From 92568edbf7a6023f897b8d7e5f9f1ea985f28fa2 Mon Sep 17 00:00:00 2001 From: Sing_chan <51314274+betterpig@users.noreply.github.com> Date: Sat, 4 Jun 2022 14:27:03 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90code=20format=20check=20upgrade?= =?UTF-8?q?=E3=80=91=20step2=EF=BC=9Acmake-format=20(#43057)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .cmake-format.py | 125 ++ .pre-commit-config.yaml | 10 + cmake/FindGperftools.cmake | 45 +- cmake/FindNumPy.cmake | 13 +- cmake/cblas.cmake | 114 +- cmake/ccache.cmake | 49 +- cmake/configure.cmake | 227 +- cmake/coveralls.cmake | 177 +- cmake/coverallsGcovJsons.cmake | 480 +++-- cmake/cuda.cmake | 132 +- cmake/cudnn.cmake | 146 +- cmake/cupti.cmake | 59 +- cmake/experimental.cmake | 6 +- .../cuda_module_loading_lazy.cmake | 42 +- cmake/external/arm_brpc.cmake | 90 +- cmake/external/ascend.cmake | 103 +- cmake/external/boost.cmake | 41 +- cmake/external/box_ps.cmake | 85 +- cmake/external/brpc.cmake | 102 +- cmake/external/cinn.cmake | 40 +- cmake/external/concurrentqueue.cmake | 37 +- cmake/external/cryptopp.cmake | 111 +- cmake/external/cub.cmake | 34 +- cmake/external/dgc.cmake | 53 +- cmake/external/dirent.cmake | 30 +- cmake/external/dlpack.cmake | 22 +- cmake/external/eigen.cmake | 54 +- cmake/external/gflags.cmake | 140 +- cmake/external/glog.cmake | 148 +- cmake/external/gloo.cmake | 97 +- cmake/external/gtest.cmake | 140 +- cmake/external/lapack.cmake | 98 +- cmake/external/leveldb.cmake | 56 +- cmake/external/libmct.cmake | 76 +- cmake/external/libxsmm.cmake | 56 +- cmake/external/lite.cmake | 251 ++- cmake/external/llvm.cmake | 62 +- cmake/external/mkldnn.cmake | 183 +- cmake/external/mklml.cmake | 103 +- cmake/external/onnxruntime.cmake | 155 +- cmake/external/openblas.cmake | 140 +- cmake/external/paddle2onnx.cmake | 129 +- cmake/external/pocketfft.cmake | 31 +- cmake/external/poplar.cmake | 44 +- cmake/external/protobuf.cmake | 564 ++--- cmake/external/pslib.cmake | 89 +- cmake/external/pslib_brpc.cmake | 91 +- cmake/external/pybind11.cmake | 38 +- cmake/external/python.cmake | 100 +- cmake/external/rocksdb.cmake | 66 +- cmake/external/snappy.cmake | 79 +- cmake/external/threadpool.cmake | 36 +- cmake/external/utf8proc.cmake | 54 +- cmake/external/warpctc.cmake | 229 +- cmake/external/xbyak.cmake | 36 +- cmake/external/xpu.cmake | 210 +- cmake/external/xxhash.cmake | 113 +- cmake/external/zlib.cmake | 85 +- cmake/flags.cmake | 301 +-- cmake/generic.cmake | 773 ++++--- cmake/hip.cmake | 83 +- cmake/inference_lib.cmake | 767 ++++--- cmake/infrt_lib.cmake | 95 +- cmake/init.cmake | 69 +- cmake/miopen.cmake | 100 +- cmake/nccl.cmake | 84 +- cmake/neuware.cmake | 18 +- cmake/operators.cmake | 1029 ++++----- cmake/phi.cmake | 749 ++++--- cmake/phi_header.cmake | 50 +- cmake/python_module.cmake | 75 +- cmake/rccl.cmake | 34 +- cmake/simd.cmake | 89 +- cmake/system.cmake | 124 +- cmake/tensorrt.cmake | 150 +- cmake/third_party.cmake | 678 +++--- cmake/thrust.cmake | 13 +- cmake/unity_build.cmake | 240 ++- cmake/util.cmake | 79 +- cmake/version.cmake | 21 +- cmake/xpu_kp.cmake | 300 +-- paddle/CMakeLists.txt | 4 +- paddle/fluid/distributed/CMakeLists.txt | 48 +- .../distributed/collective/CMakeLists.txt | 65 +- .../fluid/distributed/common/CMakeLists.txt | 6 +- .../distributed/fleet_executor/CMakeLists.txt | 85 +- .../fleet_executor/test/CMakeLists.txt | 81 +- .../distributed/index_dataset/CMakeLists.txt | 15 +- .../distributed/ps/service/CMakeLists.txt | 157 +- .../ps/service/communicator/CMakeLists.txt | 19 +- .../fluid/distributed/ps/table/CMakeLists.txt | 132 +- .../distributed/ps/wrapper/CMakeLists.txt | 19 +- paddle/fluid/distributed/store/CMakeLists.txt | 5 +- paddle/fluid/distributed/test/CMakeLists.txt | 164 +- paddle/fluid/eager/CMakeLists.txt | 81 +- .../fluid/eager/accumulation/CMakeLists.txt | 5 +- paddle/fluid/eager/api/CMakeLists.txt | 5 +- .../fluid/eager/api/generated/CMakeLists.txt | 2 +- .../eager_generated/backwards/CMakeLists.txt | 12 +- .../eager_generated/forwards/CMakeLists.txt | 12 +- paddle/fluid/eager/api/utils/CMakeLists.txt | 15 +- .../eager/auto_code_generator/CMakeLists.txt | 218 +- .../final_state_generator/CMakeLists.txt | 101 +- .../eager/custom_operator/CMakeLists.txt | 5 +- paddle/fluid/eager/pylayer/CMakeLists.txt | 5 +- .../tests/data_structure_tests/CMakeLists.txt | 30 +- .../tests/performance_tests/CMakeLists.txt | 32 +- .../eager/tests/task_tests/CMakeLists.txt | 55 +- paddle/fluid/framework/CMakeLists.txt | 1321 +++++++++--- paddle/fluid/framework/details/CMakeLists.txt | 474 ++++- paddle/fluid/framework/fleet/CMakeLists.txt | 146 +- .../framework/fleet/heter_ps/CMakeLists.txt | 134 +- paddle/fluid/framework/io/CMakeLists.txt | 21 +- .../fluid/framework/io/crypto/CMakeLists.txt | 15 +- paddle/fluid/framework/ir/CMakeLists.txt | 579 +++-- .../ir/fuse_optimizer_ops_pass/CMakeLists.txt | 20 +- .../framework/ir/fusion_group/CMakeLists.txt | 26 +- .../ir/memory_optimize_pass/CMakeLists.txt | 88 +- .../multi_devices_graph_pass/CMakeLists.txt | 67 +- .../framework/new_executor/CMakeLists.txt | 180 +- .../garbage_collector/CMakeLists.txt | 26 +- .../new_executor/workqueue/CMakeLists.txt | 15 +- .../framework/paddle2cinn/CMakeLists.txt | 86 +- paddle/fluid/imperative/CMakeLists.txt | 243 ++- paddle/fluid/imperative/jit/CMakeLists.txt | 10 +- paddle/fluid/imperative/tests/CMakeLists.txt | 124 +- paddle/fluid/inference/CMakeLists.txt | 90 +- .../fluid/inference/analysis/CMakeLists.txt | 130 +- .../analysis/ir_passes/CMakeLists.txt | 71 +- .../inference/analysis/passes/CMakeLists.txt | 71 +- paddle/fluid/inference/api/CMakeLists.txt | 147 +- .../inference/api/demo_ci/CMakeLists.txt | 257 ++- .../inference/api/details/CMakeLists.txt | 33 +- paddle/fluid/inference/capi/CMakeLists.txt | 17 +- .../fluid/inference/capi_exp/CMakeLists.txt | 17 +- .../experimental/javaapi/CMakeLists.txt | 3 +- paddle/fluid/inference/lite/CMakeLists.txt | 25 +- .../fluid/inference/tensorrt/CMakeLists.txt | 26 +- .../inference/tensorrt/convert/CMakeLists.txt | 132 +- .../inference/tensorrt/plugin/CMakeLists.txt | 56 +- .../fluid/inference/tests/api/CMakeLists.txt | 1567 +++++++++----- .../inference/tests/infer_ut/CMakeLists.txt | 300 ++- .../infer_ut/external-cmake/gtest-cpp.cmake | 83 +- paddle/fluid/inference/tests/test.cmake | 171 +- paddle/fluid/inference/utils/CMakeLists.txt | 25 +- paddle/fluid/memory/CMakeLists.txt | 112 +- paddle/fluid/memory/allocation/CMakeLists.txt | 311 ++- paddle/fluid/memory/detail/CMakeLists.txt | 85 +- paddle/fluid/operators/amp/CMakeLists.txt | 10 +- .../operators/amp/unity_build_rule.cmake | 10 +- .../fluid/operators/benchmark/CMakeLists.txt | 17 +- paddle/fluid/operators/cinn/CMakeLists.txt | 70 +- .../fluid/operators/collective/CMakeLists.txt | 170 +- .../operators/controlflow/CMakeLists.txt | 53 +- .../controlflow/unity_build_rule.cmake | 32 +- .../fluid/operators/detection/CMakeLists.txt | 133 +- paddle/fluid/operators/dlnne/CMakeLists.txt | 58 +- .../operators/elementwise/CMakeLists.txt | 30 +- .../elementwise/unity_build_rule.cmake | 46 +- paddle/fluid/operators/fused/CMakeLists.txt | 224 +- .../operators/fused/unity_build_rule.cmake | 27 +- paddle/fluid/operators/jit/CMakeLists.txt | 42 +- paddle/fluid/operators/jit/gen/CMakeLists.txt | 71 +- .../fluid/operators/jit/more/CMakeLists.txt | 11 +- .../jit/more/intrinsic/CMakeLists.txt | 19 +- .../operators/jit/more/mix/CMakeLists.txt | 32 +- .../operators/jit/more/mkl/CMakeLists.txt | 40 +- .../fluid/operators/jit/refer/CMakeLists.txt | 78 +- paddle/fluid/operators/lite/CMakeLists.txt | 5 +- paddle/fluid/operators/math/CMakeLists.txt | 96 +- paddle/fluid/operators/metrics/CMakeLists.txt | 4 +- .../operators/metrics/unity_build_rule.cmake | 9 +- paddle/fluid/operators/mkldnn/CMakeLists.txt | 5 +- .../operators/mkldnn/caching_tests.cmake | 22 +- .../operators/mkldnn/inplace_op_tests.cmake | 14 +- .../operators/mkldnn/nhwc_op_tests.cmake | 15 +- paddle/fluid/operators/mlu/CMakeLists.txt | 15 +- paddle/fluid/operators/nccl/CMakeLists.txt | 28 +- .../fluid/operators/optimizers/CMakeLists.txt | 4 +- .../optimizers/unity_build_rule.cmake | 60 +- .../fluid/operators/prim_ops/CMakeLists.txt | 11 +- .../operators/prim_ops/unity_build_rule.cmake | 40 +- paddle/fluid/operators/pscore/CMakeLists.txt | 151 +- paddle/fluid/operators/reader/CMakeLists.txt | 40 +- .../fluid/operators/reduce_ops/CMakeLists.txt | 42 +- .../reduce_ops/unity_build_rule.cmake | 22 +- .../operators/sequence_ops/CMakeLists.txt | 4 +- .../sequence_ops/unity_build_rule.cmake | 68 +- .../operators/string/unity_build_rule.cmake | 3 +- .../fluid/operators/tensorrt/CMakeLists.txt | 10 +- paddle/fluid/operators/unity_build_rule.cmake | 1090 +++++----- paddle/fluid/platform/CMakeLists.txt | 503 +++-- paddle/fluid/platform/device/CMakeLists.txt | 21 +- .../fluid/platform/device/gpu/CMakeLists.txt | 39 +- .../platform/device/gpu/cuda/CMakeLists.txt | 15 +- .../platform/device/gpu/rocm/CMakeLists.txt | 5 +- .../fluid/platform/device/ipu/CMakeLists.txt | 45 +- .../fluid/platform/device/mlu/CMakeLists.txt | 38 +- .../fluid/platform/device/npu/CMakeLists.txt | 30 +- .../device/npu/dynload/CMakeLists.txt | 5 +- .../fluid/platform/device/xpu/CMakeLists.txt | 31 +- .../platform/device/xpu/tests/CMakeLists.txt | 5 +- paddle/fluid/platform/dynload/CMakeLists.txt | 80 +- paddle/fluid/platform/profiler/CMakeLists.txt | 62 +- .../platform/profiler/mlu/CMakeLists.txt | 5 +- paddle/fluid/platform/stream/CMakeLists.txt | 9 +- paddle/fluid/pybind/CMakeLists.txt | 449 ++-- paddle/infrt/CMakeLists.txt | 113 +- paddle/infrt/api/CMakeLists.txt | 7 +- paddle/infrt/backends/CMakeLists.txt | 4 +- paddle/infrt/backends/tensorrt/CMakeLists.txt | 9 +- paddle/infrt/common/CMakeLists.txt | 27 +- paddle/infrt/dialect/CMakeLists.txt | 24 +- .../infrt/dialect/infrt/common/CMakeLists.txt | 5 +- paddle/infrt/dialect/infrt/ir/CMakeLists.txt | 6 +- .../infrt/dialect/infrt/pass/CMakeLists.txt | 5 +- paddle/infrt/dialect/pd/common/CMakeLists.txt | 3 +- paddle/infrt/dialect/pd/ir/CMakeLists.txt | 4 +- paddle/infrt/dialect/pd/pass/CMakeLists.txt | 5 +- paddle/infrt/dialect/phi/CMakeLists.txt | 7 +- paddle/infrt/dialect/phi/ir/CMakeLists.txt | 5 +- paddle/infrt/dialect/phi/pass/CMakeLists.txt | 12 +- paddle/infrt/dialect/tensorrt/CMakeLists.txt | 17 +- paddle/infrt/external_kernels/CMakeLists.txt | 6 +- paddle/infrt/host_context/CMakeLists.txt | 47 +- paddle/infrt/kernel/CMakeLists.txt | 17 +- paddle/infrt/kernel/phi/CMakeLists.txt | 53 +- paddle/infrt/kernel/tensorrt/CMakeLists.txt | 10 +- paddle/infrt/paddle/CMakeLists.txt | 16 +- paddle/infrt/paddle/cpp/CMakeLists.txt | 10 +- paddle/infrt/paddle/pb/CMakeLists.txt | 17 +- paddle/infrt/tensor/CMakeLists.txt | 7 +- paddle/infrt/tensor/phi/CMakeLists.txt | 4 +- paddle/infrt/tests/CMakeLists.txt | 22 +- paddle/phi/CMakeLists.txt | 25 +- paddle/phi/api/CMakeLists.txt | 6 +- paddle/phi/api/lib/CMakeLists.txt | 435 ++-- paddle/phi/api/lib/utils/CMakeLists.txt | 15 +- paddle/phi/backends/CMakeLists.txt | 39 +- paddle/phi/backends/cpu/CMakeLists.txt | 10 +- paddle/phi/backends/custom/CMakeLists.txt | 17 +- paddle/phi/backends/dynload/CMakeLists.txt | 79 +- paddle/phi/backends/gpu/CMakeLists.txt | 20 +- paddle/phi/backends/gpu/cuda/CMakeLists.txt | 5 +- paddle/phi/backends/gpu/rocm/CMakeLists.txt | 5 +- paddle/phi/backends/xpu/CMakeLists.txt | 10 +- paddle/phi/common/CMakeLists.txt | 10 +- paddle/phi/core/CMakeLists.txt | 84 +- paddle/phi/core/compat/CMakeLists.txt | 15 +- paddle/phi/infermeta/CMakeLists.txt | 10 +- paddle/phi/infermeta/strings/CMakeLists.txt | 5 +- paddle/phi/kernels/CMakeLists.txt | 143 +- paddle/phi/kernels/autotune/CMakeLists.txt | 39 +- paddle/phi/kernels/funcs/blas/CMakeLists.txt | 5 +- paddle/phi/kernels/funcs/eigen/CMakeLists.txt | 25 +- .../phi/kernels/selected_rows/CMakeLists.txt | 14 +- paddle/phi/kernels/sparse/CMakeLists.txt | 14 +- paddle/phi/kernels/strings/CMakeLists.txt | 17 +- paddle/phi/ops/compat/CMakeLists.txt | 14 +- paddle/phi/tests/api/CMakeLists.txt | 130 +- paddle/phi/tests/common/CMakeLists.txt | 37 +- paddle/phi/tests/core/CMakeLists.txt | 71 +- paddle/phi/tests/kernels/CMakeLists.txt | 152 +- paddle/phi/tests/ops/CMakeLists.txt | 5 +- paddle/phi/tools/CMakeLists.txt | 4 +- paddle/scripts/CMakeLists.txt | 18 +- paddle/testing/CMakeLists.txt | 16 +- paddle/utils/CMakeLists.txt | 15 +- paddle/utils/string/CMakeLists.txt | 30 +- python/CMakeLists.txt | 121 +- .../fluid/contrib/slim/tests/CMakeLists.txt | 776 ++++--- .../paddle/fluid/contrib/tests/CMakeLists.txt | 22 +- python/paddle/fluid/tests/CMakeLists.txt | 6 +- python/paddle/fluid/tests/book/CMakeLists.txt | 9 +- .../fluid/tests/custom_op/CMakeLists.txt | 32 +- .../fluid/tests/unittests/CMakeLists.txt | 1877 ++++++++++------- .../fluid/tests/unittests/asp/CMakeLists.txt | 34 +- .../unittests/auto_parallel/CMakeLists.txt | 69 +- .../tests/unittests/autograd/CMakeLists.txt | 7 +- .../distributed_passes/CMakeLists.txt | 43 +- .../unittests/distribution/CMakeLists.txt | 7 +- .../dygraph_to_static/CMakeLists.txt | 70 +- .../fluid/tests/unittests/fft/CMakeLists.txt | 7 +- .../unittests/interpreter/CMakeLists.txt | 47 +- .../fluid/tests/unittests/ipu/CMakeLists.txt | 25 +- .../fluid/tests/unittests/ir/CMakeLists.txt | 11 +- .../unittests/ir/inference/CMakeLists.txt | 224 +- .../tests/unittests/mkldnn/CMakeLists.txt | 7 +- .../fluid/tests/unittests/mlu/CMakeLists.txt | 82 +- .../fluid/tests/unittests/npu/CMakeLists.txt | 48 +- .../fluid/tests/unittests/ps/CMakeLists.txt | 11 +- .../fluid/tests/unittests/rnn/CMakeLists.txt | 11 +- .../tests/unittests/sequence/CMakeLists.txt | 7 +- .../fluid/tests/unittests/xpu/CMakeLists.txt | 22 +- python/paddle/tests/CMakeLists.txt | 56 +- 295 files changed, 19199 insertions(+), 11435 deletions(-) create mode 100644 .cmake-format.py diff --git a/.cmake-format.py b/.cmake-format.py new file mode 100644 index 00000000000..62f5651fb1c --- /dev/null +++ b/.cmake-format.py @@ -0,0 +1,125 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# ----------------------------- +# Options affecting formatting. +# ----------------------------- +with section("format"): + + # How wide to allow formatted cmake files + line_width = 80 + +# ------------------------------------------------ +# Options affecting comment reflow and formatting. +# ------------------------------------------------ +with section("markup"): + # enable comment markup parsing and reflow + enable_markup = False + + # If comment markup is enabled, don't reflow the first comment block in each + # listfile. Use this to preserve formatting of your copyright/license + # statements. + first_comment_is_literal = True + +# ---------------------------------- +# Options affecting listfile parsing +# ---------------------------------- +with section("parse"): + # Additional FLAGS and KWARGS for custom commands + additional_commands = { + "cc_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "nv_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "hip_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "xpu_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "hip_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "hip_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "go_library": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "copy": { + "kwargs": { + "SRCS": '*', + "DSTS": '*', + } + }, + "cc_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "nv_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "hip_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "xpu_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "go_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + }, + "py_test": { + "kwargs": { + "SRCS": '*', + "DEPS": '*', + } + } + } diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06b9dfeb725..71f2699d5a3 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,3 +55,13 @@ repos: (?x)^( paddle/utils/.* )$ +- repo: https://github.com/cheshirekow/cmake-format-precommit + rev: v0.6.13 + hooks: + - id: cmake-format + # exclude paddle/fluid/operators/CMakeLists.txt, see the comment + # https://github.com/PaddlePaddle/Paddle/pull/43057#pullrequestreview-993471860 + exclude: | + (?x)^( + paddle/fluid/operators/CMakeLists.txt + )$ diff --git a/cmake/FindGperftools.cmake b/cmake/FindGperftools.cmake index 318f9f5fd3b..bb76469c750 100644 --- a/cmake/FindGperftools.cmake +++ b/cmake/FindGperftools.cmake @@ -17,47 +17,46 @@ # GPERFTOOLS_LIBRARIES The Gperftools libraries (tcmalloc & profiler) # GPERFTOOLS_INCLUDE_DIR The location of Gperftools headers -find_library(GPERFTOOLS_TCMALLOC +find_library( + GPERFTOOLS_TCMALLOC NAMES tcmalloc HINTS ${Gperftools_ROOT_DIR}/lib) - -find_library(GPERFTOOLS_PROFILER + +find_library( + GPERFTOOLS_PROFILER NAMES profiler HINTS ${Gperftools_ROOT_DIR}/lib) -find_library(GPERFTOOLS_TCMALLOC_AND_PROFILER +find_library( + GPERFTOOLS_TCMALLOC_AND_PROFILER NAMES tcmalloc_and_profiler HINTS ${Gperftools_ROOT_DIR}/lib) -find_path(GPERFTOOLS_INCLUDE_DIR +find_path( + GPERFTOOLS_INCLUDE_DIR NAMES gperftools/heap-profiler.h HINTS ${Gperftools_ROOT_DIR}/include) set(GPERFTOOLS_LIBRARIES ${GPERFTOOLS_TCMALLOC_AND_PROFILER}) include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - Gperftools - DEFAULT_MSG - GPERFTOOLS_LIBRARIES - GPERFTOOLS_INCLUDE_DIR) +find_package_handle_standard_args(Gperftools DEFAULT_MSG GPERFTOOLS_LIBRARIES + GPERFTOOLS_INCLUDE_DIR) mark_as_advanced( - Gperftools_ROOT_DIR - GPERFTOOLS_TCMALLOC - GPERFTOOLS_PROFILER - GPERFTOOLS_TCMALLOC_AND_PROFILER - GPERFTOOLS_LIBRARIES - GPERFTOOLS_INCLUDE_DIR) + Gperftools_ROOT_DIR GPERFTOOLS_TCMALLOC GPERFTOOLS_PROFILER + GPERFTOOLS_TCMALLOC_AND_PROFILER GPERFTOOLS_LIBRARIES GPERFTOOLS_INCLUDE_DIR) # create IMPORTED targets -if (Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc) +if(Gperftools_FOUND AND NOT TARGET gperftools::tcmalloc) add_library(gperftools::tcmalloc UNKNOWN IMPORTED) - set_target_properties(gperftools::tcmalloc PROPERTIES - IMPORTED_LOCATION ${GPERFTOOLS_TCMALLOC} - INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") + set_target_properties( + gperftools::tcmalloc + PROPERTIES IMPORTED_LOCATION ${GPERFTOOLS_TCMALLOC} + INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") add_library(gperftools::profiler UNKNOWN IMPORTED) - set_target_properties(gperftools::profiler PROPERTIES - IMPORTED_LOCATION ${GPERFTOOLS_PROFILER} - INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") + set_target_properties( + gperftools::profiler + PROPERTIES IMPORTED_LOCATION ${GPERFTOOLS_PROFILER} + INTERFACE_INCLUDE_DIRECTORIES "${GPERFTOOLS_INCLUDE_DIR}") endif() diff --git a/cmake/FindNumPy.cmake b/cmake/FindNumPy.cmake index 8cdd642ac01..fc7cdb8c192 100644 --- a/cmake/FindNumPy.cmake +++ b/cmake/FindNumPy.cmake @@ -14,13 +14,14 @@ if(NOT PYTHON_EXECUTABLE) endif() endif() -if (PYTHON_EXECUTABLE) +if(PYTHON_EXECUTABLE) # write a python script that finds the numpy path file(WRITE ${PROJECT_BINARY_DIR}/FindNumpyPath.py - "try: import numpy; print(numpy.get_include())\nexcept:pass\n") + "try: import numpy; print(numpy.get_include())\nexcept:pass\n") # execute the find script - exec_program("${PYTHON_EXECUTABLE}" ${PROJECT_BINARY_DIR} + exec_program( + "${PYTHON_EXECUTABLE}" ${PROJECT_BINARY_DIR} ARGS "FindNumpyPath.py" OUTPUT_VARIABLE NUMPY_PATH) elseif(_numpy_out) @@ -28,10 +29,12 @@ elseif(_numpy_out) endif(PYTHON_EXECUTABLE) find_path(PYTHON_NUMPY_INCLUDE_DIR numpy/arrayobject.h - HINTS "${NUMPY_PATH}" "${PYTHON_INCLUDE_PATH}") + HINTS "${NUMPY_PATH}" "${PYTHON_INCLUDE_PATH}") if(PYTHON_NUMPY_INCLUDE_DIR) - set(PYTHON_NUMPY_FOUND 1 CACHE INTERNAL "Python numpy found") + set(PYTHON_NUMPY_FOUND + 1 + CACHE INTERNAL "Python numpy found") endif(PYTHON_NUMPY_INCLUDE_DIR) include(FindPackageHandleStandardArgs) diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 92a526a2b58..304246da4ae 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -31,9 +31,9 @@ endif() ## Find MKLML First. if(WITH_MKLML) - include(external/mklml) # download, install mklml package + include(external/mklml) # download, install mklml package set(CBLAS_PROVIDER MKLML) - set(CBLAS_INC_DIR ${MKLML_INC_DIR}) + set(CBLAS_INC_DIR ${MKLML_INC_DIR}) set(CBLAS_LIBRARIES ${MKLML_LIB}) add_definitions(-DPADDLE_WITH_MKLML) @@ -43,40 +43,48 @@ if(WITH_MKLML) target_link_libraries(cblas dynload_mklml) message(STATUS "Found cblas and lapack in MKLML " - "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() ## Then find openblas. if(NOT DEFINED CBLAS_PROVIDER) - set(OPENBLAS_ROOT $ENV{OPENBLAS_ROOT} CACHE PATH "Folder contains Openblas") + set(OPENBLAS_ROOT + $ENV{OPENBLAS_ROOT} + CACHE PATH "Folder contains Openblas") set(OPENBLAS_INCLUDE_SEARCH_PATHS - ${OPENBLAS_ROOT}/include - /usr/include - /usr/include/lapacke - /usr/include/openblas - /usr/local/opt/openblas/include) + ${OPENBLAS_ROOT}/include /usr/include /usr/include/lapacke + /usr/include/openblas /usr/local/opt/openblas/include) set(OPENBLAS_LIB_SEARCH_PATHS - ${OPENBLAS_ROOT}/lib - /usr/lib - /usr/lib/blas/openblas - /usr/lib/openblas - /usr/local/opt/openblas/lib) - - find_path(OPENBLAS_INC_DIR NAMES cblas.h - PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS} NO_DEFAULT_PATH) - find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h + ${OPENBLAS_ROOT}/lib /usr/lib /usr/lib/blas/openblas /usr/lib/openblas + /usr/local/opt/openblas/lib) + + find_path( + OPENBLAS_INC_DIR + NAMES cblas.h + PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS} + NO_DEFAULT_PATH) + find_path( + OPENBLAS_LAPACKE_INC_DIR + NAMES lapacke.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) - find_path(OPENBLAS_CONFIG_INC_DIR NAMES openblas_config.h + find_path( + OPENBLAS_CONFIG_INC_DIR + NAMES openblas_config.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) - find_library(OPENBLAS_LIB NAMES openblas + find_library( + OPENBLAS_LIB + NAMES openblas PATHS ${OPENBLAS_LIB_SEARCH_PATHS}) - if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_CONFIG_INC_DIR AND OPENBLAS_LIB) + if(OPENBLAS_LAPACKE_INC_DIR + AND OPENBLAS_INC_DIR + AND OPENBLAS_CONFIG_INC_DIR + AND OPENBLAS_LIB) file(READ "${OPENBLAS_CONFIG_INC_DIR}/openblas_config.h" config_file) string(REGEX MATCH "OpenBLAS ([0-9]+\.[0-9]+\.[0-9]+)" tmp ${config_file}) string(REGEX MATCH "([0-9]+\.[0-9]+\.[0-9]+)" ver ${tmp}) - - if (${ver} VERSION_GREATER_EQUAL "0.3.5") + + if(${ver} VERSION_GREATER_EQUAL "0.3.5") set(CBLAS_PROVIDER OPENBLAS) set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR}) set(CBLAS_LIBRARIES ${OPENBLAS_LIB}) @@ -84,52 +92,61 @@ if(NOT DEFINED CBLAS_PROVIDER) add_definitions(-DPADDLE_USE_OPENBLAS) add_definitions(-DLAPACK_FOUND) - message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") - message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") + message( + STATUS + "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})" + ) + message( + STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})" + ) endif() endif() endif() ## Then find the reference-cblas if WITH_SYSTEM_BLAS. www.netlib.org/blas/ if(NOT DEFINED CBLAS_PROVIDER AND WITH_SYSTEM_BLAS) - set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH - "Folder contains reference-cblas") - set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/include - /usr/include - /usr/include/cblas - ) + set(REFERENCE_CBLAS_ROOT + $ENV{REFERENCE_CBLAS_ROOT} + CACHE PATH "Folder contains reference-cblas") + set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include + /usr/include /usr/include/cblas) set(REFERENCE_CBLAS_LIB_SEARCH_PATHS - ${REFERENCE_CBLAS_ROOT}/lib - /usr/lib - /usr/lib/blas/reference/ - /usr/lib/reference/ - ) - - find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS - ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) - find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS - ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) - find_library(REFERENCE_BLAS_LIBRARY NAMES blas PATHS - ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) + ${REFERENCE_CBLAS_ROOT}/lib /usr/lib /usr/lib/blas/reference/ + /usr/lib/reference/) + + find_path( + REFERENCE_CBLAS_INCLUDE_DIR + NAMES cblas.h + PATHS ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) + find_library( + REFERENCE_CBLAS_LIBRARY + NAMES cblas + PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) + find_library( + REFERENCE_BLAS_LIBRARY + NAMES blas + PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) set(CBLAS_PROVIDER REFERENCE_CBLAS) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) - message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message( + STATUS + "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})" + ) endif() endif() ## Then build openblas by external_project if(NOT DEFINED CBLAS_PROVIDER) - include(external/openblas) # download, build, install openblas + include(external/openblas) # download, build, install openblas set(CBLAS_PROVIDER EXTERN_OPENBLAS) add_dependencies(cblas extern_openblas) add_definitions(-DPADDLE_USE_OPENBLAS) message(STATUS "Build OpenBLAS by External Project " - "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() # FIXME(gangliao): generate cblas target to track all high performance @@ -137,7 +154,8 @@ endif() include_directories(${CBLAS_INC_DIR}) if(${CBLAS_PROVIDER} STREQUAL REFERENCE_CBLAS) - target_link_libraries(cblas gfortran ${CBLAS_LIBRARIES} ${REFERENCE_BLAS_LIBRARY}) + target_link_libraries(cblas gfortran ${CBLAS_LIBRARIES} + ${REFERENCE_BLAS_LIBRARY}) elseif(NOT ${CBLAS_PROVIDER} STREQUAL MKLML) target_link_libraries(cblas ${CBLAS_LIBRARIES}) endif() diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 5520720f7a6..85bc0e987a6 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,29 +1,34 @@ # Use ccache if found ccache program if(NOT WIN32) - find_program(CCACHE_PATH ccache) - if(CCACHE_PATH) - execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output) - execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory) - string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output}) - message(STATUS "ccache is founded, use ccache to speed up compile on Unix.") - # show statistics summary of ccache - message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory}) - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) - set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) - endif(CCACHE_PATH) + find_program(CCACHE_PATH ccache) + if(CCACHE_PATH) + execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output) + execute_process(COMMAND ccache -s cache directory + OUTPUT_VARIABLE cache_directory) + string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output}) + message(STATUS "ccache is founded, use ccache to speed up compile on Unix.") + # show statistics summary of ccache + message("ccache version\t\t\t " ${ccache_version} "\n" + ${cache_directory}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) + endif(CCACHE_PATH) elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja") - # (Note:zhouwei25) Only Ninja Generator can support sccache now - find_program(SCCACHE_PATH sccache) + # (Note:zhouwei25) Only Ninja Generator can support sccache now + find_program(SCCACHE_PATH sccache) - if(SCCACHE_PATH) - execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version) - message(STATUS "sccache is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.") + if(SCCACHE_PATH) + execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version) + message( + STATUS + "sccache is founded, use [${SCCACHE_PATH}] to speed up compile on Windows." + ) - set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH}) - set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH}) - # (Note:zhouwei25) sccache for cuda compiler has bug so that it can't be hit - # refer to https://github.com/mozilla/sccache/issues/1017, so we fix it - set(CMAKE_CUDA_COMPILER_LAUNCHER ${SCCACHE_PATH}) - endif(SCCACHE_PATH) + set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH}) + set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH}) + # (Note:zhouwei25) sccache for cuda compiler has bug so that it can't be hit + # refer to https://github.com/mozilla/sccache/issues/1017, so we fix it + set(CMAKE_CUDA_COMPILER_LAUNCHER ${SCCACHE_PATH}) + endif(SCCACHE_PATH) endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 63ca901a940..91464b84ef0 100755 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -13,192 +13,195 @@ # limitations under the License. if(NOT WITH_PYTHON) - add_definitions(-DPADDLE_NO_PYTHON) + add_definitions(-DPADDLE_NO_PYTHON) endif(NOT WITH_PYTHON) if(WITH_TESTING) - add_definitions(-DPADDLE_WITH_TESTING) + add_definitions(-DPADDLE_WITH_TESTING) endif(WITH_TESTING) if(WITH_INFERENCE_API_TEST) - add_definitions(-DPADDLE_WITH_INFERENCE_API_TEST) + add_definitions(-DPADDLE_WITH_INFERENCE_API_TEST) endif(WITH_INFERENCE_API_TEST) if(NOT WITH_PROFILER) - add_definitions(-DPADDLE_DISABLE_PROFILER) + add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) if(WITH_AVX AND AVX_FOUND) - set(SIMD_FLAG ${AVX_FLAG}) - add_definitions(-DPADDLE_WITH_AVX) + set(SIMD_FLAG ${AVX_FLAG}) + add_definitions(-DPADDLE_WITH_AVX) elseif(SSE3_FOUND AND NOT WIN32) - set(SIMD_FLAG ${SSE3_FLAG}) + set(SIMD_FLAG ${SSE3_FLAG}) endif() -if (SSE3_FOUND) - # TODO: Runtime detection should be used here. - add_definitions(-DPADDLE_WITH_SSE3) +if(SSE3_FOUND) + # TODO: Runtime detection should be used here. + add_definitions(-DPADDLE_WITH_SSE3) endif() if(WIN32) # windows header option for all targets. add_definitions(-D_XKEYCHECK_H) - # Use symbols instead of absolute path, reduce the cmake link command length. - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) - SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) - SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) - SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") - SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") + # Use symbols instead of absolute path, reduce the cmake link command length. + set(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + set(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) + set(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) + set(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) + set(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) + set(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") + set(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") add_definitions(-DPADDLE_DLL_INFERENCE) # set definition for the dll export - if (NOT MSVC) - message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") + if(NOT MSVC) + message( + FATAL + "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA." + ) endif(NOT MSVC) endif(WIN32) if(WITH_MUSL) - add_definitions(-DPADDLE_WITH_MUSL) + add_definitions(-DPADDLE_WITH_MUSL) - message(STATUS, "Set compile option WITH_MKL=OFF when WITH_MUSL=ON") - SET(WITH_MKL OFF) + message(STATUS, "Set compile option WITH_MKL=OFF when WITH_MUSL=ON") + set(WITH_MKL OFF) - message(STATUS, "Set compile option WITH_GPU=OFF when WITH_MUSL=ON") - SET(WITH_GPU OFF) + message(STATUS, "Set compile option WITH_GPU=OFF when WITH_MUSL=ON") + set(WITH_GPU OFF) endif() if(WITH_PSLIB) - add_definitions(-DPADDLE_WITH_PSLIB) + add_definitions(-DPADDLE_WITH_PSLIB) endif() if(WITH_ARM_BRPC) - add_definitions(-DPADDLE_WITH_ARM_BRPC) + add_definitions(-DPADDLE_WITH_ARM_BRPC) endif() -if(WITH_FLPS) - add_definitions(-DPADDLE_WITH_FLPS) +if(WITH_FLPS) + add_definitions(-DPADDLE_WITH_FLPS) endif() if(WITH_GLOO) - add_definitions(-DPADDLE_WITH_GLOO) + add_definitions(-DPADDLE_WITH_GLOO) endif() if(WITH_BOX_PS) - add_definitions(-DPADDLE_WITH_BOX_PS) + add_definitions(-DPADDLE_WITH_BOX_PS) endif() if(WITH_ASCEND) - add_definitions(-DPADDLE_WITH_ASCEND) + add_definitions(-DPADDLE_WITH_ASCEND) endif() if(WITH_ASCEND_CL) - add_definitions(-DPADDLE_WITH_ASCEND_CL) + add_definitions(-DPADDLE_WITH_ASCEND_CL) endif() if(WITH_ASCEND_INT64) - add_definitions(-DPADDLE_WITH_ASCEND_INT64) + add_definitions(-DPADDLE_WITH_ASCEND_INT64) endif() if(WITH_XPU) - message(STATUS "Compile with XPU!") - add_definitions(-DPADDLE_WITH_XPU) + message(STATUS "Compile with XPU!") + add_definitions(-DPADDLE_WITH_XPU) endif() if(WITH_XPU_KP) - message(STATUS "Compile with XPU_KP!") - add_definitions(-DPADDLE_WITH_XPU_KP) + message(STATUS "Compile with XPU_KP!") + add_definitions(-DPADDLE_WITH_XPU_KP) endif() if(WITH_IPU) - message(STATUS "Compile with IPU!") - add_definitions(-DPADDLE_WITH_IPU) + message(STATUS "Compile with IPU!") + add_definitions(-DPADDLE_WITH_IPU) endif() if(WITH_MLU) - message(STATUS "Compile with MLU!") - add_definitions(-DPADDLE_WITH_MLU) + message(STATUS "Compile with MLU!") + add_definitions(-DPADDLE_WITH_MLU) endif() if(WITH_GPU) - add_definitions(-DPADDLE_WITH_CUDA) - add_definitions(-DEIGEN_USE_GPU) + add_definitions(-DPADDLE_WITH_CUDA) + add_definitions(-DEIGEN_USE_GPU) - FIND_PACKAGE(CUDA REQUIRED) + find_package(CUDA REQUIRED) - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.1) - message(FATAL_ERROR "Paddle needs CUDA >= 10.1 to compile") - endif() + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.1) + message(FATAL_ERROR "Paddle needs CUDA >= 10.1 to compile") + endif() - if(NOT CUDNN_FOUND) - message(FATAL_ERROR "Paddle needs cudnn to compile") - endif() + if(NOT CUDNN_FOUND) + message(FATAL_ERROR "Paddle needs cudnn to compile") + endif() - if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) - message(FATAL_ERROR "Paddle needs CUDNN >= 7.0 to compile") - endif() + if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + message(FATAL_ERROR "Paddle needs CUDNN >= 7.0 to compile") + endif() + + if(CUPTI_FOUND) + include_directories(${CUPTI_INCLUDE_DIR}) + add_definitions(-DPADDLE_WITH_CUPTI) + else() + message(STATUS "Cannot find CUPTI, GPU Profiling is incorrect.") + endif() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=\"${SIMD_FLAG}\"") + + # Include cuda and cudnn + include_directories(${CUDNN_INCLUDE_DIR}) + include_directories(${CUDA_TOOLKIT_INCLUDE}) - if(CUPTI_FOUND) - include_directories(${CUPTI_INCLUDE_DIR}) - add_definitions(-DPADDLE_WITH_CUPTI) + if(TENSORRT_FOUND) + if(WIN32) + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 9) + message(FATAL_ERROR "TensorRT needs CUDA >= 9.0 to compile on Windows") + endif() else() - message(STATUS "Cannot find CUPTI, GPU Profiling is incorrect.") - endif() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=\"${SIMD_FLAG}\"") - - # Include cuda and cudnn - include_directories(${CUDNN_INCLUDE_DIR}) - include_directories(${CUDA_TOOLKIT_INCLUDE}) - - if(TENSORRT_FOUND) - if(WIN32) - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 9) - message(FATAL_ERROR "TensorRT needs CUDA >= 9.0 to compile on Windows") - endif() - else() - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 8) - message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") - endif() - if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) - message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile") - endif() - if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) - message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") - endif() - endif() - include_directories(${TENSORRT_INCLUDE_DIR}) + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 8) + message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") + endif() + if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile") + endif() + if(${TENSORRT_MAJOR_VERSION} VERSION_LESS 4) + message(FATAL_ERROR "Paddle needs TensorRT >= 4.0 to compile") + endif() endif() + include_directories(${TENSORRT_INCLUDE_DIR}) + endif() elseif(WITH_ROCM) - add_definitions(-DPADDLE_WITH_HIP) - add_definitions(-DEIGEN_USE_GPU) - add_definitions(-DEIGEN_USE_HIP) + add_definitions(-DPADDLE_WITH_HIP) + add_definitions(-DEIGEN_USE_GPU) + add_definitions(-DEIGEN_USE_HIP) - if(NOT MIOPEN_FOUND) - message(FATAL_ERROR "Paddle needs MIOpen to compile") - endif() + if(NOT MIOPEN_FOUND) + message(FATAL_ERROR "Paddle needs MIOpen to compile") + endif() - if(${MIOPEN_VERSION} VERSION_LESS 2090) - message(FATAL_ERROR "Paddle needs MIOPEN >= 2.9 to compile") - endif() + if(${MIOPEN_VERSION} VERSION_LESS 2090) + message(FATAL_ERROR "Paddle needs MIOPEN >= 2.9 to compile") + endif() else() - add_definitions(-DHPPL_STUB_FUNC) - list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) + add_definitions(-DHPPL_STUB_FUNC) + list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) endif() -if (WITH_MKLML AND MKLML_IOMP_LIB) - message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") - if(WIN32) - # openmp not support well for now on windows - set(OPENMP_FLAGS "") - else(WIN32) - set(OPENMP_FLAGS "-fopenmp") - endif(WIN32) - set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") +if(WITH_MKLML AND MKLML_IOMP_LIB) + message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") + if(WIN32) + # openmp not support well for now on windows + set(OPENMP_FLAGS "") + else(WIN32) + set(OPENMP_FLAGS "-fopenmp") + endif(WIN32) + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") @@ -209,25 +212,25 @@ if(WITH_DISTRIBUTE) endif() if(WITH_PSCORE) - add_definitions(-DPADDLE_WITH_PSCORE) + add_definitions(-DPADDLE_WITH_PSCORE) endif() if(WITH_HETERPS) - add_definitions(-DPADDLE_WITH_HETERPS) + add_definitions(-DPADDLE_WITH_HETERPS) endif() if(WITH_BRPC_RDMA) - add_definitions(-DPADDLE_WITH_BRPC_RDMA) + add_definitions(-DPADDLE_WITH_BRPC_RDMA) endif(WITH_BRPC_RDMA) if(ON_INFER) - add_definitions(-DPADDLE_ON_INFERENCE) + add_definitions(-DPADDLE_ON_INFERENCE) endif(ON_INFER) if(WITH_CRYPTO) - add_definitions(-DPADDLE_WITH_CRYPTO) + add_definitions(-DPADDLE_WITH_CRYPTO) endif(WITH_CRYPTO) if(WITH_CUSTOM_DEVICE AND NOT WIN32) - add_definitions(-DPADDLE_WITH_CUSTOM_DEVICE) + add_definitions(-DPADDLE_WITH_CUSTOM_DEVICE) endif() diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake index 598754bc9ef..02c1a136280 100644 --- a/cmake/coveralls.cmake +++ b/cmake/coveralls.cmake @@ -5,107 +5,106 @@ # Param _COVERALLS_UPLOAD Upload the result to coveralls. # Param _CMAKE_SCRIPT_PATH CMake script path. function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH) - # clean previous gcov data. - file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda) + # clean previous gcov data. + file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda) - # find curl for upload JSON soon. - if (_COVERALLS_UPLOAD) - find_program(CURL_EXECUTABLE curl) - if (NOT CURL_EXECUTABLE) - message(FATAL_ERROR "Coveralls: curl not found!") - endif() + # find curl for upload JSON soon. + if(_COVERALLS_UPLOAD) + find_program(CURL_EXECUTABLE curl) + if(NOT CURL_EXECUTABLE) + message(FATAL_ERROR "Coveralls: curl not found!") endif() + endif() - # When passing a CMake list to an external process, the list - # will be converted from the format "1;2;3" to "1 2 3". - set(COVERAGE_SRCS "") - foreach (SINGLE_SRC ${_COVERAGE_SRCS}) - set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}") - endforeach() + # When passing a CMake list to an external process, the list + # will be converted from the format "1;2;3" to "1 2 3". + set(COVERAGE_SRCS "") + foreach(SINGLE_SRC ${_COVERAGE_SRCS}) + set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}") + endforeach() - # query number of logical cores - cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES) - # coveralls json file. - set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json) - add_custom_target(coveralls_generate - # Run regress tests. - COMMAND ${CMAKE_CTEST_COMMAND} - -j ${core_size} - --output-on-failure - # Generate Gcov and translate it into coveralls JSON. - COMMAND ${CMAKE_COMMAND} - -DCOVERAGE_SRCS="${COVERAGE_SRCS}" - -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}" - -DCOV_PATH="${PROJECT_BINARY_DIR}" - -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}" - -P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake" - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMENT "Coveralls: generating coveralls output..." - ) + # query number of logical cores + cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES) + # coveralls json file. + set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json) + add_custom_target( + coveralls_generate + # Run regress tests. + COMMAND ${CMAKE_CTEST_COMMAND} -j ${core_size} --output-on-failure + # Generate Gcov and translate it into coveralls JSON. + COMMAND + ${CMAKE_COMMAND} -DCOVERAGE_SRCS="${COVERAGE_SRCS}" + -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}" + -DCOV_PATH="${PROJECT_BINARY_DIR}" -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}" + -P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake" + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMENT "Coveralls: generating coveralls output...") - if (_COVERALLS_UPLOAD) - message("COVERALLS UPLOAD: ON") - # Upload the JSON to coveralls. - add_custom_target(coveralls_upload - COMMAND ${CURL_EXECUTABLE} - -S -F json_file=@${COVERALLS_FILE} - https://coveralls.io/api/v1/jobs - DEPENDS coveralls_generate - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMENT "Coveralls: uploading coveralls output...") + if(_COVERALLS_UPLOAD) + message("COVERALLS UPLOAD: ON") + # Upload the JSON to coveralls. + add_custom_target( + coveralls_upload + COMMAND ${CURL_EXECUTABLE} -S -F json_file=@${COVERALLS_FILE} + https://coveralls.io/api/v1/jobs + DEPENDS coveralls_generate + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMENT "Coveralls: uploading coveralls output...") - add_custom_target(coveralls DEPENDS coveralls_upload) - else() - message("COVERALLS UPLOAD: OFF") - add_custom_target(coveralls DEPENDS coveralls_generate) - endif() + add_custom_target(coveralls DEPENDS coveralls_upload) + else() + message("COVERALLS UPLOAD: OFF") + add_custom_target(coveralls DEPENDS coveralls_generate) + endif() endfunction() if(WITH_COVERAGE) - if (WITH_INCREMENTAL_COVERAGE) - # if *.h changed, generate coverage report totaly. - # if pybind.cc changed, generate coverage report totaly. - # Because if pybind.cc add '-g -O0 -fprofile-arcs -ftest-coverage' only, some testcase will fail. - if ( (NOT ("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL "")) OR ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc") ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") - endif() - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") + if(WITH_INCREMENTAL_COVERAGE) + # if *.h changed, generate coverage report totaly. + # if pybind.cc changed, generate coverage report totaly. + # Because if pybind.cc add '-g -O0 -fprofile-arcs -ftest-coverage' only, some testcase will fail. + if((NOT ("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL "")) + OR ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc")) + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") + set(CMAKE_C_FLAGS + "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") endif() - set(EXCLUDE_DIRS - "demo/" - "build/" - "tests/" - ".test_env/" - ) + else() + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") + endif() + set(EXCLUDE_DIRS "demo/" "build/" "tests/" ".test_env/") - if(WITH_GPU) - file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu") - else() - file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c") - endif() + if(WITH_GPU) + file( + GLOB_RECURSE PADDLE_SOURCES + RELATIVE "${PROJECT_SOURCE_DIR}" + "*.cpp" "*.cc" ".c" "*.cu") + else() + file( + GLOB_RECURSE PADDLE_SOURCES + RELATIVE "${PROJECT_SOURCE_DIR}" + "*.cpp" "*.cc" "*.c") + endif() - # exclude trivial files in PADDLE_SOURCES - foreach(EXCLUDE_DIR ${EXCLUDE_DIRS}) - foreach(TMP_PATH ${PADDLE_SOURCES}) - string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) - if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1) - list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH}) - endif() - endforeach(TMP_PATH) - endforeach() + # exclude trivial files in PADDLE_SOURCES + foreach(EXCLUDE_DIR ${EXCLUDE_DIRS}) + foreach(TMP_PATH ${PADDLE_SOURCES}) + string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND) + if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1) + list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH}) + endif() + endforeach(TMP_PATH) + endforeach() - # convert to absolute path - set(PADDLE_SRCS "") - foreach(PADDLE_SRC ${PADDLE_SOURCES}) - set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}") - endforeach() + # convert to absolute path + set(PADDLE_SRCS "") + foreach(PADDLE_SRC ${PADDLE_SOURCES}) + set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}") + endforeach() - code_coverage( - "${PADDLE_SRCS}" - ${COVERALLS_UPLOAD} - "${PROJECT_SOURCE_DIR}/cmake" - ) + code_coverage("${PADDLE_SRCS}" ${COVERALLS_UPLOAD} + "${PROJECT_SOURCE_DIR}/cmake") endif() diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake index 4d813a0726d..6c1186f69f1 100644 --- a/cmake/coverallsGcovJsons.cmake +++ b/cmake/coverallsGcovJsons.cmake @@ -32,7 +32,7 @@ # https://coveralls.io/docs/api # -CMAKE_MINIMUM_REQUIRED(VERSION 2.8) +cmake_minimum_required(VERSION 2.8) # Since it's not possible to pass a CMake list properly in the # "1;2;3" format to an external process, we have replaced the @@ -41,44 +41,42 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) string(REGEX REPLACE "\\*" ";" COVERAGE_SRCS ${COVERAGE_SRCS}) find_program(GCOV_EXECUTABLE gcov) -if (NOT GCOV_EXECUTABLE) - message(FATAL_ERROR "gcov not found! Aborting...") +if(NOT GCOV_EXECUTABLE) + message(FATAL_ERROR "gcov not found! Aborting...") endif() find_package(Git) # TODO: Add these git things to the coveralls json. -if (GIT_FOUND) - # Branch. - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_BRANCH - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - macro (git_log_format FORMAT_CHARS VAR_NAME) - execute_process( - COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS} - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE ${VAR_NAME} - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - endmacro() - - git_log_format(an GIT_AUTHOR_EMAIL) - git_log_format(ae GIT_AUTHOR_EMAIL) - git_log_format(cn GIT_COMMITTER_NAME) - git_log_format(ce GIT_COMMITTER_EMAIL) - git_log_format(B GIT_COMMIT_MESSAGE) - - message("Git exe: ${GIT_EXECUTABLE}") - message("Git branch: ${GIT_BRANCH}") - message("Git author: ${GIT_AUTHOR_NAME}") - message("Git e-mail: ${GIT_AUTHOR_EMAIL}") - message("Git commiter name: ${GIT_COMMITTER_NAME}") - message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}") - message("Git commit message: ${GIT_COMMIT_MESSAGE}") +if(GIT_FOUND) + # Branch. + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE) + + macro(git_log_format FORMAT_CHARS VAR_NAME) + execute_process( + COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE ${VAR_NAME} + OUTPUT_STRIP_TRAILING_WHITESPACE) + endmacro() + + git_log_format(an GIT_AUTHOR_EMAIL) + git_log_format(ae GIT_AUTHOR_EMAIL) + git_log_format(cn GIT_COMMITTER_NAME) + git_log_format(ce GIT_COMMITTER_EMAIL) + git_log_format(B GIT_COMMIT_MESSAGE) + + message("Git exe: ${GIT_EXECUTABLE}") + message("Git branch: ${GIT_BRANCH}") + message("Git author: ${GIT_AUTHOR_NAME}") + message("Git e-mail: ${GIT_AUTHOR_EMAIL}") + message("Git commiter name: ${GIT_COMMITTER_NAME}") + message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}") + message("Git commit message: ${GIT_COMMIT_MESSAGE}") endif() @@ -95,15 +93,15 @@ endif() # macro(get_source_path_from_gcov_filename _SRC_FILENAME _GCOV_FILENAME) - # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov - # -> - # #path#to#project#root#subdir#the_file.c.gcov - get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME) + # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov + # -> + # #path#to#project#root#subdir#the_file.c.gcov + get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME) - # #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c - string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT}) - string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP}) - set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}") + # #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c + string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT}) + string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP}) + set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}") endmacro() ############################################################################## @@ -117,26 +115,24 @@ message("===============================") # (The directories the .gcda files and .o files are found in) # and run gcov on those. foreach(GCDA ${GCDA_FILES}) - get_filename_component(GCDA_DIR ${GCDA} PATH) - - # - # The -p below refers to "Preserve path components", - # This means that the generated gcov filename of a source file will - # keep the original files entire filepath, but / is replaced with #. - # Example: - # - # /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda - # ------------------------------------------------------------------------------ - # File '/path/to/project/root/subdir/the_file.c' - # Lines executed:68.34% of 199 - # /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov' - # - # If -p is not specified then the file is named only "the_file.c.gcov" - # - execute_process( - COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null - WORKING_DIRECTORY ${GCDA_DIR} - ) + get_filename_component(GCDA_DIR ${GCDA} PATH) + + # + # The -p below refers to "Preserve path components", + # This means that the generated gcov filename of a source file will + # keep the original files entire filepath, but / is replaced with #. + # Example: + # + # /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda + # ------------------------------------------------------------------------------ + # File '/path/to/project/root/subdir/the_file.c' + # Lines executed:68.34% of 199 + # /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov' + # + # If -p is not specified then the file is named only "the_file.c.gcov" + # + execute_process(COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} + >/dev/null WORKING_DIRECTORY ${GCDA_DIR}) endforeach() # TODO: Make these be absolute path @@ -164,9 +160,9 @@ file(GLOB_RECURSE ALL_GCOV_FILES "${COV_PATH}" "*.gcov") # ALL_GCOV_FILES = # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov # /path/to/project/root/build/#path#to#project#root#subdir#other_file.c.gcov -# +# # Result should be: -# GCOV_FILES = +# GCOV_FILES = # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov # set(GCOV_FILES "") @@ -176,29 +172,29 @@ message("===============================") set(COVERAGE_SRCS_REMAINING ${COVERAGE_SRCS}) -foreach (GCOV_FILE ${ALL_GCOV_FILES}) - - # - # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov - # -> - # /path/to/project/root/subdir/the_file.c - get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE}) - - # Is this in the list of source files? - # TODO: We want to match against relative path filenames from the source file root... - list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND) - - if (NOT WAS_FOUND EQUAL -1) - message("YES: ${GCOV_FILE}") - list(APPEND GCOV_FILES ${GCOV_FILE}) - - # We remove it from the list, so we don't bother searching for it again. - # Also files left in COVERAGE_SRCS_REMAINING after this loop ends should - # have coverage data generated from them (no lines are covered). - list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH}) - else() - message("NO: ${GCOV_FILE}") - endif() +foreach(GCOV_FILE ${ALL_GCOV_FILES}) + + # + # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov + # -> + # /path/to/project/root/subdir/the_file.c + get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE}) + + # Is this in the list of source files? + # TODO: We want to match against relative path filenames from the source file root... + list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND) + + if(NOT WAS_FOUND EQUAL -1) + message("YES: ${GCOV_FILE}") + list(APPEND GCOV_FILES ${GCOV_FILE}) + + # We remove it from the list, so we don't bother searching for it again. + # Also files left in COVERAGE_SRCS_REMAINING after this loop ends should + # have coverage data generated from them (no lines are covered). + list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH}) + else() + message("NO: ${GCOV_FILE}") + endif() endforeach() # TODO: Enable setting these @@ -206,20 +202,18 @@ set(JSON_SERVICE_NAME "travis-ci") set(JSON_SERVICE_JOB_ID $ENV{TRAVIS_JOB_ID}) set(JSON_TEMPLATE -"{ + "{ \"service_name\": \"\@JSON_SERVICE_NAME\@\", \"service_job_id\": \"\@JSON_SERVICE_JOB_ID\@\", \"source_files\": \@JSON_GCOV_FILES\@ -}" -) +}") set(SRC_FILE_TEMPLATE -"{ + "{ \"name\": \"\@GCOV_SRC_REL_PATH\@\", \"source_digest\": \"\@GCOV_CONTENTS_MD5\@\", \"coverage\": \@GCOV_FILE_COVERAGE\@ - }" -) + }") message("\nGenerate JSON for files:") message("=========================") @@ -227,163 +221,163 @@ message("=========================") set(JSON_GCOV_FILES "[") # Read the GCOV files line by line and get the coverage data. -foreach (GCOV_FILE ${GCOV_FILES}) - - get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE}) - file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}") - - # The new coveralls API doesn't need the entire source (Yay!) - # However, still keeping that part for now. Will cleanup in the future. - file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5) - message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}") - - # Loads the gcov file as a list of lines. - # (We first open the file and replace all occurrences of [] with _ - # because CMake will fail to parse a line containing unmatched brackets... - # also the \ to escaped \n in macros screws up things.) - # https://public.kitware.com/Bug/view.php?id=15369 - file(READ ${GCOV_FILE} GCOV_CONTENTS) - string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") - string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") - string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") - file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}") - - file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES) - list(LENGTH GCOV_LINES LINE_COUNT) - - # Instead of trying to parse the source from the - # gcov file, simply read the file contents from the source file. - # (Parsing it from the gcov is hard because C-code uses ; in many places - # which also happens to be the same as the CMake list delimeter). - file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE) - - string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - # According to http://json.org/ these should be escaped as well. - # Don't know how to do that in CMake however... - #string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - #string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - #string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") - - # We want a json array of coverage data as a single string - # start building them from the contents of the .gcov - set(GCOV_FILE_COVERAGE "[") - - set(GCOV_LINE_COUNT 1) # Line number for the .gcov. - set(DO_SKIP 0) - foreach (GCOV_LINE ${GCOV_LINES}) - #message("${GCOV_LINE}") - # Example of what we're parsing: - # Hitcount |Line | Source - # " 8: 26: if (!allowed || (strlen(allowed) == 0))" - string(REGEX REPLACE - "^([^:]*):([^:]*):(.*)$" - "\\1;\\2;\\3" - RES - "${GCOV_LINE}") - - # Check if we should exclude lines using the Lcov syntax. - string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}") - string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}") - string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}") - - set(RESET_SKIP 0) - if (LINE_SKIP AND NOT DO_SKIP) - set(DO_SKIP 1) - set(RESET_SKIP 1) - endif() - - if (START_SKIP) - set(DO_SKIP 1) - message("${GCOV_LINE_COUNT}: Start skip") - endif() - - if (END_SKIP) - set(DO_SKIP 0) - endif() - - list(LENGTH RES RES_COUNT) - - if (RES_COUNT GREATER 2) - list(GET RES 0 HITCOUNT) - list(GET RES 1 LINE) - list(GET RES 2 SOURCE) - - string(STRIP ${HITCOUNT} HITCOUNT) - string(STRIP ${LINE} LINE) - - # Lines with 0 line numbers are metadata and can be ignored. - if (NOT ${LINE} EQUAL 0) - - if (DO_SKIP) - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ") - else() - # Translate the hitcount into valid JSON values. - if (${HITCOUNT} STREQUAL "#####") - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ") - elseif (${HITCOUNT} STREQUAL "-") - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ") - else() - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ") - endif() - endif() - endif() - else() - message(WARNING "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}") - endif() - - if (RESET_SKIP) - set(DO_SKIP 0) - endif() - math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1") - endforeach() - - message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!") - - # Advanced way of removing the trailing comma in the JSON array. - # "[1, 2, 3, " -> "[1, 2, 3" - string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE}) - - # Append the trailing ] to complete the JSON array. - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") - - # Generate the final JSON for this file. - message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...") - string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) - - set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") +foreach(GCOV_FILE ${GCOV_FILES}) + + get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE}) + file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}") + + # The new coveralls API doesn't need the entire source (Yay!) + # However, still keeping that part for now. Will cleanup in the future. + file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5) + message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}") + + # Loads the gcov file as a list of lines. + # (We first open the file and replace all occurrences of [] with _ + # because CMake will fail to parse a line containing unmatched brackets... + # also the \ to escaped \n in macros screws up things.) + # https://public.kitware.com/Bug/view.php?id=15369 + file(READ ${GCOV_FILE} GCOV_CONTENTS) + string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") + string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") + string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}") + file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}") + + file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES) + list(LENGTH GCOV_LINES LINE_COUNT) + + # Instead of trying to parse the source from the + # gcov file, simply read the file contents from the source file. + # (Parsing it from the gcov is hard because C-code uses ; in many places + # which also happens to be the same as the CMake list delimeter). + file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE) + + string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + # According to http://json.org/ these should be escaped as well. + # Don't know how to do that in CMake however... + #string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + #string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + #string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}") + + # We want a json array of coverage data as a single string + # start building them from the contents of the .gcov + set(GCOV_FILE_COVERAGE "[") + + set(GCOV_LINE_COUNT 1) # Line number for the .gcov. + set(DO_SKIP 0) + foreach(GCOV_LINE ${GCOV_LINES}) + #message("${GCOV_LINE}") + # Example of what we're parsing: + # Hitcount |Line | Source + # " 8: 26: if (!allowed || (strlen(allowed) == 0))" + string(REGEX REPLACE "^([^:]*):([^:]*):(.*)$" "\\1;\\2;\\3" RES + "${GCOV_LINE}") + + # Check if we should exclude lines using the Lcov syntax. + string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}") + string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}") + string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}") + + set(RESET_SKIP 0) + if(LINE_SKIP AND NOT DO_SKIP) + set(DO_SKIP 1) + set(RESET_SKIP 1) + endif() + + if(START_SKIP) + set(DO_SKIP 1) + message("${GCOV_LINE_COUNT}: Start skip") + endif() + + if(END_SKIP) + set(DO_SKIP 0) + endif() + + list(LENGTH RES RES_COUNT) + + if(RES_COUNT GREATER 2) + list(GET RES 0 HITCOUNT) + list(GET RES 1 LINE) + list(GET RES 2 SOURCE) + + string(STRIP ${HITCOUNT} HITCOUNT) + string(STRIP ${LINE} LINE) + + # Lines with 0 line numbers are metadata and can be ignored. + if(NOT ${LINE} EQUAL 0) + + if(DO_SKIP) + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ") + else() + # Translate the hitcount into valid JSON values. + if(${HITCOUNT} STREQUAL "#####") + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ") + elseif(${HITCOUNT} STREQUAL "-") + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ") + else() + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ") + endif() + endif() + endif() + else() + message( + WARNING + "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}" + ) + endif() + + if(RESET_SKIP) + set(DO_SKIP 0) + endif() + math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1") + endforeach() + + message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!") + + # Advanced way of removing the trailing comma in the JSON array. + # "[1, 2, 3, " -> "[1, 2, 3" + string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE}) + + # Append the trailing ] to complete the JSON array. + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") + + # Generate the final JSON for this file. + message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...") + string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) + + set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") endforeach() # Loop through all files we couldn't find any coverage for # as well, and generate JSON for those as well with 0% coverage. foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING}) - # Loads the source file as a list of lines. - file(STRINGS ${NOT_COVERED_SRC} SRC_LINES) + # Loads the source file as a list of lines. + file(STRINGS ${NOT_COVERED_SRC} SRC_LINES) - set(GCOV_FILE_COVERAGE "[") - set(GCOV_FILE_SOURCE "") + set(GCOV_FILE_COVERAGE "[") + set(GCOV_FILE_SOURCE "") - foreach (SOURCE ${SRC_LINES}) - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ") + foreach(SOURCE ${SRC_LINES}) + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ") - string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}") - string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}") - string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}") - string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}") - set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n") - endforeach() + string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}") + string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}") + string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}") + string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}") + set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n") + endforeach() - # Remove trailing comma, and complete JSON array with ] - string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE}) - set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") + # Remove trailing comma, and complete JSON array with ] + string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE}) + set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") - # Generate the final JSON for this file. - string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) - set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") + # Generate the final JSON for this file. + string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) + set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") endforeach() # Get rid of trailing comma. @@ -395,7 +389,9 @@ message("Generate final JSON...") string(CONFIGURE ${JSON_TEMPLATE} JSON) file(WRITE "${COVERALLS_OUTPUT_FILE}" "${JSON}") -message("###########################################################################") -message("Generated coveralls JSON containing coverage data:") +message( + "###########################################################################") +message("Generated coveralls JSON containing coverage data:") message("${COVERALLS_OUTPUT_FILE}") -message("###########################################################################") +message( + "###########################################################################") diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 4894d615c2a..aa958786cb8 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1,5 +1,5 @@ if(NOT WITH_GPU) - return() + return() endif() if(WITH_NV_JETSON) @@ -38,7 +38,9 @@ function(detect_installed_gpus out_variable) if(NOT CUDA_gpu_detect_output) set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - file(WRITE ${cufile} "" + file( + WRITE ${cufile} + "" "#include \"stdio.h\"\n" "#include \"cuda.h\"\n" "#include \"cuda_runtime.h\"\n" @@ -54,55 +56,86 @@ function(detect_installed_gpus out_variable) " return 0;\n" "}\n") - execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" - "--run" "${cufile}" - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process( + COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" + RESULT_VARIABLE nvcc_res + OUTPUT_VARIABLE nvcc_out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(nvcc_res EQUAL 0) # only keep the last line of nvcc_out - STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") - STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") + string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") + string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") list(GET nvcc_out -1 nvcc_out) string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") - set(CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE) + set(CUDA_gpu_detect_output + ${nvcc_out} + CACHE INTERNAL + "Returned GPU architetures from detect_installed_gpus tool" + FORCE) endif() endif() if(NOT CUDA_gpu_detect_output) - message(STATUS "Automatic GPU detection failed. Building for all known architectures.") - set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) + message( + STATUS + "Automatic GPU detection failed. Building for all known architectures.") + set(${out_variable} + ${paddle_known_gpu_archs} + PARENT_SCOPE) else() - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) + set(${out_variable} + ${CUDA_gpu_detect_output} + PARENT_SCOPE) endif() endfunction() - ######################################################################## # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME # Usage: # select_nvcc_arch_flags(out_variable) function(select_nvcc_arch_flags out_variable) # List of arch names - set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "All" "Manual") + set(archs_names + "Kepler" + "Maxwell" + "Pascal" + "Volta" + "Turing" + "Ampere" + "All" + "Manual") set(archs_name_default "Auto") list(APPEND archs_names "Auto") # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) - set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") - set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names} ) + set(CUDA_ARCH_NAME + ${archs_name_default} + CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names}) mark_as_advanced(CUDA_ARCH_NAME) # verify CUDA_ARCH_NAME value if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};") string(REPLACE ";" ", " archs_names "${archs_names}") - message(FATAL_ERROR "Only ${archs_names} architectures names are supported.") + message( + FATAL_ERROR "Only ${archs_names} architectures names are supported.") endif() if(${CUDA_ARCH_NAME} STREQUAL "Manual") - set(CUDA_ARCH_BIN ${paddle_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") - set(CUDA_ARCH_PTX "" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") + set(CUDA_ARCH_BIN + ${paddle_known_gpu_archs} + CACHE + STRING + "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported" + ) + set(CUDA_ARCH_PTX + "" + CACHE + STRING + "Specify 'virtual' PTX architectures to build PTX intermediate code for" + ) mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) else() unset(CUDA_ARCH_BIN CACHE) @@ -112,19 +145,19 @@ function(select_nvcc_arch_flags out_variable) if(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(cuda_arch_bin "30 35") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") - if (WITH_NV_JETSON) + if(WITH_NV_JETSON) set(cuda_arch_bin "53") else() set(cuda_arch_bin "50") endif() elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") - if (WITH_NV_JETSON) + if(WITH_NV_JETSON) set(cuda_arch_bin "62") else() set(cuda_arch_bin "60 61") endif() elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") - if (WITH_NV_JETSON) + if(WITH_NV_JETSON) set(cuda_arch_bin "72") else() set(cuda_arch_bin "70") @@ -132,35 +165,37 @@ function(select_nvcc_arch_flags out_variable) elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") set(cuda_arch_bin "75") elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0 + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0 set(cuda_arch_bin "80") - elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+ + elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+ set(cuda_arch_bin "80 86") endif() elseif(${CUDA_ARCH_NAME} STREQUAL "All") set(cuda_arch_bin ${paddle_known_gpu_archs}) elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") - message(STATUS "WARNING: This is just a warning for publishing release. + message( + STATUS + "WARNING: This is just a warning for publishing release. You are building GPU version without supporting different architectures. So the wheel package may fail on other GPU architectures. You can add -DCUDA_ARCH_NAME=All in cmake command to get a full wheel package to resolve this warning. While, this version will still work on local GPU architecture.") detect_installed_gpus(cuda_arch_bin) - else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") + else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") set(cuda_arch_bin ${CUDA_ARCH_BIN}) endif() if(NEW_RELEASE_JIT) - set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}") - set(cuda_arch_bin "") + set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}") + set(cuda_arch_bin "") endif() # remove dots and convert to lists string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}") string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") - string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") list(REMOVE_DUPLICATES cuda_arch_bin) list(REMOVE_DUPLICATES cuda_arch_ptx) @@ -172,7 +207,8 @@ function(select_nvcc_arch_flags out_variable) foreach(arch ${cuda_arch_bin}) if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") # User explicitly specified PTX for the concrete BIN - string(APPEND nvcc_flags " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") + string(APPEND nvcc_flags + " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}") else() # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN @@ -183,34 +219,39 @@ function(select_nvcc_arch_flags out_variable) # Tell NVCC to add PTX intermediate code for the specified architectures foreach(arch ${cuda_arch_ptx}) - string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=compute_${arch}") + string(APPEND nvcc_flags + " -gencode arch=compute_${arch},code=compute_${arch}") string(APPEND nvcc_archs_readable " compute_${arch}") endforeach() string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") - set(${out_variable} ${nvcc_flags} PARENT_SCOPE) - set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) + set(${out_variable} + ${nvcc_flags} + PARENT_SCOPE) + set(${out_variable}_readable + ${nvcc_archs_readable} + PARENT_SCOPE) endfunction() message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) -if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x +if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1 +elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1 set(paddle_known_gpu_archs ${paddle_known_gpu_archs11}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+ +elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+ set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") endif() -if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) +if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE") endif() @@ -231,7 +272,7 @@ set(CMAKE_CUDA_STANDARD 14) # (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w # So replace /W[1-4] with /W0 -if (WIN32) +if(WIN32) string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}") endif(WIN32) # in cuda9, suppress cuda warning on eigen @@ -242,15 +283,16 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") if(WIN32) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"") + set(CMAKE_CUDA_FLAGS + "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj") if(MSVC_STATIC_CRT) foreach(flag_var - CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE - CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "-MD") - string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}") - endif() + CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE + CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "-MD") + string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}") + endif() endforeach(flag_var) endif() endif() diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index c82847100ab..2e5131d217a 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -1,107 +1,113 @@ if(NOT WITH_GPU) - return() + return() endif() if(WIN32) - set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) + set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) else(WIN32) - set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT") + set(CUDNN_ROOT + "/usr" + CACHE PATH "CUDNN ROOT") endif(WIN32) -find_path(CUDNN_INCLUDE_DIR cudnn.h - PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include - $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE} - NO_DEFAULT_PATH -) +find_path( + CUDNN_INCLUDE_DIR cudnn.h + PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include $ENV{CUDNN_ROOT} + $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE} + NO_DEFAULT_PATH) get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) set(TARGET_ARCH "x86_64") if(NOT ${CMAKE_SYSTEM_PROCESSOR}) - set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) + set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) endif() -list(APPEND CUDNN_CHECK_LIBRARY_DIRS - ${CUDNN_ROOT} - ${CUDNN_ROOT}/lib64 - ${CUDNN_ROOT}/lib - ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu - ${CUDNN_ROOT}/local/cuda-${CUDA_VERSION}/targets/${TARGET_ARCH}-linux/lib/ - $ENV{CUDNN_ROOT} - $ENV{CUDNN_ROOT}/lib64 - $ENV{CUDNN_ROOT}/lib - /usr/lib - ${CUDA_TOOLKIT_ROOT_DIR} - ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 - ) +list( + APPEND + CUDNN_CHECK_LIBRARY_DIRS + ${CUDNN_ROOT} + ${CUDNN_ROOT}/lib64 + ${CUDNN_ROOT}/lib + ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu + ${CUDNN_ROOT}/local/cuda-${CUDA_VERSION}/targets/${TARGET_ARCH}-linux/lib/ + $ENV{CUDNN_ROOT} + $ENV{CUDNN_ROOT}/lib64 + $ENV{CUDNN_ROOT}/lib + /usr/lib + ${CUDA_TOOLKIT_ROOT_DIR} + ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) set(CUDNN_LIB_NAME "") -if (LINUX) - set(CUDNN_LIB_NAME "libcudnn.so") +if(LINUX) + set(CUDNN_LIB_NAME "libcudnn.so") endif(LINUX) if(WIN32) - # only support cudnn7 - set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll") + # only support cudnn7 + set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll") endif(WIN32) if(APPLE) - set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so") + set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so") endif(APPLE) -find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a - PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist} - NO_DEFAULT_PATH - DOC "Path to cuDNN library.") - +find_library( + CUDNN_LIBRARY + NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a + PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist} + NO_DEFAULT_PATH + DOC "Path to cuDNN library.") if(CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY) - set(CUDNN_FOUND ON) + set(CUDNN_FOUND ON) else() - set(CUDNN_FOUND OFF) + set(CUDNN_FOUND OFF) endif() -macro(find_cudnn_version cudnn_header_file) - file(READ ${cudnn_header_file} CUDNN_VERSION_FILE_CONTENTS) - get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) - - string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" - CUDNN_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_VERSION +([0-9]+)" "\\1" - CUDNN_VERSION "${CUDNN_VERSION}") - - if("${CUDNN_VERSION}" STREQUAL "2000") - message(STATUS "Current cuDNN version is v2. ") +macro(find_cudnn_version cudnn_header_file) + file(READ ${cudnn_header_file} CUDNN_VERSION_FILE_CONTENTS) + get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) + + string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" CUDNN_VERSION + "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_VERSION +([0-9]+)" "\\1" CUDNN_VERSION + "${CUDNN_VERSION}") + + if("${CUDNN_VERSION}" STREQUAL "2000") + message(STATUS "Current cuDNN version is v2. ") + else() + string(REGEX MATCH "define CUDNN_MAJOR +([0-9]+)" CUDNN_MAJOR_VERSION + "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR +([0-9]+)" "\\1" + CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + string(REGEX MATCH "define CUDNN_MINOR +([0-9]+)" CUDNN_MINOR_VERSION + "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR +([0-9]+)" "\\1" + CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL +([0-9]+)" + CUDNN_PATCHLEVEL_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL +([0-9]+)" "\\1" + CUDNN_PATCHLEVEL_VERSION "${CUDNN_PATCHLEVEL_VERSION}") + + if(NOT CUDNN_MAJOR_VERSION) + set(CUDNN_VERSION "???") else() - string(REGEX MATCH "define CUDNN_MAJOR +([0-9]+)" CUDNN_MAJOR_VERSION - "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_MAJOR +([0-9]+)" "\\1" - CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") - string(REGEX MATCH "define CUDNN_MINOR +([0-9]+)" CUDNN_MINOR_VERSION - "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_MINOR +([0-9]+)" "\\1" - CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") - string(REGEX MATCH "define CUDNN_PATCHLEVEL +([0-9]+)" - CUDNN_PATCHLEVEL_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_PATCHLEVEL +([0-9]+)" "\\1" - CUDNN_PATCHLEVEL_VERSION "${CUDNN_PATCHLEVEL_VERSION}") - - if(NOT CUDNN_MAJOR_VERSION) - set(CUDNN_VERSION "???") - else() - add_definitions("-DCUDNN_MAJOR_VERSION=\"${CUDNN_MAJOR_VERSION}\"") - math(EXPR CUDNN_VERSION - "${CUDNN_MAJOR_VERSION} * 1000 + + add_definitions("-DCUDNN_MAJOR_VERSION=\"${CUDNN_MAJOR_VERSION}\"") + math(EXPR CUDNN_VERSION "${CUDNN_MAJOR_VERSION} * 1000 + ${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}") - message(STATUS "Current cuDNN header is ${cudnn_header_file} " - "Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCHLEVEL_VERSION}. ") - endif() + message( + STATUS + "Current cuDNN header is ${cudnn_header_file} " + "Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCHLEVEL_VERSION}. " + ) endif() + endif() endmacro() if(CUDNN_FOUND) - find_cudnn_version(${CUDNN_INCLUDE_DIR}/cudnn.h) - if (NOT CUDNN_MAJOR_VERSION) - find_cudnn_version(${CUDNN_INCLUDE_DIR}/cudnn_version.h) + find_cudnn_version(${CUDNN_INCLUDE_DIR}/cudnn.h) + if(NOT CUDNN_MAJOR_VERSION) + find_cudnn_version(${CUDNN_INCLUDE_DIR}/cudnn_version.h) endif() endif() diff --git a/cmake/cupti.cmake b/cmake/cupti.cmake index 2d7b1917b68..6bf0141c208 100644 --- a/cmake/cupti.cmake +++ b/cmake/cupti.cmake @@ -1,44 +1,51 @@ if(NOT WITH_GPU) - return() + return() endif() - -set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT") -find_path(CUPTI_INCLUDE_DIR cupti.h - PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include - $ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include +set(CUPTI_ROOT + "/usr" + CACHE PATH "CUPTI ROOT") +find_path( + CUPTI_INCLUDE_DIR cupti.h + PATHS ${CUPTI_ROOT} + ${CUPTI_ROOT}/include + $ENV{CUPTI_ROOT} + $ENV{CUPTI_ROOT}/include ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/include ${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include - NO_DEFAULT_PATH - ) + NO_DEFAULT_PATH) get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) set(TARGET_ARCH "x86_64") if(NOT ${CMAKE_SYSTEM_PROCESSOR}) - set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) + set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) endif() -list(APPEND CUPTI_CHECK_LIBRARY_DIRS - ${CUPTI_ROOT} - ${CUPTI_ROOT}/lib64 - ${CUPTI_ROOT}/lib - ${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu - $ENV{CUPTI_ROOT} - $ENV{CUPTI_ROOT}/lib64 - $ENV{CUPTI_ROOT}/lib - /usr/lib - ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib64 - ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64) -find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a - PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist} - NO_DEFAULT_PATH - DOC "Path to cuPTI library.") +list( + APPEND + CUPTI_CHECK_LIBRARY_DIRS + ${CUPTI_ROOT} + ${CUPTI_ROOT}/lib64 + ${CUPTI_ROOT}/lib + ${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu + $ENV{CUPTI_ROOT} + $ENV{CUPTI_ROOT}/lib64 + $ENV{CUPTI_ROOT}/lib + /usr/lib + ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64) +find_library( + CUPTI_LIBRARY + NAMES libcupti.so libcupti.dylib # libcupti_static.a + PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist} + NO_DEFAULT_PATH + DOC "Path to cuPTI library.") get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY) if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY) - set(CUPTI_FOUND ON) + set(CUPTI_FOUND ON) else() - set(CUPTI_FOUND OFF) + set(CUPTI_FOUND OFF) endif() diff --git a/cmake/experimental.cmake b/cmake/experimental.cmake index 55e7fe263f9..0e4b1976456 100644 --- a/cmake/experimental.cmake +++ b/cmake/experimental.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake index ef6a51b594b..0f0793a8ee3 100644 --- a/cmake/experiments/cuda_module_loading_lazy.cmake +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,25 +16,35 @@ # cuda moduel lazy loading is supported by CUDA 11.6+ # this experiment option makes Paddle supports lazy loading before CUDA 11.6. -option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF) -if (${EXP_CUDA_MODULE_LOADING_LAZY}) - if (NOT ${ON_INFER} OR NOT ${LINUX}) - message("EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms") +option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF) +if(${EXP_CUDA_MODULE_LOADING_LAZY}) + if(NOT ${ON_INFER} OR NOT ${LINUX}) + message( + "EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms" + ) return() - endif () - if (NOT ${CUDA_FOUND}) + endif() + if(NOT ${CUDA_FOUND}) message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA") return() - endif () - if (${CUDA_VERSION} VERSION_GREATER_EQUAL "11.6") + endif() + if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.6") message("cuda 11.6+ already support lazy module loading") return() - endif () + endif() - message("for cuda before 11.6, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a") - set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "" FORCE) + message( + "for cuda before 11.6, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a" + ) + set(CUDA_USE_STATIC_CUDA_RUNTIME + OFF + CACHE BOOL "" FORCE) set(CMAKE_CUDA_FLAGS "--cudart shared") enable_language(CUDA) - set(CUDA_NVCC_EXECUTABLE "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE) - set(CMAKE_CUDA_COMPILER "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE) + set(CUDA_NVCC_EXECUTABLE + "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" + CACHE FILEPATH "" FORCE) + set(CMAKE_CUDA_COMPILER + "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" + CACHE FILEPATH "" FORCE) endif() diff --git a/cmake/external/arm_brpc.cmake b/cmake/external/arm_brpc.cmake index 83935ae0c63..660261d3ffc 100755 --- a/cmake/external/arm_brpc.cmake +++ b/cmake/external/arm_brpc.cmake @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) #find_package(OpenSSL REQUIRED) @@ -25,52 +25,56 @@ INCLUDE(ExternalProject) #ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL) #SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${OPENSSL_CRYPTO_LIBRARY}) -IF((NOT DEFINED ARM_BRPC_NAME) OR (NOT DEFINED ARM_BRPC_URL)) - SET(ARM_BRPC_VER "1.1.0" CACHE STRING "" FORCE) - SET(ARM_BRPC_NAME "arm_brpc" CACHE STRING "" FORCE) -ENDIF() +if((NOT DEFINED ARM_BRPC_NAME) OR (NOT DEFINED ARM_BRPC_URL)) + set(ARM_BRPC_VER + "1.1.0" + CACHE STRING "" FORCE) + set(ARM_BRPC_NAME + "arm_brpc" + CACHE STRING "" FORCE) +endif() -MESSAGE(STATUS "ARM_BRPC_NAME: ${ARM_BRPC_NAME}, ARM_BRPC_URL: ${ARM_BRPC_URL}") -SET(ARM_BRPC_PREFIX_DIR "${THIRD_PARTY_PATH}/arm_brpc") -SET(ARM_BRPC_PROJECT "extern_arm_brpc") -SET(ARM_BRPC_DOWNLOAD_DIR "${ARM_BRPC_PREFIX_DIR}/src/${ARM_BRPC_PROJECT}") -SET(ARM_BRPC_DST_DIR "output") -SET(ARM_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") -SET(ARM_BRPC_INSTALL_DIR ${ARM_BRPC_INSTALL_ROOT}/arm_brpc/output) -SET(ARM_BRPC_ROOT ${ARM_BRPC_INSTALL_DIR}) -SET(ARM_BRPC_INC_DIR ${ARM_BRPC_ROOT}/include) -SET(ARM_BRPC_LIB_DIR ${ARM_BRPC_ROOT}/lib) -SET(ARM_BRPC_LIB ${ARM_BRPC_LIB_DIR}/libbrpc.a) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${ARM_BRPC_ROOT}/lib") +message(STATUS "ARM_BRPC_NAME: ${ARM_BRPC_NAME}, ARM_BRPC_URL: ${ARM_BRPC_URL}") +set(ARM_BRPC_PREFIX_DIR "${THIRD_PARTY_PATH}/arm_brpc") +set(ARM_BRPC_PROJECT "extern_arm_brpc") +set(ARM_BRPC_DOWNLOAD_DIR "${ARM_BRPC_PREFIX_DIR}/src/${ARM_BRPC_PROJECT}") +set(ARM_BRPC_DST_DIR "output") +set(ARM_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +set(ARM_BRPC_INSTALL_DIR ${ARM_BRPC_INSTALL_ROOT}/arm_brpc/output) +set(ARM_BRPC_ROOT ${ARM_BRPC_INSTALL_DIR}) +set(ARM_BRPC_INC_DIR ${ARM_BRPC_ROOT}/include) +set(ARM_BRPC_LIB_DIR ${ARM_BRPC_ROOT}/lib) +set(ARM_BRPC_LIB ${ARM_BRPC_LIB_DIR}/libbrpc.a) +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${ARM_BRPC_ROOT}/lib") -INCLUDE_DIRECTORIES(${ARM_BRPC_INSTALL_ROOT}/${ARM_BRPC_NAME}/output/include) +include_directories(${ARM_BRPC_INSTALL_ROOT}/${ARM_BRPC_NAME}/output/include) -FILE(WRITE ${ARM_BRPC_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(ARM_BRPC)\n" - "cmake_minimum_required(VERSION 3.0)\n" +file( + WRITE ${ARM_BRPC_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(ARM_BRPC)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY ${ARM_BRPC_DST_DIR} ${ARM_BRPC_DST_DIR} \n" " DESTINATION ${ARM_BRPC_NAME})\n") - -SET(ARM_BRPC_URL "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/output.tar.gz" CACHE STRING "" FORCE) + +set(ARM_BRPC_URL + "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/output.tar.gz" + CACHE STRING "" FORCE) ExternalProject_Add( - ${ARM_BRPC_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${ARM_BRPC_PREFIX_DIR} - DOWNLOAD_DIR ${ARM_BRPC_DOWNLOAD_DIR} - DOWNLOAD_COMMAND rm -rf output.tar.gz - && wget --no-check-certificate ${ARM_BRPC_URL} - && tar zxvf output.tar.gz - #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/output.tar.gz . - # && tar zxvf output.tar.gz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ARM_BRPC_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ARM_BRPC_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${ARM_BRPC_LIB} -) + ${ARM_BRPC_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${ARM_BRPC_PREFIX_DIR} + DOWNLOAD_DIR ${ARM_BRPC_DOWNLOAD_DIR} + DOWNLOAD_COMMAND rm -rf output.tar.gz && wget --no-check-certificate + ${ARM_BRPC_URL} && tar zxvf output.tar.gz + #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/output.tar.gz . + # && tar zxvf output.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ARM_BRPC_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ARM_BRPC_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${ARM_BRPC_LIB}) -ADD_LIBRARY(arm_brpc STATIC IMPORTED GLOBAL) # 直接导入已经生成的库 -SET_PROPERTY(TARGET arm_brpc PROPERTY IMPORTED_LOCATION ${ARM_BRPC_LIB}) -ADD_DEPENDENCIES(arm_brpc ${ARM_BRPC_PROJECT}) +add_library(arm_brpc STATIC IMPORTED GLOBAL) # 直接导入已经生成的库 +set_property(TARGET arm_brpc PROPERTY IMPORTED_LOCATION ${ARM_BRPC_LIB}) +add_dependencies(arm_brpc ${ARM_BRPC_PROJECT}) diff --git a/cmake/external/ascend.cmake b/cmake/external/ascend.cmake index d02f47142e7..3dbe7e6e8aa 100644 --- a/cmake/external/ascend.cmake +++ b/cmake/external/ascend.cmake @@ -12,21 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. - #NOTE: Logic is from # https://github.com/mindspore-ai/graphengine/blob/master/CMakeLists.txt if(DEFINED ENV{ASCEND_CUSTOM_PATH}) - set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH}) else() - set(ASCEND_DIR /usr/local/Ascend) + set(ASCEND_DIR /usr/local/Ascend) endif() -if(EXISTS ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include/graph/ascend_string.h) +if(EXISTS + ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include/graph/ascend_string.h) # It means CANN 20.2 + add_definitions(-DPADDLE_WITH_ASCEND_STRING) endif() - if(WITH_ASCEND OR WITH_ASCEND_CL) set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64) set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common) @@ -36,28 +35,32 @@ if(WITH_ASCEND OR WITH_ASCEND_CL) set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64) set(STATIC_ACL_LIB ${ASCEND_ACL_DIR}) - set(ASCEND_MS_RUNTIME_PATH ${ASCEND_RUNTIME_DIR} ${ASCEND_ACL_DIR} ${ASCEND_ATC_DIR}) + set(ASCEND_MS_RUNTIME_PATH ${ASCEND_RUNTIME_DIR} ${ASCEND_ACL_DIR} + ${ASCEND_ATC_DIR}) set(ASCEND_MS_DRIVER_PATH ${ASCEND_DRIVER_DIR} ${ASCEND_DRIVER_COMMON_DIR}) set(ATLAS_RUNTIME_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/lib64) - set(ATLAS_RUNTIME_INC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include) + set(ATLAS_RUNTIME_INC_DIR + ${ASCEND_DIR}/ascend-toolkit/latest/fwkacllib/include) set(ATLAS_ACL_DIR ${ASCEND_DIR}/ascend-toolkit/latest/acllib/lib64) set(ATLAS_ATC_DIR ${ASCEND_DIR}/ascend-toolkit/latest/atc/lib64) - set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} ${ATLAS_ATC_DIR}) + set(ATLAS_MS_RUNTIME_PATH ${ATLAS_RUNTIME_DIR} ${ATLAS_ACL_DIR} + ${ATLAS_ATC_DIR}) set(atlas_graph_lib ${ATLAS_RUNTIME_DIR}/libgraph.so) set(atlas_ge_runner_lib ${ATLAS_RUNTIME_DIR}/libge_runner.so) set(atlas_acl_lib ${ATLAS_RUNTIME_DIR}/libascendcl.so) - INCLUDE_DIRECTORIES(${ATLAS_RUNTIME_INC_DIR}) - + include_directories(${ATLAS_RUNTIME_INC_DIR}) - ADD_LIBRARY(ascend_ge SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET ascend_ge PROPERTY IMPORTED_LOCATION ${atlas_ge_runner_lib}) + add_library(ascend_ge SHARED IMPORTED GLOBAL) + set_property(TARGET ascend_ge PROPERTY IMPORTED_LOCATION + ${atlas_ge_runner_lib}) - ADD_LIBRARY(ascend_graph SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET ascend_graph PROPERTY IMPORTED_LOCATION ${atlas_graph_lib}) + add_library(ascend_graph SHARED IMPORTED GLOBAL) + set_property(TARGET ascend_graph PROPERTY IMPORTED_LOCATION + ${atlas_graph_lib}) - ADD_LIBRARY(atlas_acl SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib}) + add_library(atlas_acl SHARED IMPORTED GLOBAL) + set_property(TARGET atlas_acl PROPERTY IMPORTED_LOCATION ${atlas_acl_lib}) add_custom_target(extern_ascend DEPENDS ascend_ge ascend_graph atlas_acl) endif() @@ -73,52 +76,60 @@ if(WITH_ASCEND_CL) message(STATUS "FWKACLLIB_INC_DIR ${FWKACLLIB_INC_DIR}") message(STATUS "ASCEND_CL_DIR ${ASCEND_CL_DIR}") - INCLUDE_DIRECTORIES(${FWKACLLIB_INC_DIR}) - INCLUDE_DIRECTORIES(${ACLLIB_INC_DIR}) + include_directories(${FWKACLLIB_INC_DIR}) + include_directories(${ACLLIB_INC_DIR}) - ADD_LIBRARY(ascendcl SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib}) + add_library(ascendcl SHARED IMPORTED GLOBAL) + set_property(TARGET ascendcl PROPERTY IMPORTED_LOCATION ${ascendcl_lib}) - ADD_LIBRARY(ascend_hccl SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib}) + add_library(ascend_hccl SHARED IMPORTED GLOBAL) + set_property(TARGET ascend_hccl PROPERTY IMPORTED_LOCATION ${ascend_hccl_lib}) - ADD_LIBRARY(acl_op_compiler SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION ${acl_op_compiler_lib}) + add_library(acl_op_compiler SHARED IMPORTED GLOBAL) + set_property(TARGET acl_op_compiler PROPERTY IMPORTED_LOCATION + ${acl_op_compiler_lib}) add_custom_target(extern_ascend_cl DEPENDS ascendcl acl_op_compiler) endif() -if (WITH_ASCEND_CL) -macro(find_ascend_toolkit_version ascend_toolkit_version_info) +if(WITH_ASCEND_CL) + macro(find_ascend_toolkit_version ascend_toolkit_version_info) file(READ ${ascend_toolkit_version_info} ASCEND_TOOLKIT_VERSION_CONTENTS) - string(REGEX MATCH "version=([0-9]+\.[0-9]+\.(RC)?[0-9][.a-z0-9]*)" ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION_CONTENTS}") - string(REGEX REPLACE "version=([0-9]+\.[0-9]+\.(RC)?[0-9][.a-z0-9]*)" "\\1" ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION}") - string(REGEX REPLACE "[A-Z]|[a-z|\.]" "" CANN_VERSION ${ASCEND_TOOLKIT_VERSION}) - STRING(SUBSTRING "${CANN_VERSION}000" 0 6 CANN_VERSION) + string(REGEX MATCH "version=([0-9]+\.[0-9]+\.(RC)?[0-9][.a-z0-9]*)" + ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION_CONTENTS}") + string(REGEX REPLACE "version=([0-9]+\.[0-9]+\.(RC)?[0-9][.a-z0-9]*)" "\\1" + ASCEND_TOOLKIT_VERSION "${ASCEND_TOOLKIT_VERSION}") + string(REGEX REPLACE "[A-Z]|[a-z|\.]" "" CANN_VERSION + ${ASCEND_TOOLKIT_VERSION}) + string(SUBSTRING "${CANN_VERSION}000" 0 6 CANN_VERSION) add_definitions("-DCANN_VERSION_CODE=${CANN_VERSION}") if(NOT ASCEND_TOOLKIT_VERSION) - set(ASCEND_TOOLKIT_VERSION "???") + set(ASCEND_TOOLKIT_VERSION "???") else() - message(STATUS "Current Ascend Toolkit version is ${ASCEND_TOOLKIT_VERSION}") + message( + STATUS "Current Ascend Toolkit version is ${ASCEND_TOOLKIT_VERSION}") endif() -endmacro() + endmacro() -macro(find_ascend_driver_version ascend_driver_version_info) + macro(find_ascend_driver_version ascend_driver_version_info) file(READ ${ascend_driver_version_info} ASCEND_DRIVER_VERSION_CONTENTS) - string(REGEX MATCH "Version=([0-9]+\.[0-9]+\.[0-9]+)" ASCEND_DRIVER_VERSION "${ASCEND_DRIVER_VERSION_CONTENTS}") - string(REGEX REPLACE "Version=([0-9]+\.[0-9]+\.[0-9]+)" "\\1" ASCEND_DRIVER_VERSION "${ASCEND_DRIVER_VERSION}") + string(REGEX MATCH "Version=([0-9]+\.[0-9]+\.[0-9]+)" ASCEND_DRIVER_VERSION + "${ASCEND_DRIVER_VERSION_CONTENTS}") + string(REGEX REPLACE "Version=([0-9]+\.[0-9]+\.[0-9]+)" "\\1" + ASCEND_DRIVER_VERSION "${ASCEND_DRIVER_VERSION}") if(NOT ASCEND_DRIVER_VERSION) - set(ASCEND_DRIVER_VERSION "???") + set(ASCEND_DRIVER_VERSION "???") else() - message(STATUS "Current Ascend Driver version is ${ASCEND_DRIVER_VERSION}") + message( + STATUS "Current Ascend Driver version is ${ASCEND_DRIVER_VERSION}") endif() -endmacro() + endmacro() -if (WITH_ARM) - set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/arm64-linux) -else() - set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/x86_64-linux) -endif() + if(WITH_ARM) + set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/arm64-linux) + else() + set(ASCEND_TOOLKIT_DIR ${ASCEND_DIR}/ascend-toolkit/latest/x86_64-linux) + endif() -find_ascend_toolkit_version(${ASCEND_TOOLKIT_DIR}/ascend_toolkit_install.info) -find_ascend_driver_version(${ASCEND_DIR}/driver/version.info) + find_ascend_toolkit_version(${ASCEND_TOOLKIT_DIR}/ascend_toolkit_install.info) + find_ascend_driver_version(${ASCEND_DIR}/driver/version.info) endif() diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index e47b608341b..810796831e2 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -14,7 +14,7 @@ include(ExternalProject) -set(BOOST_PROJECT "extern_boost") +set(BOOST_PROJECT "extern_boost") # To release PaddlePaddle as a pip package, we have to follow the # manylinux1 standard, which features as old Linux kernels and # compilers as possible and recommends CentOS 5. Indeed, the earliest @@ -22,36 +22,41 @@ set(BOOST_PROJECT "extern_boost") # version of boost, say, 1.66.0, doesn't build on CentOS 6. We # checked that the devtools package of CentOS 6 installs boost 1.41.0. # So we use 1.41.0 here. -set(BOOST_VER "1.41.0") +set(BOOST_VER "1.41.0") # boost_1_41_0_2021_10.tar.gz is almost the same with boost_1_41_0.tar.gz, # except in visualc.hpp i comment a warning of "unknown compiler version", # so if you need to change boost, you may need to block the warning similarly. -set(BOOST_TAR "boost_1_41_0_2021_10" CACHE STRING "" FORCE) -set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) +set(BOOST_TAR + "boost_1_41_0_2021_10" + CACHE STRING "" FORCE) +set(BOOST_URL + "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" + CACHE STRING "" FORCE) -MESSAGE(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}") +message(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}") set(BOOST_PREFIX_DIR ${THIRD_PARTY_PATH}/boost) -set(BOOST_INCLUDE_DIR "${THIRD_PARTY_PATH}/boost/src/extern_boost" CACHE PATH "boost include directory." FORCE) +set(BOOST_INCLUDE_DIR + "${THIRD_PARTY_PATH}/boost/src/extern_boost" + CACHE PATH "boost include directory." FORCE) set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1) include_directories(${BOOST_INCLUDE_DIR}) if(WIN32 AND MSVC_VERSION GREATER_EQUAL 1600) - add_definitions(-DBOOST_HAS_STATIC_ASSERT) + add_definitions(-DBOOST_HAS_STATIC_ASSERT) endif() ExternalProject_Add( - ${BOOST_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${BOOST_URL} - URL_MD5 51be7cc203628dc0848e97eee32d79e3 - PREFIX ${BOOST_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - UPDATE_COMMAND "" - ) + ${BOOST_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${BOOST_URL} + URL_MD5 51be7cc203628dc0848e97eee32d79e3 + PREFIX ${BOOST_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + UPDATE_COMMAND "") add_library(boost INTERFACE) diff --git a/cmake/external/box_ps.cmake b/cmake/external/box_ps.cmake index 85e1f94fd2c..2bb1fe0a0d1 100644 --- a/cmake/external/box_ps.cmake +++ b/cmake/external/box_ps.cmake @@ -12,48 +12,53 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(BOX_PS_PROJECT "extern_box_ps") -IF((NOT DEFINED BOX_PS_VER) OR (NOT DEFINED BOX_PS_URL)) - MESSAGE(STATUS "use pre defined download url") - SET(BOX_PS_VER "0.1.1" CACHE STRING "" FORCE) - SET(BOX_PS_NAME "box_ps" CACHE STRING "" FORCE) - SET(BOX_PS_URL "http://box-ps.gz.bcebos.com/box_ps.tar.gz" CACHE STRING "" FORCE) -ENDIF() -MESSAGE(STATUS "BOX_PS_NAME: ${BOX_PS_NAME}, BOX_PS_URL: ${BOX_PS_URL}") -SET(BOX_PS_SOURCE_DIR "${THIRD_PARTY_PATH}/box_ps") -SET(BOX_PS_DOWNLOAD_DIR "${BOX_PS_SOURCE_DIR}/src/${BOX_PS_PROJECT}") -SET(BOX_PS_DST_DIR "box_ps") -SET(BOX_PS_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") -SET(BOX_PS_INSTALL_DIR ${BOX_PS_INSTALL_ROOT}/${BOX_PS_DST_DIR}) -SET(BOX_PS_ROOT ${BOX_PS_INSTALL_DIR}) -SET(BOX_PS_INC_DIR ${BOX_PS_ROOT}/include) -SET(BOX_PS_LIB_DIR ${BOX_PS_ROOT}/lib) -SET(BOX_PS_LIB ${BOX_PS_LIB_DIR}/libbox_ps.so) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${BOX_PS_ROOT}/lib") +set(BOX_PS_PROJECT "extern_box_ps") +if((NOT DEFINED BOX_PS_VER) OR (NOT DEFINED BOX_PS_URL)) + message(STATUS "use pre defined download url") + set(BOX_PS_VER + "0.1.1" + CACHE STRING "" FORCE) + set(BOX_PS_NAME + "box_ps" + CACHE STRING "" FORCE) + set(BOX_PS_URL + "http://box-ps.gz.bcebos.com/box_ps.tar.gz" + CACHE STRING "" FORCE) +endif() +message(STATUS "BOX_PS_NAME: ${BOX_PS_NAME}, BOX_PS_URL: ${BOX_PS_URL}") +set(BOX_PS_SOURCE_DIR "${THIRD_PARTY_PATH}/box_ps") +set(BOX_PS_DOWNLOAD_DIR "${BOX_PS_SOURCE_DIR}/src/${BOX_PS_PROJECT}") +set(BOX_PS_DST_DIR "box_ps") +set(BOX_PS_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +set(BOX_PS_INSTALL_DIR ${BOX_PS_INSTALL_ROOT}/${BOX_PS_DST_DIR}) +set(BOX_PS_ROOT ${BOX_PS_INSTALL_DIR}) +set(BOX_PS_INC_DIR ${BOX_PS_ROOT}/include) +set(BOX_PS_LIB_DIR ${BOX_PS_ROOT}/lib) +set(BOX_PS_LIB ${BOX_PS_LIB_DIR}/libbox_ps.so) +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${BOX_PS_ROOT}/lib") -INCLUDE_DIRECTORIES(${BOX_PS_INC_DIR}) -FILE(WRITE ${BOX_PS_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(BOX_PS)\n" - "cmake_minimum_required(VERSION 3.0)\n" +include_directories(${BOX_PS_INC_DIR}) +file( + WRITE ${BOX_PS_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(BOX_PS)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY ${BOX_PS_NAME}/include ${BOX_PS_NAME}/lib \n" " DESTINATION ${BOX_PS_DST_DIR})\n") ExternalProject_Add( - ${BOX_PS_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${BOX_PS_SOURCE_DIR} - DOWNLOAD_DIR ${BOX_PS_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${BOX_PS_URL} -c -q -O ${BOX_PS_NAME}.tar.gz - && tar zxvf ${BOX_PS_NAME}.tar.gz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BOX_PS_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BOX_PS_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${BOX_PS_LIB} -) -ADD_LIBRARY(box_ps SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET box_ps PROPERTY IMPORTED_LOCATION ${BOX_PS_LIB}) -ADD_DEPENDENCIES(box_ps ${BOX_PS_PROJECT}) + ${BOX_PS_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${BOX_PS_SOURCE_DIR} + DOWNLOAD_DIR ${BOX_PS_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${BOX_PS_URL} -c -q -O + ${BOX_PS_NAME}.tar.gz && tar zxvf ${BOX_PS_NAME}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${BOX_PS_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BOX_PS_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${BOX_PS_LIB}) +add_library(box_ps SHARED IMPORTED GLOBAL) +set_property(TARGET box_ps PROPERTY IMPORTED_LOCATION ${BOX_PS_LIB}) +add_dependencies(box_ps ${BOX_PS_PROJECT}) diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index c891708751a..4434e3fbed1 100755 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -12,66 +12,80 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) find_package(OpenSSL REQUIRED) message(STATUS "ssl:" ${OPENSSL_SSL_LIBRARY}) message(STATUS "crypto:" ${OPENSSL_CRYPTO_LIBRARY}) -ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${OPENSSL_SSL_LIBRARY}) +add_library(ssl SHARED IMPORTED GLOBAL) +set_property(TARGET ssl PROPERTY IMPORTED_LOCATION ${OPENSSL_SSL_LIBRARY}) -ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${OPENSSL_CRYPTO_LIBRARY}) +add_library(crypto SHARED IMPORTED GLOBAL) +set_property(TARGET crypto PROPERTY IMPORTED_LOCATION ${OPENSSL_CRYPTO_LIBRARY}) -SET(BRPC_PREFIX_DIR ${THIRD_PARTY_PATH}/brpc) -SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc) -SET(BRPC_INCLUDE_DIR "${BRPC_INSTALL_DIR}/include" CACHE PATH "brpc include directory." FORCE) -SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc library." FORCE) +set(BRPC_PREFIX_DIR ${THIRD_PARTY_PATH}/brpc) +set(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc) +set(BRPC_INCLUDE_DIR + "${BRPC_INSTALL_DIR}/include" + CACHE PATH "brpc include directory." FORCE) +set(BRPC_LIBRARIES + "${BRPC_INSTALL_DIR}/lib/libbrpc.a" + CACHE FILEPATH "brpc library." FORCE) -INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) +include_directories(${BRPC_INCLUDE_DIR}) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args -set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") +set(prefix_path + "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog" +) # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( - extern_brpc - ${EXTERNAL_PROJECT_LOG_ARGS} - # TODO(gongwb): change to de newst repo when they changed - GIT_REPOSITORY "https://github.com/wangjiawei04/brpc" - #GIT_REPOSITORY "https://github.com/ziyoujiyi/brpc" # ssl error in the previous repo(can be mannual fixed) - GIT_TAG "e203afb794caf027da0f1e0776443e7d20c0c28e" - PREFIX ${BRPC_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_PREFIX_PATH=${prefix_path} - -DWITH_GLOG=ON - -DIOBUF_WITH_HUGE_BLOCK=ON - -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} - ${EXTERNAL_OPTIONAL_ARGS} - LIST_SEPARATOR | - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${BRPC_LIBRARIES} -) + extern_brpc + ${EXTERNAL_PROJECT_LOG_ARGS} + # TODO(gongwb): change to de newst repo when they changed + GIT_REPOSITORY "https://github.com/wangjiawei04/brpc" + #GIT_REPOSITORY "https://github.com/ziyoujiyi/brpc" # ssl error in the previous repo(can be mannual fixed) + GIT_TAG "e203afb794caf027da0f1e0776443e7d20c0c28e" + PREFIX ${BRPC_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${BRPC_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${BRPC_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_PREFIX_PATH=${prefix_path} + -DWITH_GLOG=ON + -DIOBUF_WITH_HUGE_BLOCK=ON + -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} + ${EXTERNAL_OPTIONAL_ARGS} + LIST_SEPARATOR | + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${BRPC_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${BRPC_LIBRARIES}) # ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy) -ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog snappy) -ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) -ADD_DEPENDENCIES(brpc extern_brpc) +add_dependencies( + extern_brpc + protobuf + ssl + crypto + leveldb + gflags + glog + snappy) +add_library(brpc STATIC IMPORTED GLOBAL) +set_property(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) +add_dependencies(brpc extern_brpc) add_definitions(-DBRPC_WITH_GLOG) -LIST(APPEND external_project_dependencies brpc) +list(APPEND external_project_dependencies brpc) diff --git a/cmake/external/cinn.cmake b/cmake/external/cinn.cmake index 2ec9a3faa07..5dd84657c86 100644 --- a/cmake/external/cinn.cmake +++ b/cmake/external/cinn.cmake @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -if (NOT WITH_CINN) +if(NOT WITH_CINN) return() endif() @@ -27,36 +27,33 @@ add_definitions(-w) include(ExternalProject) set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN) set(CINN_GIT_TAG release/v0.2) -set(CINN_OPTIONAL_ARGS -DPY_VERSION=${PY_VERSION} - -DWITH_CUDA=${WITH_GPU} - -DWITH_CUDNN=${WITH_GPU} - -DWITH_MKL_CBLAS=${WITH_MKL} - -DWITH_MKLDNN=${WITH_MKL} - -DPUBLISH_LIBS=ON - -DWITH_TESTING=ON -) +set(CINN_OPTIONAL_ARGS + -DPY_VERSION=${PY_VERSION} + -DWITH_CUDA=${WITH_GPU} + -DWITH_CUDNN=${WITH_GPU} + -DWITH_MKL_CBLAS=${WITH_MKL} + -DWITH_MKLDNN=${WITH_MKL} + -DPUBLISH_LIBS=ON + -DWITH_TESTING=ON) set(CINN_BUILD_COMMAND $(MAKE) cinnapi -j) ExternalProject_Add( external_cinn ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/CINN.git" - GIT_TAG ${CINN_GIT_TAG} - PREFIX ${CINN_PREFIX_DIR} - BUILD_COMMAND ${CINN_BUILD_COMMAND} - INSTALL_COMMAND "" - CMAKE_ARGS ${CINN_OPTIONAL_ARGS}) + GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/CINN.git" + GIT_TAG ${CINN_GIT_TAG} + PREFIX ${CINN_PREFIX_DIR} + BUILD_COMMAND ${CINN_BUILD_COMMAND} + INSTALL_COMMAND "" + CMAKE_ARGS ${CINN_OPTIONAL_ARGS}) - - -ExternalProject_Get_property(external_cinn BINARY_DIR) -ExternalProject_Get_property(external_cinn SOURCE_DIR) +ExternalProject_Get_Property(external_cinn BINARY_DIR) +ExternalProject_Get_Property(external_cinn SOURCE_DIR) set(CINN_BINARY_DIR ${BINARY_DIR}) set(CINN_SOURCE_DIR ${SOURCE_DIR}) message(STATUS "CINN BINARY_DIR: ${CINN_BINARY_DIR}") message(STATUS "CINN SOURCE_DIR: ${CINN_SOURCE_DIR}") - ###################################### # Add CINN's dependencies header files ###################################### @@ -82,6 +79,7 @@ set(CINN_LIB_LOCATION "${CINN_BINARY_DIR}/dist/cinn/lib") set(CINN_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/cinn/include") add_library(cinn SHARED IMPORTED GLOBAL) -set_target_properties(cinn PROPERTIES IMPORTED_LOCATION "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}") +set_target_properties(cinn PROPERTIES IMPORTED_LOCATION + "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}") include_directories(${CINN_INCLUDE_DIR}) add_dependencies(cinn external_cinn) diff --git a/cmake/external/concurrentqueue.cmake b/cmake/external/concurrentqueue.cmake index 9e4331ae6fd..0ff3612efed 100644 --- a/cmake/external/concurrentqueue.cmake +++ b/cmake/external/concurrentqueue.cmake @@ -16,27 +16,32 @@ include(ExternalProject) set(CONCURRENTQUEUE_PROJECT "extern_concurrentqueue") set(CONCURRENTQUEUE_VER "v1.0.3") -SET(CONCURRENTQUEUE_URL_MD5 118e5bb661b567634647312991e10222) -set(CONCURRENTQUEUE_PREFIX_URL "https://github.com/cameron314/concurrentqueue/archive/refs/tags") -set(CONCURRENTQUEUE_URL "${CONCURRENTQUEUE_PREFIX_URL}/${CONCURRENTQUEUE_VER}.tar.gz") +set(CONCURRENTQUEUE_URL_MD5 118e5bb661b567634647312991e10222) +set(CONCURRENTQUEUE_PREFIX_URL + "https://github.com/cameron314/concurrentqueue/archive/refs/tags") +set(CONCURRENTQUEUE_URL + "${CONCURRENTQUEUE_PREFIX_URL}/${CONCURRENTQUEUE_VER}.tar.gz") -MESSAGE(STATUS "CONCURRENTQUEUE_VERSION: ${CONCURRENTQUEUE_VER}, CONCURRENTQUEUE_URL: ${CONCURRENTQUEUE_URL}") +message( + STATUS + "CONCURRENTQUEUE_VERSION: ${CONCURRENTQUEUE_VER}, CONCURRENTQUEUE_URL: ${CONCURRENTQUEUE_URL}" +) set(CONCURRENTQUEUE_PREFIX_DIR ${THIRD_PARTY_PATH}/concurrentqueue) set(CONCURRENTQUEUE_SOURCE_DIR ${THIRD_PARTY_PATH}/concurrentqueue/src/) -set(CONCURRENTQUEUE_INCLUDE_DIR "${CONCURRENTQUEUE_SOURCE_DIR}/extern_concurrentqueue") +set(CONCURRENTQUEUE_INCLUDE_DIR + "${CONCURRENTQUEUE_SOURCE_DIR}/extern_concurrentqueue") ExternalProject_Add( - ${CONCURRENTQUEUE_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${CONCURRENTQUEUE_URL} - URL_MD5 ${CONCURRENTQUEUE_URL_MD5} - PREFIX ${CONCURRENTQUEUE_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - UPDATE_COMMAND "" - ) + ${CONCURRENTQUEUE_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${CONCURRENTQUEUE_URL} + URL_MD5 ${CONCURRENTQUEUE_URL_MD5} + PREFIX ${CONCURRENTQUEUE_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + UPDATE_COMMAND "") include_directories(${CONCURRENTQUEUE_INCLUDE_DIR}) diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake index 27a013c1763..ff4d3b5c9ea 100644 --- a/cmake/external/cryptopp.cmake +++ b/cmake/external/cryptopp.cmake @@ -12,68 +12,77 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp) -SET(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp) -SET(CRYPTOPP_INCLUDE_DIR "${CRYPTOPP_INSTALL_DIR}/include" CACHE PATH "cryptopp include directory." FORCE) -SET(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git) -SET(CRYPTOPP_TAG CRYPTOPP_8_2_0) +set(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp) +set(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp) +set(CRYPTOPP_INCLUDE_DIR + "${CRYPTOPP_INSTALL_DIR}/include" + CACHE PATH "cryptopp include directory." FORCE) +set(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git) +set(CRYPTOPP_TAG CRYPTOPP_8_2_0) -IF(WIN32) - SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" CACHE FILEPATH "cryptopp library." FORCE) +if(WIN32) + set(CRYPTOPP_LIBRARIES + "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" + CACHE FILEPATH "cryptopp library." FORCE) # There is a compilation parameter "/FI\"winapifamily.h\"" or "/FIwinapifamily.h" can't be used correctly # with Ninja on Windows. The only difference between the patch file and original # file is that the compilation parameters are changed to '/nologo'. This # patch command can be removed when upgrading to a higher version. if("${CMAKE_GENERATOR}" STREQUAL "Ninja") - set(CRYPTOPP_PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different "${PADDLE_SOURCE_DIR}/patches/cryptopp/CMakeLists.txt" "/") + set(CRYPTOPP_PATCH_COMMAND + ${CMAKE_COMMAND} -E copy_if_different + "${PADDLE_SOURCE_DIR}/patches/cryptopp/CMakeLists.txt" "/") endif() -ELSE(WIN32) - SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE) -ENDIF(WIN32) +else(WIN32) + set(CRYPTOPP_LIBRARIES + "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" + CACHE FILEPATH "cryptopp library." FORCE) +endif(WIN32) -IF(APPLE AND WITH_ARM) - SET(CMAKE_CXX_FLAGS "-DCRYPTOPP_ARM_CRC32_AVAILABLE=0") -ENDIF() +if(APPLE AND WITH_ARM) + set(CMAKE_CXX_FLAGS "-DCRYPTOPP_ARM_CRC32_AVAILABLE=0") +endif() -set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} - -DBUILD_SHARED=ON - -DBUILD_STATIC=ON - -DBUILD_TESTING=OFF - -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib - -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -) +set(CRYPTOPP_CMAKE_ARGS + ${COMMON_CMAKE_ARGS} + -DBUILD_SHARED=ON + -DBUILD_STATIC=ON + -DBUILD_TESTING=OFF + -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib + -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) -INCLUDE_DIRECTORIES(${CRYPTOPP_INCLUDE_DIR}) +include_directories(${CRYPTOPP_INCLUDE_DIR}) ExternalProject_Add( - extern_cryptopp - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${CRYPTOPP_REPOSITORY} - GIT_TAG ${CRYPTOPP_TAG} - PREFIX ${CRYPTOPP_PREFIX_DIR} - UPDATE_COMMAND "" - PATCH_COMMAND - COMMAND ${CMAKE_COMMAND} -E remove_directory "/cmake/" - COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "/cmake" - COMMAND cd "/cmake" && git checkout tags/${CRYPTOPP_TAG} -b ${CRYPTOPP_TAG} - COMMAND ${CMAKE_COMMAND} -E copy_directory "/cmake/" "/" - COMMAND ${CRYPTOPP_PATCH_COMMAND} - INSTALL_DIR ${CRYPTOPP_INSTALL_DIR} - CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${CRYPTOPP_LIBRARIES} -) + extern_cryptopp + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${CRYPTOPP_REPOSITORY} + GIT_TAG ${CRYPTOPP_TAG} + PREFIX ${CRYPTOPP_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND + COMMAND ${CMAKE_COMMAND} -E remove_directory "/cmake/" + COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "/cmake" + COMMAND cd "/cmake" && git checkout tags/${CRYPTOPP_TAG} -b + ${CRYPTOPP_TAG} + COMMAND ${CMAKE_COMMAND} -E copy_directory "/cmake/" + "/" + COMMAND ${CRYPTOPP_PATCH_COMMAND} + INSTALL_DIR ${CRYPTOPP_INSTALL_DIR} + CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${CRYPTOPP_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${CRYPTOPP_LIBRARIES}) -ADD_LIBRARY(cryptopp STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${CRYPTOPP_LIBRARIES}) -ADD_DEPENDENCIES(cryptopp extern_cryptopp) +add_library(cryptopp STATIC IMPORTED GLOBAL) +set_property(TARGET cryptopp PROPERTY IMPORTED_LOCATION ${CRYPTOPP_LIBRARIES}) +add_dependencies(cryptopp extern_cryptopp) diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake index f263086e8be..04fad252dac 100644 --- a/cmake/external/cub.cmake +++ b/cmake/external/cub.cmake @@ -14,32 +14,32 @@ include(ExternalProject) -# Note(zhouwei): extern_cub has code __FILE_, If the path of extern_cub is changed, -# it will effect about 30+ cu files sccache hit and slow compile speed on windows. +# Note(zhouwei): extern_cub has code __FILE_, If the path of extern_cub is changed, +# it will effect about 30+ cu files sccache hit and slow compile speed on windows. # Therefore, a fixed CUB_PATH will be input to increase the sccache hit rate. -set(CUB_PATH "${THIRD_PARTY_PATH}/cub" CACHE STRING "A path setting for external_cub path.") -set(CUB_PREFIX_DIR ${CUB_PATH}) +set(CUB_PATH + "${THIRD_PARTY_PATH}/cub" + CACHE STRING "A path setting for external_cub path.") +set(CUB_PREFIX_DIR ${CUB_PATH}) -set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git) -set(CUB_TAG 1.8.0) +set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git) +set(CUB_TAG 1.8.0) -SET(CUB_INCLUDE_DIR ${CUB_PREFIX_DIR}/src/extern_cub) +set(CUB_INCLUDE_DIR ${CUB_PREFIX_DIR}/src/extern_cub) message("CUB_INCLUDE_DIR is ${CUB_INCLUDE_DIR}") include_directories(${CUB_INCLUDE_DIR}) ExternalProject_Add( extern_cub - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${CUB_REPOSITORY} - GIT_TAG ${CUB_TAG} - PREFIX ${CUB_PREFIX_DIR} - UPDATE_COMMAND "" + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${CUB_REPOSITORY} + GIT_TAG ${CUB_TAG} + PREFIX ${CUB_PREFIX_DIR} + UPDATE_COMMAND "" CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(cub INTERFACE) diff --git a/cmake/external/dgc.cmake b/cmake/external/dgc.cmake index 711d6c5b10a..9c22ee89d48 100644 --- a/cmake/external/dgc.cmake +++ b/cmake/external/dgc.cmake @@ -12,32 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(DGC_PREFIX_DIR "${THIRD_PARTY_PATH}/dgc") -SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc/src/extern_dgc") -SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc") -SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE) -SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE) -SET(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_f66ef73.tgz") -INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR}) +set(DGC_PREFIX_DIR "${THIRD_PARTY_PATH}/dgc") +set(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc/src/extern_dgc") +set(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc") +set(DGC_INCLUDE_DIR + "${DGC_INSTALL_DIR}/include" + CACHE PATH "dgc include directory." FORCE) +set(DGC_LIBRARIES + "${DGC_INSTALL_DIR}/lib/libdgc.a" + CACHE FILEPATH "dgc library." FORCE) +set(DGC_URL "https://fleet.bj.bcebos.com/dgc/collective_f66ef73.tgz") +include_directories(${DGC_INCLUDE_DIR}) ExternalProject_Add( - extern_dgc - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${DGC_URL} - URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251" - PREFIX "${DGC_PREFIX_DIR}" - CONFIGURE_COMMAND "" - BUILD_COMMAND make -j $(nproc) - INSTALL_COMMAND mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc - && cp ${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES} - && cp ${DGC_SOURCES_DIR}/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/ - BUILD_IN_SOURCE 1 - BUILD_BYPRODUCTS ${DGC_LIBRARIES} -) - -ADD_LIBRARY(dgc STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES}) -ADD_DEPENDENCIES(dgc extern_dgc) + extern_dgc + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${DGC_URL} + URL_MD5 "94e6fa1bc97169d0e1aad44570fe3251" + PREFIX "${DGC_PREFIX_DIR}" + CONFIGURE_COMMAND "" + BUILD_COMMAND make -j $(nproc) + INSTALL_COMMAND + mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc && cp + ${DGC_SOURCES_DIR}/build/lib/libdgc.a ${DGC_LIBRARIES} && cp + ${DGC_SOURCES_DIR}/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/ + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${DGC_LIBRARIES}) +add_library(dgc STATIC IMPORTED GLOBAL) +set_property(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES}) +add_dependencies(dgc extern_dgc) diff --git a/cmake/external/dirent.cmake b/cmake/external/dirent.cmake index 59caa437415..51d8eaac29e 100644 --- a/cmake/external/dirent.cmake +++ b/cmake/external/dirent.cmake @@ -15,30 +15,28 @@ # Note(chenxin33): dirent.h is only exist in Linux, so get it from github when build in windows. # use dirent tag v1.23.2 on 09/05//2018 https://github.com/tronkko/dirent.git -INCLUDE (ExternalProject) +include(ExternalProject) -SET(DIRENT_PREFIX_DIR ${THIRD_PARTY_PATH}/dirent) -SET(DIRENT_INCLUDE_DIR ${THIRD_PARTY_PATH}/dirent/src/extern_dirent/include) +set(DIRENT_PREFIX_DIR ${THIRD_PARTY_PATH}/dirent) +set(DIRENT_INCLUDE_DIR ${THIRD_PARTY_PATH}/dirent/src/extern_dirent/include) include_directories(${DIRENT_INCLUDE_DIR}) -set(DIRENT_REPOSITORY ${GIT_URL}/tronkko/dirent) -set(DIRENT_TAG 1.23.2) +set(DIRENT_REPOSITORY ${GIT_URL}/tronkko/dirent) +set(DIRENT_TAG 1.23.2) ExternalProject_Add( extern_dirent - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${DIRENT_REPOSITORY} - GIT_TAG ${DIRENT_TAG} - PREFIX ${DIRENT_PREFIX_DIR} - UPDATE_COMMAND "" + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${DIRENT_REPOSITORY} + GIT_TAG ${DIRENT_TAG} + PREFIX ${DIRENT_PREFIX_DIR} + UPDATE_COMMAND "" CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(dirent INTERFACE) -add_dependencies(dirent extern_dirent) \ No newline at end of file +add_dependencies(dirent extern_dirent) diff --git a/cmake/external/dlpack.cmake b/cmake/external/dlpack.cmake index 1aeea752e66..727202a4346 100644 --- a/cmake/external/dlpack.cmake +++ b/cmake/external/dlpack.cmake @@ -17,24 +17,22 @@ include(ExternalProject) set(DLPACK_PREFIX_DIR ${THIRD_PARTY_PATH}/dlpack) set(DLPACK_REPOSITORY ${GIT_URL}/dmlc/dlpack.git) -set(DLPACK_TAG v0.4) +set(DLPACK_TAG v0.4) -set(DLPACK_INCLUDE_DIR ${THIRD_PARTY_PATH}/dlpack/src/extern_dlpack/include) +set(DLPACK_INCLUDE_DIR ${THIRD_PARTY_PATH}/dlpack/src/extern_dlpack/include) include_directories(${DLPACK_INCLUDE_DIR}) ExternalProject_Add( extern_dlpack - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${DLPACK_REPOSITORY} - GIT_TAG ${DLPACK_TAG} - PREFIX ${DLPACK_PREFIX_DIR} - UPDATE_COMMAND "" + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${DLPACK_REPOSITORY} + GIT_TAG ${DLPACK_TAG} + PREFIX ${DLPACK_PREFIX_DIR} + UPDATE_COMMAND "" CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(dlpack INTERFACE) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index f8bac96b68f..443b7aa7d56 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -18,39 +18,43 @@ include(ExternalProject) set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3) set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3) set(EIGEN_REPOSITORY https://gitlab.com/libeigen/eigen.git) -set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee) +set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee) if(WIN32) - add_definitions(-DEIGEN_STRONG_INLINE=inline) + add_definitions(-DEIGEN_STRONG_INLINE=inline) elseif(LINUX) - if(WITH_ROCM) - # For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC - # which will cause compiler error of using __host__ funciont in __host__ __device__ - file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src) - file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst) - file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h native_src1) - file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h native_dst1) - set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} ${native_dst1}) - endif() + if(WITH_ROCM) + # For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC + # which will cause compiler error of using __host__ funciont in __host__ __device__ + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src) + file(TO_NATIVE_PATH ${EIGEN_SOURCE_DIR}/Eigen/src/Core/util/Meta.h + native_dst) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/TensorReductionGpu.h + native_src1) + file( + TO_NATIVE_PATH + ${EIGEN_SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h + native_dst1) + set(EIGEN_PATCH_COMMAND cp ${native_src} ${native_dst} && cp ${native_src1} + ${native_dst1}) + endif() endif() set(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR}) +include_directories(${EIGEN_INCLUDE_DIR}) ExternalProject_Add( - extern_eigen3 - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${EIGEN_REPOSITORY} - GIT_TAG ${EIGEN_TAG} - PREFIX ${EIGEN_PREFIX_DIR} - UPDATE_COMMAND "" - PATCH_COMMAND ${EIGEN_PATCH_COMMAND} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${EIGEN_REPOSITORY} + GIT_TAG ${EIGEN_TAG} + PREFIX ${EIGEN_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND ${EIGEN_PATCH_COMMAND} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(eigen3 INTERFACE) diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 056ff32c8c0..783e1c0d442 100755 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -12,90 +12,94 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(GFLAGS_PREFIX_DIR ${THIRD_PARTY_PATH}/gflags) -SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) -SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) +set(GFLAGS_PREFIX_DIR ${THIRD_PARTY_PATH}/gflags) +set(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) +set(GFLAGS_INCLUDE_DIR + "${GFLAGS_INSTALL_DIR}/include" + CACHE PATH "gflags include directory." FORCE) set(GFLAGS_REPOSITORY ${GIT_URL}/gflags/gflags.git) set(GFLAGS_TAG "v2.2.2") -IF(WIN32) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) -ELSE(WIN32) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +if(WIN32) + set(GFLAGS_LIBRARIES + "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" + CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +else(WIN32) + set(GFLAGS_LIBRARIES + "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" + CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) set(BUILD_COMMAND $(MAKE) --silent) set(INSTALL_COMMAND $(MAKE) install) -ENDIF(WIN32) +endif(WIN32) -INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) +include_directories(${GFLAGS_INCLUDE_DIR}) if(WITH_ARM_BRPC) - SET(ARM_GFLAGS_URL "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_gflags.tar.gz" CACHE STRING "" FORCE) - set(GFLAGS_SOURCE_DIR ${THIRD_PARTY_PATH}/gflags/src/extern_gflags) - FILE(WRITE ${GFLAGS_SOURCE_DIR}/CMakeLists.txt - "PROJECT(ARM_GFLAGS)\n" - "cmake_minimum_required(VERSION 3.0)\n" + set(ARM_GFLAGS_URL + "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_gflags.tar.gz" + CACHE STRING "" FORCE) + set(GFLAGS_SOURCE_DIR ${THIRD_PARTY_PATH}/gflags/src/extern_gflags) + file( + WRITE ${GFLAGS_SOURCE_DIR}/CMakeLists.txt + "PROJECT(ARM_GFLAGS)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY arm_gflags/bin arm_gflags/include arm_gflags/lib \n" " DESTINATION . USE_SOURCE_PERMISSIONS)\n") - ExternalProject_Add( - extern_gflags - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - PREFIX ${GFLAGS_PREFIX_DIR} - DOWNLOAD_DIR ${GFLAGS_SOURCE_DIR} - DOWNLOAD_COMMAND rm -rf arm_gflags.tar.gz && - wget --no-check-certificate ${ARM_GFLAGS_URL} - && tar zxvf arm_gflags.tar.gz - #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_gflags.tar.gz . - # && tar zxvf arm_gflags.tar.gz - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES} - ) + ExternalProject_Add( + extern_gflags + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + PREFIX ${GFLAGS_PREFIX_DIR} + DOWNLOAD_DIR ${GFLAGS_SOURCE_DIR} + DOWNLOAD_COMMAND rm -rf arm_gflags.tar.gz && wget --no-check-certificate + ${ARM_GFLAGS_URL} && tar zxvf arm_gflags.tar.gz + #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_gflags.tar.gz . + # && tar zxvf arm_gflags.tar.gz + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}) else() - ExternalProject_Add( - extern_gflags - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${GFLAGS_REPOSITORY} - GIT_TAG ${GFLAGS_TAG} - PREFIX ${GFLAGS_PREFIX_DIR} - UPDATE_COMMAND "" - BUILD_COMMAND ${BUILD_COMMAND} - INSTALL_COMMAND ${INSTALL_COMMAND} - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DBUILD_STATIC_LIBS=ON - -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES} - ) + ExternalProject_Add( + extern_gflags + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${GFLAGS_REPOSITORY} + GIT_TAG ${GFLAGS_TAG} + PREFIX ${GFLAGS_PREFIX_DIR} + UPDATE_COMMAND "" + BUILD_COMMAND ${BUILD_COMMAND} + INSTALL_COMMAND ${INSTALL_COMMAND} + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DBUILD_STATIC_LIBS=ON + -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}) endif() -ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) -ADD_DEPENDENCIES(gflags extern_gflags) +add_library(gflags STATIC IMPORTED GLOBAL) +set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) +add_dependencies(gflags extern_gflags) # On Windows (including MinGW), the Shlwapi library is used by gflags if available. -if (WIN32) +if(WIN32) include(CheckIncludeFileCXX) check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI) - if (HAVE_SHLWAPI) + if(HAVE_SHLWAPI) set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib) endif(HAVE_SHLWAPI) -endif (WIN32) +endif(WIN32) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index b2f3afdabf4..a9942a6bca6 100755 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -12,86 +12,90 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(GLOG_PREFIX_DIR ${THIRD_PARTY_PATH}/glog) -SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) -SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE) -SET(GLOG_REPOSITORY ${GIT_URL}/google/glog.git) -SET(GLOG_TAG v0.4.0) +set(GLOG_PREFIX_DIR ${THIRD_PARTY_PATH}/glog) +set(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) +set(GLOG_INCLUDE_DIR + "${GLOG_INSTALL_DIR}/include" + CACHE PATH "glog include directory." FORCE) +set(GLOG_REPOSITORY ${GIT_URL}/google/glog.git) +set(GLOG_TAG v0.4.0) -IF(WIN32) - SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/glog.lib" CACHE FILEPATH "glog library." FORCE) - SET(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4530") +if(WIN32) + set(GLOG_LIBRARIES + "${GLOG_INSTALL_DIR}/lib/glog.lib" + CACHE FILEPATH "glog library." FORCE) + set(GLOG_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267 /wd4530") add_definitions("/DGOOGLE_GLOG_DLL_DECL=") -ELSE(WIN32) - SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE) - SET(GLOG_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) -ENDIF(WIN32) +else(WIN32) + set(GLOG_LIBRARIES + "${GLOG_INSTALL_DIR}/lib/libglog.a" + CACHE FILEPATH "glog library." FORCE) + set(GLOG_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) +endif(WIN32) -INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) +include_directories(${GLOG_INCLUDE_DIR}) if(WITH_ARM_BRPC) - SET(ARM_GLOG_URL "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_glog.tar.gz" CACHE STRING "" FORCE) - set(GLOG_SOURCE_DIR ${THIRD_PARTY_PATH}/glog/src/extern_glog) - FILE(WRITE ${GLOG_SOURCE_DIR}/CMakeLists.txt - "PROJECT(ARM_GLOGS)\n" - "cmake_minimum_required(VERSION 3.0)\n" + set(ARM_GLOG_URL + "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_glog.tar.gz" + CACHE STRING "" FORCE) + set(GLOG_SOURCE_DIR ${THIRD_PARTY_PATH}/glog/src/extern_glog) + file( + WRITE ${GLOG_SOURCE_DIR}/CMakeLists.txt + "PROJECT(ARM_GLOGS)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY arm_glog/include arm_glog/lib \n" " DESTINATION . USE_SOURCE_PERMISSIONS)\n") - ExternalProject_Add( - extern_glog - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - DEPENDS gflags - PREFIX ${GLOG_PREFIX_DIR} - DOWNLOAD_DIR ${GLOG_SOURCE_DIR} - DOWNLOAD_COMMAND rm -rf arm_glog.tar.gz && - wget --no-check-certificate ${ARM_GLOG_URL} - && tar zxvf arm_glog.tar.gz - #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_glog.tar.gz . - # && tar zxvf arm_glog.tar.gz - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${GLOG_LIBRARIES} - ) + ExternalProject_Add( + extern_glog + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + DEPENDS gflags + PREFIX ${GLOG_PREFIX_DIR} + DOWNLOAD_DIR ${GLOG_SOURCE_DIR} + DOWNLOAD_COMMAND rm -rf arm_glog.tar.gz && wget --no-check-certificate + ${ARM_GLOG_URL} && tar zxvf arm_glog.tar.gz + #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_glog.tar.gz . + # && tar zxvf arm_glog.tar.gz + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GLOG_LIBRARIES}) else() - ExternalProject_Add( - extern_glog - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${GLOG_REPOSITORY} - GIT_TAG ${GLOG_TAG} - DEPENDS gflags - PREFIX ${GLOG_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DWITH_GFLAGS=OFF - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${GLOG_LIBRARIES} - ) + ExternalProject_Add( + extern_glog + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${GLOG_REPOSITORY} + GIT_TAG ${GLOG_TAG} + DEPENDS gflags + PREFIX ${GLOG_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=OFF + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GLOG_LIBRARIES}) endif() -ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) -ADD_DEPENDENCIES(glog extern_glog gflags) -LINK_LIBRARIES(glog) +add_library(glog STATIC IMPORTED GLOBAL) +set_property(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) +add_dependencies(glog extern_glog gflags) +link_libraries(glog) diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake index 778d7c2a0ae..cd7b254892e 100644 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -12,58 +12,65 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(GLOO_PROJECT "extern_gloo") -SET(GLOO_PREFIX_DIR ${THIRD_PARTY_PATH}/gloo) -SET(GLOO_SOURCE_DIR ${THIRD_PARTY_PATH}/gloo/src/extern_gloo) -SET(GLOO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gloo) -SET(GLOO_INCLUDE_DIR "${GLOO_INSTALL_DIR}/include" CACHE PATH "gloo include directory." FORCE) -SET(GLOO_LIBRARY_DIR "${GLOO_INSTALL_DIR}/lib" CACHE PATH "gloo library directory." FORCE) +set(GLOO_PROJECT "extern_gloo") +set(GLOO_PREFIX_DIR ${THIRD_PARTY_PATH}/gloo) +set(GLOO_SOURCE_DIR ${THIRD_PARTY_PATH}/gloo/src/extern_gloo) +set(GLOO_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gloo) +set(GLOO_INCLUDE_DIR + "${GLOO_INSTALL_DIR}/include" + CACHE PATH "gloo include directory." FORCE) +set(GLOO_LIBRARY_DIR + "${GLOO_INSTALL_DIR}/lib" + CACHE PATH "gloo library directory." FORCE) # As we add extra features for gloo, we use the non-official repo -SET(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git) -SET(GLOO_TAG v0.0.2) -SET(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE) +set(GLOO_REPOSITORY ${GIT_URL}/sandyhouse/gloo.git) +set(GLOO_TAG v0.0.2) +set(GLOO_LIBRARIES + "${GLOO_INSTALL_DIR}/lib/libgloo.a" + CACHE FILEPATH "gloo library." FORCE) -INCLUDE_DIRECTORIES(${GLOO_INCLUDE_DIR}) +include_directories(${GLOO_INCLUDE_DIR}) if(WITH_ASCEND OR WITH_ASCEND_CL) ExternalProject_Add( - ${GLOO_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${GLOO_REPOSITORY} - GIT_TAG ${GLOO_TAG} - PREFIX "${GLOO_PREFIX_DIR}" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build - && cd ${GLOO_SOURCE_DIR}/build && cmake .. -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} && make - && mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} - COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo" - BUILD_BYPRODUCTS ${GLOO_LIBRARIES} - ) + ${GLOO_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${GLOO_REPOSITORY} + GIT_TAG ${GLOO_TAG} + PREFIX "${GLOO_PREFIX_DIR}" + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND + mkdir -p ${GLOO_SOURCE_DIR}/build && cd ${GLOO_SOURCE_DIR}/build && cmake + .. -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} && make && mkdir -p + ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy + ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" + "${GLOO_INCLUDE_DIR}/gloo" + BUILD_BYPRODUCTS ${GLOO_LIBRARIES}) else() ExternalProject_Add( - ${GLOO_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${GLOO_REPOSITORY} - GIT_TAG ${GLOO_TAG} - PREFIX "${GLOO_PREFIX_DIR}" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build - && cd ${GLOO_SOURCE_DIR}/build && cmake .. -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} && make - && mkdir -p ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} - COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" "${GLOO_INCLUDE_DIR}/gloo" - BUILD_BYPRODUCTS ${GLOO_LIBRARIES} - ) + ${GLOO_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${GLOO_REPOSITORY} + GIT_TAG ${GLOO_TAG} + PREFIX "${GLOO_PREFIX_DIR}" + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND + mkdir -p ${GLOO_SOURCE_DIR}/build && cd ${GLOO_SOURCE_DIR}/build && cmake + .. -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} && make && mkdir -p + ${GLOO_LIBRARY_DIR} ${GLOO_INCLUDE_DIR}/gloo + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy + ${GLOO_SOURCE_DIR}/build/gloo/libgloo.a ${GLOO_LIBRARY_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory "${GLOO_SOURCE_DIR}/gloo/" + "${GLOO_INCLUDE_DIR}/gloo" + BUILD_BYPRODUCTS ${GLOO_LIBRARIES}) endif() - -ADD_LIBRARY(gloo STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET gloo PROPERTY IMPORTED_LOCATION ${GLOO_LIBRARIES}) -ADD_DEPENDENCIES(gloo ${GLOO_PROJECT}) +add_library(gloo STATIC IMPORTED GLOBAL) +set_property(TARGET gloo PROPERTY IMPORTED_LOCATION ${GLOO_LIBRARIES}) +add_dependencies(gloo ${GLOO_PROJECT}) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 3c740af6e0b..00527ceecdc 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -14,79 +14,85 @@ #FIXME:(gongwb) Move brpc's gtest dependency. -IF(WITH_TESTING) - ENABLE_TESTING() -ENDIF() +if(WITH_TESTING) + enable_testing() +endif() -INCLUDE(GNUInstallDirs) -INCLUDE(ExternalProject) +include(GNUInstallDirs) +include(ExternalProject) -SET(GTEST_PREFIX_DIR ${THIRD_PARTY_PATH}/gtest) -SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest) -SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE) -set(GTEST_REPOSITORY ${GIT_URL}/google/googletest.git) -set(GTEST_TAG release-1.8.1) +set(GTEST_PREFIX_DIR ${THIRD_PARTY_PATH}/gtest) +set(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest) +set(GTEST_INCLUDE_DIR + "${GTEST_INSTALL_DIR}/include" + CACHE PATH "gtest include directory." FORCE) +set(GTEST_REPOSITORY ${GIT_URL}/google/googletest.git) +set(GTEST_TAG release-1.8.1) -INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR}) +include_directories(${GTEST_INCLUDE_DIR}) -IF(WIN32) - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) - string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}") - string(REPLACE "/W0 " "" GTEST_CMAKE_CXX_FLAGS "${GTEST_CMAKE_CXX_FLAGS}") -ELSE(WIN32) - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) - set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") -ENDIF(WIN32) +if(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest.lib" + CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/gtest_main.lib" + CACHE FILEPATH "gtest main libraries." FORCE) + string(REPLACE "/w " "" GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "/w " "" GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "/W0 " "" GTEST_CMAKE_C_FLAGS "${GTEST_CMAKE_C_FLAGS}") + string(REPLACE "/W0 " "" GTEST_CMAKE_CXX_FLAGS "${GTEST_CMAKE_CXX_FLAGS}") +else(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" + CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" + CACHE FILEPATH "gtest main libraries." FORCE) + set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif(WIN32) -IF(WITH_MKLML) - # wait for mklml downloading completed - SET(GTEST_DEPENDS ${MKLML_PROJECT}) -ENDIF() +if(WITH_MKLML) + # wait for mklml downloading completed + set(GTEST_DEPENDS ${MKLML_PROJECT}) +endif() ExternalProject_Add( - extern_gtest - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${GTEST_REPOSITORY} - GIT_TAG ${GTEST_TAG} - DEPENDS ${GTEST_DEPENDS} - PREFIX ${GTEST_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${GTEST_CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DBUILD_GMOCK=ON - -Dgtest_disable_pthreads=ON - -Dgtest_force_shared_crt=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${GTEST_LIBRARIES} - BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES} -) + extern_gtest + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${GTEST_REPOSITORY} + GIT_TAG ${GTEST_TAG} + DEPENDS ${GTEST_DEPENDS} + PREFIX ${GTEST_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${GTEST_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${GTEST_CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_GMOCK=ON + -Dgtest_disable_pthreads=ON + -Dgtest_force_shared_crt=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${GTEST_LIBRARIES} + BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}) -ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) -ADD_DEPENDENCIES(gtest extern_gtest) +add_library(gtest STATIC IMPORTED GLOBAL) +set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) +add_dependencies(gtest extern_gtest) -ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) -ADD_DEPENDENCIES(gtest_main extern_gtest) +add_library(gtest_main STATIC IMPORTED GLOBAL) +set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION + ${GTEST_MAIN_LIBRARIES}) +add_dependencies(gtest_main extern_gtest) diff --git a/cmake/external/lapack.cmake b/cmake/external/lapack.cmake index 4cca61681c6..43305223fe2 100644 --- a/cmake/external/lapack.cmake +++ b/cmake/external/lapack.cmake @@ -12,56 +12,68 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE (ExternalProject) +include(ExternalProject) -SET(LAPACK_PREFIX_DIR ${THIRD_PARTY_PATH}/lapack) -SET(LAPACK_SOURCE_DIR ${THIRD_PARTY_PATH}/lapack/src/extern_lapack) -SET(LAPACK_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lapack) -SET(LAPACK_LIB_DIR ${LAPACK_INSTALL_DIR}/lib) +set(LAPACK_PREFIX_DIR ${THIRD_PARTY_PATH}/lapack) +set(LAPACK_SOURCE_DIR ${THIRD_PARTY_PATH}/lapack/src/extern_lapack) +set(LAPACK_INSTALL_DIR ${THIRD_PARTY_PATH}/install/lapack) +set(LAPACK_LIB_DIR ${LAPACK_INSTALL_DIR}/lib) # Note(zhouwei): lapack need fortan compiler which many machines don't have, so use precompiled library. # use lapack tag v3.10.0 on 06/28/2021 https://github.com/Reference-LAPACK/lapack if(LINUX) - SET(LAPACK_VER "lapack_lnx_v3.10.0.20210628" CACHE STRING "" FORCE) - SET(LAPACK_URL "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.tar.gz" CACHE STRING "" FORCE) - SET(LAPACK_URL_MD5 71f8cc8237a8571692f3e07f9a4f25f6) - SET(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.so.0") - SET(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.so.3") - SET(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.so.3") - SET(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.so.3") + set(LAPACK_VER + "lapack_lnx_v3.10.0.20210628" + CACHE STRING "" FORCE) + set(LAPACK_URL + "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.tar.gz" + CACHE STRING "" FORCE) + set(LAPACK_URL_MD5 71f8cc8237a8571692f3e07f9a4f25f6) + set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.so.0") + set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.so.3") + set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.so.3") + set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.so.3") elseif(WIN32) - # Refer to [lapack-for-windows] http://icl.cs.utk.edu/lapack-for-windows/lapack/#lapacke - SET(LAPACK_VER "lapack_win_v3.10.0.20210628" CACHE STRING "" FORCE) - SET(LAPACK_URL "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.zip" CACHE STRING "" FORCE) - SET(LAPACK_URL_MD5 590d080392dcd5abbd5dca767a50b63a) - SET(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath-0.dll") - SET(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s_seh-1.dll") - SET(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran-3.dll") - SET(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dll") - SET(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dll") + # Refer to [lapack-for-windows] http://icl.cs.utk.edu/lapack-for-windows/lapack/#lapacke + set(LAPACK_VER + "lapack_win_v3.10.0.20210628" + CACHE STRING "" FORCE) + set(LAPACK_URL + "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.zip" + CACHE STRING "" FORCE) + set(LAPACK_URL_MD5 590d080392dcd5abbd5dca767a50b63a) + set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath-0.dll") + set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s_seh-1.dll") + set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran-3.dll") + set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dll") + set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dll") else() - SET(LAPACK_VER "lapack_mac_v3.10.0.20210628" CACHE STRING "" FORCE) - SET(LAPACK_URL "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.tar.gz" CACHE STRING "" FORCE) - SET(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7) - SET(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib") - SET(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib") - SET(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib") - SET(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib") - SET(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib") + set(LAPACK_VER + "lapack_mac_v3.10.0.20210628" + CACHE STRING "" FORCE) + set(LAPACK_URL + "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_VER}.tar.gz" + CACHE STRING "" FORCE) + set(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7) + set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib") + set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib") + set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib") + set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib") + set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib") endif() ExternalProject_Add( - extern_lapack - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${LAPACK_URL} - URL_MD5 ${LAPACK_URL_MD5} - PREFIX ${LAPACK_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - PATCH_COMMAND "" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAPACK_SOURCE_DIR} ${LAPACK_LIB_DIR} - BUILD_BYPRODUCTS ${BLAS_LIB} - BUILD_BYPRODUCTS ${LAPACK_LIB} -) + extern_lapack + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${LAPACK_URL} + URL_MD5 ${LAPACK_URL_MD5} + PREFIX ${LAPACK_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + PATCH_COMMAND "" + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAPACK_SOURCE_DIR} + ${LAPACK_LIB_DIR} + BUILD_BYPRODUCTS ${BLAS_LIB} + BUILD_BYPRODUCTS ${LAPACK_LIB}) diff --git a/cmake/external/leveldb.cmake b/cmake/external/leveldb.cmake index 65a21a87dbd..b1f2345794e 100644 --- a/cmake/external/leveldb.cmake +++ b/cmake/external/leveldb.cmake @@ -12,35 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(LEVELDB_PREFIX_DIR ${THIRD_PARTY_PATH}/leveldb) -SET(LEVELDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/leveldb) -SET(LEVELDB_INCLUDE_DIR "${LEVELDB_INSTALL_DIR}/include" CACHE PATH "leveldb include directory." FORCE) -SET(LEVELDB_LIBRARIES "${LEVELDB_INSTALL_DIR}/lib/libleveldb.a" CACHE FILEPATH "leveldb library." FORCE) -INCLUDE_DIRECTORIES(${LEVELDB_INCLUDE_DIR}) +set(LEVELDB_PREFIX_DIR ${THIRD_PARTY_PATH}/leveldb) +set(LEVELDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/leveldb) +set(LEVELDB_INCLUDE_DIR + "${LEVELDB_INSTALL_DIR}/include" + CACHE PATH "leveldb include directory." FORCE) +set(LEVELDB_LIBRARIES + "${LEVELDB_INSTALL_DIR}/lib/libleveldb.a" + CACHE FILEPATH "leveldb library." FORCE) +include_directories(${LEVELDB_INCLUDE_DIR}) ExternalProject_Add( - extern_leveldb - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${LEVELDB_PREFIX_DIR} - GIT_REPOSITORY "https://github.com/google/leveldb" - GIT_TAG v1.18 - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND CXXFLAGS=-fPIC make -j ${NUM_OF_PROCESSOR} libleveldb.a - INSTALL_COMMAND mkdir -p ${LEVELDB_INSTALL_DIR}/lib/ - && cp ${LEVELDB_PREFIX_DIR}/src/extern_leveldb/libleveldb.a ${LEVELDB_LIBRARIES} - && cp -r ${LEVELDB_PREFIX_DIR}/src/extern_leveldb/include ${LEVELDB_INSTALL_DIR}/ - BUILD_IN_SOURCE 1 - BUILD_BYPRODUCTS ${LEVELDB_LIBRARIES} -) + extern_leveldb + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${LEVELDB_PREFIX_DIR} + GIT_REPOSITORY "https://github.com/google/leveldb" + GIT_TAG v1.18 + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND CXXFLAGS=-fPIC make -j ${NUM_OF_PROCESSOR} libleveldb.a + INSTALL_COMMAND + mkdir -p ${LEVELDB_INSTALL_DIR}/lib/ && cp + ${LEVELDB_PREFIX_DIR}/src/extern_leveldb/libleveldb.a ${LEVELDB_LIBRARIES} + && cp -r ${LEVELDB_PREFIX_DIR}/src/extern_leveldb/include + ${LEVELDB_INSTALL_DIR}/ + BUILD_IN_SOURCE 1 + BUILD_BYPRODUCTS ${LEVELDB_LIBRARIES}) -ADD_DEPENDENCIES(extern_leveldb snappy) +add_dependencies(extern_leveldb snappy) -ADD_LIBRARY(leveldb STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES}) -ADD_DEPENDENCIES(leveldb extern_leveldb) - -LIST(APPEND external_project_dependencies leveldb) +add_library(leveldb STATIC IMPORTED GLOBAL) +set_property(TARGET leveldb PROPERTY IMPORTED_LOCATION ${LEVELDB_LIBRARIES}) +add_dependencies(leveldb extern_leveldb) +list(APPEND external_project_dependencies leveldb) diff --git a/cmake/external/libmct.cmake b/cmake/external/libmct.cmake index a166e43c7b9..28bf083f779 100644 --- a/cmake/external/libmct.cmake +++ b/cmake/external/libmct.cmake @@ -12,48 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(LIBMCT_PROJECT "extern_libmct") -IF((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL)) - MESSAGE(STATUS "use pre defined download url") - SET(LIBMCT_VER "0.1.0" CACHE STRING "" FORCE) - SET(LIBMCT_NAME "libmct" CACHE STRING "" FORCE) - SET(LIBMCT_URL "https://pslib.bj.bcebos.com/libmct/libmct.tar.gz" CACHE STRING "" FORCE) -ENDIF() -MESSAGE(STATUS "LIBMCT_NAME: ${LIBMCT_NAME}, LIBMCT_URL: ${LIBMCT_URL}") -SET(LIBMCT_PREFIX_DIR "${THIRD_PARTY_PATH}/libmct") -SET(LIBMCT_DOWNLOAD_DIR "${LIBMCT_PREFIX_DIR}/src/${LIBMCT_PROJECT}") -SET(LIBMCT_DST_DIR "libmct") -SET(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") -SET(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR}) -SET(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR}) -SET(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib") +set(LIBMCT_PROJECT "extern_libmct") +if((NOT DEFINED LIBMCT_VER) OR (NOT DEFINED LIBMCT_URL)) + message(STATUS "use pre defined download url") + set(LIBMCT_VER + "0.1.0" + CACHE STRING "" FORCE) + set(LIBMCT_NAME + "libmct" + CACHE STRING "" FORCE) + set(LIBMCT_URL + "https://pslib.bj.bcebos.com/libmct/libmct.tar.gz" + CACHE STRING "" FORCE) +endif() +message(STATUS "LIBMCT_NAME: ${LIBMCT_NAME}, LIBMCT_URL: ${LIBMCT_URL}") +set(LIBMCT_PREFIX_DIR "${THIRD_PARTY_PATH}/libmct") +set(LIBMCT_DOWNLOAD_DIR "${LIBMCT_PREFIX_DIR}/src/${LIBMCT_PROJECT}") +set(LIBMCT_DST_DIR "libmct") +set(LIBMCT_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +set(LIBMCT_INSTALL_DIR ${LIBMCT_INSTALL_ROOT}/${LIBMCT_DST_DIR}) +set(LIBMCT_ROOT ${LIBMCT_INSTALL_DIR}) +set(LIBMCT_INC_DIR ${LIBMCT_ROOT}/include) +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${LIBMCT_ROOT}/lib") -INCLUDE_DIRECTORIES(${LIBMCT_INC_DIR}) +include_directories(${LIBMCT_INC_DIR}) -FILE(WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(LIBMCT)\n" - "cmake_minimum_required(VERSION 3.0)\n" +file( + WRITE ${LIBMCT_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(LIBMCT)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY ${LIBMCT_NAME}/include ${LIBMCT_NAME}/lib \n" " DESTINATION ${LIBMCT_DST_DIR})\n") ExternalProject_Add( - ${LIBMCT_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${LIBMCT_PREFIX_DIR} - DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_NAME}.tar.gz - && tar --no-same-owner -zxvf ${LIBMCT_NAME}.tar.gz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -) + ${LIBMCT_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${LIBMCT_PREFIX_DIR} + DOWNLOAD_DIR ${LIBMCT_DOWNLOAD_DIR} + DOWNLOAD_COMMAND + wget --no-check-certificate ${LIBMCT_URL} -c -q -O ${LIBMCT_NAME}.tar.gz && + tar --no-same-owner -zxvf ${LIBMCT_NAME}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBMCT_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${LIBMCT_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}) add_library(libmct INTERFACE) -ADD_DEPENDENCIES(libmct ${LIBMCT_PROJECT}) +add_dependencies(libmct ${LIBMCT_PROJECT}) diff --git a/cmake/external/libxsmm.cmake b/cmake/external/libxsmm.cmake index da7cb696ef8..1efb95cc0cf 100644 --- a/cmake/external/libxsmm.cmake +++ b/cmake/external/libxsmm.cmake @@ -12,34 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE (ExternalProject) +include(ExternalProject) -SET(LIBXSMM_PREFIX_DIR ${THIRD_PARTY_PATH}/libxsmm) -SET(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm) -SET(LIBXSMM_INCLUDE_DIR "${LIBXSMM_INSTALL_DIR}/include" CACHE PATH "LIBXSMM include directory." FORCE) -SET(LIBXSMM_LIBRARY_DIR "${LIBXSMM_INSTALL_DIR}/lib" CACHE PATH "LIBXSMM library directory." FORCE) -SET(LIBXSMM_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmm.a") -SET(LIBXSMMNOBLAS_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a") +set(LIBXSMM_PREFIX_DIR ${THIRD_PARTY_PATH}/libxsmm) +set(LIBXSMM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/libxsmm) +set(LIBXSMM_INCLUDE_DIR + "${LIBXSMM_INSTALL_DIR}/include" + CACHE PATH "LIBXSMM include directory." FORCE) +set(LIBXSMM_LIBRARY_DIR + "${LIBXSMM_INSTALL_DIR}/lib" + CACHE PATH "LIBXSMM library directory." FORCE) +set(LIBXSMM_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmm.a") +set(LIBXSMMNOBLAS_LIB "${LIBXSMM_LIBRARY_DIR}/libxsmmnoblas.a") ExternalProject_Add( - extern_libxsmm - ${SHALLOW_CLONE} - GIT_REPOSITORY "${GIT_URL}/hfp/libxsmm.git" - GIT_TAG "7cc03b5b342fdbc6b6d990b190671c5dbb8489a2" - PREFIX ${LIBXSMM_PREFIX_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_IN_SOURCE 1 - BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc WARP=0 install - INSTALL_COMMAND "" - BUILD_BYPRODUCTS ${LIBXSMM_LIB} - BUILD_BYPRODUCTS ${LIBXSMMNOBLAS_LIB} -) -ADD_LIBRARY(libxsmm STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIB}") -SET_PROPERTY(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMMNOBLAS_LIB}") + extern_libxsmm + ${SHALLOW_CLONE} + GIT_REPOSITORY "${GIT_URL}/hfp/libxsmm.git" + GIT_TAG "7cc03b5b342fdbc6b6d990b190671c5dbb8489a2" + PREFIX ${LIBXSMM_PREFIX_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + BUILD_COMMAND $(MAKE) --silent PREFIX=${LIBXSMM_INSTALL_DIR} CXX=g++ CC=gcc + WARP=0 install + INSTALL_COMMAND "" + BUILD_BYPRODUCTS ${LIBXSMM_LIB} + BUILD_BYPRODUCTS ${LIBXSMMNOBLAS_LIB}) +add_library(libxsmm STATIC IMPORTED GLOBAL) +set_property(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMM_LIB}") +set_property(TARGET libxsmm PROPERTY IMPORTED_LOCATION "${LIBXSMMNOBLAS_LIB}") -MESSAGE(STATUS "Libxsmm library: ${LIBXSMM_LIBS}") +message(STATUS "Libxsmm library: ${LIBXSMM_LIBS}") include_directories(${LIBXSMM_INCLUDE_DIR}) -ADD_DEFINITIONS(-DPADDLE_WITH_LIBXSMM) -ADD_DEPENDENCIES(libxsmm extern_libxsmm) +add_definitions(-DPADDLE_WITH_LIBXSMM) +add_dependencies(libxsmm extern_libxsmm) diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake index 0031757467f..b994f407604 100644 --- a/cmake/external/lite.cmake +++ b/cmake/external/lite.cmake @@ -18,32 +18,34 @@ if(NOT LINUX) return() endif() -if (LITE_WITH_XPU) +if(LITE_WITH_XPU) add_definitions(-DLITE_SUBGRAPH_WITH_XPU) - IF(WITH_AARCH64) - SET(XPU_SDK_ENV "kylin_aarch64") - ELSEIF(WITH_SUNWAY) - SET(XPU_SDK_ENV "deepin_sw6_64") - ELSEIF(WITH_BDCENTOS) - SET(XPU_SDK_ENV "bdcentos_x86_64") - ELSEIF(WITH_UBUNTU) - SET(XPU_SDK_ENV "ubuntu_x86_64") - ELSEIF(WITH_CENTOS) - SET(XPU_SDK_ENV "centos7_x86_64") - ELSE () - SET(XPU_SDK_ENV "ubuntu_x86_64") - ENDIF() + if(WITH_AARCH64) + set(XPU_SDK_ENV "kylin_aarch64") + elseif(WITH_SUNWAY) + set(XPU_SDK_ENV "deepin_sw6_64") + elseif(WITH_BDCENTOS) + set(XPU_SDK_ENV "bdcentos_x86_64") + elseif(WITH_UBUNTU) + set(XPU_SDK_ENV "ubuntu_x86_64") + elseif(WITH_CENTOS) + set(XPU_SDK_ENV "centos7_x86_64") + else() + set(XPU_SDK_ENV "ubuntu_x86_64") + endif() endif() -if (LITE_WITH_NNADAPTER) - add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER) - if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) +if(LITE_WITH_NNADAPTER) + add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER) + if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU) add_definitions(-DLITE_SUBGRAPH_WITH_NPU) - set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT") + set(NPU_SDK_ROOT + "/usr/local/Ascend/ascend-toolkit/latest" + CACHE STRING "default NPU SDK ROOT") endif() endif() -if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) +if(NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) include(ExternalProject) set(LITE_PROJECT extern_lite) set(LITE_PREFIX_DIR ${THIRD_PARTY_PATH}/lite) @@ -61,109 +63,118 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) if(WITH_ARM) set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j) message(WARNING "BUILD_COMMAND: ${LITE_BUILD_COMMAND}") - set(LITE_OPTIONAL_ARGS -DWITH_MKL=OFF - -DLITE_WITH_CUDA=OFF - -DWITH_MKLDNN=OFF - -DLITE_WITH_X86=OFF - -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON - -DLITE_WITH_PROFILE=OFF - -DARM_TARGET_OS=armlinux - -DWITH_LITE=ON - -DWITH_PYTHON=OFF - -DWITH_TESTING=OFF - -DLITE_BUILD_EXTRA=ON - -DLITE_WITH_XPU=${LITE_WITH_XPU} - -DXPU_SDK_URL=${XPU_BASE_URL} - -DXPU_SDK_ENV=${XPU_SDK_ENV} - -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} - -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} - -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} - -DLITE_WITH_CODE_META_INFO=OFF - -DLITE_WITH_ARM=ON) + set(LITE_OPTIONAL_ARGS + -DWITH_MKL=OFF + -DLITE_WITH_CUDA=OFF + -DWITH_MKLDNN=OFF + -DLITE_WITH_X86=OFF + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON + -DLITE_WITH_PROFILE=OFF + -DARM_TARGET_OS=armlinux + -DWITH_LITE=ON + -DWITH_PYTHON=OFF + -DWITH_TESTING=OFF + -DLITE_BUILD_EXTRA=ON + -DLITE_WITH_XPU=${LITE_WITH_XPU} + -DXPU_SDK_URL=${XPU_BASE_URL} + -DXPU_SDK_ENV=${XPU_SDK_ENV} + -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} + -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} + -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} + -DLITE_WITH_CODE_META_INFO=OFF + -DLITE_WITH_ARM=ON) ExternalProject_Add( ${LITE_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/Paddle-Lite.git" - GIT_TAG ${LITE_GIT_TAG} - PREFIX ${LITE_PREFIX_DIR} - PATCH_COMMAND mkdir -p ${LITE_PREFIX_DIR}/src/extern_lite-build/lite/gen_code && touch ${LITE_PREFIX_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc && sed -i "/aarch64-linux-gnu-gcc/d" ${LITE_PREFIX_DIR}/src/extern_lite/cmake/os/armlinux.cmake && sed -i "/aarch64-linux-gnu-g++/d" ${LITE_PREFIX_DIR}/src/extern_lite/cmake/os/armlinux.cmake - UPDATE_COMMAND "" - BUILD_COMMAND ${LITE_BUILD_COMMAND} - INSTALL_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - ${LITE_OPTIONAL_ARGS} - ) + GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/Paddle-Lite.git" + GIT_TAG ${LITE_GIT_TAG} + PREFIX ${LITE_PREFIX_DIR} + PATCH_COMMAND + mkdir -p ${LITE_PREFIX_DIR}/src/extern_lite-build/lite/gen_code && touch + ${LITE_PREFIX_DIR}/src/extern_lite-build/lite/gen_code/__generated_code__.cc + && sed -i "/aarch64-linux-gnu-gcc/d" + ${LITE_PREFIX_DIR}/src/extern_lite/cmake/os/armlinux.cmake && sed -i + "/aarch64-linux-gnu-g++/d" + ${LITE_PREFIX_DIR}/src/extern_lite/cmake/os/armlinux.cmake + UPDATE_COMMAND "" + BUILD_COMMAND ${LITE_BUILD_COMMAND} + INSTALL_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + ${LITE_OPTIONAL_ARGS}) else() set(LITE_BUILD_COMMAND $(MAKE) publish_inference -j) - set(LITE_OPTIONAL_ARGS -DWITH_MKL=ON - -DLITE_WITH_CUDA=${WITH_GPU} - -DWITH_MKLDNN=OFF - -DLITE_WITH_X86=ON - -DLITE_WITH_PROFILE=OFF - -DWITH_LITE=OFF - -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF - -DWITH_PYTHON=OFF - -DWITH_TESTING=OFF - -DLITE_BUILD_EXTRA=ON - -DCUDNN_ROOT=${CUDNN_ROOT} - -DLITE_WITH_STATIC_CUDA=OFF - -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME} - -DLITE_WITH_XPU=${LITE_WITH_XPU} - -DXPU_SDK_URL=${XPU_BASE_URL} - -DXPU_SDK_ENV=${XPU_SDK_ENV} - -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} - -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} - -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} - -DLITE_WITH_CODE_META_INFO=OFF - -DLITE_WITH_ARM=OFF) + set(LITE_OPTIONAL_ARGS + -DWITH_MKL=ON + -DLITE_WITH_CUDA=${WITH_GPU} + -DWITH_MKLDNN=OFF + -DLITE_WITH_X86=ON + -DLITE_WITH_PROFILE=OFF + -DWITH_LITE=OFF + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF + -DWITH_PYTHON=OFF + -DWITH_TESTING=OFF + -DLITE_BUILD_EXTRA=ON + -DCUDNN_ROOT=${CUDNN_ROOT} + -DLITE_WITH_STATIC_CUDA=OFF + -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME} + -DLITE_WITH_XPU=${LITE_WITH_XPU} + -DXPU_SDK_URL=${XPU_BASE_URL} + -DXPU_SDK_ENV=${XPU_SDK_ENV} + -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} + -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} + -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} + -DLITE_WITH_CODE_META_INFO=OFF + -DLITE_WITH_ARM=OFF) ExternalProject_Add( - ${LITE_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/Paddle-Lite.git" - GIT_TAG ${LITE_GIT_TAG} - PREFIX ${LITE_PREFIX_DIR} - UPDATE_COMMAND "" - PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_PREFIX_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py - BUILD_COMMAND ${LITE_BUILD_COMMAND} - INSTALL_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - ${LITE_OPTIONAL_ARGS} - ) + ${LITE_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/Paddle-Lite.git" + GIT_TAG ${LITE_GIT_TAG} + PREFIX ${LITE_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND + sed -i + "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" + ${LITE_PREFIX_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py + BUILD_COMMAND ${LITE_BUILD_COMMAND} + INSTALL_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${LITE_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + ${LITE_OPTIONAL_ARGS}) endif() - ExternalProject_Get_property(${LITE_PROJECT} BINARY_DIR) - ExternalProject_Get_property(${LITE_PROJECT} SOURCE_DIR) + ExternalProject_Get_Property(${LITE_PROJECT} BINARY_DIR) + ExternalProject_Get_Property(${LITE_PROJECT} SOURCE_DIR) set(LITE_BINARY_DIR ${BINARY_DIR}) set(LITE_SOURCE_DIR ${SOURCE_DIR}) endif() -if (WITH_ARM) +if(WITH_ARM) if(LITE_WITH_XPU) set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu) elseif(LITE_WITH_NNADAPTER) message("Enable LITE_WITH_NNADAPTER") - if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) + if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU) set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter) endif() else() @@ -184,22 +195,32 @@ endif() function(external_lite_libs alias path) add_library(${alias} SHARED IMPORTED GLOBAL) - SET_PROPERTY(TARGET ${alias} PROPERTY IMPORTED_LOCATION - ${path}) - if (LITE_PROJECT) + set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${path}) + if(LITE_PROJECT) add_dependencies(${alias} ${LITE_PROJECT}) endif() endfunction() -external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) -set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) +external_lite_libs( + lite_full_static + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so +) +set(LITE_SHARED_LIB + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so +) -if (LITE_WITH_NNADAPTER) - set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so) - if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) - external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so) +if(LITE_WITH_NNADAPTER) + set(LITE_NNADAPTER_LIB + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so) + if(NNADAPTER_WITH_HUAWEI_ASCEND_NPU) + external_lite_libs( + lite_nnadapter + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so) set(LITE_DEPS lite_full_static lite_nnadapter) - set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so) + set(LITE_NNADAPTER_NPU_LIB + ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libhuawei_ascend_npu.so + ) endif() else() set(LITE_DEPS lite_full_static) diff --git a/cmake/external/llvm.cmake b/cmake/external/llvm.cmake index 5c48afa2806..8b33a73e24c 100644 --- a/cmake/external/llvm.cmake +++ b/cmake/external/llvm.cmake @@ -1,31 +1,33 @@ include(FetchContent) -set(LLVM_DOWNLOAD_URL https://paddle-inference-dist.bj.bcebos.com/infrt/llvm_b5149f4e66a49a98b67e8e2de4e24a4af8e2781b.tar.gz) +set(LLVM_DOWNLOAD_URL + https://paddle-inference-dist.bj.bcebos.com/infrt/llvm_b5149f4e66a49a98b67e8e2de4e24a4af8e2781b.tar.gz +) set(LLVM_MD5 022819bb5760817013cf4b8a37e97d5e) set(FETCHCONTENT_BASE_DIR ${THIRD_PARTY_PATH}/llvm) set(FETCHCONTENT_QUIET OFF) -FetchContent_Declare(external_llvm +FetchContent_Declare( + external_llvm URL ${LLVM_DOWNLOAD_URL} URL_MD5 ${LLVM_MD5} - PREFIX ${THIRD_PARTY_PATH}/llvm - SOURCE_DIR ${THIRD_PARTY_PATH}/install/llvm -) -if (NOT LLVM_PATH) + PREFIX ${THIRD_PARTY_PATH}/llvm SOURCE_DIR ${THIRD_PARTY_PATH}/install/llvm) +if(NOT LLVM_PATH) FetchContent_GetProperties(external_llvm) - if (NOT external_llvm_POPULATED) + if(NOT external_llvm_POPULATED) FetchContent_Populate(external_llvm) endif() set(LLVM_PATH ${THIRD_PARTY_PATH}/install/llvm) set(LLVM_DIR ${THIRD_PARTY_PATH}/install/llvm/lib/cmake/llvm) set(MLIR_DIR ${THIRD_PARTY_PATH}/install/llvm/lib/cmake/mlir) -else () +else() set(LLVM_DIR ${LLVM_PATH}/lib/cmake/llvm) set(MLIR_DIR ${LLVM_PATH}/lib/cmake/mlir) endif() -if (${CMAKE_CXX_COMPILER} STREQUAL "clang++") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi") +if(${CMAKE_CXX_COMPILER} STREQUAL "clang++") + set(CMAKE_EXE_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi") endif() message(STATUS "set LLVM_DIR: ${LLVM_DIR}") @@ -66,8 +68,17 @@ cmake ../llvm -G "Unix Makefiles" \ add_definitions(${LLVM_DEFINITIONS}) -llvm_map_components_to_libnames(llvm_libs Support Core irreader - X86 executionengine orcjit mcjit all codegen) +llvm_map_components_to_libnames( + llvm_libs + Support + Core + irreader + X86 + executionengine + orcjit + mcjit + all + codegen) message(STATUS "LLVM libs: ${llvm_libs}") @@ -75,23 +86,24 @@ get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS) message(STATUS "MLIR libs: ${mlir_libs}") add_definitions(${LLVM_DEFINITIONS}) - # The minimum needed libraries for MLIR IR parse and transform. set(MLIR_IR_LIBS MLIRAnalysis MLIRPass MLIRParser MLIRDialect MLIRIR MLIROptLib) - # tb_base is the name of a xxx.td file (without the .td suffix) function(mlir_tablegen_on td_base) set(options) set(oneValueArgs DIALECT) - cmake_parse_arguments(mlir_tablegen_on "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(mlir_tablegen_on "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) set(LLVM_TARGET_DEFINITIONS ${td_base}.td) mlir_tablegen(${td_base}.hpp.inc -gen-op-decls) mlir_tablegen(${td_base}.cpp.inc -gen-op-defs) - if (mlir_tablegen_on_DIALECT) - mlir_tablegen(${td_base}_dialect.hpp.inc --gen-dialect-decls -dialect=${mlir_tablegen_on_DIALECT}) - mlir_tablegen(${td_base}_dialect.cpp.inc --gen-dialect-defs -dialect=${mlir_tablegen_on_DIALECT}) + if(mlir_tablegen_on_DIALECT) + mlir_tablegen(${td_base}_dialect.hpp.inc --gen-dialect-decls + -dialect=${mlir_tablegen_on_DIALECT}) + mlir_tablegen(${td_base}_dialect.cpp.inc --gen-dialect-defs + -dialect=${mlir_tablegen_on_DIALECT}) endif() add_public_tablegen_target(${td_base}_IncGen) add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen) @@ -99,7 +111,9 @@ endfunction() function(mlir_add_rewriter td_base) set(LLVM_TARGET_DEFINITIONS ${td_base}.td) - set(LLVM_TARGET_DEPENDS ${LLVM_TARGET_DEPENDS} ${CMAKE_SOURCE_DIR}/paddle/infrt/dialect/infrt/ir/infrt_base.td) + set(LLVM_TARGET_DEPENDS + ${LLVM_TARGET_DEPENDS} + ${CMAKE_SOURCE_DIR}/paddle/infrt/dialect/infrt/ir/infrt_base.td) mlir_tablegen(${td_base}.cpp.inc -gen-rewriters) add_public_tablegen_target(MLIR${td_base}IncGen) add_dependencies(mlir-headers MLIR${td_base}IncGen) @@ -108,7 +122,11 @@ endfunction() # Execute the mlir script with infrt-exec program. # @name: name of the test # @script: path to the mlir script file -function (infrt_exec_check name script) - add_test(NAME ${name} - COMMAND sh -c "${CMAKE_BINARY_DIR}/paddle/infrt/host_context/infrt-exec -i ${CMAKE_CURRENT_SOURCE_DIR}/${script}| ${LLVM_PATH}/bin/FileCheck ${CMAKE_CURRENT_SOURCE_DIR}/${script}") +function(infrt_exec_check name script) + add_test( + NAME ${name} + COMMAND + sh -c + "${CMAKE_BINARY_DIR}/paddle/infrt/host_context/infrt-exec -i ${CMAKE_CURRENT_SOURCE_DIR}/${script}| ${LLVM_PATH}/bin/FileCheck ${CMAKE_CURRENT_SOURCE_DIR}/${script}" + ) endfunction() diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 8f955008fa0..dfa20dd631f 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -12,108 +12,131 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) - -SET(MKLDNN_PROJECT "extern_mkldnn") -SET(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn) -SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) -SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) -SET(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git) -SET(MKLDNN_TAG 9b186765dded79066e0cd9c17eb70b680b76fb8e) +include(ExternalProject) +set(MKLDNN_PROJECT "extern_mkldnn") +set(MKLDNN_PREFIX_DIR ${THIRD_PARTY_PATH}/mkldnn) +set(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +set(MKLDNN_INC_DIR + "${MKLDNN_INSTALL_DIR}/include" + CACHE PATH "mkldnn include directory." FORCE) +set(MKLDNN_REPOSITORY ${GIT_URL}/oneapi-src/oneDNN.git) +set(MKLDNN_TAG 9b186765dded79066e0cd9c17eb70b680b76fb8e) # Introduce variables: # * CMAKE_INSTALL_LIBDIR -INCLUDE(GNUInstallDirs) -SET(LIBDIR "lib") +include(GNUInstallDirs) +set(LIBDIR "lib") if(CMAKE_INSTALL_LIBDIR MATCHES ".*lib64$") - SET(LIBDIR "lib64") + set(LIBDIR "lib64") endif() -MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/${LIBDIR} to runtime path") -SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/${LIBDIR}") - -INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) # For MKLDNN code to include internal headers. +message(STATUS "Set ${MKLDNN_INSTALL_DIR}/${LIBDIR} to runtime path") +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" + "${MKLDNN_INSTALL_DIR}/${LIBDIR}") +include_directories(${MKLDNN_INC_DIR} +)# For MKLDNN code to include internal headers. -IF(NOT WIN32) - SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds") - SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") - SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") - SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") - SET(MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - SET(MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") - SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" CACHE FILEPATH "mkldnn library." FORCE) -ELSE() - SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") - SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS}") - string(REPLACE "/O2 " "" MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") - string(REPLACE "/O2 " "" MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") - SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE) -ENDIF(NOT WIN32) +if(NOT WIN32) + set(MKLDNN_FLAG + "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds" + ) + set(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") + set(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") + set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") + set(MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + set(MKLDNN_LIB + "${MKLDNN_INSTALL_DIR}/${LIBDIR}/libdnnl.so" + CACHE FILEPATH "mkldnn library." FORCE) +else() + set(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} /EHsc") + set(MKLDNN_CFLAG "${CMAKE_C_FLAGS}") + string(REPLACE "/O2 " "" MKLDNN_CFLAG_RELEASE "${CMAKE_C_FLAGS_RELEASE}") + string(REPLACE "/O2 " "" MKLDNN_CXXFLAG_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") + set(MKLDNN_LIB + "${MKLDNN_INSTALL_DIR}/bin/mkldnn.lib" + CACHE FILEPATH "mkldnn library." FORCE) +endif(NOT WIN32) ExternalProject_Add( - ${MKLDNN_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${MKLDNN_REPOSITORY} - GIT_TAG ${MKLDNN_TAG} - DEPENDS ${MKLDNN_DEPENDS} - PREFIX ${MKLDNN_PREFIX_DIR} - UPDATE_COMMAND "" - #BUILD_ALWAYS 1 - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} - -DCMAKE_CXX_FLAGS_RELEASE=${MKLDNN_CXXFLAG_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${MKLDNN_CFLAG_RELEASE} - -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DDNNL_BUILD_TESTS=OFF -DDNNL_BUILD_EXAMPLES=OFF - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} -) + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${MKLDNN_REPOSITORY} + GIT_TAG ${MKLDNN_TAG} + DEPENDS ${MKLDNN_DEPENDS} + PREFIX ${MKLDNN_PREFIX_DIR} + UPDATE_COMMAND "" + #BUILD_ALWAYS 1 + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} + -DCMAKE_CXX_FLAGS_RELEASE=${MKLDNN_CXXFLAG_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${MKLDNN_CFLAG_RELEASE} + -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DDNNL_BUILD_TESTS=OFF + -DDNNL_BUILD_EXAMPLES=OFF + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}) -MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") +message(STATUS "MKLDNN library: ${MKLDNN_LIB}") add_definitions(-DPADDLE_WITH_MKLDNN) # copy the real so.0 lib to install dir # it can be directly contained in wheel or capi if(WIN32) - SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll) + set(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll) - file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR) - file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB) + file(TO_NATIVE_PATH ${MKLDNN_INSTALL_DIR} NATIVE_MKLDNN_INSTALL_DIR) + file(TO_NATIVE_PATH ${MKLDNN_SHARED_LIB} NATIVE_MKLDNN_SHARED_LIB) - ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_LIB} - COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll ${NATIVE_MKLDNN_SHARED_LIB} /Y) - COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > ${MKLDNN_INSTALL_DIR}/bin/exports.txt - COMMAND echo LIBRARY mkldnn > ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def - COMMAND echo EXPORTS >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def - COMMAND echo off && (for /f "skip=19 tokens=4" %A in (${MKLDNN_INSTALL_DIR}/bin/exports.txt) do echo %A >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) && echo on - COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_LIB} /machine:x64 - COMMENT "Generate mkldnn.lib manually--->" - DEPENDS ${MKLDNN_PROJECT} - VERBATIM) - ADD_CUSTOM_TARGET(mkldnn_cmd ALL DEPENDS ${MKLDNN_LIB}) + add_custom_command( + OUTPUT ${MKLDNN_LIB} + COMMAND (copy ${NATIVE_MKLDNN_INSTALL_DIR}\\bin\\dnnl.dll + ${NATIVE_MKLDNN_SHARED_LIB} /Y) + COMMAND dumpbin /exports ${MKLDNN_INSTALL_DIR}/bin/mkldnn.dll > + ${MKLDNN_INSTALL_DIR}/bin/exports.txt + COMMAND echo LIBRARY mkldnn > ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def + COMMAND echo EXPORTS >> ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def + COMMAND + echo off && (for + /f + "skip=19 tokens=4" + %A + in + (${MKLDNN_INSTALL_DIR}/bin/exports.txt) + do + echo + %A + >> + ${MKLDNN_INSTALL_DIR}/bin/mkldnn.def) && echo on + COMMAND lib /def:${MKLDNN_INSTALL_DIR}/bin/mkldnn.def /out:${MKLDNN_LIB} + /machine:x64 + COMMENT "Generate mkldnn.lib manually--->" + DEPENDS ${MKLDNN_PROJECT} + VERBATIM) + add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_LIB}) else(WIN32) - SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0) - SET(MKLDNN_SHARED_LIB_1 ${MKLDNN_INSTALL_DIR}/libdnnl.so.1) - SET(MKLDNN_SHARED_LIB_2 ${MKLDNN_INSTALL_DIR}/libdnnl.so.2) - ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB_2} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB_1} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB_2} - DEPENDS ${MKLDNN_PROJECT}) - ADD_CUSTOM_TARGET(mkldnn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB_2}) + set(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0) + set(MKLDNN_SHARED_LIB_1 ${MKLDNN_INSTALL_DIR}/libdnnl.so.1) + set(MKLDNN_SHARED_LIB_2 ${MKLDNN_INSTALL_DIR}/libdnnl.so.2) + add_custom_command( + OUTPUT ${MKLDNN_SHARED_LIB_2} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB_1} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB_2} + DEPENDS ${MKLDNN_PROJECT}) + add_custom_target(mkldnn_cmd ALL DEPENDS ${MKLDNN_SHARED_LIB_2}) endif(WIN32) # generate a static dummy target to track mkldnn dependencies # for cc_library(xxx SRCS xxx.c DEPS mkldnn) generate_dummy_static_lib(LIB_NAME "mkldnn" GENERATOR "mkldnn.cmake") -TARGET_LINK_LIBRARIES(mkldnn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) -ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT} mkldnn_cmd) +target_link_libraries(mkldnn ${MKLDNN_LIB} ${MKLML_IOMP_LIB}) +add_dependencies(mkldnn ${MKLDNN_PROJECT} mkldnn_cmd) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index a2fd2fe03c1..90d61f47a52 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -12,59 +12,68 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) -SET(MKLML_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mklml) -SET(MKLML_INC_DIR ${MKLML_INSTALL_DIR}/include) -SET(MKLML_LIB_DIR ${MKLML_INSTALL_DIR}/lib) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_LIB_DIR}") +include(ExternalProject) +set(MKLML_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mklml) +set(MKLML_INC_DIR ${MKLML_INSTALL_DIR}/include) +set(MKLML_LIB_DIR ${MKLML_INSTALL_DIR}/lib) +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_LIB_DIR}") -IF(WIN32) - SET(MKLML_VER "mklml_win_2019.0.5.20190502" CACHE STRING "" FORCE) - SET(MKLML_URL "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE) - SET(MKLML_URL_MD5 ff8c5237570f03eea37377ccfc95a08a) - SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib) - SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) - SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) - SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll) -ELSE() - #TODO(intel-huying): - # Now enable csrmm function in mklml library temporarily, it will be updated as offical version later. - SET(MKLML_VER "csrmm_mklml_lnx_2019.0.5" CACHE STRING "" FORCE) - SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) - SET(MKLML_URL_MD5 bc6a7faea6a2a9ad31752386f3ae87da) - SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) - SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) - SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) - SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) -ENDIF() +if(WIN32) + set(MKLML_VER + "mklml_win_2019.0.5.20190502" + CACHE STRING "" FORCE) + set(MKLML_URL + "https://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.zip" + CACHE STRING "" FORCE) + set(MKLML_URL_MD5 ff8c5237570f03eea37377ccfc95a08a) + set(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib) + set(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) + set(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) + set(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll) +else() + #TODO(intel-huying): + # Now enable csrmm function in mklml library temporarily, it will be updated as offical version later. + set(MKLML_VER + "csrmm_mklml_lnx_2019.0.5" + CACHE STRING "" FORCE) + set(MKLML_URL + "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz" + CACHE STRING "" FORCE) + set(MKLML_URL_MD5 bc6a7faea6a2a9ad31752386f3ae87da) + set(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) + set(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) + set(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) + set(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +endif() -SET(MKLML_PROJECT "extern_mklml") -MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") -SET(MKLML_PREFIX_DIR ${THIRD_PARTY_PATH}/mklml) -SET(MKLML_SOURCE_DIR ${THIRD_PARTY_PATH}/mklml/src/extern_mklml) +set(MKLML_PROJECT "extern_mklml") +message(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") +set(MKLML_PREFIX_DIR ${THIRD_PARTY_PATH}/mklml) +set(MKLML_SOURCE_DIR ${THIRD_PARTY_PATH}/mklml/src/extern_mklml) -# Ninja Generator can not establish the correct dependency relationship between the imported library with target, +# Ninja Generator can not establish the correct dependency relationship between the imported library with target, # the product file in the ExternalProject need to be specified manually, please refer to # https://stackoverflow.com/questions/54866067/cmake-and-ninja-missing-and-no-known-rule-to-make-it # It is the same to all other ExternalProject. ExternalProject_Add( - ${MKLML_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${MKLML_URL} - URL_MD5 ${MKLML_URL_MD5} - PREFIX ${MKLML_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/include ${MKLML_INC_DIR} && - ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR} - BUILD_BYPRODUCTS ${MKLML_LIB} - BUILD_BYPRODUCTS ${MKLML_IOMP_LIB} -) + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${MKLML_URL} + URL_MD5 ${MKLML_URL_MD5} + PREFIX ${MKLML_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${MKLML_SOURCE_DIR}/include + ${MKLML_INC_DIR} && ${CMAKE_COMMAND} -E copy_directory + ${MKLML_SOURCE_DIR}/lib ${MKLML_LIB_DIR} + BUILD_BYPRODUCTS ${MKLML_LIB} + BUILD_BYPRODUCTS ${MKLML_IOMP_LIB}) -INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) +include_directories(${MKLML_INC_DIR}) -ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) -ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +add_library(mklml SHARED IMPORTED GLOBAL) +set_property(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) +add_dependencies(mklml ${MKLML_PROJECT}) diff --git a/cmake/external/onnxruntime.cmake b/cmake/external/onnxruntime.cmake index 2162f87812d..9ace4caafd1 100644 --- a/cmake/external/onnxruntime.cmake +++ b/cmake/external/onnxruntime.cmake @@ -12,83 +12,114 @@ # See the License for the specific language governing permissions and # limitations under the License. -if (NOT WITH_ONNXRUNTIME) +if(NOT WITH_ONNXRUNTIME) return() -endif () +endif() -if (WITH_ARM) +if(WITH_ARM) message(SEND_ERROR "The current onnxruntime backend doesn't support ARM cpu") return() -endif () +endif() -INCLUDE(ExternalProject) +include(ExternalProject) add_definitions(-DPADDLE_WITH_ONNXRUNTIME) -SET(ONNXRUNTIME_PROJECT "extern_onnxruntime") -SET(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime) -SET(ONNXRUNTIME_SOURCE_DIR ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT}) -SET(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime) -SET(ONNXRUNTIME_INC_DIR "${ONNXRUNTIME_INSTALL_DIR}/include" CACHE PATH "onnxruntime include directory." FORCE) -SET(ONNXRUNTIME_LIB_DIR "${ONNXRUNTIME_INSTALL_DIR}/lib" CACHE PATH "onnxruntime lib directory." FORCE) -SET(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") - +set(ONNXRUNTIME_PROJECT "extern_onnxruntime") +set(ONNXRUNTIME_PREFIX_DIR ${THIRD_PARTY_PATH}/onnxruntime) +set(ONNXRUNTIME_SOURCE_DIR + ${THIRD_PARTY_PATH}/onnxruntime/src/${ONNXRUNTIME_PROJECT}) +set(ONNXRUNTIME_INSTALL_DIR ${THIRD_PARTY_PATH}/install/onnxruntime) +set(ONNXRUNTIME_INC_DIR + "${ONNXRUNTIME_INSTALL_DIR}/include" + CACHE PATH "onnxruntime include directory." FORCE) +set(ONNXRUNTIME_LIB_DIR + "${ONNXRUNTIME_INSTALL_DIR}/lib" + CACHE PATH "onnxruntime lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}") -if (WIN32) - SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-win-x64-1.10.0.zip") -elseif (APPLE) - SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-osx-x86_64-1.10.0.tgz") -else () - SET(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz") +if(WIN32) + set(ONNXRUNTIME_URL + "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-win-x64-1.10.0.zip" + ) +elseif(APPLE) + set(ONNXRUNTIME_URL + "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-osx-x86_64-1.10.0.tgz" + ) +else() + set(ONNXRUNTIME_URL + "https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/onnxruntime-linux-x64-1.10.0.tgz" + ) endif() +include_directories(${ONNXRUNTIME_INC_DIR} +)# For ONNXRUNTIME code to include internal headers. +if(WIN32) + set(ONNXRUNTIME_SOURCE_LIB + "${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.dll" + CACHE FILEPATH "ONNXRUNTIME source library." FORCE) + set(ONNXRUNTIME_SHARED_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.dll" + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) +elseif(APPLE) + set(ONNXRUNTIME_SOURCE_LIB + "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.1.10.0.dylib" + CACHE FILEPATH "ONNXRUNTIME source library." FORCE) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.1.10.0.dylib" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) + set(ONNXRUNTIME_SHARED_LIB + ${ONNXRUNTIME_LIB} + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) +else() + set(ONNXRUNTIME_SOURCE_LIB + "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.so.1.10.0" + CACHE FILEPATH "ONNXRUNTIME source library." FORCE) + set(ONNXRUNTIME_LIB + "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so.1.10.0" + CACHE FILEPATH "ONNXRUNTIME static library." FORCE) + set(ONNXRUNTIME_SHARED_LIB + ${ONNXRUNTIME_LIB} + CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) +endif() -INCLUDE_DIRECTORIES(${ONNXRUNTIME_INC_DIR}) # For ONNXRUNTIME code to include internal headers. -if (WIN32) - SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.dll" CACHE FILEPATH "ONNXRUNTIME source library." FORCE) - SET(ONNXRUNTIME_SHARED_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.dll" CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) - SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/onnxruntime.lib" CACHE FILEPATH "ONNXRUNTIME static library." FORCE) -elseif (APPLE) - SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.1.10.0.dylib" CACHE FILEPATH "ONNXRUNTIME source library." FORCE) - SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.1.10.0.dylib" CACHE FILEPATH "ONNXRUNTIME static library." FORCE) - SET(ONNXRUNTIME_SHARED_LIB ${ONNXRUNTIME_LIB} CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) -else () - SET(ONNXRUNTIME_SOURCE_LIB "${ONNXRUNTIME_SOURCE_DIR}/lib/libonnxruntime.so.1.10.0" CACHE FILEPATH "ONNXRUNTIME source library." FORCE) - SET(ONNXRUNTIME_LIB "${ONNXRUNTIME_INSTALL_DIR}/lib/libonnxruntime.so.1.10.0" CACHE FILEPATH "ONNXRUNTIME static library." FORCE) - SET(ONNXRUNTIME_SHARED_LIB ${ONNXRUNTIME_LIB} CACHE FILEPATH "ONNXRUNTIME shared library." FORCE) -endif () - -if (WIN32) +if(WIN32) ExternalProject_Add( - ${ONNXRUNTIME_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${ONNXRUNTIME_URL} - PREFIX ${ONNXRUNTIME_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} ${ONNXRUNTIME_SHARED_LIB} && - ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.lib ${ONNXRUNTIME_LIB} && - ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include ${ONNXRUNTIME_INC_DIR} - BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB} - ) -else () + ${ONNXRUNTIME_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${ONNXRUNTIME_URL} + PREFIX ${ONNXRUNTIME_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} + ${ONNXRUNTIME_SHARED_LIB} && ${CMAKE_COMMAND} -E copy + ${ONNXRUNTIME_SOURCE_DIR}/lib/onnxruntime.lib ${ONNXRUNTIME_LIB} && + ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include + ${ONNXRUNTIME_INC_DIR} + BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}) +else() ExternalProject_Add( ${ONNXRUNTIME_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} - URL ${ONNXRUNTIME_URL} - PREFIX ${ONNXRUNTIME_PREFIX_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} ${ONNXRUNTIME_LIB} && - ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include ${ONNXRUNTIME_INC_DIR} - BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB} - ) + URL ${ONNXRUNTIME_URL} + PREFIX ${ONNXRUNTIME_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SOURCE_LIB} ${ONNXRUNTIME_LIB} && + ${CMAKE_COMMAND} -E copy_directory ${ONNXRUNTIME_SOURCE_DIR}/include + ${ONNXRUNTIME_INC_DIR} + BUILD_BYPRODUCTS ${ONNXRUNTIME_LIB}) endif() -ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB}) -ADD_DEPENDENCIES(onnxruntime ${ONNXRUNTIME_PROJECT}) +add_library(onnxruntime STATIC IMPORTED GLOBAL) +set_property(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${ONNXRUNTIME_LIB}) +add_dependencies(onnxruntime ${ONNXRUNTIME_PROJECT}) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index b0998317385..1cccfb86f42 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -12,80 +12,84 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas) -SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) -SET(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git) -SET(CBLAS_TAG v0.3.7) +set(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas) +set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) +set(CBLAS_REPOSITORY ${GIT_URL}/xianyi/OpenBLAS.git) +set(CBLAS_TAG v0.3.7) if(APPLE AND WITH_ARM) - SET(CBLAS_TAG v0.3.13) + set(CBLAS_TAG v0.3.13) endif() if(WITH_MIPS) - SET(CBLAS_TAG v0.3.13) + set(CBLAS_TAG v0.3.13) endif() -IF(NOT WIN32) - SET(CBLAS_LIBRARIES - "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" - CACHE FILEPATH "openblas library." FORCE) - SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) - SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") +if(NOT WIN32) + set(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE FILEPATH "openblas library." FORCE) + set(CBLAS_INC_DIR + "${CBLAS_INSTALL_DIR}/include" + CACHE PATH "openblas include directory." FORCE) + set(OPENBLAS_CC + "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") - IF(APPLE) - SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") - ENDIF() - SET(OPTIONAL_ARGS "") - IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") - SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) - ENDIF() + if(APPLE) + set(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") + endif() + set(OPTIONAL_ARGS "") + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") + set(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) + endif() - SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) - ExternalProject_Add( - extern_openblas - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${CBLAS_REPOSITORY} - GIT_TAG ${CBLAS_TAG} - PREFIX ${CBLAS_PREFIX_DIR} - INSTALL_DIR ${CBLAS_INSTALL_DIR} - BUILD_IN_SOURCE 1 - BUILD_COMMAND make -j$(nproc) ${COMMON_ARGS} ${OPTIONAL_ARGS} - INSTALL_COMMAND make install NO_SHARED=1 NO_LAPACK=1 PREFIX= - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_BYPRODUCTS ${CBLAS_LIBRARIES} - ) -ELSE(NOT WIN32) - SET(CBLAS_LIBRARIES - "${CBLAS_INSTALL_DIR}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" - CACHE FILEPATH "openblas library." FORCE) - SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include/openblas" CACHE PATH "openblas include directory." FORCE) - ExternalProject_Add( - extern_openblas - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY ${CBLAS_REPOSITORY} - GIT_TAG ${CBLAS_TAG} - PREFIX ${CBLAS_PREFIX_DIR} - INSTALL_DIR ${CBLAS_INSTALL_DIR} - BUILD_IN_SOURCE 0 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_INSTALL_PREFIX=${CBLAS_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DBUILD_SHARED_LIBS=ON - -DMSVC_STATIC_CRT=${MSVC_STATIC_CRT} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CBLAS_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - # ninja need to know where openblas.lib comes from - BUILD_BYPRODUCTS ${CBLAS_LIBRARIES} - ) - SET(OPENBLAS_SHARED_LIB ${CBLAS_INSTALL_DIR}/bin/openblas${CMAKE_SHARED_LIBRARY_SUFFIX}) -ENDIF(NOT WIN32) + set(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) + ExternalProject_Add( + extern_openblas + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${CBLAS_REPOSITORY} + GIT_TAG ${CBLAS_TAG} + PREFIX ${CBLAS_PREFIX_DIR} + INSTALL_DIR ${CBLAS_INSTALL_DIR} + BUILD_IN_SOURCE 1 + BUILD_COMMAND make -j$(nproc) ${COMMON_ARGS} ${OPTIONAL_ARGS} + INSTALL_COMMAND make install NO_SHARED=1 NO_LAPACK=1 PREFIX= + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_BYPRODUCTS ${CBLAS_LIBRARIES}) +else(NOT WIN32) + set(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE FILEPATH "openblas library." FORCE) + set(CBLAS_INC_DIR + "${CBLAS_INSTALL_DIR}/include/openblas" + CACHE PATH "openblas include directory." FORCE) + ExternalProject_Add( + extern_openblas + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY ${CBLAS_REPOSITORY} + GIT_TAG ${CBLAS_TAG} + PREFIX ${CBLAS_PREFIX_DIR} + INSTALL_DIR ${CBLAS_INSTALL_DIR} + BUILD_IN_SOURCE 0 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_INSTALL_PREFIX=${CBLAS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DBUILD_SHARED_LIBS=ON + -DMSVC_STATIC_CRT=${MSVC_STATIC_CRT} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${CBLAS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + # ninja need to know where openblas.lib comes from + BUILD_BYPRODUCTS ${CBLAS_LIBRARIES}) + set(OPENBLAS_SHARED_LIB + ${CBLAS_INSTALL_DIR}/bin/openblas${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif(NOT WIN32) diff --git a/cmake/external/paddle2onnx.cmake b/cmake/external/paddle2onnx.cmake index 2fc22578cae..8252b2a73e9 100644 --- a/cmake/external/paddle2onnx.cmake +++ b/cmake/external/paddle2onnx.cmake @@ -16,84 +16,91 @@ if(NOT WITH_ONNXRUNTIME) return() endif() -if (WITH_ARM) +if(WITH_ARM) message(SEND_ERROR "The current onnxruntime backend doesn't support ARM cpu") return() -endif () +endif() -INCLUDE(ExternalProject) +include(ExternalProject) -SET(PADDLE2ONNX_PROJECT "extern_paddle2onnx") -SET(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx) -SET(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx) -SET(PADDLE2ONNX_INC_DIR "${PADDLE2ONNX_INSTALL_DIR}/include" CACHE PATH "paddle2onnx include directory." FORCE) -SET(PADDLE2ONNX_REPOSITORY ${GIT_URL}/PaddlePaddle/Paddle2ONNX.git) -SET(PADDLE2ONNX_TAG cpp) -SET(LIBDIR "lib") -SET(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}") +set(PADDLE2ONNX_PROJECT "extern_paddle2onnx") +set(PADDLE2ONNX_PREFIX_DIR ${THIRD_PARTY_PATH}/paddle2onnx) +set(PADDLE2ONNX_INSTALL_DIR ${THIRD_PARTY_PATH}/install/paddle2onnx) +set(PADDLE2ONNX_INC_DIR + "${PADDLE2ONNX_INSTALL_DIR}/include" + CACHE PATH "paddle2onnx include directory." FORCE) +set(PADDLE2ONNX_REPOSITORY ${GIT_URL}/PaddlePaddle/Paddle2ONNX.git) +set(PADDLE2ONNX_TAG cpp) +set(LIBDIR "lib") +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}") -INCLUDE_DIRECTORIES(${PADDLE2ONNX_INC_DIR}) # For PADDLE2ONNX code to include internal headers. +include_directories(${PADDLE2ONNX_INC_DIR} +)# For PADDLE2ONNX code to include internal headers. if(WIN32) - SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.lib" CACHE FILEPATH "paddle2onnx static library." FORCE) - SET(PADDLE2ONNX_SHARED_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.dll" CACHE FILEPATH "paddle2onnx shared library." FORCE) + set(PADDLE2ONNX_LIB + "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.lib" + CACHE FILEPATH "paddle2onnx static library." FORCE) + set(PADDLE2ONNX_SHARED_LIB + "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/paddle2onnx.dll" + CACHE FILEPATH "paddle2onnx shared library." FORCE) elseif(APPLE) - SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.dylib" CACHE FILEPATH "PADDLE2ONNX library." FORCE) + set(PADDLE2ONNX_LIB + "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.dylib" + CACHE FILEPATH "PADDLE2ONNX library." FORCE) else() - SET(PADDLE2ONNX_LIB "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.so" CACHE FILEPATH "PADDLE2ONNX library." FORCE) + set(PADDLE2ONNX_LIB + "${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}/libpaddle2onnx.so" + CACHE FILEPATH "PADDLE2ONNX library." FORCE) endif(WIN32) - # The protoc path is required to compile onnx. string(REPLACE "/" ";" PROTOC_BIN_PATH ${PROTOBUF_PROTOC_EXECUTABLE}) list(POP_BACK PROTOC_BIN_PATH) list(JOIN PROTOC_BIN_PATH "/" PROTOC_BIN_PATH) - set(PADDLE2ONNX_OPTIONAL_ARGS - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_STANDARD=14 - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH} - -DWITH_STATIC=OFF - -DMSVC_STATIC_CRT=${MSVC_STATIC_CRT} - -DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} -) - -if (WITH_PYTHON) - set(PADDLE2ONNX_OPTIONAL_ARGS ${PADDLE2ONNX_OPTIONAL_ARGS} - -DPYTHON_EXECUTABLE:FILEPATH=${PYTHON_EXECUTABLE} - -DPYTHON_INCLUDE_DIR:PATH=${PYTHON_INCLUDE_DIR} - -DPYTHON_LIBRARY:FILEPATH=${PYTHON_LIBRARY} - ) -endif () + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_STANDARD=14 + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH} + -DWITH_STATIC=OFF + -DMSVC_STATIC_CRT=${MSVC_STATIC_CRT} + -DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS}) +if(WITH_PYTHON) + set(PADDLE2ONNX_OPTIONAL_ARGS + ${PADDLE2ONNX_OPTIONAL_ARGS} + -DPYTHON_EXECUTABLE:FILEPATH=${PYTHON_EXECUTABLE} + -DPYTHON_INCLUDE_DIR:PATH=${PYTHON_INCLUDE_DIR} + -DPYTHON_LIBRARY:FILEPATH=${PYTHON_LIBRARY}) +endif() ExternalProject_Add( - ${PADDLE2ONNX_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${PADDLE2ONNX_REPOSITORY} - GIT_TAG ${PADDLE2ONNX_TAG} - DEPENDS protobuf - PREFIX ${PADDLE2ONNX_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS ${PADDLE2ONNX_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PADDLE2ONNX_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${PADDLE2ONNX_LIB} -) + ${PADDLE2ONNX_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${PADDLE2ONNX_REPOSITORY} + GIT_TAG ${PADDLE2ONNX_TAG} + DEPENDS protobuf + PREFIX ${PADDLE2ONNX_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS ${PADDLE2ONNX_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${PADDLE2ONNX_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${PADDLE2ONNX_LIB}) -ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE2ONNX_LIB}) -ADD_DEPENDENCIES(paddle2onnx ${PADDLE2ONNX_PROJECT}) +add_library(paddle2onnx STATIC IMPORTED GLOBAL) +set_property(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE2ONNX_LIB}) +add_dependencies(paddle2onnx ${PADDLE2ONNX_PROJECT}) diff --git a/cmake/external/pocketfft.cmake b/cmake/external/pocketfft.cmake index 7323f67d115..2d809bbcf03 100644 --- a/cmake/external/pocketfft.cmake +++ b/cmake/external/pocketfft.cmake @@ -14,30 +14,29 @@ include(ExternalProject) +set(POCKETFFT_PATH + "${THIRD_PARTY_PATH}/pocketfft" + CACHE STRING "A path setting for external_pocketfft path.") +set(POCKETFFT_PREFIX_DIR ${POCKETFFT_PATH}) -set(POCKETFFT_PATH "${THIRD_PARTY_PATH}/pocketfft" CACHE STRING "A path setting for external_pocketfft path.") -set(POCKETFFT_PREFIX_DIR ${POCKETFFT_PATH}) +set(POCKETFFT_REPOSITORY https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git) +set(POCKETFFT_TAG release_for_eigen) -set(POCKETFFT_REPOSITORY https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git) -set(POCKETFFT_TAG release_for_eigen) - -SET(POCKETFFT_INCLUDE_DIR ${POCKETFFT_PREFIX_DIR}/src) +set(POCKETFFT_INCLUDE_DIR ${POCKETFFT_PREFIX_DIR}/src) message("POCKETFFT_INCLUDE_DIR is ${POCKETFFT_INCLUDE_DIR}") include_directories(${POCKETFFT_INCLUDE_DIR}) ExternalProject_Add( extern_pocketfft - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${POCKETFFT_REPOSITORY} - GIT_TAG ${POCKETFFT_TAG} - PREFIX ${POCKETFFT_PREFIX_DIR} - UPDATE_COMMAND "" + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${POCKETFFT_REPOSITORY} + GIT_TAG ${POCKETFFT_TAG} + PREFIX ${POCKETFFT_PREFIX_DIR} + UPDATE_COMMAND "" CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(pocketfft INTERFACE) diff --git a/cmake/external/poplar.cmake b/cmake/external/poplar.cmake index 8b2de14e966..7589059e7b3 100644 --- a/cmake/external/poplar.cmake +++ b/cmake/external/poplar.cmake @@ -14,7 +14,12 @@ macro(find_popart_version popart_version_file) file(READ ${popart_version_file} popart_version_file_content) - string(REGEX MATCH "(POPART_VERSION_STRING)[ \t\r\n](\")([0-9]+\.[0-9]+\.[0-9]+)(\\+)([A-Za-z0-9_]*)(\")" POPART_VERSION ${popart_version_file_content}) + string( + REGEX + MATCH + "(POPART_VERSION_STRING)[ \t\r\n](\")([0-9]+\.[0-9]+\.[0-9]+)(\\+)([A-Za-z0-9_]*)(\")" + POPART_VERSION + ${popart_version_file_content}) string(REPLACE "POPART_VERSION_STRING" "" POPART_VERSION "${POPART_VERSION}") string(REPLACE "\"" "" POPART_VERSION "${POPART_VERSION}") string(REPLACE " " "" POPART_VERSION "${POPART_VERSION}") @@ -28,7 +33,11 @@ endmacro() if(WITH_IPU) set(POPLAR_DIR CACHE PATH "Path to a Poplar install") set(POPART_DIR CACHE PATH "Path to a Popart install") - set(POPLAR_SDK_DIR CACHE PATH "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)") + set(POPLAR_SDK_DIR + CACHE + PATH + "Path to an extracted SDK archive or to a Poplar & Popart install directory (Will populate POPLAR_DIR and POPART_DIR)" + ) # support setting SDK both from environment variable or command line arguments @@ -36,10 +45,15 @@ if(WITH_IPU) set(POPLAR_SDK_DIR $ENV{POPLAR_SDK_DIR}) endif() if(EXISTS ${POPLAR_SDK_DIR}) - execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*" - OUTPUT_VARIABLE POPART_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) - execute_process(COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o -name "poplar" - OUTPUT_VARIABLE POPLAR_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process( + COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "popart*" + OUTPUT_VARIABLE POPART_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process( + COMMAND find ${POPLAR_SDK_DIR}/ -maxdepth 1 -type d -name "poplar-*" -o + -name "poplar" + OUTPUT_VARIABLE POPLAR_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if(DEFINED ENV{POPLAR_DIR}) set(POPLAR_DIR $ENV{POPLAR_DIR}) @@ -51,7 +65,10 @@ if(WITH_IPU) if(EXISTS ${POPLAR_DIR}) message("POPLAR_DIR is ${POPLAR_DIR}") if(NOT IS_DIRECTORY "${POPLAR_DIR}") - message(FATAL_ERROR "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'") + message( + FATAL_ERROR + "Couldn't find a \"poplar\" or \"poplar-*\" folder in '${POPLAR_SDK_DIR}'" + ) endif() list(APPEND CMAKE_PREFIX_PATH ${POPLAR_DIR}) set(ENABLE_POPLAR_CMD "source ${POPLAR_DIR}/enable.sh") @@ -60,12 +77,16 @@ if(WITH_IPU) link_directories("${POPLAR_DIR}/lib") endif() if(NOT poplar_FOUND) - message(FATAL_ERROR "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install") + message( + FATAL_ERROR + "You must provide a path to a Poplar install using -DPOPLAR_DIR=/path/to/popart/build/install" + ) endif() if(EXISTS ${POPART_DIR}) message("POPART_DIR is ${POPART_DIR}") if(NOT IS_DIRECTORY "${POPART_DIR}") - message(FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'") + message( + FATAL_ERROR "Couldn't find a \"popart*\" folder in '${POPLAR_SDK_DIR}'") endif() list(APPEND CMAKE_PREFIX_PATH ${POPART_DIR}) set(ENABLE_POPART_CMD "source ${POPART_DIR}/enable.sh") @@ -74,7 +95,10 @@ if(WITH_IPU) link_directories("${POPART_DIR}/lib") endif() if(NOT popart_FOUND) - message(FATAL_ERROR "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build") + message( + FATAL_ERROR + "You must provide a path to a Popart build using -DPOPART_DIR=/path/to/popart/build" + ) endif() find_popart_version("${POPART_DIR}/include/popart/version.hpp") diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 3a59ea6bc92..1368081b58f 100755 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -12,304 +12,346 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) # Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp -IF(NOT WIN32) - FIND_PACKAGE(Protobuf QUIET) -ENDIF(NOT WIN32) +if(NOT WIN32) + find_package(Protobuf QUIET) +endif(NOT WIN32) -UNSET_VAR(PROTOBUF_INCLUDE_DIR) -UNSET_VAR(PROTOBUF_FOUND) -UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE) -UNSET_VAR(PROTOBUF_PROTOC_LIBRARY) -UNSET_VAR(PROTOBUF_LITE_LIBRARY) -UNSET_VAR(PROTOBUF_LIBRARY) -UNSET_VAR(PROTOBUF_INCLUDE_DIR) -UNSET_VAR(Protobuf_PROTOC_EXECUTABLE) +unset_var(PROTOBUF_INCLUDE_DIR) +unset_var(PROTOBUF_FOUND) +unset_var(PROTOBUF_PROTOC_EXECUTABLE) +unset_var(PROTOBUF_PROTOC_LIBRARY) +unset_var(PROTOBUF_LITE_LIBRARY) +unset_var(PROTOBUF_LIBRARY) +unset_var(PROTOBUF_INCLUDE_DIR) +unset_var(Protobuf_PROTOC_EXECUTABLE) function(protobuf_generate_python SRCS) - # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - # Create an include path for each file specified - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message( + SEND_ERROR + "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() - if(DEFINED Protobuf_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set(${SRCS}) + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) - get_filename_component(FIL_DIR ${FIL} DIRECTORY) - if(FIL_DIR) - set(FIL_WE "${FIL_DIR}/${FIL_WE}") - endif() - endif() - list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} - COMMENT "Running Python protocol buffer compiler on ${FIL}" - VERBATIM ) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() endforeach() + endif() - set(${SRCS} ${${SRCS}} PARENT_SCOPE) + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out + ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${PROTOBUF_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM) + endforeach() + + set(${SRCS} + ${${SRCS}} + PARENT_SCOPE) endfunction() # Print and set the protobuf library information, # finish this cmake process and exit from this file. macro(PROMPT_PROTOBUF_LIB) - SET(protobuf_DEPS ${ARGN}) + set(protobuf_DEPS ${ARGN}) - MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") - MESSAGE(STATUS "Protobuf-lite library: ${PROTOBUF_LITE_LIBRARY}") - MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") - MESSAGE(STATUS "Protoc library: ${PROTOBUF_PROTOC_LIBRARY}") - MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") - INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + message(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") + message(STATUS "Protobuf-lite library: ${PROTOBUF_LITE_LIBRARY}") + message(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") + message(STATUS "Protoc library: ${PROTOBUF_PROTOC_LIBRARY}") + message(STATUS "Protobuf version: ${PROTOBUF_VERSION}") + include_directories(${PROTOBUF_INCLUDE_DIR}) - # Assuming that all the protobuf libraries are of the same type. - IF(${PROTOBUF_LIBRARY} MATCHES ${CMAKE_STATIC_LIBRARY_SUFFIX}) - SET(protobuf_LIBTYPE STATIC) - ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") - SET(protobuf_LIBTYPE SHARED) - ELSE() - MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") - ENDIF() + # Assuming that all the protobuf libraries are of the same type. + if(${PROTOBUF_LIBRARY} MATCHES ${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(protobuf_LIBTYPE STATIC) + elseif(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") + set(protobuf_LIBTYPE SHARED) + else() + message(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") + endif() - ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) - SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) + add_library(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) + set_property(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) - ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) - SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) + add_library(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) + set_property(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION + ${PROTOBUF_LITE_LIBRARY}) - ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) - SET_PROPERTY(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + add_library(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + set_property(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) - ADD_EXECUTABLE(protoc IMPORTED GLOBAL) - SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) - # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. - # make `protobuf_generate_cpp` happy. - SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + add_executable(protoc IMPORTED GLOBAL) + set_property(TARGET protoc PROPERTY IMPORTED_LOCATION + ${PROTOBUF_PROTOC_EXECUTABLE}) + # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. + # make `protobuf_generate_cpp` happy. + set(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) - FOREACH(dep ${protobuf_DEPS}) - ADD_DEPENDENCIES(protobuf ${dep}) - ADD_DEPENDENCIES(protobuf_lite ${dep}) - ADD_DEPENDENCIES(libprotoc ${dep}) - ADD_DEPENDENCIES(protoc ${dep}) - ENDFOREACH() + foreach(dep ${protobuf_DEPS}) + add_dependencies(protobuf ${dep}) + add_dependencies(protobuf_lite ${dep}) + add_dependencies(libprotoc ${dep}) + add_dependencies(protoc ${dep}) + endforeach() - RETURN() + return() endmacro() macro(SET_PROTOBUF_VERSION) - EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) - STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}") + exec_program( + ${PROTOBUF_PROTOC_EXECUTABLE} ARGS + --version + OUTPUT_VARIABLE PROTOBUF_VERSION) + string(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}") endmacro() -set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") -IF (WIN32) - SET(PROTOBUF_ROOT ${THIRD_PARTY_PATH}/install/protobuf) -ENDIF(WIN32) +set(PROTOBUF_ROOT + "" + CACHE PATH "Folder contains protobuf") +if(WIN32) + set(PROTOBUF_ROOT ${THIRD_PARTY_PATH}/install/protobuf) +endif(WIN32) -if (NOT "${PROTOBUF_ROOT}" STREQUAL "") - find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH) - find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) - find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) - find_library(PROTOBUF_PROTOC_LIBRARY protoc libprotoc.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) - find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH) - if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) - SET(PROTOBUF_FOUND true) - message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") - SET_PROTOBUF_VERSION() - PROMPT_PROTOBUF_LIB() - endif() +if(NOT "${PROTOBUF_ROOT}" STREQUAL "") + find_path( + PROTOBUF_INCLUDE_DIR google/protobuf/message.h + PATHS ${PROTOBUF_ROOT}/include + NO_DEFAULT_PATH) + find_library( + PROTOBUF_LIBRARY protobuf libprotobuf.lib + PATHS ${PROTOBUF_ROOT}/lib + NO_DEFAULT_PATH) + find_library( + PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib + PATHS ${PROTOBUF_ROOT}/lib + NO_DEFAULT_PATH) + find_library( + PROTOBUF_PROTOC_LIBRARY protoc libprotoc.lib + PATHS ${PROTOBUF_ROOT}/lib + NO_DEFAULT_PATH) + find_program( + PROTOBUF_PROTOC_EXECUTABLE protoc + PATHS ${PROTOBUF_ROOT}/bin + NO_DEFAULT_PATH) + if(PROTOBUF_INCLUDE_DIR + AND PROTOBUF_LIBRARY + AND PROTOBUF_LITE_LIBRARY + AND PROTOBUF_PROTOC_LIBRARY + AND PROTOBUF_PROTOC_EXECUTABLE) + set(PROTOBUF_FOUND true) + message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") + set_protobuf_version() + prompt_protobuf_lib() + endif() endif() -FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) - STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") - SET(PROTOBUF_PREFIX_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) - SET(PROTOBUF_SOURCE_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}/src/${TARGET_NAME}) - SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) - - SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) - SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) - SET(${TARGET_NAME}_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" - PARENT_SCOPE) - SET(${TARGET_NAME}_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" - PARENT_SCOPE) - SET(${TARGET_NAME}_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" - PARENT_SCOPE) - SET(${TARGET_NAME}_PROTOC_EXECUTABLE - "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" - PARENT_SCOPE) +function(build_protobuf TARGET_NAME BUILD_FOR_HOST) + string(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") + set(PROTOBUF_PREFIX_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) + set(PROTOBUF_SOURCE_DIR + ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}/src/${TARGET_NAME}) + set(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) - SET(OPTIONAL_CACHE_ARGS "") - SET(OPTIONAL_ARGS "") - IF(BUILD_FOR_HOST) - SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF") - ELSE() - SET(OPTIONAL_ARGS - "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" - "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" - "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" - "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}" - "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}" - "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" - "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}" - "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}" - "-Dprotobuf_WITH_ZLIB=ON" - "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}" - ${EXTERNAL_OPTIONAL_ARGS}) - SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}") - ENDIF() - IF(WIN32) - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} - "-DCMAKE_GENERATOR=${CMAKE_GENERATOR}" - "-DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM}" - "-Dprotobuf_MSVC_STATIC_RUNTIME=${MSVC_STATIC_CRT}") - ENDIF() + set(${TARGET_NAME}_INCLUDE_DIR + "${PROTOBUF_INSTALL_DIR}/include" + PARENT_SCOPE) + set(PROTOBUF_INCLUDE_DIR + "${PROTOBUF_INSTALL_DIR}/include" + PARENT_SCOPE) + set(${TARGET_NAME}_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + set(${TARGET_NAME}_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + set(${TARGET_NAME}_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + set(${TARGET_NAME}_PROTOC_EXECUTABLE + "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" + PARENT_SCOPE) + set(OPTIONAL_CACHE_ARGS "") + set(OPTIONAL_ARGS "") + if(BUILD_FOR_HOST) + set(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF") + else() + set(OPTIONAL_ARGS + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + "-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}" + "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}" + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}" + "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}" + "-Dprotobuf_WITH_ZLIB=ON" + "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}" + ${EXTERNAL_OPTIONAL_ARGS}) + set(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}") + endif() + if(WIN32) + set(OPTIONAL_ARGS + ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR=${CMAKE_GENERATOR}" + "-DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM}" + "-Dprotobuf_MSVC_STATIC_RUNTIME=${MSVC_STATIC_CRT}") + endif() - if(WITH_ONNXRUNTIME) - SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) - SET(PROTOBUF_TAG v3.18.0) - elseif(WITH_ASCEND AND NOT WITH_ASCEND_CXX11) - SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) - SET(PROTOBUF_TAG v3.8.0) - elseif(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11) - SET(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) - SET(PROTOBUF_TAG v3.8.0) - elseif(WITH_IPU) - SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) - SET(PROTOBUF_TAG d750fbf648256c7c631f51ffdbf67d7c18b0114e) - elseif(WIN32) - SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) - # Change the tag to support building with vs2019 - SET(PROTOBUF_TAG 01a05a53f40ca2ac5f0af10c6cc0810bee39b792) - else() - SET(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) - SET(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) - endif() - if(WITH_ARM_BRPC) - SET(ARM_PROTOBUF_URL "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_protobuf.tar.gz" CACHE STRING "" FORCE) - FILE(WRITE ${PROTOBUF_SOURCE_DIR}/CMakeLists.txt - "PROJECT(ARM_PROTOBUF)\n" - "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY arm_protobuf/bin arm_protobuf/include arm_protobuf/lib \n" - " DESTINATION . USE_SOURCE_PERMISSIONS)\n") - ExternalProject_Add( - ${TARGET_NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - PREFIX ${PROTOBUF_PREFIX_DIR} - DOWNLOAD_DIR ${PROTOBUF_SOURCE_DIR} - DOWNLOAD_COMMAND rm -rf arm_protobuf.tar.gz - && wget --no-check-certificate ${ARM_PROTOBUF_URL} - && tar zxvf arm_protobuf.tar.gz - #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_protobuf.tar.gz . - # && tar zxvf arm_protobuf.tar.gz - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS - -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX} - ) - else() - ExternalProject_Add( - ${TARGET_NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${PROTOBUF_REPOSITORY} - GIT_TAG ${PROTOBUF_TAG} - PREFIX ${PROTOBUF_PREFIX_DIR} - UPDATE_COMMAND "" - DEPENDS zlib - CONFIGURE_COMMAND - ${CMAKE_COMMAND} ${PROTOBUF_SOURCE_DIR}/cmake - ${OPTIONAL_ARGS} - -Dprotobuf_BUILD_TESTS=OFF - -DCMAKE_SKIP_RPATH=ON - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=lib - -DBUILD_SHARED_LIBS=OFF - CMAKE_CACHE_ARGS - -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - ${OPTIONAL_CACHE_ARGS} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX} - BUILD_BYPRODUCTS ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX} - ) - endif() -ENDFUNCTION() + if(WITH_ONNXRUNTIME) + set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) + set(PROTOBUF_TAG v3.18.0) + elseif(WITH_ASCEND AND NOT WITH_ASCEND_CXX11) + set(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) + set(PROTOBUF_TAG v3.8.0) + elseif(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11) + set(PROTOBUF_REPOSITORY https://gitee.com/tianjianhe/protobuf.git) + set(PROTOBUF_TAG v3.8.0) + elseif(WITH_IPU) + set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) + set(PROTOBUF_TAG d750fbf648256c7c631f51ffdbf67d7c18b0114e) + elseif(WIN32) + set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) + # Change the tag to support building with vs2019 + set(PROTOBUF_TAG 01a05a53f40ca2ac5f0af10c6cc0810bee39b792) + else() + set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) + set(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) + endif() + if(WITH_ARM_BRPC) + set(ARM_PROTOBUF_URL + "https://paddlerec.bj.bcebos.com/online_infer/arm_brpc_ubuntu18/arm_protobuf.tar.gz" + CACHE STRING "" FORCE) + file( + WRITE ${PROTOBUF_SOURCE_DIR}/CMakeLists.txt + "PROJECT(ARM_PROTOBUF)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY arm_protobuf/bin arm_protobuf/include arm_protobuf/lib \n" + " DESTINATION . USE_SOURCE_PERMISSIONS)\n") + ExternalProject_Add( + ${TARGET_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + PREFIX ${PROTOBUF_PREFIX_DIR} + DOWNLOAD_DIR ${PROTOBUF_SOURCE_DIR} + DOWNLOAD_COMMAND rm -rf arm_protobuf.tar.gz && wget --no-check-certificate + ${ARM_PROTOBUF_URL} && tar zxvf arm_protobuf.tar.gz + #DOWNLOAD_COMMAND cp /home/wangbin44/Paddle/build/arm_protobuf.tar.gz . + # && tar zxvf arm_protobuf.tar.gz + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}) + else() + ExternalProject_Add( + ${TARGET_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${PROTOBUF_REPOSITORY} + GIT_TAG ${PROTOBUF_TAG} + PREFIX ${PROTOBUF_PREFIX_DIR} + UPDATE_COMMAND "" + DEPENDS zlib + CONFIGURE_COMMAND + ${CMAKE_COMMAND} ${PROTOBUF_SOURCE_DIR}/cmake ${OPTIONAL_ARGS} + -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_SKIP_RPATH=ON + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=lib -DBUILD_SHARED_LIBS=OFF + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + ${OPTIONAL_CACHE_ARGS} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX} + BUILD_BYPRODUCTS + ${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}) + endif() +endfunction() if(WITH_ONNXRUNTIME) - SET(PROTOBUF_VERSION 3.18.0) + set(PROTOBUF_VERSION 3.18.0) elseif(WITH_ASCEND OR WITH_ASCEND_CL) - SET(PROTOBUF_VERSION 3.8.0) + set(PROTOBUF_VERSION 3.8.0) elseif(WITH_IPU) - SET(PROTOBUF_VERSION 3.6.1) + set(PROTOBUF_VERSION 3.6.1) elseif(WITH_ARM_BRPC) - SET(PROTOBUF_VERSION 3.7.1-baidu-ee-common) + set(PROTOBUF_VERSION 3.7.1-baidu-ee-common) else() - SET(PROTOBUF_VERSION 3.1.0) + set(PROTOBUF_VERSION 3.1.0) endif() -IF(NOT PROTOBUF_FOUND) - build_protobuf(extern_protobuf FALSE) +if(NOT PROTOBUF_FOUND) + build_protobuf(extern_protobuf FALSE) - SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} - CACHE PATH "protobuf include directory." FORCE) - SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY} - CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY} - CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} - CACHE FILEPATH "protoc library." FORCE) + set(PROTOBUF_INCLUDE_DIR + ${extern_protobuf_INCLUDE_DIR} + CACHE PATH "protobuf include directory." FORCE) + set(PROTOBUF_LITE_LIBRARY + ${extern_protobuf_LITE_LIBRARY} + CACHE FILEPATH "protobuf lite library." FORCE) + set(PROTOBUF_LIBRARY + ${extern_protobuf_LIBRARY} + CACHE FILEPATH "protobuf library." FORCE) + set(PROTOBUF_PROTOC_LIBRARY + ${extern_protobuf_PROTOC_LIBRARY} + CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} - CACHE FILEPATH "protobuf executable." FORCE) - # `EXTERN_PROTOBUF_DEPEND` used in cmake function `proto_library` to ensure - # `protoc.exe` existed before calling it. - set(EXTERN_PROTOBUF_DEPEND extern_protobuf) - PROMPT_PROTOBUF_LIB(extern_protobuf) -ENDIF(NOT PROTOBUF_FOUND) + set(PROTOBUF_PROTOC_EXECUTABLE + ${extern_protobuf_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) + # `EXTERN_PROTOBUF_DEPEND` used in cmake function `proto_library` to ensure + # `protoc.exe` existed before calling it. + set(EXTERN_PROTOBUF_DEPEND extern_protobuf) + prompt_protobuf_lib(extern_protobuf) +endif(NOT PROTOBUF_FOUND) diff --git a/cmake/external/pslib.cmake b/cmake/external/pslib.cmake index 47a83d905e8..1b1298d6c6c 100644 --- a/cmake/external/pslib.cmake +++ b/cmake/external/pslib.cmake @@ -12,53 +12,58 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(PSLIB_PROJECT "extern_pslib") -IF((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL)) - MESSAGE(STATUS "use pre defined download url") - SET(PSLIB_VER "0.1.1" CACHE STRING "" FORCE) - SET(PSLIB_NAME "pslib" CACHE STRING "" FORCE) - SET(PSLIB_URL "https://pslib.bj.bcebos.com/pslib.tar.gz" CACHE STRING "" FORCE) -ENDIF() -MESSAGE(STATUS "PSLIB_NAME: ${PSLIB_NAME}, PSLIB_URL: ${PSLIB_URL}") -SET(PSLIB_PREFIX_DIR "${THIRD_PARTY_PATH}/pslib") -SET(PSLIB_DOWNLOAD_DIR "${PSLIB_PREFIX_DIR}/src/${PSLIB_PROJECT}") -SET(PSLIB_DST_DIR "pslib") -SET(PSLIB_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") -SET(PSLIB_INSTALL_DIR ${PSLIB_INSTALL_ROOT}/${PSLIB_DST_DIR}) -SET(PSLIB_ROOT ${PSLIB_INSTALL_DIR}) -SET(PSLIB_INC_DIR ${PSLIB_ROOT}/include) -SET(PSLIB_LIB_DIR ${PSLIB_ROOT}/lib) -SET(PSLIB_LIB ${PSLIB_LIB_DIR}/libps.so) -SET(PSLIB_VERSION_PY ${PSLIB_DOWNLOAD_DIR}/pslib/version.py) -SET(PSLIB_IOMP_LIB ${PSLIB_LIB_DIR}/libiomp5.so) #todo what is this -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_ROOT}/lib") +set(PSLIB_PROJECT "extern_pslib") +if((NOT DEFINED PSLIB_VER) OR (NOT DEFINED PSLIB_URL)) + message(STATUS "use pre defined download url") + set(PSLIB_VER + "0.1.1" + CACHE STRING "" FORCE) + set(PSLIB_NAME + "pslib" + CACHE STRING "" FORCE) + set(PSLIB_URL + "https://pslib.bj.bcebos.com/pslib.tar.gz" + CACHE STRING "" FORCE) +endif() +message(STATUS "PSLIB_NAME: ${PSLIB_NAME}, PSLIB_URL: ${PSLIB_URL}") +set(PSLIB_PREFIX_DIR "${THIRD_PARTY_PATH}/pslib") +set(PSLIB_DOWNLOAD_DIR "${PSLIB_PREFIX_DIR}/src/${PSLIB_PROJECT}") +set(PSLIB_DST_DIR "pslib") +set(PSLIB_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +set(PSLIB_INSTALL_DIR ${PSLIB_INSTALL_ROOT}/${PSLIB_DST_DIR}) +set(PSLIB_ROOT ${PSLIB_INSTALL_DIR}) +set(PSLIB_INC_DIR ${PSLIB_ROOT}/include) +set(PSLIB_LIB_DIR ${PSLIB_ROOT}/lib) +set(PSLIB_LIB ${PSLIB_LIB_DIR}/libps.so) +set(PSLIB_VERSION_PY ${PSLIB_DOWNLOAD_DIR}/pslib/version.py) +set(PSLIB_IOMP_LIB ${PSLIB_LIB_DIR}/libiomp5.so) #todo what is this +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_ROOT}/lib") -INCLUDE_DIRECTORIES(${PSLIB_INC_DIR}) +include_directories(${PSLIB_INC_DIR}) -FILE(WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(PSLIB)\n" - "cmake_minimum_required(VERSION 3.0)\n" +file( + WRITE ${PSLIB_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(PSLIB)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY ${PSLIB_NAME}/include ${PSLIB_NAME}/lib \n" " DESTINATION ${PSLIB_DST_DIR})\n") ExternalProject_Add( - ${PSLIB_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${PSLIB_PREFIX_DIR} - DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O ${PSLIB_NAME}.tar.gz - && tar zxvf ${PSLIB_NAME}.tar.gz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${PSLIB_LIB} -) + ${PSLIB_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PSLIB_PREFIX_DIR} + DOWNLOAD_DIR ${PSLIB_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_URL} -c -q -O + ${PSLIB_NAME}.tar.gz && tar zxvf ${PSLIB_NAME}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${PSLIB_LIB}) -ADD_LIBRARY(pslib SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) -ADD_DEPENDENCIES(pslib ${PSLIB_PROJECT}) +add_library(pslib SHARED IMPORTED GLOBAL) +set_property(TARGET pslib PROPERTY IMPORTED_LOCATION ${PSLIB_LIB}) +add_dependencies(pslib ${PSLIB_PROJECT}) diff --git a/cmake/external/pslib_brpc.cmake b/cmake/external/pslib_brpc.cmake index 27e2788aa21..eef91052a40 100644 --- a/cmake/external/pslib_brpc.cmake +++ b/cmake/external/pslib_brpc.cmake @@ -12,52 +12,61 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(PSLIB_BRPC_PROJECT "extern_pslib_brpc") -IF((NOT DEFINED PSLIB_BRPC_NAME) OR (NOT DEFINED PSLIB_BRPC_URL)) - MESSAGE(STATUS "use pre defined download url") - SET(PSLIB_BRPC_VER "0.1.0" CACHE STRING "" FORCE) - SET(PSLIB_BRPC_NAME "pslib_brpc" CACHE STRING "" FORCE) - SET(PSLIB_BRPC_URL "https://pslib.bj.bcebos.com/pslib_brpc.tar.gz" CACHE STRING "" FORCE) -ENDIF() -MESSAGE(STATUS "PSLIB_BRPC_NAME: ${PSLIB_BRPC_NAME}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}") -SET(PSLIB_BRPC_PREFIX_DIR "${THIRD_PARTY_PATH}/pslib_brpc") -SET(PSLIB_BRPC_DOWNLOAD_DIR "${PSLIB_BRPC_PREFIX_DIR}/src/${PSLIB_BRPC_PROJECT}") -SET(PSLIB_BRPC_DST_DIR "pslib_brpc") -SET(PSLIB_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") -SET(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR}) -SET(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR}) -SET(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include) -SET(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib) -SET(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libbrpc.a) -SET(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib") +set(PSLIB_BRPC_PROJECT "extern_pslib_brpc") +if((NOT DEFINED PSLIB_BRPC_NAME) OR (NOT DEFINED PSLIB_BRPC_URL)) + message(STATUS "use pre defined download url") + set(PSLIB_BRPC_VER + "0.1.0" + CACHE STRING "" FORCE) + set(PSLIB_BRPC_NAME + "pslib_brpc" + CACHE STRING "" FORCE) + set(PSLIB_BRPC_URL + "https://pslib.bj.bcebos.com/pslib_brpc.tar.gz" + CACHE STRING "" FORCE) +endif() +message( + STATUS + "PSLIB_BRPC_NAME: ${PSLIB_BRPC_NAME}, PSLIB_BRPC_URL: ${PSLIB_BRPC_URL}") +set(PSLIB_BRPC_PREFIX_DIR "${THIRD_PARTY_PATH}/pslib_brpc") +set(PSLIB_BRPC_DOWNLOAD_DIR + "${PSLIB_BRPC_PREFIX_DIR}/src/${PSLIB_BRPC_PROJECT}") +set(PSLIB_BRPC_DST_DIR "pslib_brpc") +set(PSLIB_BRPC_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +set(PSLIB_BRPC_INSTALL_DIR ${PSLIB_BRPC_INSTALL_ROOT}/${PSLIB_BRPC_DST_DIR}) +set(PSLIB_BRPC_ROOT ${PSLIB_BRPC_INSTALL_DIR}) +set(PSLIB_BRPC_INC_DIR ${PSLIB_BRPC_ROOT}/include) +set(PSLIB_BRPC_LIB_DIR ${PSLIB_BRPC_ROOT}/lib) +set(PSLIB_BRPC_LIB ${PSLIB_BRPC_LIB_DIR}/libbrpc.a) +set(PSLIB_BRPC_IOMP_LIB ${PSLIB_BRPC_LIB_DIR}/libiomp5.so) #todo what is this +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PSLIB_BRPC_ROOT}/lib") -INCLUDE_DIRECTORIES(${PSLIB_BRPC_INC_DIR}) +include_directories(${PSLIB_BRPC_INC_DIR}) -FILE(WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(PSLIB_BRPC)\n" - "cmake_minimum_required(VERSION 3.0)\n" +file( + WRITE ${PSLIB_BRPC_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(PSLIB_BRPC)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY ${PSLIB_BRPC_NAME}/include ${PSLIB_BRPC_NAME}/lib \n" " DESTINATION ${PSLIB_BRPC_DST_DIR})\n") ExternalProject_Add( - ${PSLIB_BRPC_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${PSLIB_BRPC_PREFIX_DIR} - DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O ${PSLIB_BRPC_NAME}.tar.gz - && tar zxvf ${PSLIB_BRPC_NAME}.tar.gz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${PSLIB_BRPC_LIB} -) + ${PSLIB_BRPC_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PSLIB_BRPC_PREFIX_DIR} + DOWNLOAD_DIR ${PSLIB_BRPC_DOWNLOAD_DIR} + DOWNLOAD_COMMAND + wget --no-check-certificate ${PSLIB_BRPC_URL} -c -q -O + ${PSLIB_BRPC_NAME}.tar.gz && tar zxvf ${PSLIB_BRPC_NAME}.tar.gz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${PSLIB_BRPC_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PSLIB_BRPC_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${PSLIB_BRPC_LIB}) -ADD_LIBRARY(pslib_brpc SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) -ADD_DEPENDENCIES(pslib_brpc ${PSLIB_BRPC_PROJECT}) +add_library(pslib_brpc SHARED IMPORTED GLOBAL) +set_property(TARGET pslib_brpc PROPERTY IMPORTED_LOCATION ${PSLIB_BRPC_LIB}) +add_dependencies(pslib_brpc ${PSLIB_BRPC_PROJECT}) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index f87e73081ff..e236767cec1 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -14,31 +14,29 @@ include(ExternalProject) -set(PYBIND_PREFIX_DIR ${THIRD_PARTY_PATH}/pybind) -SET(PYBIND_REPOSITORY ${GIT_URL}/pybind/pybind11.git) -SET(PYBIND_TAG v2.4.3) +set(PYBIND_PREFIX_DIR ${THIRD_PARTY_PATH}/pybind) +set(PYBIND_REPOSITORY ${GIT_URL}/pybind/pybind11.git) +set(PYBIND_TAG v2.4.3) set(PYBIND_INCLUDE_DIR ${THIRD_PARTY_PATH}/pybind/src/extern_pybind/include) include_directories(${PYBIND_INCLUDE_DIR}) ExternalProject_Add( - extern_pybind - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${PYBIND_REPOSITORY} - GIT_TAG ${PYBIND_TAG} - PREFIX ${PYBIND_PREFIX_DIR} - # If we explicitly leave the `UPDATE_COMMAND` of the ExternalProject_Add - # function in CMakeLists blank, it will cause another parameter GIT_TAG - # to be modified without triggering incremental compilation, and the - # third-party library version changes cannot be incorporated. - # reference: https://cmake.org/cmake/help/latest/module/ExternalProject.html - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + extern_pybind + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${PYBIND_REPOSITORY} + GIT_TAG ${PYBIND_TAG} + PREFIX ${PYBIND_PREFIX_DIR} + # If we explicitly leave the `UPDATE_COMMAND` of the ExternalProject_Add + # function in CMakeLists blank, it will cause another parameter GIT_TAG + # to be modified without triggering incremental compilation, and the + # third-party library version changes cannot be incorporated. + # reference: https://cmake.org/cmake/help/latest/module/ExternalProject.html + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(pybind INTERFACE) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index ab377608413..bc58c9d7b6c 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -12,68 +12,72 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(python_module) +include(python_module) -FIND_PACKAGE(PythonInterp ${PY_VERSION} REQUIRED) -FIND_PACKAGE(PythonLibs ${PY_VERSION} REQUIRED) +find_package(PythonInterp ${PY_VERSION} REQUIRED) +find_package(PythonLibs ${PY_VERSION} REQUIRED) if(WIN32) - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" -"from distutils import sysconfig as s;import sys;import struct; + execute_process( + COMMAND + "${PYTHON_EXECUTABLE}" "-c" + "from distutils import sysconfig as s;import sys;import struct; print(sys.prefix); print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); " - RESULT_VARIABLE _PYTHON_SUCCESS - OUTPUT_VARIABLE _PYTHON_VALUES - ERROR_VARIABLE _PYTHON_ERROR_VALUE) + RESULT_VARIABLE _PYTHON_SUCCESS + OUTPUT_VARIABLE _PYTHON_VALUES + ERROR_VARIABLE _PYTHON_ERROR_VALUE) - if(NOT _PYTHON_SUCCESS EQUAL 0) - set(PYTHONLIBS_FOUND FALSE) - return() - endif() + if(NOT _PYTHON_SUCCESS EQUAL 0) + set(PYTHONLIBS_FOUND FALSE) + return() + endif() - # Convert the process output into a list - string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) - string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) - list(GET _PYTHON_VALUES 0 PYTHON_PREFIX) - list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX) + # Convert the process output into a list + string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) + string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) + list(GET _PYTHON_VALUES 0 PYTHON_PREFIX) + list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX) - # Make sure all directory separators are '/' - string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) + # Make sure all directory separators are '/' + string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) - set(PYTHON_LIBRARY - "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") - # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the - # original python installation. They may be found relative to PYTHON_INCLUDE_DIR. - if(NOT EXISTS "${PYTHON_LIBRARY}") - get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY) - set(PYTHON_LIBRARY - "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") - endif() + # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the + # original python installation. They may be found relative to PYTHON_INCLUDE_DIR. + if(NOT EXISTS "${PYTHON_LIBRARY}") + get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY) + set(PYTHON_LIBRARY + "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + endif() - # raise an error if the python libs are still not found. - if(NOT EXISTS "${PYTHON_LIBRARY}") - message(FATAL_ERROR "Python libraries not found") - endif() - SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") + # raise an error if the python libs are still not found. + if(NOT EXISTS "${PYTHON_LIBRARY}") + message(FATAL_ERROR "Python libraries not found") + endif() + set(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") endif(WIN32) # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. -ADD_LIBRARY(python SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) +add_library(python SHARED IMPORTED GLOBAL) +set_property(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) -SET(py_env "") -IF(PYTHONINTERP_FOUND) - find_python_module(pip REQUIRED) - find_python_module(numpy REQUIRED) - find_python_module(wheel REQUIRED) - find_python_module(google.protobuf REQUIRED) - FIND_PACKAGE(NumPy REQUIRED) - IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") - MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " +set(py_env "") +if(PYTHONINTERP_FOUND) + find_python_module(pip REQUIRED) + find_python_module(numpy REQUIRED) + find_python_module(wheel REQUIRED) + find_python_module(google.protobuf REQUIRED) + find_package(NumPy REQUIRED) + if(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} + VERSION_LESS "3.0.0") + message( + FATAL_ERROR + "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " "please use pip to upgrade protobuf. pip install -U protobuf") - ENDIF() -ENDIF(PYTHONINTERP_FOUND) -INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) -INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) + endif() +endif(PYTHONINTERP_FOUND) +include_directories(${PYTHON_INCLUDE_DIR}) +include_directories(${PYTHON_NUMPY_INCLUDE_DIR}) diff --git a/cmake/external/rocksdb.cmake b/cmake/external/rocksdb.cmake index befbc8138fc..2e90f50e3cd 100644 --- a/cmake/external/rocksdb.cmake +++ b/cmake/external/rocksdb.cmake @@ -12,40 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(ROCKSDB_PREFIX_DIR ${THIRD_PARTY_PATH}/rocksdb) -SET(ROCKSDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocksdb) -SET(ROCKSDB_INCLUDE_DIR "${ROCKSDB_INSTALL_DIR}/include" CACHE PATH "rocksdb include directory." FORCE) -SET(ROCKSDB_LIBRARIES "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" CACHE FILEPATH "rocksdb library." FORCE) -SET(ROCKSDB_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") -INCLUDE_DIRECTORIES(${ROCKSDB_INCLUDE_DIR}) +set(ROCKSDB_PREFIX_DIR ${THIRD_PARTY_PATH}/rocksdb) +set(ROCKSDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocksdb) +set(ROCKSDB_INCLUDE_DIR + "${ROCKSDB_INSTALL_DIR}/include" + CACHE PATH "rocksdb include directory." FORCE) +set(ROCKSDB_LIBRARIES + "${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a" + CACHE FILEPATH "rocksdb library." FORCE) +set(ROCKSDB_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") +include_directories(${ROCKSDB_INCLUDE_DIR}) ExternalProject_Add( - extern_rocksdb - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${ROCKSDB_PREFIX_DIR} - GIT_REPOSITORY "https://github.com/facebook/rocksdb" - GIT_TAG v6.10.1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DWITH_BZ2=OFF - -DWITH_GFLAGS=OFF - -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} -# BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a - INSTALL_COMMAND mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ - && cp ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES} - && cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include ${ROCKSDB_INSTALL_DIR}/ - BUILD_IN_SOURCE 1 -) + extern_rocksdb + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${ROCKSDB_PREFIX_DIR} + GIT_REPOSITORY "https://github.com/facebook/rocksdb" + GIT_TAG v6.10.1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DWITH_BZ2=OFF + -DWITH_GFLAGS=OFF + -DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + # BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a + INSTALL_COMMAND + mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp + ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a ${ROCKSDB_LIBRARIES} + && cp -r ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/include + ${ROCKSDB_INSTALL_DIR}/ + BUILD_IN_SOURCE 1) -ADD_DEPENDENCIES(extern_rocksdb snappy) +add_dependencies(extern_rocksdb snappy) -ADD_LIBRARY(rocksdb STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES}) -ADD_DEPENDENCIES(rocksdb extern_rocksdb) - -LIST(APPEND external_project_dependencies rocksdb) +add_library(rocksdb STATIC IMPORTED GLOBAL) +set_property(TARGET rocksdb PROPERTY IMPORTED_LOCATION ${ROCKSDB_LIBRARIES}) +add_dependencies(rocksdb extern_rocksdb) +list(APPEND external_project_dependencies rocksdb) diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake index 42320df1397..dfb7192a71e 100644 --- a/cmake/external/snappy.cmake +++ b/cmake/external/snappy.cmake @@ -12,58 +12,61 @@ # See the License for the specific language governing permissions and # limitations under the License. -include (ExternalProject) +include(ExternalProject) # NOTE: snappy is needed when linking with recordio set(SNAPPY_PREFIX_DIR ${THIRD_PARTY_PATH}/snappy) set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy) -set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE) +set(SNAPPY_INCLUDE_DIR + "${SNAPPY_INSTALL_DIR}/include" + CACHE PATH "snappy include directory." FORCE) if(WIN32) - SET(SNAPPY_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267") - IF(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib") - add_custom_command(TARGET extern_snappy POST_BUILD - COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib - ) - ENDIF() - set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib") + set(SNAPPY_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4267") + if(NOT EXISTS "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib") + add_custom_command( + TARGET extern_snappy + POST_BUILD + COMMAND cmake -E copy ${SNAPPY_INSTALL_DIR}/lib/snappy.lib + ${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib) + endif() + set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.lib") else() - SET(SNAPPY_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") + set(SNAPPY_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a") endif() ExternalProject_Add( - extern_snappy - GIT_REPOSITORY "https://github.com/google/snappy" - GIT_TAG "1.1.7" - PREFIX ${SNAPPY_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DBUILD_TESTING=OFF - -DSNAPPY_BUILD_TESTS:BOOL=OFF - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${SNAPPY_LIBRARIES} -) + extern_snappy + GIT_REPOSITORY "https://github.com/google/snappy" + GIT_TAG "1.1.7" + PREFIX ${SNAPPY_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS=${SNAPPY_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DSNAPPY_BUILD_TESTS:BOOL=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${SNAPPY_LIBRARIES}) add_library(snappy STATIC IMPORTED GLOBAL) set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES}) include_directories(${SNAPPY_INCLUDE_DIR}) add_dependencies(snappy extern_snappy) - diff --git a/cmake/external/threadpool.cmake b/cmake/external/threadpool.cmake index c4d978115bf..1047465095f 100644 --- a/cmake/external/threadpool.cmake +++ b/cmake/external/threadpool.cmake @@ -12,32 +12,30 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(THREADPOOL_PREFIX_DIR ${THIRD_PARTY_PATH}/threadpool) +set(THREADPOOL_PREFIX_DIR ${THIRD_PARTY_PATH}/threadpool) if(WITH_ASCEND OR WITH_ASCEND_CL) - SET(THREADPOOL_REPOSITORY https://gitee.com/tianjianhe/ThreadPool.git) + set(THREADPOOL_REPOSITORY https://gitee.com/tianjianhe/ThreadPool.git) else() - SET(THREADPOOL_REPOSITORY ${GIT_URL}/progschj/ThreadPool.git) + set(THREADPOOL_REPOSITORY ${GIT_URL}/progschj/ThreadPool.git) endif() -SET(THREADPOOL_TAG 9a42ec1329f259a5f4881a291db1dcb8f2ad9040) +set(THREADPOOL_TAG 9a42ec1329f259a5f4881a291db1dcb8f2ad9040) -SET(THREADPOOL_INCLUDE_DIR ${THIRD_PARTY_PATH}/threadpool/src/extern_threadpool) -INCLUDE_DIRECTORIES(${THREADPOOL_INCLUDE_DIR}) +set(THREADPOOL_INCLUDE_DIR ${THIRD_PARTY_PATH}/threadpool/src/extern_threadpool) +include_directories(${THREADPOOL_INCLUDE_DIR}) ExternalProject_Add( - extern_threadpool - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${THREADPOOL_REPOSITORY} - GIT_TAG ${THREADPOOL_TAG} - PREFIX ${THREADPOOL_PREFIX_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) + extern_threadpool + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${THREADPOOL_REPOSITORY} + GIT_TAG ${THREADPOOL_TAG} + PREFIX ${THREADPOOL_PREFIX_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "") add_library(simple_threadpool INTERFACE) diff --git a/cmake/external/utf8proc.cmake b/cmake/external/utf8proc.cmake index a5de5c15c3b..13107c03cf1 100644 --- a/cmake/external/utf8proc.cmake +++ b/cmake/external/utf8proc.cmake @@ -12,40 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(UTF8PROC_PREFIX_DIR ${THIRD_PARTY_PATH}/utf8proc) -SET(UTF8PROC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/utf8proc) +set(UTF8PROC_PREFIX_DIR ${THIRD_PARTY_PATH}/utf8proc) +set(UTF8PROC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/utf8proc) # As we add extra features for utf8proc, we use the non-official repo -SET(UTF8PROC_REPOSITORY ${GIT_URL}/JuliaStrings/utf8proc.git) -SET(UTF8PROC_TAG v2.6.1) +set(UTF8PROC_REPOSITORY ${GIT_URL}/JuliaStrings/utf8proc.git) +set(UTF8PROC_TAG v2.6.1) -IF(WIN32) - SET(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/utf8proc_static.lib") +if(WIN32) + set(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/utf8proc_static.lib") add_definitions(-DUTF8PROC_STATIC) -ELSE(WIN32) - SET(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/libutf8proc.a") -ENDIF(WIN32) +else(WIN32) + set(UTF8PROC_LIBRARIES "${UTF8PROC_INSTALL_DIR}/lib/libutf8proc.a") +endif(WIN32) -INCLUDE_DIRECTORIES(${UTF8PROC_INSTALL_DIR}/include) +include_directories(${UTF8PROC_INSTALL_DIR}/include) ExternalProject_Add( extern_utf8proc - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${UTF8PROC_REPOSITORY} - GIT_TAG ${UTF8PROC_TAG} - PREFIX ${UTF8PROC_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DBUILD_SHARED=ON - -DBUILD_STATIC=ON - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_INSTALL_PREFIX:PATH=${UTF8PROC_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} - BUILD_BYPRODUCTS ${UTF8PROC_LIBRARIES} -) + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${UTF8PROC_REPOSITORY} + GIT_TAG ${UTF8PROC_TAG} + PREFIX ${UTF8PROC_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DBUILD_SHARED=ON + -DBUILD_STATIC=ON + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_INSTALL_PREFIX:PATH=${UTF8PROC_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} + BUILD_BYPRODUCTS ${UTF8PROC_LIBRARIES}) -ADD_LIBRARY(utf8proc STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET utf8proc PROPERTY IMPORTED_LOCATION ${UTF8PROC_LIBRARIES}) -ADD_DEPENDENCIES(utf8proc extern_utf8proc) +add_library(utf8proc STATIC IMPORTED GLOBAL) +set_property(TARGET utf8proc PROPERTY IMPORTED_LOCATION ${UTF8PROC_LIBRARIES}) +add_dependencies(utf8proc extern_utf8proc) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index f0d16fc7978..d38636c9c23 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -12,130 +12,139 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -IF(WITH_ROCM) - add_definitions(-DWARPCTC_WITH_HIP) -ENDIF() +if(WITH_ROCM) + add_definitions(-DWARPCTC_WITH_HIP) +endif() -SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) -SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) -# in case of low internet speed +set(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) +set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) +# in case of low internet speed #set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git) -set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) -set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b) +set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) +set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b) -SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" +set(WARPCTC_INCLUDE_DIR + "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) # Used in unit test test_WarpCTCLayer -SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" +set(WARPCTC_LIB_DIR + "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE) -IF(WIN32) - SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" - CACHE FILEPATH "Warp-ctc Library" FORCE) +if(WIN32) + set(WARPCTC_LIBRARIES + "${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) else(WIN32) - SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" - CACHE FILEPATH "Warp-ctc Library" FORCE) -ENDIF(WIN32) + set(WARPCTC_LIBRARIES + "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) +endif(WIN32) -IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32) - SET(USE_OMP OFF) -ELSE() - SET(USE_OMP ON) -ENDIF() +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" + OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" + OR WIN32) + set(USE_OMP OFF) +else() + set(USE_OMP ON) +endif() if(WITH_ASCEND OR WITH_ASCEND_CL) - ExternalProject_Add( - extern_warpctc - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${WARPCTC_REPOSITORY} - GIT_TAG ${WARPCTC_TAG} - PREFIX ${WARPCTC_PREFIX_DIR} - #UPDATE_COMMAND "" - PATCH_COMMAND "" - BUILD_ALWAYS 1 - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} - -DWITH_GPU=${WITH_GPU} - -DWITH_ROCM=${WITH_ROCM} - -DWITH_OMP=${USE_OMP} - -DWITH_TORCH=OFF - -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON - -DBUILD_SHARED=ON - -DBUILD_TESTS=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} - BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES} - ) + ExternalProject_Add( + extern_warpctc + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${WARPCTC_REPOSITORY} + GIT_TAG ${WARPCTC_TAG} + PREFIX ${WARPCTC_PREFIX_DIR} + #UPDATE_COMMAND "" + PATCH_COMMAND "" + BUILD_ALWAYS 1 + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} + -DWITH_GPU=${WITH_GPU} + -DWITH_ROCM=${WITH_ROCM} + -DWITH_OMP=${USE_OMP} + -DWITH_TORCH=OFF + -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON + -DBUILD_SHARED=ON + -DBUILD_TESTS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} + BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}) else() - if(WIN32) - set(WARPCTC_C_FLAGS $) - set(WARPCTC_C_FLAGS_DEBUG $) - set(WARPCTC_C_FLAGS_RELEASE $) - set(WARPCTC_CXX_FLAGS $) - set(WARPCTC_CXX_FLAGS_RELEASE $) - set(WARPCTC_CXX_FLAGS_DEBUG $) - else() - set(WARPCTC_C_FLAGS ${CMAKE_C_FLAGS}) - set(WARPCTC_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) - set(WARPCTC_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) - set(WARPCTC_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) - set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) - endif() - ExternalProject_Add( - extern_warpctc - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${WARPCTC_REPOSITORY} - GIT_TAG ${WARPCTC_TAG} - PREFIX ${WARPCTC_PREFIX_DIR} - UPDATE_COMMAND "" - PATCH_COMMAND "" - #BUILD_ALWAYS 1 - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_C_FLAGS=${WARPCTC_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${WARPCTC_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${WARPCTC_C_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS=${WARPCTC_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${WARPCTC_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${WARPCTC_CXX_FLAGS_DEBUG} - -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} - -DWITH_GPU=${WITH_GPU} - -DWITH_ROCM=${WITH_ROCM} - -DWITH_OMP=${USE_OMP} - -DWITH_TORCH=OFF - -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON - -DBUILD_SHARED=ON - -DBUILD_TESTS=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} - BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES} - ) + if(WIN32) + set(WARPCTC_C_FLAGS $) + set(WARPCTC_C_FLAGS_DEBUG + $) + set(WARPCTC_C_FLAGS_RELEASE + $) + set(WARPCTC_CXX_FLAGS $) + set(WARPCTC_CXX_FLAGS_RELEASE + $) + set(WARPCTC_CXX_FLAGS_DEBUG + $) + else() + set(WARPCTC_C_FLAGS ${CMAKE_C_FLAGS}) + set(WARPCTC_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) + set(WARPCTC_C_FLAGS_RELEASE ${CMAKE_C_FLAGS_RELEASE}) + set(WARPCTC_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + set(WARPCTC_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + set(WARPCTC_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + endif() + ExternalProject_Add( + extern_warpctc + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${WARPCTC_REPOSITORY} + GIT_TAG ${WARPCTC_TAG} + PREFIX ${WARPCTC_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND "" + #BUILD_ALWAYS 1 + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_C_FLAGS=${WARPCTC_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${WARPCTC_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${WARPCTC_C_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS=${WARPCTC_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${WARPCTC_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${WARPCTC_CXX_FLAGS_DEBUG} + -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} + -DWITH_GPU=${WITH_GPU} + -DWITH_ROCM=${WITH_ROCM} + -DWITH_OMP=${USE_OMP} + -DWITH_TORCH=OFF + -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON + -DBUILD_SHARED=ON + -DBUILD_TESTS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} + BUILD_BYPRODUCTS ${WARPCTC_LIBRARIES}) endif() -MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") +message(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY) -INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers. +include_directories(${WARPCTC_INCLUDE_DIR} +)# For warpctc code to include its headers. -ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) -ADD_DEPENDENCIES(warpctc extern_warpctc) +add_library(warpctc SHARED IMPORTED GLOBAL) +set_property(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) +add_dependencies(warpctc extern_warpctc) diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 6ad15b3730d..589056458c1 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -14,12 +14,12 @@ include(ExternalProject) -set(XBYAK_PROJECT extern_xbyak) -set(XBYAK_PREFIX_DIR ${THIRD_PARTY_PATH}/xbyak) -set(XBYAK_INSTALL_ROOT ${THIRD_PARTY_PATH}/install/xbyak) -set(XBYAK_INC_DIR ${XBYAK_INSTALL_ROOT}/include) -set(XBYAK_REPOSITORY ${GIT_URL}/herumi/xbyak.git) -set(XBYAK_TAG v5.81) # Dec 19, 2019 +set(XBYAK_PROJECT extern_xbyak) +set(XBYAK_PREFIX_DIR ${THIRD_PARTY_PATH}/xbyak) +set(XBYAK_INSTALL_ROOT ${THIRD_PARTY_PATH}/install/xbyak) +set(XBYAK_INC_DIR ${XBYAK_INSTALL_ROOT}/include) +set(XBYAK_REPOSITORY ${GIT_URL}/herumi/xbyak.git) +set(XBYAK_TAG v5.81) # Dec 19, 2019 include_directories(${XBYAK_INC_DIR}) include_directories(${XBYAK_INC_DIR}/xbyak) @@ -31,19 +31,17 @@ add_definitions(-DXBYAK64) add_definitions(-DXBYAK_NO_OP_NAMES) ExternalProject_Add( - ${XBYAK_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${XBYAK_REPOSITORY} - GIT_TAG ${XBYAK_TAG} - DEPENDS "" - PREFIX ${XBYAK_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -) + ${XBYAK_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${XBYAK_REPOSITORY} + GIT_TAG ${XBYAK_TAG} + DEPENDS "" + PREFIX ${XBYAK_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XBYAK_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XBYAK_INSTALL_ROOT} + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}) add_library(xbyak INTERFACE) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index d9b302e9ed3..af27500398f 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -1,127 +1,151 @@ -if (NOT WITH_XPU) - return() +if(NOT WITH_XPU) + return() endif() -INCLUDE(ExternalProject) -SET(XPU_PROJECT "extern_xpu") -SET(XPU_API_LIB_NAME "libxpuapi.so") -SET(XPU_RT_LIB_NAME "libxpurt.so") +include(ExternalProject) +set(XPU_PROJECT "extern_xpu") +set(XPU_API_LIB_NAME "libxpuapi.so") +set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) - SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") - SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220601") + set(XPU_BASE_URL_WITHOUT_DATE + "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220601") else() - SET(XPU_BASE_URL "${XPU_BASE_URL}") + set(XPU_BASE_URL "${XPU_BASE_URL}") endif() # ubuntu and centos: use output by XDNN API team if(NOT DEFINED XPU_XDNN_BASE_URL) - SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") - SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220601") + set(XPU_XDNN_BASE_URL_WITHOUT_DATE + "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220601") else() - SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") + set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") endif() -IF(WITH_AARCH64) - SET(XPU_XRE_DIR_NAME "xre-kylin_aarch64") - SET(XPU_XDNN_DIR_NAME "XDNN-kylin_aarch64") - SET(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64") - SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ELSEIF(WITH_SUNWAY) - SET(XPU_XRE_DIR_NAME "xre-deepin_sw6_64") - SET(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64") - SET(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64") - SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ELSEIF(WITH_BDCENTOS) - SET(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64") - SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64") - SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") +if(WITH_AARCH64) + set(XPU_XRE_DIR_NAME "xre-kylin_aarch64") + set(XPU_XDNN_DIR_NAME "XDNN-kylin_aarch64") + set(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64") + set(XPU_XDNN_URL + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +elseif(WITH_SUNWAY) + set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64") + set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64") + set(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64") + set(XPU_XDNN_URL + "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +elseif(WITH_BDCENTOS) + set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64") + set(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64") + set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") # ubuntu and centos: use output by XDNN API team - SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ELSEIF(WITH_UBUNTU) - SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") - SET(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64") - SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") + set(XPU_XDNN_URL + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +elseif(WITH_UBUNTU) + set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") + set(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64") + set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") # ubuntu and centos: use output by XDNN API team - SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ELSEIF(WITH_CENTOS) - SET(XPU_XRE_DIR_NAME "xre-centos7_x86_64") - SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64") - SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") + set(XPU_XDNN_URL + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +elseif(WITH_CENTOS) + set(XPU_XRE_DIR_NAME "xre-centos7_x86_64") + set(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64") + set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") # ubuntu and centos: use output by XDNN API team - SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ELSE() - SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") - SET(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64") - SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") + set(XPU_XDNN_URL + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +else() + set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") + set(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64") + set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") # default: use output by XDNN API team - SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -ENDIF() - -SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) -SET(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE) - -SET(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu") -SET(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}") -SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") -SET(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") -SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib") - -SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}") -SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}") - -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib") + set(XPU_XDNN_URL + "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +endif() -FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(XPU)\n" - "cmake_minimum_required(VERSION 3.0)\n" +set(XPU_XRE_URL + "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +set(XPU_XCCL_URL + "${XPU_BASE_URL_WITHOUT_DATE}/20220411/${XPU_XCCL_DIR_NAME}.tar.gz" + CACHE STRING "" FORCE) +set(XPU_PACK_DEPENCE_URL + "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" + CACHE STRING "" FORCE) + +set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu") +set(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}") +set(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") +set(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") +set(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib") + +set(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}") +set(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}") + +set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib") + +file( + WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(XPU)\n" "cmake_minimum_required(VERSION 3.0)\n" "install(DIRECTORY xpu/include xpu/lib \n" " DESTINATION ${XPU_INSTALL_DIR})\n") ExternalProject_Add( - ${XPU_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${SNAPPY_PREFIX_DIR} - DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget ${XPU_PACK_DEPENCE_URL} - && bash pack_paddle_depence.sh ${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} - - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT} - BUILD_BYPRODUCTS ${XPU_API_LIB} - BUILD_BYPRODUCTS ${XPU_RT_LIB} -) - -INCLUDE_DIRECTORIES(${XPU_INC_DIR}) -ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL) + ${XPU_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${SNAPPY_PREFIX_DIR} + DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR} + DOWNLOAD_COMMAND + wget ${XPU_PACK_DEPENCE_URL} && bash pack_paddle_depence.sh ${XPU_XRE_URL} + ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} + ${XPU_XCCL_DIR_NAME} + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT} + BUILD_BYPRODUCTS ${XPU_API_LIB} + BUILD_BYPRODUCTS ${XPU_RT_LIB}) + +include_directories(${XPU_INC_DIR}) +add_library(shared_xpuapi SHARED IMPORTED GLOBAL) set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}") # generate a static dummy target to track xpulib dependencies # for cc_library(xxx SRCS xxx.c DEPS xpulib) generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake") -TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) +target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) -IF(WITH_XPU_BKCL) - MESSAGE(STATUS "Compile with XPU BKCL!") - ADD_DEFINITIONS(-DPADDLE_WITH_XPU_BKCL) +if(WITH_XPU_BKCL) + message(STATUS "Compile with XPU BKCL!") + add_definitions(-DPADDLE_WITH_XPU_BKCL) - SET(XPU_BKCL_LIB_NAME "libbkcl.so") - SET(XPU_BKCL_LIB "${XPU_LIB_DIR}/${XPU_BKCL_LIB_NAME}") - SET(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") - INCLUDE_DIRECTORIES(${XPU_BKCL_INC_DIR}) - TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}) -ELSE(WITH_XPU_BKCL) - TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) -ENDIF(WITH_XPU_BKCL) + set(XPU_BKCL_LIB_NAME "libbkcl.so") + set(XPU_BKCL_LIB "${XPU_LIB_DIR}/${XPU_BKCL_LIB_NAME}") + set(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") + include_directories(${XPU_BKCL_INC_DIR}) + target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}) +else(WITH_XPU_BKCL) + target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) +endif(WITH_XPU_BKCL) -ADD_DEPENDENCIES(xpulib ${XPU_PROJECT}) +add_dependencies(xpulib ${XPU_PROJECT}) # Ensure that xpu/api.h can be included without dependency errors. -file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc CONTENT "") -add_library(xpu_headers_dummy STATIC ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc) +file( + GENERATE + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc + CONTENT "") +add_library(xpu_headers_dummy STATIC + ${CMAKE_CURRENT_BINARY_DIR}/.xpu_headers_dummy.cc) add_dependencies(xpu_headers_dummy extern_xpu) link_libraries(xpu_headers_dummy) diff --git a/cmake/external/xxhash.cmake b/cmake/external/xxhash.cmake index fe17806e362..6e685bbde40 100644 --- a/cmake/external/xxhash.cmake +++ b/cmake/external/xxhash.cmake @@ -12,24 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) set(XXHASH_PREFIX_DIR ${THIRD_PARTY_PATH}/xxhash) set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash/src/extern_xxhash) set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash) set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include") -set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git) -set(XXHASH_TAG v0.6.5) +set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git) +set(XXHASH_TAG v0.6.5) -INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR}) +include_directories(${XXHASH_INCLUDE_DIR}) -IF(APPLE) - SET(BUILD_CMD sed -i \"\" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib) -ELSEIF(UNIX) - SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/Makefile && make lib) -ENDIF() +if(APPLE) + set(BUILD_CMD + sed + -i + \"\" + "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" + ${XXHASH_SOURCE_DIR}/Makefile + && + make + lib) +elseif(UNIX) + set(BUILD_CMD + sed + -i + "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" + ${XXHASH_SOURCE_DIR}/Makefile + && + make + lib) +endif() -if (WIN32) +if(WIN32) set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/xxhash.lib") set(XXHASH_CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4710 /wd4711") set(XXHASH_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4710 /wd4711") @@ -37,53 +52,47 @@ else() set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a") set(XXHASH_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set(XXHASH_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) -endif () +endif() if(WIN32) ExternalProject_Add( - extern_xxhash - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${XXHASH_REPOSITORY} - GIT_TAG ${XXHASH_TAG} - PREFIX ${XXHASH_PREFIX_DIR} - UPDATE_COMMAND "" - PATCH_COMMAND "" - CONFIGURE_COMMAND - ${CMAKE_COMMAND} ${XXHASH_SOURCE_DIR}/cmake_unofficial - -DCMAKE_INSTALL_PREFIX:PATH=${XXHASH_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} - -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DBUILD_XXHSUM=OFF - -DCMAKE_GENERATOR=${CMAKE_GENERATOR} - -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} - -DBUILD_SHARED_LIBS=OFF - -DCMAKE_CXX_FLAGS=${XXHASH_CMAKE_CXX_FLAGS} - -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} - -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} - -DCMAKE_C_FLAGS=${XXHASH_CMAKE_C_FLAGS} - -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} - -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} - ${OPTIONAL_CACHE_ARGS} - TEST_COMMAND "" - BUILD_BYPRODUCTS ${XXHASH_LIBRARIES} - ) + extern_xxhash + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${XXHASH_REPOSITORY} + GIT_TAG ${XXHASH_TAG} + PREFIX ${XXHASH_PREFIX_DIR} + UPDATE_COMMAND "" + PATCH_COMMAND "" + CONFIGURE_COMMAND + ${CMAKE_COMMAND} ${XXHASH_SOURCE_DIR}/cmake_unofficial + -DCMAKE_INSTALL_PREFIX:PATH=${XXHASH_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DBUILD_XXHSUM=OFF + -DCMAKE_GENERATOR=${CMAKE_GENERATOR} + -DCMAKE_GENERATOR_PLATFORM=${CMAKE_GENERATOR_PLATFORM} + -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${XXHASH_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${XXHASH_CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} ${OPTIONAL_CACHE_ARGS} + TEST_COMMAND "" + BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}) else() ExternalProject_Add( - extern_xxhash - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY ${XXHASH_REPOSITORY} - GIT_TAG ${XXHASH_TAG} - PREFIX ${XXHASH_PREFIX_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_IN_SOURCE 1 - BUILD_COMMAND ${BUILD_CMD} - INSTALL_COMMAND make PREFIX=${XXHASH_INSTALL_DIR} install - TEST_COMMAND "" - BUILD_BYPRODUCTS ${XXHASH_LIBRARIES} - ) + extern_xxhash + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY ${XXHASH_REPOSITORY} + GIT_TAG ${XXHASH_TAG} + PREFIX ${XXHASH_PREFIX_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${BUILD_CMD} + INSTALL_COMMAND make PREFIX=${XXHASH_INSTALL_DIR} install + TEST_COMMAND "" + BUILD_BYPRODUCTS ${XXHASH_LIBRARIES}) endif() add_library(xxhash STATIC IMPORTED GLOBAL) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 679e2064699..2cef053e325 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -12,48 +12,57 @@ # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +include(ExternalProject) -SET(ZLIB_PREFIX_DIR ${THIRD_PARTY_PATH}/zlib) -SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib) -SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE) -SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE) +set(ZLIB_PREFIX_DIR ${THIRD_PARTY_PATH}/zlib) +set(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib) +set(ZLIB_ROOT + ${ZLIB_INSTALL_DIR} + CACHE FILEPATH "zlib root directory." FORCE) +set(ZLIB_INCLUDE_DIR + "${ZLIB_INSTALL_DIR}/include" + CACHE PATH "zlib include directory." FORCE) set(ZLIB_REPOSITORY ${GIT_URL}/madler/zlib.git) -set(ZLIB_TAG v1.2.8) +set(ZLIB_TAG v1.2.8) -INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers. -INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include zlib.h. +include_directories(${ZLIB_INCLUDE_DIR} +)# For zlib code to include its own headers. +include_directories(${THIRD_PARTY_PATH}/install +)# For Paddle code to include zlib.h. -IF(WIN32) - SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE) -ELSE(WIN32) - SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE) -ENDIF(WIN32) +if(WIN32) + set(ZLIB_LIBRARIES + "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" + CACHE FILEPATH "zlib library." FORCE) +else(WIN32) + set(ZLIB_LIBRARIES + "${ZLIB_INSTALL_DIR}/lib/libz.a" + CACHE FILEPATH "zlib library." FORCE) +endif(WIN32) ExternalProject_Add( - extern_zlib - ${EXTERNAL_PROJECT_LOG_ARGS} - ${SHALLOW_CLONE} - GIT_REPOSITORY ${ZLIB_REPOSITORY} - GIT_TAG ${ZLIB_TAG} - PREFIX ${ZLIB_PREFIX_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} - -DBUILD_SHARED_LIBS=OFF - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_MACOSX_RPATH=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_BYPRODUCTS ${ZLIB_LIBRARIES} -) + extern_zlib + ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} + GIT_REPOSITORY ${ZLIB_REPOSITORY} + GIT_TAG ${ZLIB_TAG} + PREFIX ${ZLIB_PREFIX_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_MACOSX_RPATH=ON + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} + BUILD_BYPRODUCTS ${ZLIB_LIBRARIES}) -ADD_LIBRARY(zlib STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) -ADD_DEPENDENCIES(zlib extern_zlib) +add_library(zlib STATIC IMPORTED GLOBAL) +set_property(TARGET zlib PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) +add_dependencies(zlib extern_zlib) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 0dbd3bc3283..e3c5545df8b 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -5,33 +5,39 @@ include(CheckCXXSymbolExists) include(CheckTypeSize) function(CheckCompilerCXX14Flag) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.4) - message(FATAL_ERROR "Unsupported GCC version. GCC >= 5.4 required.") - elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.2) - message(WARNING "Found GCC ${CMAKE_CXX_COMPILER_VERSION} which is too high, recommended to use GCC 8.2") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" - # Apple Clang is a different compiler than upstream Clang which havs different version numbers. - # https://gist.github.com/yamaya/2924292 - if(APPLE) # cmake < 3.0 compiler id "Clang" on Mac OS X - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.1) - message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.") - endif() - else() - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.4) - message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.4 required.") - endif() - endif() + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.4) + message(FATAL_ERROR "Unsupported GCC version. GCC >= 5.4 required.") + elseif(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.2) + message( + WARNING + "Found GCC ${CMAKE_CXX_COMPILER_VERSION} which is too high, recommended to use GCC 8.2" + ) endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID + STREQUAL "Clang") + # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" + # Apple Clang is a different compiler than upstream Clang which havs different version numbers. + # https://gist.github.com/yamaya/2924292 + if(APPLE) # cmake < 3.0 compiler id "Clang" on Mac OS X + if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.1) + message( + FATAL_ERROR + "Unsupported AppleClang version. AppleClang >= 5.1 required.") + endif() + else() + if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.4) + message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.4 required.") + endif() + endif() + endif() endfunction() -CheckCompilerCXX14Flag() +checkcompilercxx14flag() if(NOT WIN32) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") else() - set(CMAKE_CXX_STANDARD 14) + set(CMAKE_CXX_STANDARD 14) endif() # safe_set_flag @@ -42,56 +48,58 @@ endif() # flag_name: the flag name for compiler, such as '-Werror' '-Wall' etc # rest arguments: not used. function(safe_set_flag is_c src_list flag_name) - string(REPLACE "-" "_" safe_name ${flag_name}) - string(REPLACE "=" "_" safe_name ${safe_name}) + string(REPLACE "-" "_" safe_name ${flag_name}) + string(REPLACE "=" "_" safe_name ${safe_name}) - if(${flag_name} MATCHES "fsanitize") - set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) - set(CMAKE_REQUIRED_FLAGS ${flag_name}) - endif() + if(${flag_name} MATCHES "fsanitize") + set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS ${flag_name}) + endif() - if(is_c) - CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) - set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) - else() - CHECK_CXX_COMPILER_FLAG(${flag_name} CXX_COMPILER_SUPPORT_FLAG_${safe_name}) - set(safe_name CXX_COMPILER_SUPPORT_FLAG_${safe_name}) - endif() - if(${safe_name}) - set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE) - endif() + if(is_c) + check_c_compiler_flag(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) + set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) + else() + check_cxx_compiler_flag(${flag_name} CXX_COMPILER_SUPPORT_FLAG_${safe_name}) + set(safe_name CXX_COMPILER_SUPPORT_FLAG_${safe_name}) + endif() + if(${safe_name}) + set(${src_list} + "${${src_list}} ${flag_name}" + PARENT_SCOPE) + endif() - if(${flag_name} MATCHES "fsanitize") - set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) - endif() + if(${flag_name} MATCHES "fsanitize") + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) + endif() endfunction() # helper macro to set cflag macro(safe_set_cflag src_list flag_name) - safe_set_flag(ON ${src_list} ${flag_name}) + safe_set_flag(ON ${src_list} ${flag_name}) endmacro() # helper macro to set cxxflag macro(safe_set_cxxflag src_list flag_name) - safe_set_flag(OFF ${src_list} ${flag_name}) + safe_set_flag(OFF ${src_list} ${flag_name}) endmacro() # helper macro to set nvcc flag macro(safe_set_nvflag flag_name) - string(REPLACE "-" "_" safe_name ${flag_name}) - string(REPLACE "=" "_" safe_name ${safe_name}) - CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) - set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) - if(${safe_name}) - set(SAFE_GPU_COMMON_FLAGS "${SAFE_GPU_COMMON_FLAGS} -Xcompiler=\"${flag_name}\"") - endif() + string(REPLACE "-" "_" safe_name ${flag_name}) + string(REPLACE "=" "_" safe_name ${safe_name}) + check_c_compiler_flag(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) + set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) + if(${safe_name}) + set(SAFE_GPU_COMMON_FLAGS + "${SAFE_GPU_COMMON_FLAGS} -Xcompiler=\"${flag_name}\"") + endif() endmacro() - -CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) +check_cxx_symbol_exists(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS) set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS) - CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS_HERE) + check_cxx_symbol_exists(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS_HERE) if(UINT64_MAX_EXISTS_HERE) set(CMAKE_REQUIRED_DEFINITIONS) add_definitions(-D__STDC_LIMIT_MACROS) @@ -100,152 +108,151 @@ if(NOT UINT64_MAX_EXISTS) endif() endif() -SET(CMAKE_EXTRA_INCLUDE_FILES "pthread.h") -CHECK_TYPE_SIZE(pthread_spinlock_t SPINLOCK_FOUND) -CHECK_TYPE_SIZE(pthread_barrier_t BARRIER_FOUND) +set(CMAKE_EXTRA_INCLUDE_FILES "pthread.h") +check_type_size(pthread_spinlock_t SPINLOCK_FOUND) +check_type_size(pthread_barrier_t BARRIER_FOUND) if(SPINLOCK_FOUND) add_definitions(-DPADDLE_USE_PTHREAD_SPINLOCK) endif(SPINLOCK_FOUND) if(BARRIER_FOUND) add_definitions(-DPADDLE_USE_PTHREAD_BARRIER) endif(BARRIER_FOUND) -SET(CMAKE_EXTRA_INCLUDE_FILES "") +set(CMAKE_EXTRA_INCLUDE_FILES "") # Only one sanitizer is allowed in compile time string(TOLOWER "${SANITIZER_TYPE}" sanitizer_type) if(sanitizer_type STREQUAL "address") - set(fsanitize "-fsanitize=address") + set(fsanitize "-fsanitize=address") elseif(sanitizer_type STREQUAL "leak") - set(fsanitize "-fsanitize=leak") + set(fsanitize "-fsanitize=leak") elseif(sanitizer_type STREQUAL "memory") - set(fsanitize "-fsanitize=memory") + set(fsanitize "-fsanitize=memory") elseif(sanitizer_type STREQUAL "thread") - set(fsanitize "-fsanitize=thread") + set(fsanitize "-fsanitize=thread") elseif(sanitizer_type STREQUAL "undefined") - set(fsanitize "-fsanitize=undefined") + set(fsanitize "-fsanitize=undefined") endif() # Common flags. the compiler flag used for C/C++ sources whenever release or debug # Do not care if this flag is support for gcc. # https://github.com/PaddlePaddle/Paddle/issues/12773 -if (NOT WIN32) -set(COMMON_FLAGS - -fPIC - -fno-omit-frame-pointer - -Werror - -Wall - -Wextra - -Wnon-virtual-dtor - -Wdelete-non-virtual-dtor - -Wno-unused-parameter - -Wno-unused-function - -Wno-error=literal-suffix - -Wno-error=unused-local-typedefs - -Wno-error=ignored-attributes # Warnings in Eigen, gcc 6.3 - -Wno-error=terminate # Warning in PADDLE_ENFORCE - -Wno-error=int-in-bool-context # Warning in Eigen gcc 7.2 - -Wimplicit-fallthrough=0 # Warning in tinyformat.h - ${fsanitize} -) - -if(WITH_IPU) - set(COMMON_FLAGS ${COMMON_FLAGS} - -Wno-sign-compare # Warnings in Popart - -Wno-non-virtual-dtor # Warnings in Popart +if(NOT WIN32) + set(COMMON_FLAGS + -fPIC + -fno-omit-frame-pointer + -Werror + -Wall + -Wextra + -Wnon-virtual-dtor + -Wdelete-non-virtual-dtor + -Wno-unused-parameter + -Wno-unused-function + -Wno-error=literal-suffix + -Wno-error=unused-local-typedefs + -Wno-error=ignored-attributes # Warnings in Eigen, gcc 6.3 + -Wno-error=terminate # Warning in PADDLE_ENFORCE + -Wno-error=int-in-bool-context # Warning in Eigen gcc 7.2 + -Wimplicit-fallthrough=0 # Warning in tinyformat.h + ${fsanitize}) + + if(WITH_IPU) + set(COMMON_FLAGS ${COMMON_FLAGS} -Wno-sign-compare # Warnings in Popart + -Wno-non-virtual-dtor # Warnings in Popart ) -endif() + endif() -if(WITH_ASCEND_CL AND WITH_ARM_BRPC) + if(WITH_ASCEND_CL AND WITH_ARM_BRPC) set(COMMON_FLAGS ${COMMON_FLAGS} -faligned-new) -endif() + endif() -if(NOT APPLE) + if(NOT APPLE) if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM)) - set(COMMON_FLAGS - ${COMMON_FLAGS} - -Wno-format-truncation # Warning in boost gcc 8.2 - -Wno-error=parentheses # Warning in boost gcc 8.2 - -Wno-error=catch-value # Warning in boost gcc 8.2 - -Wno-error=nonnull-compare # Warning in boost gcc 8.2 - -Wno-error=address # Warning in boost gcc 8.2 - -Wno-ignored-qualifiers # Warning in boost gcc 8.2 - -Wno-ignored-attributes # Warning in Eigen gcc 8.3 - -Wno-parentheses # Warning in Eigen gcc 8.3 - ) + set(COMMON_FLAGS + ${COMMON_FLAGS} + -Wno-format-truncation # Warning in boost gcc 8.2 + -Wno-error=parentheses # Warning in boost gcc 8.2 + -Wno-error=catch-value # Warning in boost gcc 8.2 + -Wno-error=nonnull-compare # Warning in boost gcc 8.2 + -Wno-error=address # Warning in boost gcc 8.2 + -Wno-ignored-qualifiers # Warning in boost gcc 8.2 + -Wno-ignored-attributes # Warning in Eigen gcc 8.3 + -Wno-parentheses # Warning in Eigen gcc 8.3 + ) endif() -endif(NOT APPLE) - -set(GPU_COMMON_FLAGS - -fPIC - -fno-omit-frame-pointer - -Wnon-virtual-dtor - -Wdelete-non-virtual-dtor - -Wno-unused-parameter - -Wno-unused-function - -Wno-error=literal-suffix - -Wno-error=unused-local-typedefs - -Wno-error=unused-function # Warnings in Numpy Header. - -Wno-error=array-bounds # Warnings in Eigen::array -) -if (NOT WITH_NV_JETSON AND NOT WITH_ARM AND NOT WITH_SW AND NOT WITH_MIPS) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") -endif() + endif(NOT APPLE) + + set(GPU_COMMON_FLAGS + -fPIC + -fno-omit-frame-pointer + -Wnon-virtual-dtor + -Wdelete-non-virtual-dtor + -Wno-unused-parameter + -Wno-unused-function + -Wno-error=literal-suffix + -Wno-error=unused-local-typedefs + -Wno-error=unused-function # Warnings in Numpy Header. + -Wno-error=array-bounds # Warnings in Eigen::array + ) + if(NOT WITH_NV_JETSON + AND NOT WITH_ARM + AND NOT WITH_SW + AND NOT WITH_MIPS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") + endif() endif(NOT WIN32) -if (APPLE) - if(WITH_ARM) - set (CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Build architectures for OSX" FORCE) - else(WITH_ARM) - set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) - endif(WITH_ARM) - # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 - set (COMMON_FLAGS -Wno-deprecated-register) +if(APPLE) + if(WITH_ARM) + set(CMAKE_OSX_ARCHITECTURES + "arm64" + CACHE STRING "Build architectures for OSX" FORCE) + else(WITH_ARM) + set(CMAKE_OSX_ARCHITECTURES + "x86_64" + CACHE STRING "Build architectures for OSX" FORCE) + endif(WITH_ARM) + # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 + set(COMMON_FLAGS -Wno-deprecated-register) endif(APPLE) if(WITH_HETERPS AND WITH_PSLIB) - set(COMMON_FLAGS - -D_GLIBCXX_USE_CXX11_ABI=0 - ${COMMON_FLAGS}) + set(COMMON_FLAGS -D_GLIBCXX_USE_CXX11_ABI=0 ${COMMON_FLAGS}) - set(GPU_COMMON_FLAGS - -D_GLIBCXX_USE_CXX11_ABI=0 - ${GPU_COMMON_FLAGS}) + set(GPU_COMMON_FLAGS -D_GLIBCXX_USE_CXX11_ABI=0 ${GPU_COMMON_FLAGS}) endif() if(LINUX) - set(GPU_COMMON_FLAGS - -Wall - -Wextra - -Werror - ${GPU_COMMON_FLAGS}) + set(GPU_COMMON_FLAGS -Wall -Wextra -Werror ${GPU_COMMON_FLAGS}) endif(LINUX) foreach(flag ${COMMON_FLAGS}) - safe_set_cflag(CMAKE_C_FLAGS ${flag}) - safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) + safe_set_cflag(CMAKE_C_FLAGS ${flag}) + safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) endforeach() set(SAFE_GPU_COMMON_FLAGS "") foreach(flag ${GPU_COMMON_FLAGS}) - safe_set_nvflag(${flag}) + safe_set_nvflag(${flag}) endforeach() if(WITH_GPU) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") endif() if(WITH_ROCM) - set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") + set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") endif() - # Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex +# Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex if(WITH_ROCM) - string (REPLACE "-Werror" "-Wno-error" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - string (REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + string(REPLACE "-Werror" "-Wno-error" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + string(REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) endif() if(WITH_PSCORE OR WITH_PSLIB) - string (REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - string (REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_CXX_FLAGS + ${CMAKE_CXX_FLAGS}) + string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_C_FLAGS + ${CMAKE_C_FLAGS}) endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 35170b5198d..a6a7ab983b9 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -13,7 +13,6 @@ # limitations under the License. # - # generic.cmake defines CMakes functions that look like Bazel's # building rules (https://bazel.build/). # @@ -96,9 +95,11 @@ if(NOT APPLE AND NOT WIN32) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) if(WITH_PSLIB OR WITH_DISTRIBUTE) - set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt -lz -lssl") + set(CMAKE_CXX_LINK_EXECUTABLE + "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt -lz -lssl") else() - set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") + set(CMAKE_CXX_LINK_EXECUTABLE + "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") endif() endif() @@ -107,7 +108,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "") # for building inference libs function(find_fluid_modules TARGET_NAME) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path + ${__target_path}) string(FIND "${__target_path}" "fluid" pos) if(pos GREATER 1) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) @@ -121,7 +123,8 @@ set_property(GLOBAL PROPERTY PHI_MODULES "") # for building inference libs function(find_phi_modules TARGET_NAME) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path + ${__target_path}) string(FIND "${__target_path}" "phi" pos) if(pos GREATER 1) get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) @@ -131,7 +134,7 @@ function(find_phi_modules TARGET_NAME) endfunction(find_phi_modules) function(common_link TARGET_NAME) - if (WITH_PROFILER) + if(WITH_PROFILER) target_link_libraries(${TARGET_NAME} gperftools::profiler) endif() endfunction() @@ -141,7 +144,8 @@ endfunction() set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) function(find_fluid_thirdparties TARGET_NAME) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path + ${__target_path}) string(FIND "${__target_path}" "third_party" pos) if(pos GREATER 1) get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY) @@ -162,13 +166,15 @@ function(create_static_lib TARGET_NAME) foreach(lib ${libs}) list(APPEND dummy_list ${lib}) list(LENGTH dummy_list listlen) - if ((${listlen} GREATER ${dummy_limit}) OR (${dummy_offset} EQUAL ${libs_len})) + if((${listlen} GREATER ${dummy_limit}) OR (${dummy_offset} EQUAL + ${libs_len})) merge_static_libs(${TARGET_NAME}_dummy_${dummy_index} ${dummy_list}) set(dummy_list) - list(APPEND ${TARGET_NAME}_dummy_list ${TARGET_NAME}_dummy_${dummy_index}) - MATH(EXPR dummy_index "${dummy_index}+1") + list(APPEND ${TARGET_NAME}_dummy_list + ${TARGET_NAME}_dummy_${dummy_index}) + math(EXPR dummy_index "${dummy_index}+1") endif() - MATH(EXPR dummy_offset "${dummy_offset}+1") + math(EXPR dummy_offset "${dummy_offset}+1") endforeach() merge_static_libs(${TARGET_NAME} ${${TARGET_NAME}_dummy_list}) else() @@ -180,7 +186,8 @@ function(create_dummy_static_lib TARGET_NAME) set(options "") set(oneValueArgs "") set(multiValueArgs LIBS DEPS LIMIT) - cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(merge "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) list(REMOVE_DUPLICATES merge_LIBS) set(index 1) @@ -191,17 +198,18 @@ function(create_dummy_static_lib TARGET_NAME) foreach(lib ${merge_LIBS}) list(APPEND merge_list ${lib}) list(LENGTH merge_list listlen) - if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len})) - message("Merge and generate static library: ${TARGET_NAME}_static_${index}") + if((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len})) + message( + "Merge and generate static library: ${TARGET_NAME}_static_${index}") merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list}) if(merge_DEPS) target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS}) endif() set(merge_list) list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index}) - MATH(EXPR index "${index}+1") + math(EXPR index "${index}+1") endif() - MATH(EXPR offset "${offset}+1") + math(EXPR offset "${offset}+1") endforeach() cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list}) endfunction() @@ -226,12 +234,14 @@ function(merge_static_libs TARGET_NAME) # Make the generated dummy source file depended on all static input # libs. If input lib changes,the source file is touched # which causes the desired effect (relink). - add_custom_command(OUTPUT ${target_SRCS} + add_custom_command( + OUTPUT ${target_SRCS} COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} DEPENDS ${libs}) - - # Generate dummy staic lib - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") + + # Generate dummy staic lib + generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} + GENERATOR "generic.cmake:merge_static_libs") target_link_libraries(${TARGET_NAME} ${libs_deps}) # OSX: use 'libtool' to merge archives @@ -240,29 +250,41 @@ function(merge_static_libs TARGET_NAME) # Get the file names of the libraries to be merged set(libfiles ${libfiles} $) endforeach() - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + add_custom_command( + TARGET ${TARGET_NAME} + POST_BUILD COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a" COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" - COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} - ) + COMMAND /usr/bin/libtool -static -o + "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) endif() # LINUX: use "ar" to extract objects and re-add to a common lib if(LINUX) - set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file") - get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION) + set(mri_file + ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri + CACHE INTERNAL "phi_static.mri file") + get_property( + ABS_MERGE_LIB_PATH + TARGET ${TARGET_NAME} + PROPERTY LOCATION) file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n") foreach(lib ${libs}) - get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION) + get_property( + ABS_LIB_PATH + TARGET ${lib} + PROPERTY LOCATION) file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n") endforeach() file(APPEND ${mri_file} "save\nend\n") - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a" - COMMAND ${CMAKE_AR} -M < ${mri_file} - COMMAND ${CMAKE_RANLIB} "$") + add_custom_command( + TARGET ${TARGET_NAME} + POST_BUILD + COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a" + COMMAND ${CMAKE_AR} -M < ${mri_file} + COMMAND ${CMAKE_RANLIB} "$") endif() # Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs. @@ -271,60 +293,70 @@ function(merge_static_libs TARGET_NAME) set(libfiles ${libfiles} $) endforeach() # msvc compiler will put libarary in directory of "/Release/xxxlib" by default - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + add_custom_command( + TARGET ${TARGET_NAME} + POST_BUILD COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib" COMMAND cmake -E make_directory $ - COMMAND lib /OUT:$ ${libfiles} - ) + COMMAND lib /OUT:$ ${libfiles}) endif() endfunction() function(check_coverage_opt TARGET_NAME SRCS) if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE) # if pybind.cc add '-g -O0 -fprofile-arcs -ftest-coverage' only, some testcase will fail. - if ("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL "" AND (NOT ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc"))) - if (NOT ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" STREQUAL "")) + if("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL "" + AND (NOT ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc"))) + if(NOT ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" STREQUAL "")) string(REPLACE "," ";" CC_FILE_LIST $ENV{PADDLE_GIT_DIFF_CC_FILE}) set(use_coverage_opt FALSE) - FOREACH(cc_file ${CC_FILE_LIST}) + foreach(cc_file ${CC_FILE_LIST}) if("${SRCS};" MATCHES "${cc_file}") set(use_coverage_opt TRUE) break() endif() - ENDFOREACH(cc_file) + endforeach(cc_file) - if (use_coverage_opt) + if(use_coverage_opt) message(STATUS "cc changed, add coverage opt for ${TARGET_NAME}") - target_compile_options(${TARGET_NAME} PRIVATE -g -O0 -fprofile-arcs -ftest-coverage) + target_compile_options(${TARGET_NAME} PRIVATE -g -O0 -fprofile-arcs + -ftest-coverage) target_link_libraries(${TARGET_NAME} -fprofile-arcs) - get_target_property(WH_TARGET_COMPILE_OPTIONS ${TARGET_NAME} COMPILE_OPTIONS) - message(STATUS "property for ${TARGET_NAME} is ${WH_TARGET_COMPILE_OPTIONS}") + get_target_property(WH_TARGET_COMPILE_OPTIONS ${TARGET_NAME} + COMPILE_OPTIONS) + message( + STATUS "property for ${TARGET_NAME} is ${WH_TARGET_COMPILE_OPTIONS}" + ) endif() endif() endif() endif() endfunction(check_coverage_opt) - function(cc_library TARGET_NAME) set(options STATIC static SHARED shared INTERFACE interface) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(WIN32) - # add libxxx.lib prefix in windows - set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + # add libxxx.lib prefix in windows + set(${TARGET_NAME}_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE STRING "output library name for target ${TARGET_NAME}") endif(WIN32) if(cc_library_SRCS) - if(cc_library_SHARED OR cc_library_shared) # build *.so - add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) - elseif(cc_library_INTERFACE OR cc_library_interface) - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:cc_library") - else() - add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) - find_fluid_modules(${TARGET_NAME}) - find_phi_modules(${TARGET_NAME}) - endif() + if(cc_library_SHARED OR cc_library_shared) # build *.so + add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + elseif(cc_library_INTERFACE OR cc_library_interface) + generate_dummy_static_lib( + LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR + "generic.cmake:cc_library") + else() + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) + find_fluid_modules(${TARGET_NAME}) + find_phi_modules(${TARGET_NAME}) + endif() if(cc_library_DEPS) # Don't need link libwarpctc.so if("${cc_library_DEPS};" MATCHES "warpctc;") @@ -341,7 +373,8 @@ function(cc_library TARGET_NAME) if(WIN32) target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB}) else(WIN32) - target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") + target_link_libraries(${TARGET_NAME} + "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") endif(WIN32) endif() # remove link to python, see notes at: @@ -373,21 +406,26 @@ function(cc_library TARGET_NAME) if(cc_library_DEPS) list(REMOVE_DUPLICATES cc_library_DEPS) - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:cc_library") + generate_dummy_static_lib( + LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR + "generic.cmake:cc_library") target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) else() - message(FATAL_ERROR "Please specify source files or libraries in cc_library(${TARGET_NAME} ...).") + message( + FATAL_ERROR + "Please specify source files or libraries in cc_library(${TARGET_NAME} ...)." + ) endif() endif(cc_library_SRCS) endfunction(cc_library) - function(cc_binary TARGET_NAME) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_binary_SRCS}) if(cc_binary_DEPS) target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) @@ -408,7 +446,8 @@ function(cc_test_build TARGET_NAME) if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) if(WIN32) if("${cc_test_DEPS};" MATCHES "python;") @@ -417,8 +456,25 @@ function(cc_test_build TARGET_NAME) endif() endif(WIN32) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + target_link_libraries( + ${TARGET_NAME} + ${cc_test_DEPS} + ${os_dependency_modules} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog) + add_dependencies( + ${TARGET_NAME} + ${cc_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog) common_link(${TARGET_NAME}) if(WITH_ROCM) target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB}) @@ -431,74 +487,80 @@ function(cc_test_run TARGET_NAME) if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs COMMAND ARGS) - cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - add_test(NAME ${TARGET_NAME} - COMMAND ${cc_test_COMMAND} ${cc_test_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + add_test( + NAME ${TARGET_NAME} + COMMAND ${cc_test_COMMAND} ${cc_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cpu_deterministic=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_init_allocated_mem=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cudnn_deterministic=true) # No unit test should exceed 2 minutes. - if (WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) + if(WIN32) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) endif() - if (APPLE) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 20) + if(APPLE) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 20) endif() elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME}) - add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip ${TARGET_NAME}.) + add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip + ${TARGET_NAME}.) endif() endfunction() function(cc_test TARGET_NAME) - # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation - # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files + # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation + # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files # other than *.py are modified. if(WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS) - cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - cc_test_build(${TARGET_NAME} - SRCS ${cc_test_SRCS} - DEPS ${cc_test_DEPS}) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + cc_test_build(${TARGET_NAME} SRCS ${cc_test_SRCS} DEPS ${cc_test_DEPS}) # we dont test hcom op, because it need complex configuration # with more than one machine - if(NOT ("${TARGET_NAME}" STREQUAL "c_broadcast_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "c_allreduce_sum_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "c_allreduce_max_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "c_reducescatter_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "c_allgather_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "send_v2_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "c_reduce_sum_op_npu_test" OR - "${TARGET_NAME}" STREQUAL "recv_v2_op_npu_test")) - cc_test_run(${TARGET_NAME} - COMMAND ${TARGET_NAME} - ARGS ${cc_test_ARGS}) + if(NOT + ("${TARGET_NAME}" STREQUAL "c_broadcast_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "c_allreduce_sum_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "c_allreduce_max_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "c_reducescatter_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "c_allgather_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "send_v2_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "c_reduce_sum_op_npu_test" + OR "${TARGET_NAME}" STREQUAL "recv_v2_op_npu_test")) + cc_test_run(${TARGET_NAME} COMMAND ${TARGET_NAME} ARGS ${cc_test_ARGS}) endif() elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME}) - add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip ${TARGET_NAME}.) + add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip + ${TARGET_NAME}.) endif() endfunction(cc_test) function(nv_library TARGET_NAME) - if (WITH_GPU) + if(WITH_GPU) set(options STATIC static SHARED shared) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(nv_library_SRCS) # Attention: # 1. cuda_add_library is deprecated after cmake v3.10, use add_library for CUDA please. # 2. cuda_add_library does not support ccache. # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html - if (nv_library_SHARED OR nv_library_shared) # build *.so + if(nv_library_SHARED OR nv_library_shared) # build *.so add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) else() add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) find_fluid_modules(${TARGET_NAME}) find_phi_modules(${TARGET_NAME}) endif() - if (nv_library_DEPS) + if(nv_library_DEPS) add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) endif() @@ -506,13 +568,16 @@ function(nv_library TARGET_NAME) foreach(source_file ${nv_library_SRCS}) string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) - list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND nv_library_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() else(nv_library_SRCS) - if (nv_library_DEPS) + if(nv_library_DEPS) list(REMOVE_DUPLICATES nv_library_DEPS) - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:nv_library") + generate_dummy_static_lib( + LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR + "generic.cmake:nv_library") target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) @@ -520,76 +585,112 @@ function(nv_library TARGET_NAME) message(FATAL "Please specify source file or library in nv_library.") endif() endif(nv_library_SRCS) - if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910)) - set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + if((CUDA_VERSION GREATER 9.2) + AND (CUDA_VERSION LESS 11.0) + AND (MSVC_VERSION LESS 1910)) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS + ${WIN_PROPS}) endif() endif() endfunction(nv_library) function(nv_binary TARGET_NAME) - if (WITH_GPU) + if(WITH_GPU) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${nv_binary_SRCS}) if(nv_binary_DEPS) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) common_link(${TARGET_NAME}) endif() - if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910)) - set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + if((CUDA_VERSION GREATER 9.2) + AND (CUDA_VERSION LESS 11.0) + AND (MSVC_VERSION LESS 1910)) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS + ${WIN_PROPS}) endif() endif() endfunction(nv_binary) function(nv_test TARGET_NAME) - # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation - # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files + # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation + # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files # other than *.py are modified. - if (WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + if(WITH_GPU + AND WITH_TESTING + AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) # Attention: # 1. cuda_add_executable is deprecated after cmake v3.10, use cuda_add_executable for CUDA please. # 2. cuda_add_executable does not support ccache. # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html add_executable(${TARGET_NAME} ${nv_test_SRCS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules}) - add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + target_link_libraries( + ${TARGET_NAME} + ${nv_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog + ${os_dependency_modules}) + add_dependencies( + ${TARGET_NAME} + ${nv_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) - if((CUDA_VERSION GREATER 9.2) AND (CUDA_VERSION LESS 11.0) AND (MSVC_VERSION LESS 1910)) - set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cpu_deterministic=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_init_allocated_mem=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cudnn_deterministic=true) + if((CUDA_VERSION GREATER 9.2) + AND (CUDA_VERSION LESS 11.0) + AND (MSVC_VERSION LESS 1910)) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS + ${WIN_PROPS}) endif() endif() endfunction(nv_test) function(hip_library TARGET_NAME) - if (WITH_ROCM) + if(WITH_ROCM) set(options STATIC static SHARED shared) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(hip_library_SRCS) # FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found - if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators" OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels")) - set_source_files_properties(${hip_library_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) + if(NOT (${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators" + OR ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/phi/kernels")) + set_source_files_properties(${hip_library_SRCS} + PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) endif() - if (hip_library_SHARED OR hip_library_shared) # build *.so + if(hip_library_SHARED OR hip_library_shared) # build *.so hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS}) else() hip_add_library(${TARGET_NAME} STATIC ${hip_library_SRCS}) find_fluid_modules(${TARGET_NAME}) find_phi_modules(${TARGET_NAME}) endif() - if (hip_library_DEPS) + if(hip_library_DEPS) add_dependencies(${TARGET_NAME} ${hip_library_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_library_DEPS}) endif() @@ -597,13 +698,16 @@ function(hip_library TARGET_NAME) foreach(source_file ${hip_library_SRCS}) string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) - list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND hip_library_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() else(hip_library_SRCS) - if (hip_library_DEPS) + if(hip_library_DEPS) list(REMOVE_DUPLICATES hip_library_DEPS) - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:hip_library") + generate_dummy_static_lib( + LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR + "generic.cmake:hip_library") target_link_libraries(${TARGET_NAME} ${hip_library_DEPS}) add_dependencies(${TARGET_NAME} ${hip_library_DEPS}) @@ -615,11 +719,12 @@ function(hip_library TARGET_NAME) endfunction(hip_library) function(hip_binary TARGET_NAME) - if (WITH_ROCM) + if(WITH_ROCM) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS}) if(hip_binary_DEPS) @@ -634,42 +739,73 @@ function(hip_test TARGET_NAME) # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files # other than *.py are modified. - if (WITH_ROCM AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + if(WITH_ROCM + AND WITH_TESTING + AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files hip_add_executable(${TARGET_NAME} ${hip_test_SRCS}) # "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules}) - add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + target_link_libraries( + ${TARGET_NAME} + ${hip_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog + ${os_dependency_modules}) + add_dependencies( + ${TARGET_NAME} + ${hip_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT "LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/python/paddle/libs:$LD_LIBRARY_PATH") + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cpu_deterministic=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_init_allocated_mem=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cudnn_deterministic=true) + set_property( + TEST ${TARGET_NAME} + PROPERTY + ENVIRONMENT + "LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/python/paddle/libs:$LD_LIBRARY_PATH" + ) endif() endfunction(hip_test) function(xpu_library TARGET_NAME) - if (WITH_XPU_KP) + if(WITH_XPU_KP) set(options STATIC static SHARED shared) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(xpu_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(xpu_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(xpu_library_SRCS) - if (xpu_library_SHARED OR xpu_library_shared) # build *.so - message(FATAL_ERROR "XPU kernel currently does not support dynamic links") + if(xpu_library_SHARED OR xpu_library_shared) # build *.so + message( + FATAL_ERROR "XPU kernel currently does not support dynamic links") else() - xpu_add_library(${TARGET_NAME} STATIC ${xpu_library_SRCS} DEPENDS ${xpu_library_DEPS}) + xpu_add_library(${TARGET_NAME} STATIC ${xpu_library_SRCS} DEPENDS + ${xpu_library_DEPS}) find_fluid_modules(${TARGET_NAME}) find_phi_modules(${TARGET_NAME}) endif() - if (xpu_library_DEPS) + if(xpu_library_DEPS) add_dependencies(${TARGET_NAME} ${xpu_library_DEPS}) target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS}) endif() @@ -677,13 +813,16 @@ function(xpu_library TARGET_NAME) foreach(source_file ${xpu_library_SRCS}) string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) - list(APPEND xpu_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND xpu_library_HEADERS + ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) endif() endforeach() else(xpu_library_SRCS) - if (xpu_library_DEPS) + if(xpu_library_DEPS) list(REMOVE_DUPLICATES xpu_library_DEPS) - generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:xpu_library") + generate_dummy_static_lib( + LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR + "generic.cmake:xpu_library") target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS}) add_dependencies(${TARGET_NAME} ${xpu_library_DEPS}) else() @@ -694,11 +833,12 @@ function(xpu_library TARGET_NAME) endfunction(xpu_library) function(xpu_binary TARGET_NAME) - if (WITH_XPU_KP) + if(WITH_XPU_KP) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(xpu_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(xpu_binary "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${xpu_binary_SRCS}) if(xpu_binary_DEPS) target_link_libraries(${TARGET_NAME} ${xpu_binary_DEPS}) @@ -712,21 +852,44 @@ function(xpu_test TARGET_NAME) # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files # other than *.py are modified. - if (WITH_XPU_KP AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + if(WITH_XPU_KP + AND WITH_TESTING + AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(xpu_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(xpu_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${xpu_test_SRCS}) # "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules}) - add_dependencies(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + target_link_libraries( + ${TARGET_NAME} + ${xpu_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog + ${os_dependency_modules}) + add_dependencies( + ${TARGET_NAME} + ${xpu_test_DEPS} + paddle_gtest_main + lod_tensor + memory + gtest + gflags + glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) - set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cpu_deterministic=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_init_allocated_mem=true) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT + FLAGS_cudnn_deterministic=true) endif() endfunction(xpu_test) @@ -734,34 +897,36 @@ function(go_library TARGET_NAME) set(options STATIC static SHARED shared) set(oneValueArgs "") set(multiValueArgs DEPS) - cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - if (go_library_SHARED OR go_library_shared) + if(go_library_SHARED OR go_library_shared) set(BUILD_MODE "-buildmode=c-shared") - set(${TARGET_NAME}_LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + set(${TARGET_NAME}_LIB_NAME + "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE STRING "output library name for target ${TARGET_NAME}") else() set(BUILD_MODE "-buildmode=c-archive") - set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + set(${TARGET_NAME}_LIB_NAME + "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE STRING "output library name for target ${TARGET_NAME}") endif() set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) # This custom command will always run since it depends on a not # existing file. - add_custom_command( - OUTPUT dummy_rebulid_${TARGET_NAME} - COMMAND cmake -E touch ${dummyfile} - ) + add_custom_command(OUTPUT dummy_rebulid_${TARGET_NAME} COMMAND cmake -E touch + ${dummyfile}) # Create a custom target that depends on the custom command output # file, so the custom command can be referenced as a dependency by # `add_dependencies`. - add_custom_target(rebuild_${TARGET_NAME} - DEPENDS dummy_rebulid_${TARGET_NAME} - ) + add_custom_target(rebuild_${TARGET_NAME} DEPENDS dummy_rebulid_${TARGET_NAME}) # Add dummy code to support `make target_name` under Terminal Command - file(WRITE ${dummyfile} "const char *dummy_${TARGET_NAME} = \"${dummyfile}\";") - if (go_library_SHARED OR go_library_shared) + file(WRITE ${dummyfile} + "const char *dummy_${TARGET_NAME} = \"${dummyfile}\";") + if(go_library_SHARED OR go_library_shared) add_library(${TARGET_NAME} SHARED ${dummyfile}) else() add_library(${TARGET_NAME} STATIC ${dummyfile}) @@ -777,17 +942,26 @@ function(go_library TARGET_NAME) # rebuild will always happen. add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME}) - set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") + set(${TARGET_NAME}_LIB_PATH + "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" + CACHE STRING "output library path for target ${TARGET_NAME}") - file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") - string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + file( + GLOB GO_SOURCE + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.go") + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR + ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + add_custom_command( + TARGET ${TARGET_NAME} + POST_BUILD COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code - COMMAND GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${${TARGET_NAME}_LIB_PATH}" - "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" + COMMAND + GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o + "${${TARGET_NAME}_LIB_PATH}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" # must run under GOPATH WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") add_dependencies(${TARGET_NAME} go_vendor) @@ -797,15 +971,21 @@ function(go_binary TARGET_NAME) set(options OPTIONAL) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR + ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_command(OUTPUT ${TARGET_NAME}_timestamp - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build - -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" - "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" + add_custom_command( + OUTPUT ${TARGET_NAME}_timestamp + COMMAND + env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build -o + "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") - add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + add_custom_target( + ${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp + ${go_binary_DEPS}) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) check_coverage_opt(${TARGET_NAME} ${go_binary_SRCS}) @@ -816,15 +996,21 @@ function(go_test TARGET_NAME) set(options OPTIONAL) set(oneValueArgs "") set(multiValueArgs DEPS) - cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR + ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS}) - add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race - -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" - ".${CMAKE_CURRENT_SOURCE_REL_DIR}" + add_custom_command( + TARGET ${TARGET_NAME} + POST_BUILD + COMMAND + env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race -c -o + "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + ".${CMAKE_CURRENT_SOURCE_REL_DIR}" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") - add_test(NAME ${TARGET_NAME} + add_test( + NAME ${TARGET_NAME} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endfunction(go_test) @@ -835,7 +1021,9 @@ endfunction(go_test) function(paddle_protobuf_generate_cpp SRCS HDRS) if(NOT ARGN) - message(SEND_ERROR "Error: paddle_protobuf_generate_cpp() called without any proto files") + message( + SEND_ERROR + "Error: paddle_protobuf_generate_cpp() called without any proto files") return() endif() @@ -852,40 +1040,45 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) list(APPEND ${HDRS} "${_protobuf_protoc_hdr}") add_custom_command( - OUTPUT "${_protobuf_protoc_src}" - "${_protobuf_protoc_hdr}" - + OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}" COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - -I${CMAKE_CURRENT_SOURCE_DIR} - --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_CURRENT_SOURCE_DIR} + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} # Set `EXTERN_PROTOBUF_DEPEND` only if need to compile `protoc.exe`. DEPENDS ${ABS_FIL} ${EXTERN_PROTOBUF_DEPEND} COMMENT "Running C++ protocol buffer compiler on ${FIL}" - VERBATIM ) + VERBATIM) endforeach() set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - set(${HDRS} ${${HDRS}} PARENT_SCOPE) + set(${SRCS} + ${${SRCS}} + PARENT_SCOPE) + set(${HDRS} + ${${HDRS}} + PARENT_SCOPE) endfunction() - function(proto_library TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) set(proto_srcs) set(proto_hdrs) paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) - cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) + cc_library( + ${TARGET_NAME} + SRCS ${proto_srcs} + DEPS ${proto_library_DEPS} protobuf) add_dependencies(extern_xxhash ${TARGET_NAME}) endfunction() function(py_proto_compile TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS) - cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) set(py_srcs) protobuf_generate_python(py_srcs ${py_proto_compile_SRCS}) add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs} protobuf) @@ -896,29 +1089,37 @@ function(py_test TARGET_NAME) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS ENVS) - cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL "")) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true - FLAGS_cpu_deterministic=true - PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -m coverage run --branch -p ${py_test_SRCS} ${py_test_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE + AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL "")) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true + FLAGS_cudnn_deterministic=true FLAGS_cpu_deterministic=true + PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -m coverage run --branch -p ${py_test_SRCS} + ${py_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) else() - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true - FLAGS_cpu_deterministic=true ${py_test_ENVS} - ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true + FLAGS_cudnn_deterministic=true FLAGS_cpu_deterministic=true + ${py_test_ENVS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} + ${py_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() - if (WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) + if(WIN32) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) endif() - if (APPLE) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 20) + if(APPLE) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 20) endif() endif() @@ -936,7 +1137,8 @@ function(grpc_library TARGET_NAME) set(oneValueArgs PROTO) set(multiValueArgs SRCS DEPS) set(options "") - cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) message(STATUS "generating grpc ${grpc_library_PROTO}") @@ -953,36 +1155,43 @@ function(grpc_library TARGET_NAME) cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}") add_custom_command( - OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" - --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" - COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" - "${ABS_PROTO}" - DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc) + OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" + COMMAND + ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --grpc_out + "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" + --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out + "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" "${ABS_PROTO}" + DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc) # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it # as compiler warnings instead of error. Should try remove the warnings also. set_source_files_properties( ${grpc_grpc_srcs} PROPERTIES - COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}") set_source_files_properties( ${grpc_library_SRCS} PROPERTIES - COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - cc_library("${TARGET_NAME}" SRCS "${grpc_library_SRCS}" DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}") + COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) + cc_library( + "${TARGET_NAME}" + SRCS "${grpc_library_SRCS}" + DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}") endfunction() - function(brpc_library TARGET_NAME) set(oneValueArgs PROTO) set(multiValueArgs SRCS DEPS) set(options "") - cmake_parse_arguments(brpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(brpc_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) message(STATUS "generating brpc ${brpc_library_PROTO}") @@ -992,7 +1201,10 @@ function(brpc_library TARGET_NAME) paddle_protobuf_generate_cpp(brpc_proto_srcs brpc_proto_hdrs "${ABS_PROTO}") cc_library("${TARGET_NAME}_proto" SRCS "${brpc_proto_srcs}") - cc_library("${TARGET_NAME}" SRCS "${brpc_library_SRCS}" DEPS "${TARGET_NAME}_proto" "${brpc_library_DEPS}") + cc_library( + "${TARGET_NAME}" + SRCS "${brpc_library_SRCS}" + DEPS "${TARGET_NAME}_proto" "${brpc_library_DEPS}") endfunction() # copy_if_different from src_file to dst_file At the beginning of the build. @@ -1000,11 +1212,11 @@ function(copy_if_different src_file dst_file) get_filename_component(FILE_NAME ${dst_file} NAME_WE) # this is a dummy target for custom command, should always be run firstly to update ${dst_file} - add_custom_target(copy_${FILE_NAME}_command ALL - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src_file} ${dst_file} - COMMENT "copy_if_different ${dst_file}" - VERBATIM - ) + add_custom_target( + copy_${FILE_NAME}_command ALL + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src_file} ${dst_file} + COMMENT "copy_if_different ${dst_file}" + VERBATIM) add_dependencies(extern_glog copy_${FILE_NAME}_command) endfunction() @@ -1019,7 +1231,8 @@ function(generate_dummy_static_lib) set(options "") set(oneValueArgs LIB_NAME FILE_PATH GENERATOR CONTENT) set(multiValueArgs "") - cmake_parse_arguments(dummy "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(dummy "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) if(NOT dummy_LIB_NAME) message(FATAL_ERROR "You must provide a static lib name.") endif() @@ -1033,45 +1246,55 @@ function(generate_dummy_static_lib) set(dummy_CONTENT "${dummy_LIB_NAME}_dummy.c for lib ${dummy_LIB_NAME}") endif() - configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} @ONLY) + configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} + @ONLY) add_library(${dummy_LIB_NAME} STATIC ${dummy_FILE_PATH}) endfunction() function(math_library TARGET) - # math_library is a function to create math library. - # The interface is the same as cc_library. - # But it handle split GPU/CPU code and link some common library. - set(cc_srcs) - set(cu_srcs) - set(hip_srcs) - set(math_common_deps device_context framework_proto enforce) - if (WITH_GPU) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - list(APPEND math_common_deps cub) - else() - list(APPEND math_common_deps) - endif() + # math_library is a function to create math library. + # The interface is the same as cc_library. + # But it handle split GPU/CPU code and link some common library. + set(cc_srcs) + set(cu_srcs) + set(hip_srcs) + set(math_common_deps device_context framework_proto enforce) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + list(APPEND math_common_deps cub) + else() + list(APPEND math_common_deps) endif() - set(multiValueArgs DEPS) - cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + endif() + set(multiValueArgs DEPS) + cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) - list(APPEND cc_srcs ${TARGET}.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) - list(APPEND cu_srcs ${TARGET}.cu) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) - list(APPEND cu_srcs ${TARGET}.cu.cc) - endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cu) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND cu_srcs ${TARGET}.cu.cc) + endif() - list(LENGTH cc_srcs cc_srcs_len) - if (WITH_GPU) - nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) - elseif (WITH_ROCM) - hip_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) - elseif(${cc_srcs_len} GREATER 0) - cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) - endif() + list(LENGTH cc_srcs cc_srcs_len) + if(WITH_GPU) + nv_library( + ${TARGET} + SRCS ${cc_srcs} ${cu_srcs} + DEPS ${math_library_DEPS} ${math_common_deps}) + elseif(WITH_ROCM) + hip_library( + ${TARGET} + SRCS ${cc_srcs} ${cu_srcs} + DEPS ${math_library_DEPS} ${math_common_deps}) + elseif(${cc_srcs_len} GREATER 0) + cc_library( + ${TARGET} + SRCS ${cc_srcs} + DEPS ${math_library_DEPS} ${math_common_deps}) + endif() endfunction() diff --git a/cmake/hip.cmake b/cmake/hip.cmake index 14cb9e6f6be..3514882c944 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -1,15 +1,27 @@ if(NOT WITH_ROCM) - return() + return() endif() if(NOT DEFINED ENV{ROCM_PATH}) - set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") - set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") - set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed") + set(ROCM_PATH + "/opt/rocm" + CACHE PATH "Path to which ROCm has been installed") + set(HIP_PATH + ${ROCM_PATH}/hip + CACHE PATH "Path to which HIP has been installed") + set(HIP_CLANG_PATH + ${ROCM_PATH}/llvm/bin + CACHE PATH "Path to which clang has been installed") else() - set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") - set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") - set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed") + set(ROCM_PATH + $ENV{ROCM_PATH} + CACHE PATH "Path to which ROCm has been installed") + set(HIP_PATH + ${ROCM_PATH}/hip + CACHE PATH "Path to which HIP has been installed") + set(HIP_CLANG_PATH + ${ROCM_PATH}/llvm/bin + CACHE PATH "Path to which clang has been installed") endif() set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) @@ -18,30 +30,39 @@ include_directories(${ROCM_PATH}/include) message(STATUS "HIP version: ${HIP_VERSION}") message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}") -macro(find_hip_version hip_header_file) - file(READ ${hip_header_file} HIP_VERSION_FILE_CONTENTS) +macro(find_hip_version hip_header_file) + file(READ ${hip_header_file} HIP_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define HIP_VERSION_MAJOR +([0-9]+)" HIP_MAJOR_VERSION - "${HIP_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define HIP_VERSION_MAJOR +([0-9]+)" "\\1" - HIP_MAJOR_VERSION "${HIP_MAJOR_VERSION}") - string(REGEX MATCH "define HIP_VERSION_MINOR +([0-9]+)" HIP_MINOR_VERSION - "${HIP_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define HIP_VERSION_MINOR +([0-9]+)" "\\1" - HIP_MINOR_VERSION "${HIP_MINOR_VERSION}") - string(REGEX MATCH "define HIP_VERSION_PATCH +([0-9]+)" HIP_PATCH_VERSION - "${HIP_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define HIP_VERSION_PATCH +([0-9]+)" "\\1" - HIP_PATCH_VERSION "${HIP_PATCH_VERSION}") + string(REGEX MATCH "define HIP_VERSION_MAJOR +([0-9]+)" HIP_MAJOR_VERSION + "${HIP_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define HIP_VERSION_MAJOR +([0-9]+)" "\\1" + HIP_MAJOR_VERSION "${HIP_MAJOR_VERSION}") + string(REGEX MATCH "define HIP_VERSION_MINOR +([0-9]+)" HIP_MINOR_VERSION + "${HIP_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define HIP_VERSION_MINOR +([0-9]+)" "\\1" + HIP_MINOR_VERSION "${HIP_MINOR_VERSION}") + string(REGEX MATCH "define HIP_VERSION_PATCH +([0-9]+)" HIP_PATCH_VERSION + "${HIP_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define HIP_VERSION_PATCH +([0-9]+)" "\\1" + HIP_PATCH_VERSION "${HIP_PATCH_VERSION}") - if(NOT HIP_MAJOR_VERSION) - set(HIP_VERSION "???") - message(WARNING "Cannot find HIP version in ${HIP_PATH}/include/hip/hip_version.h") - else() - math(EXPR HIP_VERSION "${HIP_MAJOR_VERSION} * 10000000 + ${HIP_MINOR_VERSION} * 100000 + ${HIP_PATCH_VERSION}") - message(STATUS "Current HIP header is ${HIP_PATH}/include/hip/hip_version.h " - "Current HIP version is v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}.${HIP_PATCH_VERSION}. ") - endif() + if(NOT HIP_MAJOR_VERSION) + set(HIP_VERSION "???") + message( + WARNING "Cannot find HIP version in ${HIP_PATH}/include/hip/hip_version.h" + ) + else() + math( + EXPR + HIP_VERSION + "${HIP_MAJOR_VERSION} * 10000000 + ${HIP_MINOR_VERSION} * 100000 + ${HIP_PATCH_VERSION}" + ) + message( + STATUS + "Current HIP header is ${HIP_PATH}/include/hip/hip_version.h " + "Current HIP version is v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}.${HIP_PATCH_VERSION}. " + ) + endif() endmacro() find_hip_version(${HIP_PATH}/include/hip/hip_version.h) @@ -66,7 +87,8 @@ find_package_and_include(rocfft) # set CXX flags for HIP set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP") +set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP") set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP) # define HIP_CXX_FLAGS @@ -103,7 +125,6 @@ list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc) list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906) list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx908) - if(HIP_COMPILER STREQUAL clang) set(hip_library_name amdhip64) else() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index a52047e1616..bf69ddc8fb4 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -13,290 +13,366 @@ # limitations under the License. # make package for paddle fluid shared and static library -set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING - "A path setting paddle shared and static libraries") +set(PADDLE_INSTALL_DIR + "${CMAKE_BINARY_DIR}/paddle_install_dir" + CACHE STRING "A path setting paddle shared and static libraries") + +set(PADDLE_INFERENCE_INSTALL_DIR + "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" + CACHE STRING "A path setting paddle inference shared and static libraries") -set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING - "A path setting paddle inference shared and static libraries") - # At present, the size of static lib in Windows is very large, # so we need to crop the library size. if(WIN32) - #todo: remove the option - option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) - if(NOT PYTHON_EXECUTABLE) - FIND_PACKAGE(PythonInterp REQUIRED) - endif() + #todo: remove the option + option(WITH_STATIC_LIB + "Compile demo with static/shared library, default use dynamic." OFF) + if(NOT PYTHON_EXECUTABLE) + find_package(PythonInterp REQUIRED) + endif() endif() set(COPY_SCRIPT_DIR ${PADDLE_SOURCE_DIR}/cmake) function(copy TARGET) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS DSTS) - cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - list(LENGTH copy_lib_SRCS copy_lib_SRCS_len) - list(LENGTH copy_lib_DSTS copy_lib_DSTS_len) - if (NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len}) - message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers") - endif () - math(EXPR len "${copy_lib_SRCS_len} - 1") - foreach (index RANGE ${len}) - list(GET copy_lib_SRCS ${index} src) - list(GET copy_lib_DSTS ${index} dst) - if (WIN32) #windows - file(TO_NATIVE_PATH ${src} native_src) - file(TO_NATIVE_PATH ${dst} native_dst) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${PYTHON_EXECUTABLE} ${COPY_SCRIPT_DIR}/copyfile.py ${native_src} ${native_dst}) - else (WIN32) #not windows - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND mkdir -p "${dst}" - COMMAND cp -r "${src}" "${dst}" - COMMENT "copying ${src} -> ${dst}") - endif (WIN32) # not windows - endforeach () + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DSTS) + cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + list(LENGTH copy_lib_SRCS copy_lib_SRCS_len) + list(LENGTH copy_lib_DSTS copy_lib_DSTS_len) + if(NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len}) + message( + FATAL_ERROR + "${TARGET} source numbers are not equal to destination numbers") + endif() + math(EXPR len "${copy_lib_SRCS_len} - 1") + foreach(index RANGE ${len}) + list(GET copy_lib_SRCS ${index} src) + list(GET copy_lib_DSTS ${index} dst) + if(WIN32) #windows + file(TO_NATIVE_PATH ${src} native_src) + file(TO_NATIVE_PATH ${dst} native_dst) + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND ${PYTHON_EXECUTABLE} ${COPY_SCRIPT_DIR}/copyfile.py + ${native_src} ${native_dst}) + else(WIN32) #not windows + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND mkdir -p "${dst}" + COMMAND cp -r "${src}" "${dst}" + COMMENT "copying ${src} -> ${dst}") + endif(WIN32) # not windows + endforeach() endfunction() -function(copy_part_of_thrid_party TARGET DST) - if(${CBLAS_PROVIDER} STREQUAL MKLML) - set(dst_dir "${DST}/third_party/install/mklml") - if(WIN32) - copy(${TARGET} - SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_SHARED_LIB} - ${MKLML_SHARED_IOMP_LIB} ${MKLML_INC_DIR} - DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}/lib - ${dst_dir}/lib ${dst_dir}) - else() - copy(${TARGET} - SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR} - DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}) - if(WITH_STRIP) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND strip -s ${dst_dir}/lib/libiomp5.so - COMMAND strip -s ${dst_dir}/lib/libmklml_intel.so - COMMENT "striping libiomp5.so\nstriping libmklml_intel.so") - endif() - endif() - elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) - set(dst_dir "${DST}/third_party/install/openblas") - if(WIN32) - copy(${TARGET} - SRCS ${CBLAS_INSTALL_DIR}/lib ${OPENBLAS_SHARED_LIB} ${CBLAS_INSTALL_DIR}/include - DSTS ${dst_dir} ${dst_dir}/lib ${dst_dir}) - else() - copy(${TARGET} - SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include - DSTS ${dst_dir} ${dst_dir}) - endif() - endif() - - if(WITH_MKLDNN) - set(dst_dir "${DST}/third_party/install/mkldnn") - if(WIN32) - copy(${TARGET} - SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} ${MKLDNN_LIB} - DSTS ${dst_dir} ${dst_dir}/lib ${dst_dir}/lib) - else() - copy(${TARGET} - SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} - DSTS ${dst_dir} ${dst_dir}/lib) - if(WITH_STRIP) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND strip -s ${dst_dir}/lib/libmkldnn.so.0 - COMMENT "striping libmkldnn.so.0") - endif() - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0 ${dst_dir}/lib/libdnnl.so.1 - COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0 ${dst_dir}/lib/libdnnl.so.2 - COMMENT "Make a symbol link of libmkldnn.so.0") - endif() +function(copy_part_of_thrid_party TARGET DST) + if(${CBLAS_PROVIDER} STREQUAL MKLML) + set(dst_dir "${DST}/third_party/install/mklml") + if(WIN32) + copy( + ${TARGET} + SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_SHARED_LIB} + ${MKLML_SHARED_IOMP_LIB} ${MKLML_INC_DIR} + DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}/lib + ${dst_dir}) + else() + copy( + ${TARGET} + SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR} + DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir}) + if(WITH_STRIP) + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND strip -s ${dst_dir}/lib/libiomp5.so + COMMAND strip -s ${dst_dir}/lib/libmklml_intel.so + COMMENT "striping libiomp5.so\nstriping libmklml_intel.so") + endif() endif() - - if (WITH_ONNXRUNTIME) - set(dst_dir "${DST}/third_party/install/onnxruntime") - copy(${TARGET} - SRCS ${ONNXRUNTIME_INC_DIR} ${ONNXRUNTIME_LIB_DIR} - DSTS ${dst_dir} ${dst_dir}) - - set(dst_dir "${DST}/third_party/install/paddle2onnx") - if(WIN32) - copy(${TARGET} - SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_SHARED_LIB} ${PADDLE2ONNX_LIB} - DSTS ${dst_dir}/include ${dst_dir}/lib ${dst_dir}/lib) - else() - copy(${TARGET} - SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_LIB} - DSTS ${dst_dir}/include ${dst_dir}/lib) - endif() + elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) + set(dst_dir "${DST}/third_party/install/openblas") + if(WIN32) + copy( + ${TARGET} + SRCS ${CBLAS_INSTALL_DIR}/lib ${OPENBLAS_SHARED_LIB} + ${CBLAS_INSTALL_DIR}/include + DSTS ${dst_dir} ${dst_dir}/lib ${dst_dir}) + else() + copy( + ${TARGET} + SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include + DSTS ${dst_dir} ${dst_dir}) endif() - - set(dst_dir "${DST}/third_party/install/gflags") - copy(${TARGET} - SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) - - set(dst_dir "${DST}/third_party/install/glog") - copy(${TARGET} - SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) - - set(dst_dir "${DST}/third_party/install/utf8proc") - copy(${TARGET} - SRCS ${UTF8PROC_INSTALL_DIR}/include ${UTF8PROC_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) - - if (WITH_CRYPTO) - set(dst_dir "${DST}/third_party/install/cryptopp") - copy(${TARGET} - SRCS ${CRYPTOPP_INCLUDE_DIR} ${CRYPTOPP_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) + endif() + + if(WITH_MKLDNN) + set(dst_dir "${DST}/third_party/install/mkldnn") + if(WIN32) + copy( + ${TARGET} + SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} ${MKLDNN_LIB} + DSTS ${dst_dir} ${dst_dir}/lib ${dst_dir}/lib) + else() + copy( + ${TARGET} + SRCS ${MKLDNN_INC_DIR} ${MKLDNN_SHARED_LIB} + DSTS ${dst_dir} ${dst_dir}/lib) + if(WITH_STRIP) + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND strip -s ${dst_dir}/lib/libmkldnn.so.0 + COMMENT "striping libmkldnn.so.0") + endif() + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0 + ${dst_dir}/lib/libdnnl.so.1 + COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0 + ${dst_dir}/lib/libdnnl.so.2 + COMMENT "Make a symbol link of libmkldnn.so.0") endif() - - set(dst_dir "${DST}/third_party/install/xxhash") - copy(${TARGET} - SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) - - if (NOT PROTOBUF_FOUND OR WIN32) - set(dst_dir "${DST}/third_party/install/protobuf") - copy(${TARGET} - SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY} - DSTS ${dst_dir} ${dst_dir}/lib) - endif () - - if (LITE_BINARY_DIR) - set(dst_dir "${DST}/third_party/install/lite") - copy(${TARGET} - SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/* - DSTS ${dst_dir}) + endif() + + if(WITH_ONNXRUNTIME) + set(dst_dir "${DST}/third_party/install/onnxruntime") + copy( + ${TARGET} + SRCS ${ONNXRUNTIME_INC_DIR} ${ONNXRUNTIME_LIB_DIR} + DSTS ${dst_dir} ${dst_dir}) + + set(dst_dir "${DST}/third_party/install/paddle2onnx") + if(WIN32) + copy( + ${TARGET} + SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_SHARED_LIB} + ${PADDLE2ONNX_LIB} + DSTS ${dst_dir}/include ${dst_dir}/lib ${dst_dir}/lib) + else() + copy( + ${TARGET} + SRCS ${PADDLE2ONNX_INC_DIR}/paddle2onnx ${PADDLE2ONNX_LIB} + DSTS ${dst_dir}/include ${dst_dir}/lib) endif() + endif() + + set(dst_dir "${DST}/third_party/install/gflags") + copy( + ${TARGET} + SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + set(dst_dir "${DST}/third_party/install/glog") + copy( + ${TARGET} + SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + set(dst_dir "${DST}/third_party/install/utf8proc") + copy( + ${TARGET} + SRCS ${UTF8PROC_INSTALL_DIR}/include ${UTF8PROC_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + if(WITH_CRYPTO) + set(dst_dir "${DST}/third_party/install/cryptopp") + copy( + ${TARGET} + SRCS ${CRYPTOPP_INCLUDE_DIR} ${CRYPTOPP_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + endif() + + set(dst_dir "${DST}/third_party/install/xxhash") + copy( + ${TARGET} + SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) + + if(NOT PROTOBUF_FOUND OR WIN32) + set(dst_dir "${DST}/third_party/install/protobuf") + copy( + ${TARGET} + SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LIBRARY} + DSTS ${dst_dir} ${dst_dir}/lib) + endif() + + if(LITE_BINARY_DIR) + set(dst_dir "${DST}/third_party/install/lite") + copy( + ${TARGET} + SRCS ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/* + DSTS ${dst_dir}) + endif() endfunction() # inference library for only inference -set(inference_lib_deps third_party paddle_inference paddle_inference_c paddle_inference_shared paddle_inference_c_shared) +set(inference_lib_deps third_party paddle_inference paddle_inference_c + paddle_inference_shared paddle_inference_c_shared) add_custom_target(inference_lib_dist DEPENDS ${inference_lib_deps}) - set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/threadpool") -copy(inference_lib_dist - SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h - DSTS ${dst_dir}) +copy( + inference_lib_dist + SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h + DSTS ${dst_dir}) # GPU must copy externalErrorMsg.pb -IF(WITH_GPU) - set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/externalError/data") - copy(inference_lib_dist - SRCS ${externalError_INCLUDE_DIR} - DSTS ${dst_dir}) -ENDIF() - -IF(WITH_XPU) - set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu") - copy(inference_lib_dist - SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR} - DSTS ${dst_dir} ${dst_dir}) -ENDIF() +if(WITH_GPU) + set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/externalError/data") + copy( + inference_lib_dist + SRCS ${externalError_INCLUDE_DIR} + DSTS ${dst_dir}) +endif() + +if(WITH_XPU) + set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/install/xpu") + copy( + inference_lib_dist + SRCS ${XPU_INC_DIR} ${XPU_LIB_DIR} + DSTS ${dst_dir} ${dst_dir}) +endif() # CMakeCache Info -copy(inference_lib_dist - SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}) +copy( + inference_lib_dist + SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}) copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) - if(WITH_STATIC_LIB) - set(paddle_inference_lib $/libpaddle_inference.lib - $/paddle_inference.*) - else() - set(paddle_inference_lib $/paddle_inference.dll - $/paddle_inference.lib) - endif() - copy(inference_lib_dist - SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib - ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + if(WITH_STATIC_LIB) + set(paddle_inference_lib + $/libpaddle_inference.lib + $/paddle_inference.*) + else() + set(paddle_inference_lib + $/paddle_inference.dll + $/paddle_inference.lib) + endif() + copy( + inference_lib_dist + SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) else(WIN32) - set(paddle_inference_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_inference.*) - copy(inference_lib_dist - SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + set(paddle_inference_lib + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_inference.*) + copy( + inference_lib_dist + SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) endif(WIN32) -copy(inference_lib_dist - SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/internal) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io/crypto/cipher.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) +copy( + inference_lib_dist + SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/internal) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io/crypto/cipher.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) # copy api headers for phi & custom op -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/include/*.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/all.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) -copy(inference_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/include/*.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include/ +) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/all.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) +copy( + inference_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) # the header file of phi is copied to the experimental directory, # the include path of phi needs to be changed to adapt to inference api path -add_custom_command(TARGET inference_lib_dist POST_BUILD - COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/phi_header.cmake" - COMMENT "Change phi header include path to adapt to inference api path") +add_custom_command( + TARGET inference_lib_dist + POST_BUILD + COMMAND ${CMAKE_COMMAND} -P "${PADDLE_SOURCE_DIR}/cmake/phi_header.cmake" + COMMENT "Change phi header include path to adapt to inference api path") # CAPI inference library for only inference -set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING -"A path setting CAPI paddle inference shared") +set(PADDLE_INFERENCE_C_INSTALL_DIR + "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" + CACHE STRING "A path setting CAPI paddle inference shared") copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) - set(paddle_inference_c_lib $/paddle_inference_c.*) + set(paddle_inference_c_lib + $/paddle_inference_c.*) else(WIN32) - set(paddle_inference_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi_exp/libpaddle_inference_c.*) + set(paddle_inference_c_lib + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi_exp/libpaddle_inference_c.* + ) endif(WIN32) -copy(inference_lib_dist - SRCS ${src_dir}/inference/capi_exp/pd_*.h ${paddle_inference_c_lib} - DSTS ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib) +copy( + inference_lib_dist + SRCS ${src_dir}/inference/capi_exp/pd_*.h ${paddle_inference_c_lib} + DSTS ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include + ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib) if(WITH_STRIP AND NOT WIN32) - add_custom_command(TARGET inference_lib_dist POST_BUILD - COMMAND strip -s ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib/libpaddle_inference_c.so - COMMAND strip -s ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so - COMMENT "striping libpaddle_inference_c.so\nstriping libpaddle_inference.so") + add_custom_command( + TARGET inference_lib_dist + POST_BUILD + COMMAND + strip -s + ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib/libpaddle_inference_c.so + COMMAND strip -s + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so + COMMENT "striping libpaddle_inference_c.so\nstriping libpaddle_inference.so" + ) endif() # fluid library for both train and inference @@ -306,36 +382,55 @@ add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps}) set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid") set(module "inference") if(WIN32) - copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_inference_lib} - DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} - ) - else() - copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_inference_lib} - DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} - ) + copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h + ${paddle_inference_lib} + DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} + ${dst_dir}/${module}) +else() + copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h + ${paddle_inference_lib} + DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}) endif() set(module "framework") set(framework_lib_deps framework_proto data_feed_proto trainer_desc_proto) add_dependencies(fluid_lib_dist ${framework_lib_deps}) -copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/trainer_desc.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h ${src_dir}/${module}/ir/memory_optimize_pass/*.h - ${src_dir}/${module}/ir/*.h ${src_dir}/${module}/fleet/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}/ir/memory_optimize_pass ${dst_dir}/${module}/ir ${dst_dir}/${module}/fleet) +copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h + ${src_dir}/${module}/details/*.h + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/trainer_desc.pb.h + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h + ${src_dir}/${module}/ir/memory_optimize_pass/*.h + ${src_dir}/${module}/ir/*.h + ${src_dir}/${module}/fleet/*.h + DSTS ${dst_dir}/${module} + ${dst_dir}/${module}/details + ${dst_dir}/${module} + ${dst_dir}/${module} + ${dst_dir}/${module} + ${dst_dir}/${module}/ir/memory_optimize_pass + ${dst_dir}/${module}/ir + ${dst_dir}/${module}/fleet) set(module "operators") -copy(fluid_lib_dist - SRCS ${src_dir}/${module}/reader/blocking_queue.h - DSTS ${dst_dir}/${module}/reader/ - ) +copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/reader/blocking_queue.h + DSTS ${dst_dir}/${module}/reader/) set(module "memory") -copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h ${src_dir}/${module}/allocation/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail ${dst_dir}/${module}/allocation - ) +copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h + ${src_dir}/${module}/allocation/*.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail + ${dst_dir}/${module}/allocation) set(module "platform") set(platform_lib_deps profiler_proto errors) @@ -344,99 +439,113 @@ if(WITH_GPU) endif(WITH_GPU) add_dependencies(fluid_lib_dist ${platform_lib_deps}) -copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/platform/*.pb.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details ${dst_dir}/${module} - ) +copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h + ${src_dir}/${module}/details/*.h + ${PADDLE_BINARY_DIR}/paddle/fluid/platform/*.pb.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload + ${dst_dir}/${module}/details ${dst_dir}/${module}) set(module "string") -copy(fluid_lib_dist - SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/${module}/*.h ${PADDLE_SOURCE_DIR}/paddle/utils/${module}/tinyformat/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat - ) +copy( + fluid_lib_dist + SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/${module}/*.h + ${PADDLE_SOURCE_DIR}/paddle/utils/${module}/tinyformat/*.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat) set(module "imperative") -copy(fluid_lib_dist - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/jit/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/jit - ) +copy( + fluid_lib_dist + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/jit/*.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/jit) set(module "pybind") -copy(fluid_lib_dist - SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h - DSTS ${dst_dir}/${module} - ) +copy( + fluid_lib_dist + SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h + DSTS ${dst_dir}/${module}) set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/eigen3") -copy(inference_lib_dist - SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen - DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported) +copy( + inference_lib_dist + SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src + ${EIGEN_INCLUDE_DIR}/unsupported/Eigen + DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported) set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/boost") -copy(inference_lib_dist - SRCS ${BOOST_INCLUDE_DIR}/boost - DSTS ${dst_dir}) +copy( + inference_lib_dist + SRCS ${BOOST_INCLUDE_DIR}/boost + DSTS ${dst_dir}) set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/dlpack") -copy(inference_lib_dist - SRCS ${DLPACK_INCLUDE_DIR}/dlpack - DSTS ${dst_dir}) +copy( + inference_lib_dist + SRCS ${DLPACK_INCLUDE_DIR}/dlpack + DSTS ${dst_dir}) set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/install/zlib") -copy(inference_lib_dist - SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) - +copy( + inference_lib_dist + SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) # CMakeCache Info -copy(fluid_lib_dist - SRCS ${PADDLE_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${PADDLE_INSTALL_DIR} ${PADDLE_INSTALL_DIR} - ) +copy( + fluid_lib_dist + SRCS ${PADDLE_INFERENCE_INSTALL_DIR}/third_party + ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt + DSTS ${PADDLE_INSTALL_DIR} ${PADDLE_INSTALL_DIR}) # paddle fluid version function(version version_file) - execute_process( - COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_GIT_COMMIT) - file(WRITE ${version_file} - "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" - "WITH_MKL: ${WITH_MKL}\n" - "WITH_MKLDNN: ${WITH_MKLDNN}\n" - "WITH_GPU: ${WITH_GPU}\n" - "WITH_ROCM: ${WITH_ROCM}\n" - "WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n" - "WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n" - "WITH_IPU: ${WITH_IPU}\n") - if(WITH_GPU) - file(APPEND ${version_file} - "CUDA version: ${CUDA_VERSION}\n" - "CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n") - endif() - if(WITH_ROCM) - file(APPEND ${version_file} - "HIP version: v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}\n" - "MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n") - endif() - if(WITH_ASCEND_CL) - file(APPEND ${version_file} - "Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n" - "Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n") - endif() - if(WITH_IPU) - file(APPEND ${version_file} - "PopART version: ${POPART_VERSION}\n") - endif() - file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") - if(TENSORRT_FOUND) - file(APPEND ${version_file} - "WITH_TENSORRT: ${TENSORRT_FOUND}\n" "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n") - endif() - if(WITH_LITE) - file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" "LITE_GIT_TAG: ${LITE_GIT_TAG}\n") - endif() - + execute_process( + COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_GIT_COMMIT) + file( + WRITE ${version_file} + "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n" + "WITH_MKL: ${WITH_MKL}\n" + "WITH_MKLDNN: ${WITH_MKLDNN}\n" + "WITH_GPU: ${WITH_GPU}\n" + "WITH_ROCM: ${WITH_ROCM}\n" + "WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n" + "WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n" + "WITH_IPU: ${WITH_IPU}\n") + if(WITH_GPU) + file(APPEND ${version_file} + "CUDA version: ${CUDA_VERSION}\n" + "CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n") + endif() + if(WITH_ROCM) + file(APPEND ${version_file} + "HIP version: v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}\n" + "MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n") + endif() + if(WITH_ASCEND_CL) + file(APPEND ${version_file} + "Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n" + "Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n") + endif() + if(WITH_IPU) + file(APPEND ${version_file} "PopART version: ${POPART_VERSION}\n") + endif() + file(APPEND ${version_file} + "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") + if(TENSORRT_FOUND) + file( + APPEND ${version_file} + "WITH_TENSORRT: ${TENSORRT_FOUND}\n" + "TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n" + ) + endif() + if(WITH_LITE) + file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n" + "LITE_GIT_TAG: ${LITE_GIT_TAG}\n") + endif() + endfunction() version(${PADDLE_INSTALL_DIR}/version.txt) version(${PADDLE_INFERENCE_INSTALL_DIR}/version.txt) diff --git a/cmake/infrt_lib.cmake b/cmake/infrt_lib.cmake index 5b27c9d8400..21dcd0ef36d 100644 --- a/cmake/infrt_lib.cmake +++ b/cmake/infrt_lib.cmake @@ -12,65 +12,74 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(INFRT_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_infrt_install_dir" CACHE STRING - "A path setting paddle infrt shared and static libraries") - +set(INFRT_INSTALL_DIR + "${CMAKE_BINARY_DIR}/paddle_infrt_install_dir" + CACHE STRING "A path setting paddle infrt shared and static libraries") + function(copy TARGET) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS DSTS) - cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DSTS) + cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - list(LENGTH copy_lib_SRCS copy_lib_SRCS_len) - list(LENGTH copy_lib_DSTS copy_lib_DSTS_len) - if (NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len}) - message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers") - endif () - math(EXPR len "${copy_lib_SRCS_len} - 1") - foreach (index RANGE ${len}) - list(GET copy_lib_SRCS ${index} src) - list(GET copy_lib_DSTS ${index} dst) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND mkdir -p "${dst}" - COMMAND cp -r "${src}" "${dst}" - COMMENT "copying ${src} -> ${dst}") - endforeach () + list(LENGTH copy_lib_SRCS copy_lib_SRCS_len) + list(LENGTH copy_lib_DSTS copy_lib_DSTS_len) + if(NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len}) + message( + FATAL_ERROR + "${TARGET} source numbers are not equal to destination numbers") + endif() + math(EXPR len "${copy_lib_SRCS_len} - 1") + foreach(index RANGE ${len}) + list(GET copy_lib_SRCS ${index} src) + list(GET copy_lib_DSTS ${index} dst) + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND mkdir -p "${dst}" + COMMAND cp -r "${src}" "${dst}" + COMMENT "copying ${src} -> ${dst}") + endforeach() endfunction() -function(copy_part_of_thrid_party TARGET DST) - set(dst_dir "${DST}/third_party/install/glog") - copy(${TARGET} - SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} - DSTS ${dst_dir} ${dst_dir}/lib) +function(copy_part_of_thrid_party TARGET DST) + set(dst_dir "${DST}/third_party/install/glog") + copy( + ${TARGET} + SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES} + DSTS ${dst_dir} ${dst_dir}/lib) endfunction() # inference library for only inference set(infrt_lib_deps third_party infrt infrt_static) add_custom_target(infrt_lib_dist DEPENDS ${infrt_lib_deps}) - # CMakeCache Info -copy(infrt_lib_dist - SRCS ${CMAKE_BINARY_DIR}/CMakeCache.txt - DSTS ${INFRT_INSTALL_DIR}) +copy( + infrt_lib_dist + SRCS ${CMAKE_BINARY_DIR}/CMakeCache.txt + DSTS ${INFRT_INSTALL_DIR}) set(infrt_lib ${INFRT_BINARY_DIR}/libinfrt.*) -copy(infrt_lib_dist - SRCS ${INFRT_SOURCE_DIR}/api/infrt_api.h ${infrt_lib} - DSTS ${INFRT_INSTALL_DIR}/infrt/include ${INFRT_INSTALL_DIR}/infrt/lib) - +copy( + infrt_lib_dist + SRCS ${INFRT_SOURCE_DIR}/api/infrt_api.h ${infrt_lib} + DSTS ${INFRT_INSTALL_DIR}/infrt/include ${INFRT_INSTALL_DIR}/infrt/lib) -copy(infrt_lib_dist - SRCS ${INFRT_BINARY_DIR}/paddle/framework.pb.h - DSTS ${INFRT_INSTALL_DIR}/infrt/include/internal) +copy( + infrt_lib_dist + SRCS ${INFRT_BINARY_DIR}/paddle/framework.pb.h + DSTS ${INFRT_INSTALL_DIR}/infrt/include/internal) # paddle fluid version function(version version_file) - execute_process( - COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_GIT_COMMIT) - file(WRITE ${version_file} "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n") - file(APPEND ${version_file} "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") + execute_process( + COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_GIT_COMMIT) + file(WRITE ${version_file} "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n") + file(APPEND ${version_file} + "CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n") endfunction() version(${INFRT_INSTALL_DIR}/version.txt) diff --git a/cmake/init.cmake b/cmake/init.cmake index 0ebcdc8ceee..86c43cb233b 100644 --- a/cmake/init.cmake +++ b/cmake/init.cmake @@ -8,43 +8,44 @@ # MINSIZEREL: default: "-O2 -g -DNDEBUG" if(NOT WIN32) - set(CMAKE_C_FLAGS_DEBUG "-g") - set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") - set(CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG") + set(CMAKE_C_FLAGS_DEBUG "-g") + set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") + set(CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG") - set(CMAKE_CXX_FLAGS_DEBUG "-g") - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") - set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") - - if(WITH_GPU) - set(CMAKE_CUDA_FLAGS_DEBUG "-g") - set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG") - set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") - set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG") - endif() + set(CMAKE_CXX_FLAGS_DEBUG "-g") + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") + set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") + + if(WITH_GPU) + set(CMAKE_CUDA_FLAGS_DEBUG "-g") + set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG") + set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") + set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG") + endif() else() - set(CMAKE_C_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1") - set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG") - set(CMAKE_C_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG") + set(CMAKE_C_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1") + set(CMAKE_C_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_C_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG") - set(CMAKE_CXX_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1") - set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG") - set(CMAKE_CXX_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "/MDd /Zi /Ob0 /Od /RTC1") + set(CMAKE_CXX_FLAGS_RELEASE "/MD /O2 /Ob2 /DNDEBUG") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MD /Zi /O2 /Ob1 /DNDEBUG") + set(CMAKE_CXX_FLAGS_MINSIZEREL "/MD /O1 /Ob1 /DNDEBUG") - if(WITH_GPU) - set(CMAKE_CUDA_FLAGS_DEBUG "-Xcompiler=\"-MDd -Zi -Ob0 -Od /RTC1\"") - set(CMAKE_CUDA_FLAGS_RELEASE "-Xcompiler=\"-MD -O2 -Ob2\" -DNDEBUG") - set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-Xcompiler=\"-MD -Zi -O2 -Ob1\" -DNDEBUG") - set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Xcompiler=\"-MD -O1 -Ob1\" -DNDEBUG") - endif() + if(WITH_GPU) + set(CMAKE_CUDA_FLAGS_DEBUG "-Xcompiler=\"-MDd -Zi -Ob0 -Od /RTC1\"") + set(CMAKE_CUDA_FLAGS_RELEASE "-Xcompiler=\"-MD -O2 -Ob2\" -DNDEBUG") + set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO + "-Xcompiler=\"-MD -Zi -O2 -Ob1\" -DNDEBUG") + set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Xcompiler=\"-MD -O1 -Ob1\" -DNDEBUG") + endif() - # It can specify CUDA compile flag manualy, - # its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous - # because CUDA will update by nvidia, then error will occur. - # Now, it's only used in VS2015 + CUDA:[10.0, 10.2] - set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) + # It can specify CUDA compile flag manualy, + # its use is to remvoe /Zi to reduce GPU static library size. But it's dangerous + # because CUDA will update by nvidia, then error will occur. + # Now, it's only used in VS2015 + CUDA:[10.0, 10.2] + set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) endif() diff --git a/cmake/miopen.cmake b/cmake/miopen.cmake index 493c37955f7..392ff0401ea 100644 --- a/cmake/miopen.cmake +++ b/cmake/miopen.cmake @@ -1,65 +1,77 @@ if(NOT WITH_ROCM) - return() + return() endif() # Now we don't support ROCm on windows if(WIN32) - return() + return() endif() -set(MIOPEN_ROOT ${ROCM_PATH}/miopen CACHE PATH "MIOPEN ROOT") +set(MIOPEN_ROOT + ${ROCM_PATH}/miopen + CACHE PATH "MIOPEN ROOT") -find_path(MIOPEN_INCLUDE_DIR "miopen/miopen.h" - PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/include ${MIOPEN_ROOT}/local/include - $ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/include $ENV{MIOPEN_ROOT}/local/include - NO_DEFAULT_PATH -) +find_path( + MIOPEN_INCLUDE_DIR "miopen/miopen.h" + PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/include ${MIOPEN_ROOT}/local/include + $ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/include + $ENV{MIOPEN_ROOT}/local/include + NO_DEFAULT_PATH) -find_library(MIOPEN_LIBRARY NAMES "libMIOpen.so" - PATHS ${MIOPEN_ROOT} ${MIOPEN_ROOT}/lib ${MIOPEN_ROOT}/lib64 ${__libpath_hist} - $ENV{MIOPEN_ROOT} $ENV{MIOPEN_ROOT}/lib $ENV{MIOPEN_ROOT}/lib64 - NO_DEFAULT_PATH - DOC "Path to MIOpen library.") +find_library( + MIOPEN_LIBRARY + NAMES "libMIOpen.so" + PATHS ${MIOPEN_ROOT} + ${MIOPEN_ROOT}/lib + ${MIOPEN_ROOT}/lib64 + ${__libpath_hist} + $ENV{MIOPEN_ROOT} + $ENV{MIOPEN_ROOT}/lib + $ENV{MIOPEN_ROOT}/lib64 + NO_DEFAULT_PATH + DOC "Path to MIOpen library.") if(MIOPEN_INCLUDE_DIR AND MIOPEN_LIBRARY) - set(MIOPEN_FOUND ON) + set(MIOPEN_FOUND ON) else() - set(MIOPEN_FOUND OFF) + set(MIOPEN_FOUND OFF) endif() -macro(find_miopen_version miopen_header_file) - file(READ ${miopen_header_file} MIOPEN_VERSION_FILE_CONTENTS) - get_filename_component(MIOPEN_LIB_PATH ${MIOPEN_LIBRARY} DIRECTORY) +macro(find_miopen_version miopen_header_file) + file(READ ${miopen_header_file} MIOPEN_VERSION_FILE_CONTENTS) + get_filename_component(MIOPEN_LIB_PATH ${MIOPEN_LIBRARY} DIRECTORY) - string(REGEX MATCH "define MIOPEN_VERSION_MAJOR +([0-9]+)" MIOPEN_MAJOR_VERSION - "${MIOPEN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR +([0-9]+)" "\\1" - MIOPEN_MAJOR_VERSION "${MIOPEN_MAJOR_VERSION}") - string(REGEX MATCH "define MIOPEN_VERSION_MINOR +([0-9]+)" MIOPEN_MINOR_VERSION - "${MIOPEN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_MINOR +([0-9]+)" "\\1" - MIOPEN_MINOR_VERSION "${MIOPEN_MINOR_VERSION}") - string(REGEX MATCH "define MIOPEN_VERSION_PATCH +([0-9]+)" MIOPEN_PATCH_VERSION - "${MIOPEN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_PATCH +([0-9]+)" "\\1" - MIOPEN_PATCH_VERSION "${MIOPEN_PATCH_VERSION}") - string(REGEX MATCH "define MIOPEN_VERSION_TWEAK +([0-9]+)" MIOPEN_TWEAK_VERSION - "${MIOPEN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define MIOPEN_VERSION_TWEAK +([0-9]+)" "\\1" - MIOPEN_TWEAK_VERSION "${MIOPEN_TWEAK_VERSION}") + string(REGEX MATCH "define MIOPEN_VERSION_MAJOR +([0-9]+)" + MIOPEN_MAJOR_VERSION "${MIOPEN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_MAJOR +([0-9]+)" "\\1" + MIOPEN_MAJOR_VERSION "${MIOPEN_MAJOR_VERSION}") + string(REGEX MATCH "define MIOPEN_VERSION_MINOR +([0-9]+)" + MIOPEN_MINOR_VERSION "${MIOPEN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_MINOR +([0-9]+)" "\\1" + MIOPEN_MINOR_VERSION "${MIOPEN_MINOR_VERSION}") + string(REGEX MATCH "define MIOPEN_VERSION_PATCH +([0-9]+)" + MIOPEN_PATCH_VERSION "${MIOPEN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_PATCH +([0-9]+)" "\\1" + MIOPEN_PATCH_VERSION "${MIOPEN_PATCH_VERSION}") + string(REGEX MATCH "define MIOPEN_VERSION_TWEAK +([0-9]+)" + MIOPEN_TWEAK_VERSION "${MIOPEN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define MIOPEN_VERSION_TWEAK +([0-9]+)" "\\1" + MIOPEN_TWEAK_VERSION "${MIOPEN_TWEAK_VERSION}") - if(NOT MIOPEN_MAJOR_VERSION) - set(MIOPEN_VERSION "???") - else() - add_definitions("-DMIOPEN_MAJOR_VERSION=\"${MIOPEN_MAJOR_VERSION}\"") - math(EXPR MIOPEN_VERSION - "${MIOPEN_MAJOR_VERSION} * 1000 + + if(NOT MIOPEN_MAJOR_VERSION) + set(MIOPEN_VERSION "???") + else() + add_definitions("-DMIOPEN_MAJOR_VERSION=\"${MIOPEN_MAJOR_VERSION}\"") + math(EXPR MIOPEN_VERSION "${MIOPEN_MAJOR_VERSION} * 1000 + ${MIOPEN_MINOR_VERSION} * 10 + ${MIOPEN_PATCH_VERSION}") - message(STATUS "Current MIOpen header is ${MIOPEN_INCLUDE_DIR}/miopen/miopen.h " - "Current MIOpen version is v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}.${MIOPEN_PATCH_VERSION}. ") - endif() + message( + STATUS + "Current MIOpen header is ${MIOPEN_INCLUDE_DIR}/miopen/miopen.h " + "Current MIOpen version is v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}.${MIOPEN_PATCH_VERSION}. " + ) + endif() endmacro() if(MIOPEN_FOUND) - find_miopen_version(${MIOPEN_INCLUDE_DIR}/miopen/version.h) + find_miopen_version(${MIOPEN_INCLUDE_DIR}/miopen/version.h) endif() diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 9124fec0b85..8ce3cd91ac8 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,55 +1,59 @@ if(NOT WITH_GPU) - return() + return() endif() # Now we don't support NCCL on windows if(WIN32) - return() + return() endif() if(WITH_NCCL) - set(NCCL_ROOT "/usr" CACHE PATH "NCCL ROOT") - find_path(NCCL_INCLUDE_DIR nccl.h - PATHS ${NCCL_ROOT} ${NCCL_ROOT}/include ${NCCL_ROOT}/local/include - $ENV{NCCL_ROOT} $ENV{NCCL_ROOT}/include $ENV{NCCL_ROOT}/local/include - NO_DEFAULT_PATH - ) + set(NCCL_ROOT + "/usr" + CACHE PATH "NCCL ROOT") + find_path( + NCCL_INCLUDE_DIR nccl.h + PATHS ${NCCL_ROOT} ${NCCL_ROOT}/include ${NCCL_ROOT}/local/include + $ENV{NCCL_ROOT} $ENV{NCCL_ROOT}/include $ENV{NCCL_ROOT}/local/include + NO_DEFAULT_PATH) - file(READ ${NCCL_INCLUDE_DIR}/nccl.h NCCL_VERSION_FILE_CONTENTS) + file(READ ${NCCL_INCLUDE_DIR}/nccl.h NCCL_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" - NCCL_VERSION "${NCCL_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" - NCCL_VERSION "${NCCL_VERSION}") + string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" NCCL_VERSION + "${NCCL_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" NCCL_VERSION + "${NCCL_VERSION}") - if("${NCCL_VERSION}" GREATER "2000") - message(STATUS "Current NCCL header is ${NCCL_INCLUDE_DIR}/nccl.h. " - "Current NCCL version is v${NCCL_VERSION}. ") - else() - # in old version nccl, it may not define NCCL_VERSION_CODE - string(REGEX MATCH "define NCCL_MAJOR +([0-9]+)" NCCL_MAJOR_VERSION - "${NCCL_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define NCCL_MAJOR +([0-9]+)" "\\1" - NCCL_MAJOR_VERSION "${NCCL_MAJOR_VERSION}") - string(REGEX MATCH "define NCCL_MINOR +([0-9]+)" NCCL_MINOR_VERSION - "${NCCL_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define NCCL_MINOR +([0-9]+)" "\\1" - NCCL_MINOR_VERSION "${NCCL_MINOR_VERSION}") - string(REGEX MATCH "define NCCL_PATCH +([0-9]+)" - NCCL_PATCH_VERSION "${NCCL_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define NCCL_PATCH +([0-9]+)" "\\1" - NCCL_PATCH_VERSION "${NCCL_PATCH_VERSION}") + if("${NCCL_VERSION}" GREATER "2000") + message(STATUS "Current NCCL header is ${NCCL_INCLUDE_DIR}/nccl.h. " + "Current NCCL version is v${NCCL_VERSION}. ") + else() + # in old version nccl, it may not define NCCL_VERSION_CODE + string(REGEX MATCH "define NCCL_MAJOR +([0-9]+)" NCCL_MAJOR_VERSION + "${NCCL_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define NCCL_MAJOR +([0-9]+)" "\\1" NCCL_MAJOR_VERSION + "${NCCL_MAJOR_VERSION}") + string(REGEX MATCH "define NCCL_MINOR +([0-9]+)" NCCL_MINOR_VERSION + "${NCCL_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define NCCL_MINOR +([0-9]+)" "\\1" NCCL_MINOR_VERSION + "${NCCL_MINOR_VERSION}") + string(REGEX MATCH "define NCCL_PATCH +([0-9]+)" NCCL_PATCH_VERSION + "${NCCL_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define NCCL_PATCH +([0-9]+)" "\\1" NCCL_PATCH_VERSION + "${NCCL_PATCH_VERSION}") - if(NOT NCCL_MAJOR_VERSION) - set(NCCL_VERSION "0") - else() - math(EXPR NCCL_VERSION - "${NCCL_MAJOR_VERSION} * 1000 + + if(NOT NCCL_MAJOR_VERSION) + set(NCCL_VERSION "0") + else() + math(EXPR NCCL_VERSION "${NCCL_MAJOR_VERSION} * 1000 + ${NCCL_MINOR_VERSION} * 100 + ${NCCL_PATCH_VERSION}") - endif() - add_definitions("-DNCCL_VERSION_CODE=$NCCL_VERSION") - - message(STATUS "Current NCCL header is ${NCCL_INCLUDE_DIR}/nccl.h. " - "Current NCCL version is v${NCCL_MAJOR_VERSION}.${NCCL_MINOR_VERSION}.${NCCL_PATCH_VERSION} ") endif() + add_definitions("-DNCCL_VERSION_CODE=$NCCL_VERSION") + + message( + STATUS + "Current NCCL header is ${NCCL_INCLUDE_DIR}/nccl.h. " + "Current NCCL version is v${NCCL_MAJOR_VERSION}.${NCCL_MINOR_VERSION}.${NCCL_PATCH_VERSION} " + ) + endif() endif() diff --git a/cmake/neuware.cmake b/cmake/neuware.cmake index a371a0032d9..16dbf16899b 100644 --- a/cmake/neuware.cmake +++ b/cmake/neuware.cmake @@ -1,18 +1,18 @@ if(NOT WITH_MLU) - return() + return() endif() if(NOT ENV{NEUWARE_HOME}) - set(NEUWARE_HOME "/usr/local/neuware") + set(NEUWARE_HOME "/usr/local/neuware") else() - set(NEUWARE_HOME $ENV{NEUWARE_HOME}) + set(NEUWARE_HOME $ENV{NEUWARE_HOME}) endif() message(STATUS "NEUWARE_HOME: " ${NEUWARE_HOME}) set(NEUWARE_INCLUDE_DIR ${NEUWARE_HOME}/include) set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64) -INCLUDE_DIRECTORIES(${NEUWARE_INCLUDE_DIR}) +include_directories(${NEUWARE_INCLUDE_DIR}) set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so) set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so) @@ -23,10 +23,10 @@ generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake") set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${CNRT_LIB} ${CNDRV_LIB} ${CNPAPI_LIB}) if(WITH_CNCL) - MESSAGE(STATUS "Compile with CNCL!") - ADD_DEFINITIONS(-DPADDLE_WITH_CNCL) - set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so) - list(APPEND NEUWARE_LIB_DEPS ${CNCL_LIB}) + message(STATUS "Compile with CNCL!") + add_definitions(-DPADDLE_WITH_CNCL) + set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so) + list(APPEND NEUWARE_LIB_DEPS ${CNCL_LIB}) endif() -TARGET_LINK_LIBRARIES(neuware_lib ${NEUWARE_LIB_DEPS}) +target_link_libraries(neuware_lib ${NEUWARE_LIB_DEPS}) diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 51e4bd3ac41..4e0cc1027ef 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -3,538 +3,611 @@ include(unity_build) set(PART_CUDA_KERNEL_FILES) function(find_register FILENAME PATTERN OUTPUT) -# find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc. -# set op_name to OUTPUT - set(options "") - set(oneValueArgs "") - set(multiValueArgs "") - file(READ ${FILENAME} CONTENT) - # message ("number of arguments sent to function: ${ARGC}") - # message ("all function arguments: ${ARGV}") - # message("PATTERN ${PATTERN}") - string(REGEX MATCH "${PATTERN}\\([ \t\r\n]*[a-z0-9_]*," register "${CONTENT}") - if (NOT register STREQUAL "") - string(REPLACE "${PATTERN}(" "" register "${register}") - string(REPLACE "," "" register "${register}") - # [ \t\r\n]+ is used for blank characters. - # Here we use '+' instead of '*' since it is a REPLACE operation. - string(REGEX REPLACE "[ \t\r\n]+" "" register "${register}") - endif() - - set(${OUTPUT} ${register} PARENT_SCOPE) + # find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc. + # set op_name to OUTPUT + set(options "") + set(oneValueArgs "") + set(multiValueArgs "") + file(READ ${FILENAME} CONTENT) + # message ("number of arguments sent to function: ${ARGC}") + # message ("all function arguments: ${ARGV}") + # message("PATTERN ${PATTERN}") + string(REGEX MATCH "${PATTERN}\\([ \t\r\n]*[a-z0-9_]*," register "${CONTENT}") + if(NOT register STREQUAL "") + string(REPLACE "${PATTERN}(" "" register "${register}") + string(REPLACE "," "" register "${register}") + # [ \t\r\n]+ is used for blank characters. + # Here we use '+' instead of '*' since it is a REPLACE operation. + string(REGEX REPLACE "[ \t\r\n]+" "" register "${register}") + endif() + + set(${OUTPUT} + ${register} + PARENT_SCOPE) endfunction() function(op_library TARGET) - # op_library is a function to create op library. The interface is same as - # cc_library. But it handle split GPU/CPU code and link some common library - # for ops. - set(cc_srcs) - set(cu_srcs) - set(hip_srcs) - set(cu_cc_srcs) - set(hip_cc_srcs) - set(xpu_cc_srcs) - set(xpu_kp_cc_srcs) - set(npu_cc_srcs) - set(mlu_cc_srcs) - set(cudnn_cu_cc_srcs) - set(miopen_cu_cc_srcs) - set(cudnn_cu_srcs) - set(miopen_cu_srcs) - set(CUDNN_FILE) - set(MIOPEN_FILE) - set(mkldnn_cc_srcs) - set(MKLDNN_FILE) - set(op_common_deps operator op_registry math_function layer common_infer_shape_functions) - if (WITH_ASCEND_CL) - set(op_common_deps ${op_common_deps} npu_op_runner) - endif() - if (WITH_MLU) - set(op_common_deps ${op_common_deps} mlu_baseop) - endif() + # op_library is a function to create op library. The interface is same as + # cc_library. But it handle split GPU/CPU code and link some common library + # for ops. + set(cc_srcs) + set(cu_srcs) + set(hip_srcs) + set(cu_cc_srcs) + set(hip_cc_srcs) + set(xpu_cc_srcs) + set(xpu_kp_cc_srcs) + set(npu_cc_srcs) + set(mlu_cc_srcs) + set(cudnn_cu_cc_srcs) + set(miopen_cu_cc_srcs) + set(cudnn_cu_srcs) + set(miopen_cu_srcs) + set(CUDNN_FILE) + set(MIOPEN_FILE) + set(mkldnn_cc_srcs) + set(MKLDNN_FILE) + set(op_common_deps operator op_registry math_function layer + common_infer_shape_functions) + if(WITH_ASCEND_CL) + set(op_common_deps ${op_common_deps} npu_op_runner) + endif() + if(WITH_MLU) + set(op_common_deps ${op_common_deps} mlu_baseop) + endif() - # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. - set(options UNITY) - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - set(pybind_flag 0) - cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. + set(options UNITY) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + set(pybind_flag 0) + cmake_parse_arguments(op_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - list(LENGTH op_library_SRCS op_library_SRCS_len) - if (${op_library_SRCS_len} EQUAL 0) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) - list(APPEND cc_srcs ${TARGET}.cc) - endif() - if(WITH_GPU) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) - list(APPEND cu_cc_srcs ${TARGET}.cu.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) - list(APPEND cu_srcs ${TARGET}.cu) - endif() - # rename in KP: .kps -> .cu - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) - file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) - file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) - list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) - endif() - if (WITH_NV_JETSON) - list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu") - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) - set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu - ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) - list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) - endif() - string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) - list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu) - list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu) - endif() - endif() - if(WITH_ROCM) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) - list(APPEND hip_cc_srcs ${TARGET}.cu.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) - list(APPEND hip_srcs ${TARGET}.cu) - endif() - # rename in KP: .kps -> .cu - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) - file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) - file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) - list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) - set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu - ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) - list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) - endif() - string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc) - list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu) - list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu) - endif() - endif() - if(WITH_MKLDNN) - string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc) - list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc) - endif() - endif() - if(WITH_XPU) - string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc) - list(APPEND xpu_cc_srcs ${XPU_FILE}.cc) - endif() - endif() - if(WITH_XPU_KP) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu) - list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) - list(APPEND xpu_kp_cc_srcs ${TARGET}.kps) - endif() - endif() - if(WITH_ASCEND_CL) - string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc) - list(APPEND npu_cc_srcs ${NPU_FILE}.cc) - endif() - endif() - if(WITH_MLU) - string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc) - list(APPEND mlu_cc_srcs ${MLU_FILE}.cc) - endif() - endif() - else() - foreach(src ${op_library_SRCS}) - if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$") - list(APPEND miopen_cu_srcs ${src}) - elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$") - list(APPEND hip_srcs ${src}) - elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$") - list(APPEND miopen_cu_cc_srcs ${src}) - elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$") - list(APPEND hip_cc_srcs ${src}) - elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu$") - list(APPEND cudnn_cu_srcs ${src}) - elseif (WITH_GPU AND ${src} MATCHES ".*\\.cu$") - list(APPEND cu_srcs ${src}) - elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu.cc$") - list(APPEND cudnn_cu_cc_srcs ${src}) - elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$") - list(APPEND cu_cc_srcs ${src}) - elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") - list(APPEND mkldnn_cc_srcs ${src}) - elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$") - list(APPEND xpu_cc_srcs ${src}) - elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$") - list(APPEND xpu_kp_cc_srcs ${src}) - elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$") - list(APPEND xpu_kp_cc_srcs ${src}) - elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$") - list(APPEND npu_cc_srcs ${src}) - elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$") - list(APPEND mlu_cc_srcs ${src}) - elseif(${src} MATCHES ".*\\.cc$") - list(APPEND cc_srcs ${src}) - else() - message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu or .xpu") - endif() - endforeach() + list(LENGTH op_library_SRCS op_library_SRCS_len) + if(${op_library_SRCS_len} EQUAL 0) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) + endif() + if(WITH_GPU) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND cu_cc_srcs ${TARGET}.cu.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cu) + endif() + # rename in KP: .kps -> .cu + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) + file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps + ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) + endif() + if(WITH_NV_JETSON) + list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu") + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + set(PART_CUDA_KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu + ${PART_CUDA_KERNEL_FILES} + PARENT_SCOPE) + list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + endif() + string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) + list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu) + list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu) + endif() + endif() + if(WITH_ROCM) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) + list(APPEND hip_cc_srcs ${TARGET}.cu.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND hip_srcs ${TARGET}.cu) + endif() + # rename in KP: .kps -> .cu + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) + file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) + file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps + ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) + list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + set(PART_CUDA_KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu + ${PART_CUDA_KERNEL_FILES} + PARENT_SCOPE) + list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) + endif() + string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc) + list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu) + list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu) + endif() + endif() + if(WITH_MKLDNN) + string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc) + list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc) + endif() + endif() + if(WITH_XPU) + string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc) + list(APPEND xpu_cc_srcs ${XPU_FILE}.cc) + endif() + endif() + if(WITH_XPU_KP) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu) + list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps) + list(APPEND xpu_kp_cc_srcs ${TARGET}.kps) + endif() + endif() + if(WITH_ASCEND_CL) + string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc) + list(APPEND npu_cc_srcs ${NPU_FILE}.cc) + endif() endif() - - list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) - list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len) - list(LENGTH cc_srcs cc_srcs_len) - if (${cc_srcs_len} EQUAL 0) - message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") + if(WITH_MLU) + string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc) + list(APPEND mlu_cc_srcs ${MLU_FILE}.cc) + endif() endif() - if (WIN32) + else() + foreach(src ${op_library_SRCS}) + if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$") + list(APPEND miopen_cu_srcs ${src}) + elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$") + list(APPEND hip_srcs ${src}) + elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$") + list(APPEND miopen_cu_cc_srcs ${src}) + elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$") + list(APPEND hip_cc_srcs ${src}) + elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu$") + list(APPEND cudnn_cu_srcs ${src}) + elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu$") + list(APPEND cu_srcs ${src}) + elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu.cc$") + list(APPEND cudnn_cu_cc_srcs ${src}) + elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$") + list(APPEND cu_cc_srcs ${src}) + elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") + list(APPEND mkldnn_cc_srcs ${src}) + elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$") + list(APPEND xpu_cc_srcs ${src}) + elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$") + list(APPEND xpu_kp_cc_srcs ${src}) + elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$") + list(APPEND xpu_kp_cc_srcs ${src}) + elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$") + list(APPEND npu_cc_srcs ${src}) + elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$") + list(APPEND mlu_cc_srcs ${src}) + elseif(${src} MATCHES ".*\\.cc$") + list(APPEND cc_srcs ${src}) + else() + message( + FATAL_ERROR + "${TARGET} Source file ${src} should only be .cc or .cu or .xpu") + endif() + endforeach() + endif() + + list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) + list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len) + list(LENGTH cc_srcs cc_srcs_len) + if(${cc_srcs_len} EQUAL 0) + message( + FATAL_ERROR + "The op library ${TARGET} should contains at least one .cc file") + endif() + if(WIN32) # remove windows unsupported op, because windows has no nccl, no warpctc such ops. foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op") - if ("${TARGET}" STREQUAL "${windows_unsupport_op}") - return() - endif() + if("${TARGET}" STREQUAL "${windows_unsupport_op}") + return() + endif() endforeach() - endif(WIN32) + endif(WIN32) + + # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. + if(WITH_UNITY_BUILD AND op_library_UNITY) + # Generate the unity target name by the directory where source files located. + string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET + ${CMAKE_CURRENT_SOURCE_DIR}) + string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) + set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") + if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY) + set(OP_LIBRARY + ${UNITY_TARGET} ${OP_LIBRARY} + CACHE INTERNAL "op libs") + endif() + else() + set(OP_LIBRARY + ${TARGET} ${OP_LIBRARY} + CACHE INTERNAL "op libs") + endif() + list(LENGTH op_library_DEPS op_library_DEPS_len) + if(${op_library_DEPS_len} GREATER 0) + set(DEPS_OPS + ${TARGET} ${DEPS_OPS} + PARENT_SCOPE) + endif() + if(WITH_GPU) # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. if(WITH_UNITY_BUILD AND op_library_UNITY) - # Generate the unity target name by the directory where source files located. - string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR}) - string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) - set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") - if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY) - set(OP_LIBRARY ${UNITY_TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs") - endif() + # Combine the cc and cu source files. + compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs} + ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs}) + compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs} + ${cu_srcs}) + if(TARGET ${UNITY_TARGET}) + # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. + target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources} + ${unity_target_cu_sources}) + else() + # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files. + nv_library( + ${UNITY_TARGET} + SRCS ${unity_target_cc_sources} ${unity_target_cu_sources} + DEPS ${op_library_DEPS} ${op_common_deps}) + endif() + # Add alias library to handle dependencies. + add_library(${TARGET} ALIAS ${UNITY_TARGET}) else() - set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs") + nv_library( + ${TARGET} + SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} + ${mkldnn_cc_srcs} ${cu_srcs} + DEPS ${op_library_DEPS} ${op_common_deps}) endif() - - list(LENGTH op_library_DEPS op_library_DEPS_len) - if (${op_library_DEPS_len} GREATER 0) - set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE) + elseif(WITH_ROCM) + list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc") + list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc") + list(REMOVE_ITEM hip_srcs "cholesky_op.cu") + list(REMOVE_ITEM hip_srcs "cholesky_solve_op.cu") + list(REMOVE_ITEM hip_srcs "lu_op.cu") + list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu") + list(REMOVE_ITEM hip_srcs "svd_op.cu") + list(REMOVE_ITEM hip_srcs "eigvalsh_op.cu") + list(REMOVE_ITEM hip_srcs "qr_op.cu") + list(REMOVE_ITEM hip_srcs "eigh_op.cu") + list(REMOVE_ITEM hip_srcs "lstsq_op.cu") + list(REMOVE_ITEM hip_srcs "multinomial_op.cu") + list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") + hip_library( + ${TARGET} + SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} + ${mkldnn_cc_srcs} ${hip_srcs} + DEPS ${op_library_DEPS} ${op_common_deps}) + elseif(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0) + xpu_library( + ${TARGET} + SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} + DEPS ${op_library_DEPS} ${op_common_deps}) + else() + # deal with CANN version control while registering NPU operators before build + if(WITH_ASCEND_CL) + if(CANN_VERSION LESS 504000) + list(REMOVE_ITEM npu_cc_srcs "multinomial_op_npu.cc") + list(REMOVE_ITEM npu_cc_srcs "take_along_axis_op_npu.cc") + endif() endif() - if (WITH_GPU) - # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. - if(WITH_UNITY_BUILD AND op_library_UNITY) - # Combine the cc and cu source files. - compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs}) - compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs} ${cu_srcs}) - if(TARGET ${UNITY_TARGET}) - # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. - target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources} ${unity_target_cu_sources}) - else() - # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files. - nv_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} ${unity_target_cu_sources} DEPS ${op_library_DEPS} ${op_common_deps}) - endif() - # Add alias library to handle dependencies. - add_library(${TARGET} ALIAS ${UNITY_TARGET}) - else() - nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} - ${op_common_deps}) - endif() - elseif (WITH_ROCM) - list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc") - list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc") - list(REMOVE_ITEM hip_srcs "cholesky_op.cu") - list(REMOVE_ITEM hip_srcs "cholesky_solve_op.cu") - list(REMOVE_ITEM hip_srcs "lu_op.cu") - list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu") - list(REMOVE_ITEM hip_srcs "svd_op.cu") - list(REMOVE_ITEM hip_srcs "eigvalsh_op.cu") - list(REMOVE_ITEM hip_srcs "qr_op.cu") - list(REMOVE_ITEM hip_srcs "eigh_op.cu") - list(REMOVE_ITEM hip_srcs "lstsq_op.cu") - list(REMOVE_ITEM hip_srcs "multinomial_op.cu") - list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") - hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS} - ${op_common_deps}) - elseif (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0) - xpu_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) + # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. + if(WITH_UNITY_BUILD AND op_library_UNITY) + # Combine the cc source files. + compose_unity_target_sources( + ${UNITY_TARGET} + cc + ${cc_srcs} + ${mkldnn_cc_srcs} + ${xpu_cc_srcs} + ${npu_cc_srcs} + ${mlu_cc_srcs}) + if(TARGET ${UNITY_TARGET}) + # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. + target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}) + else() + # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files. + cc_library( + ${UNITY_TARGET} + SRCS ${unity_target_cc_sources} + DEPS ${op_library_DEPS} ${op_common_deps}) + endif() + # Add alias library to handle dependencies. + add_library(${TARGET} ALIAS ${UNITY_TARGET}) else() - # deal with CANN version control while registering NPU operators before build - if (WITH_ASCEND_CL) - if (CANN_VERSION LESS 504000) - list(REMOVE_ITEM npu_cc_srcs "multinomial_op_npu.cc") - list(REMOVE_ITEM npu_cc_srcs "take_along_axis_op_npu.cc") - endif() - endif() - # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. - if(WITH_UNITY_BUILD AND op_library_UNITY) - # Combine the cc source files. - compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} ${mlu_cc_srcs}) - if(TARGET ${UNITY_TARGET}) - # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. - target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}) - else() - # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files. - cc_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} DEPS ${op_library_DEPS} ${op_common_deps}) - endif() - # Add alias library to handle dependencies. - add_library(${TARGET} ALIAS ${UNITY_TARGET}) - else() - cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} ${mlu_cc_srcs} DEPS ${op_library_DEPS} - ${op_common_deps}) - endif() + cc_library( + ${TARGET} + SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} + ${mlu_cc_srcs} + DEPS ${op_library_DEPS} ${op_common_deps}) endif() + endif() - list(LENGTH cu_srcs cu_srcs_len) - list(LENGTH hip_srcs hip_srcs_len) - list(LENGTH cu_cc_srcs cu_cc_srcs_len) - list(LENGTH hip_cc_srcs hip_cc_srcs_len) - list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) - list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) - list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len) - list(LENGTH npu_cc_srcs npu_cc_srcs_len) - list(LENGTH mlu_cc_srcs mlu_cc_srcs_len) + list(LENGTH cu_srcs cu_srcs_len) + list(LENGTH hip_srcs hip_srcs_len) + list(LENGTH cu_cc_srcs cu_cc_srcs_len) + list(LENGTH hip_cc_srcs hip_cc_srcs_len) + list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) + list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) + list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len) + list(LENGTH npu_cc_srcs npu_cc_srcs_len) + list(LENGTH mlu_cc_srcs mlu_cc_srcs_len) - # Define operators that don't need pybind here. - foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op" - "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op") + # Define operators that don't need pybind here. + foreach( + manual_pybind_op + "compare_all_op" + "compare_op" + "logical_op" + "bitwise_op" + "nccl_op" + "tensor_array_read_write_op" + "tensorrt_engine_op" + "conv_fusion_op") - if ("${TARGET}" STREQUAL "${manual_pybind_op}") - set(pybind_flag 1) - endif() - endforeach() + if("${TARGET}" STREQUAL "${manual_pybind_op}") + set(pybind_flag 1) + endif() + endforeach() - # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h. - # Note that it's enough to just adding one operator to pybind in a *_op.cc file. - # And for detail pybind information, please see generated paddle/pybind/pybind.h. - set(ORIGINAL_TARGET ${TARGET}) - string(REGEX REPLACE "_op" "" TARGET "${TARGET}") + # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h. + # Note that it's enough to just adding one operator to pybind in a *_op.cc file. + # And for detail pybind information, please see generated paddle/pybind/pybind.h. + set(ORIGINAL_TARGET ${TARGET}) + string(REGEX REPLACE "_op" "" TARGET "${TARGET}") - foreach(cc_src ${cc_srcs}) - # pybind USE_OP_ITSELF - set(op_name "") - find_register(${cc_src} "REGISTER_OPERATOR" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") - # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn - set(TARGET ${op_name}) - set(pybind_flag 1) - endif() - - set(op_name "") - find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") - # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn - set(TARGET ${op_name}) - set(pybind_flag 1) - endif() + foreach(cc_src ${cc_srcs}) + # pybind USE_OP_ITSELF + set(op_name "") + find_register(${cc_src} "REGISTER_OPERATOR" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") + # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn + set(TARGET ${op_name}) + set(pybind_flag 1) + endif() - # pybind USE_OP_DEVICE_KERNEL for CPU - set(op_name "") - find_register(${cc_src} "REGISTER_OP_CPU_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n") - # why change TARGET here? - # when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py) - # in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add - # and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h - # however, grad_add has no mkldnn kernel. - set(TARGET ${op_name}) - set(pybind_flag 1) - endif() - endforeach() + set(op_name "") + find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") + # hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn + set(TARGET ${op_name}) + set(pybind_flag 1) + endif() - # pybind USE_OP_DEVICE_KERNEL for CUDA - list (APPEND cu_srcs ${cu_cc_srcs}) - # message("cu_srcs ${cu_srcs}") - foreach(cu_src ${cu_srcs}) - set(op_name "") - find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") - set(pybind_flag 1) - endif() - endforeach() + # pybind USE_OP_DEVICE_KERNEL for CPU + set(op_name "") + find_register(${cc_src} "REGISTER_OP_CPU_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n") + # why change TARGET here? + # when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py) + # in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add + # and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h + # however, grad_add has no mkldnn kernel. + set(TARGET ${op_name}) + set(pybind_flag 1) + endif() + endforeach() - # pybind USE_OP_DEVICE_KERNEL for ROCm - list (APPEND hip_srcs ${hip_cc_srcs}) - # message("hip_srcs ${hip_srcs}") - foreach(hip_src ${hip_srcs}) - set(op_name "") - find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") - set(pybind_flag 1) - endif() - endforeach() + # pybind USE_OP_DEVICE_KERNEL for CUDA + list(APPEND cu_srcs ${cu_cc_srcs}) + # message("cu_srcs ${cu_srcs}") + foreach(cu_src ${cu_srcs}) + set(op_name "") + find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") + set(pybind_flag 1) + endif() + endforeach() - # pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN - list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs}) - list(APPEND cudnn_cu_srcs ${miopen_cu_cc_srcs}) - list(APPEND cudnn_cu_srcs ${miopen_cu_srcs}) - list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len) - #message("cudnn_cu_srcs ${cudnn_cu_srcs}") - if(${cudnn_cu_srcs_len} GREATER 0 AND ${ORIGINAL_TARGET} STREQUAL "activation_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n") - else() - foreach(cudnn_src ${cudnn_cu_srcs}) - set(op_name "") - find_register(${cudnn_src} "REGISTER_OP_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDNN);\n") - set(pybind_flag 1) - endif() - endforeach() + # pybind USE_OP_DEVICE_KERNEL for ROCm + list(APPEND hip_srcs ${hip_cc_srcs}) + # message("hip_srcs ${hip_srcs}") + foreach(hip_src ${hip_srcs}) + set(op_name "") + find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") + set(pybind_flag 1) endif() + endforeach() + # pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN + list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs}) + list(APPEND cudnn_cu_srcs ${miopen_cu_cc_srcs}) + list(APPEND cudnn_cu_srcs ${miopen_cu_srcs}) + list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len) + #message("cudnn_cu_srcs ${cudnn_cu_srcs}") + if(${cudnn_cu_srcs_len} GREATER 0 AND ${ORIGINAL_TARGET} STREQUAL + "activation_op") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n") + else() + foreach(cudnn_src ${cudnn_cu_srcs}) + set(op_name "") + find_register(${cudnn_src} "REGISTER_OP_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDNN);\n") + set(pybind_flag 1) + endif() + endforeach() + endif() - if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0) + if(WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0) if(${ORIGINAL_TARGET} STREQUAL "activation_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, XPU);\n") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, XPU);\n") else() - foreach(xpu_src ${xpu_cc_srcs}) + foreach(xpu_src ${xpu_cc_srcs}) set(op_name "") find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name) if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n") - set(pybind_flag 1) + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n") + set(pybind_flag 1) else() - find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL_FUNCTOR" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n") - set(pybind_flag 1) - endif() - endif() - endforeach() - endif() - endif() - - # pybind USE_OP_DEVICE_KERNEL for XPU KP - if (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0) - foreach(xpu_kp_src ${xpu_kp_cc_srcs}) - set(op_name "") - find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n") - message(STATUS "Building KP Target: ${op_name}") + find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL_FUNCTOR" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n") set(pybind_flag 1) + endif() endif() - endforeach() + endforeach() endif() + endif() - # pybind USE_OP_DEVICE_KERNEL for NPU - if (WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0) - foreach(npu_src ${npu_cc_srcs}) - set(op_name "") - find_register(${npu_src} "REGISTER_OP_NPU_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, NPU);\n") - set(pybind_flag 1) - endif() - endforeach() - endif() + # pybind USE_OP_DEVICE_KERNEL for XPU KP + if(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0) + foreach(xpu_kp_src ${xpu_kp_cc_srcs}) + set(op_name "") + find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n") + message(STATUS "Building KP Target: ${op_name}") + set(pybind_flag 1) + endif() + endforeach() + endif() - # pybind USE_OP_DEVICE_KERNEL for MLU - if (WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0) - foreach(mlu_src ${mlu_cc_srcs}) - set(op_name "") - find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name) - if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n") - set(pybind_flag 1) - endif() - endforeach() - endif() + # pybind USE_OP_DEVICE_KERNEL for NPU + if(WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0) + foreach(npu_src ${npu_cc_srcs}) + set(op_name "") + find_register(${npu_src} "REGISTER_OP_NPU_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, NPU);\n") + set(pybind_flag 1) + endif() + endforeach() + endif() - # pybind USE_OP_DEVICE_KERNEL for MKLDNN - if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) - # Append first implemented MKLDNN activation operator - if (${MKLDNN_FILE} STREQUAL "activation_mkldnn_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n") - elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n") - elseif(${MKLDNN_FILE} STREQUAL "transpose_mkldnn_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);\n") - elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n") - else() - foreach(mkldnn_src ${mkldnn_cc_srcs}) + # pybind USE_OP_DEVICE_KERNEL for MLU + if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0) + foreach(mlu_src ${mlu_cc_srcs}) + set(op_name "") + find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name) + if(NOT ${op_name} EQUAL "") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n") + set(pybind_flag 1) + endif() + endforeach() + endif() + + # pybind USE_OP_DEVICE_KERNEL for MKLDNN + if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) + # Append first implemented MKLDNN activation operator + if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op") + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n") + elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n") + elseif(${MKLDNN_FILE} STREQUAL "transpose_mkldnn_op") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);\n") + elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n") + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n") + else() + foreach(mkldnn_src ${mkldnn_cc_srcs}) set(op_name "") find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name) if(NOT ${op_name} EQUAL "") - file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n") - set(pybind_flag 1) + file(APPEND ${pybind_file} + "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n") + set(pybind_flag 1) endif() - endforeach() - endif() + endforeach() endif() + endif() - # pybind USE_NO_KERNEL_OP - # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel - string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}") - string(REPLACE "_op" "" TARGET "${TARGET}") - if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") - file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") - set(pybind_flag 1) - endif() + # pybind USE_NO_KERNEL_OP + # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel + string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}") + string(REPLACE "_op" "" TARGET "${TARGET}") + if(${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") + file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") + set(pybind_flag 1) + endif() - # pybind USE_OP - if (${pybind_flag} EQUAL 0) - # NOTE(*): activation use macro to regist the kernels, set use_op manually. - if(${TARGET} STREQUAL "activation") - file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n") - elseif(${TARGET} STREQUAL "fake_dequantize") - file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n") - elseif(${TARGET} STREQUAL "fake_quantize") - file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n") - elseif(${TARGET} STREQUAL "tensorrt_engine_op") - message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference") - else() - file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") - endif() + # pybind USE_OP + if(${pybind_flag} EQUAL 0) + # NOTE(*): activation use macro to regist the kernels, set use_op manually. + if(${TARGET} STREQUAL "activation") + file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n") + elseif(${TARGET} STREQUAL "fake_dequantize") + file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n") + elseif(${TARGET} STREQUAL "fake_quantize") + file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n") + elseif(${TARGET} STREQUAL "tensorrt_engine_op") + message( + STATUS + "Pybind skips [tensorrt_engine_op], for this OP is only used in inference" + ) + else() + file(APPEND ${pybind_file} "USE_OP(${TARGET});\n") endif() + endif() endfunction() function(register_operators) - set(options "") - set(oneValueArgs "") - set(multiValueArgs EXCLUDES DEPS) - cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) - file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") - string(REPLACE "_mkldnn" "" OPS "${OPS}") - string(REPLACE "_xpu" "" OPS "${OPS}") - string(REPLACE "_npu" "" OPS "${OPS}") - string(REPLACE "_mlu" "" OPS "${OPS}") - string(REPLACE ".cc" "" OPS "${OPS}") - list(REMOVE_DUPLICATES OPS) - list(LENGTH register_operators_DEPS register_operators_DEPS_len) + set(options "") + set(oneValueArgs "") + set(multiValueArgs EXCLUDES DEPS) + cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + file( + GLOB OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*_op.cc") + string(REPLACE "_mkldnn" "" OPS "${OPS}") + string(REPLACE "_xpu" "" OPS "${OPS}") + string(REPLACE "_npu" "" OPS "${OPS}") + string(REPLACE "_mlu" "" OPS "${OPS}") + string(REPLACE ".cc" "" OPS "${OPS}") + list(REMOVE_DUPLICATES OPS) + list(LENGTH register_operators_DEPS register_operators_DEPS_len) - foreach(src ${OPS}) - list(FIND register_operators_EXCLUDES ${src} _index) - if (${_index} EQUAL -1) - if (${register_operators_DEPS_len} GREATER 0) - op_library(${src} UNITY DEPS ${register_operators_DEPS}) - else() - op_library(${src} UNITY) - endif() - endif() - endforeach() + foreach(src ${OPS}) + list(FIND register_operators_EXCLUDES ${src} _index) + if(${_index} EQUAL -1) + if(${register_operators_DEPS_len} GREATER 0) + op_library(${src} UNITY DEPS ${register_operators_DEPS}) + else() + op_library(${src} UNITY) + endif() + endif() + endforeach() - # Complete the processing of `UNITY_TARGET`. - if(WITH_UNITY_BUILD) - finish_unity_target(cc) - if(WITH_GPU) - finish_unity_target(cu) - endif() + # Complete the processing of `UNITY_TARGET`. + if(WITH_UNITY_BUILD) + finish_unity_target(cc) + if(WITH_GPU) + finish_unity_target(cu) endif() + endif() endfunction() diff --git a/cmake/phi.cmake b/cmake/phi.cmake index f147ef3a586..4555d892f11 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -13,366 +13,485 @@ # limitations under the License. function(generate_unify_header DIR_NAME) - set(options "") - set(oneValueArgs HEADER_NAME SKIP_SUFFIX) - set(multiValueArgs "") - cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + set(options "") + set(oneValueArgs HEADER_NAME SKIP_SUFFIX) + set(multiValueArgs "") + cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - # get header name and suffix - set(header_name "${DIR_NAME}") - list(LENGTH generate_unify_header_HEADER_NAME generate_unify_header_HEADER_NAME_len) - if(${generate_unify_header_HEADER_NAME_len} GREATER 0) - set(header_name "${generate_unify_header_HEADER_NAME}") - endif() - set(skip_suffix "") - list(LENGTH generate_unify_header_SKIP_SUFFIX generate_unify_header_SKIP_SUFFIX_len) - if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0) - set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}") - endif() + # get header name and suffix + set(header_name "${DIR_NAME}") + list(LENGTH generate_unify_header_HEADER_NAME + generate_unify_header_HEADER_NAME_len) + if(${generate_unify_header_HEADER_NAME_len} GREATER 0) + set(header_name "${generate_unify_header_HEADER_NAME}") + endif() + set(skip_suffix "") + list(LENGTH generate_unify_header_SKIP_SUFFIX + generate_unify_header_SKIP_SUFFIX_len) + if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0) + set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}") + endif() - # generate target header file - set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h) - file(WRITE ${header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n") + # generate target header file + set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h) + file( + WRITE ${header_file} + "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n" + ) - # get all top-level headers and write into header file - file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h") - foreach(header ${HEADERS}) - if("${skip_suffix}" STREQUAL "") - string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}") - file(APPEND ${header_file} "#include \"${header}\"\n") - else() - string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found) - if(${skip_suffix_found} EQUAL -1) - string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}") - file(APPEND ${header_file} "#include \"${header}\"\n") - endif() - endif() - endforeach() - # append header into extension.h - string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}") - file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n") + # get all top-level headers and write into header file + file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h") + foreach(header ${HEADERS}) + if("${skip_suffix}" STREQUAL "") + string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}") + file(APPEND ${header_file} "#include \"${header}\"\n") + else() + string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found) + if(${skip_suffix_found} EQUAL -1) + string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}") + file(APPEND ${header_file} "#include \"${header}\"\n") + endif() + endif() + endforeach() + # append header into extension.h + string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}") + file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n") endfunction() # call kernel_declare need to make sure whether the target of input exists function(kernel_declare TARGET_LIST) - foreach(kernel_path ${TARGET_LIST}) - file(READ ${kernel_path} kernel_impl) - string(REGEX MATCH "(PD_REGISTER_KERNEL|PD_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*,[ \t\r\n\/]*[a-z0-9_]*" first_registry "${kernel_impl}") - if (NOT first_registry STREQUAL "") - # some gpu kernel only can run on cuda, not support rocm, so we add this branch - if (WITH_ROCM) - string(FIND "${first_registry}" "cuda_only" pos) - if(pos GREATER 1) - continue() - endif() - endif() - # parse the first kernel name - string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_name "${first_registry}") - string(REPLACE "PD_REGISTER_GENERAL_KERNEL(" "" kernel_name "${kernel_name}") - string(REPLACE "," "" kernel_name "${kernel_name}") - string(REGEX REPLACE "[ \t\r\n]+" "" kernel_name "${kernel_name}") - string(REGEX REPLACE "//cuda_only" "" kernel_name "${kernel_name}") - # append kernel declare into declarations.h - # TODO(chenweihang): default declare ALL_LAYOUT for each kernel - if (${kernel_path} MATCHES "./cpu\/") - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n") - elseif (${kernel_path} MATCHES "./gpu\/") - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n") - elseif (${kernel_path} MATCHES "./xpu\/") - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n") - elseif (${kernel_path} MATCHES "./gpudnn\/") - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPUDNN, ALL_LAYOUT);\n") - elseif (${kernel_path} MATCHES "./kps\/") - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, KPS, ALL_LAYOUT);\n") - else () - # deal with device independent kernel, now we use CPU temporaary - file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n") - endif() + foreach(kernel_path ${TARGET_LIST}) + file(READ ${kernel_path} kernel_impl) + string( + REGEX + MATCH + "(PD_REGISTER_KERNEL|PD_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*,[ \t\r\n\/]*[a-z0-9_]*" + first_registry + "${kernel_impl}") + if(NOT first_registry STREQUAL "") + # some gpu kernel only can run on cuda, not support rocm, so we add this branch + if(WITH_ROCM) + string(FIND "${first_registry}" "cuda_only" pos) + if(pos GREATER 1) + continue() endif() - endforeach() + endif() + # parse the first kernel name + string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_name "${first_registry}") + string(REPLACE "PD_REGISTER_GENERAL_KERNEL(" "" kernel_name + "${kernel_name}") + string(REPLACE "," "" kernel_name "${kernel_name}") + string(REGEX REPLACE "[ \t\r\n]+" "" kernel_name "${kernel_name}") + string(REGEX REPLACE "//cuda_only" "" kernel_name "${kernel_name}") + # append kernel declare into declarations.h + # TODO(chenweihang): default declare ALL_LAYOUT for each kernel + if(${kernel_path} MATCHES "./cpu\/") + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n") + elseif(${kernel_path} MATCHES "./gpu\/") + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n") + elseif(${kernel_path} MATCHES "./xpu\/") + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n") + elseif(${kernel_path} MATCHES "./gpudnn\/") + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, GPUDNN, ALL_LAYOUT);\n") + elseif(${kernel_path} MATCHES "./kps\/") + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, KPS, ALL_LAYOUT);\n") + else() + # deal with device independent kernel, now we use CPU temporaary + file(APPEND ${kernel_declare_file} + "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n") + endif() + endif() + endforeach() endfunction() function(kernel_library TARGET) - set(common_srcs) - set(cpu_srcs) - set(gpu_srcs) - set(xpu_srcs) - set(gpudnn_srcs) - set(kps_srcs) - # parse and save the deps kerenl targets - set(all_srcs) - set(kernel_deps) + set(common_srcs) + set(cpu_srcs) + set(gpu_srcs) + set(xpu_srcs) + set(gpudnn_srcs) + set(kps_srcs) + # parse and save the deps kerenl targets + set(all_srcs) + set(kernel_deps) - set(oneValueArgs SUB_DIR) - set(multiValueArgs SRCS DEPS) - set(target_build_flag 1) + set(oneValueArgs SUB_DIR) + set(multiValueArgs SRCS DEPS) + set(target_build_flag 1) - cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - # used for cc_library selected_rows dir target - set(target_suffix "") - if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows") - set(target_suffix "_sr") + # used for cc_library selected_rows dir target + set(target_suffix "") + if("${kernel_library_SUB_DIR}" STREQUAL "selected_rows") + set(target_suffix "_sr") + endif() + if("${kernel_library_SUB_DIR}" STREQUAL "sparse") + set(target_suffix "_sp") + endif() + + list(LENGTH kernel_library_SRCS kernel_library_SRCS_len) + # one kernel only match one impl file in each backend + if(${kernel_library_SRCS_len} EQUAL 0) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) endif() - if ("${kernel_library_SUB_DIR}" STREQUAL "sparse") - set(target_suffix "_sp") + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) + list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) endif() - - list(LENGTH kernel_library_SRCS kernel_library_SRCS_len) - # one kernel only match one impl file in each backend - if (${kernel_library_SRCS_len} EQUAL 0) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) - list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) - list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) - endif() - if (WITH_GPU OR WITH_ROCM) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) - list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc) - list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) - list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu) - list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu) - endif() - endif() - if (WITH_XPU) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc) - list(APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc) - endif() - endif() - if (WITH_XPU_KP) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) - # Change XPU2 file suffix - # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) - file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) - list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) - endif() - endif() - else() - # TODO(chenweihang): impl compile by source later + if(WITH_GPU OR WITH_ROCM) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) + list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc) + list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) + list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) + endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu) + list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu) + endif() endif() - - list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.h) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h) - list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h) + if(WITH_XPU) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc) + list(APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc) + endif() endif() - list(APPEND all_srcs ${common_srcs}) - list(APPEND all_srcs ${cpu_srcs}) - list(APPEND all_srcs ${gpu_srcs}) - list(APPEND all_srcs ${xpu_srcs}) - list(APPEND all_srcs ${gpudnn_srcs}) - list(APPEND all_srcs ${kps_srcs}) + if(WITH_XPU_KP) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) + # Change XPU2 file suffix + # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) + file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu + ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) + list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) + endif() + endif() + else() + # TODO(chenweihang): impl compile by source later + endif() - set(all_include_kernels) - set(all_kernel_name) + list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.h) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h) + list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h) + endif() + list(APPEND all_srcs ${common_srcs}) + list(APPEND all_srcs ${cpu_srcs}) + list(APPEND all_srcs ${gpu_srcs}) + list(APPEND all_srcs ${xpu_srcs}) + list(APPEND all_srcs ${gpudnn_srcs}) + list(APPEND all_srcs ${kps_srcs}) - foreach(src ${all_srcs}) - file(READ ${src} target_content) - # "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel) - string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) - list(APPEND all_include_kernels ${include_kernels}) + set(all_include_kernels) + set(all_kernel_name) - # "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx" - if (NOT "${kernel_library_SUB_DIR}" STREQUAL "") - string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) - list(APPEND all_include_kernels ${include_kernels}) - endif() + foreach(src ${all_srcs}) + file(READ ${src} target_content) + # "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel) + string(REGEX MATCHALL + "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" + include_kernels ${target_content}) + list(APPEND all_include_kernels ${include_kernels}) - foreach(include_kernel ${all_include_kernels}) - if ("${kernel_library_SUB_DIR}" STREQUAL "") - string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) - string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) - list(APPEND all_kernel_name ${kernel_name}) - else() - # NOTE(dev): we should firstly match kernel_library_SUB_DIR. - if (${include_kernel} MATCHES "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/") - string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel}) - # for selected_rows directory, add ${target_suffix}. - string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name ${kernel_name}) - list(APPEND all_kernel_name ${kernel_name}) - else() - string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) - string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) - list(APPEND all_kernel_name ${kernel_name}) - endif() - endif() - list(APPEND kernel_deps ${all_kernel_name}) - endforeach() + # "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx" + if(NOT "${kernel_library_SUB_DIR}" STREQUAL "") + string( + REGEX + MATCHALL + "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" + include_kernels + ${target_content}) + list(APPEND all_include_kernels ${include_kernels}) + endif() + + foreach(include_kernel ${all_include_kernels}) + if("${kernel_library_SUB_DIR}" STREQUAL "") + string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name + ${include_kernel}) + string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) + else() + # NOTE(dev): we should firstly match kernel_library_SUB_DIR. + if(${include_kernel} MATCHES + "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/") + string( + REGEX + REPLACE + "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" + kernel_name ${include_kernel}) + # for selected_rows directory, add ${target_suffix}. + string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name + ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) + else() + string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" + kernel_name ${include_kernel}) + string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) + endif() + endif() + list(APPEND kernel_deps ${all_kernel_name}) endforeach() - list(REMOVE_DUPLICATES kernel_deps) - list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix}) + endforeach() + list(REMOVE_DUPLICATES kernel_deps) + list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix}) - list(LENGTH common_srcs common_srcs_len) - list(LENGTH cpu_srcs cpu_srcs_len) - list(LENGTH gpu_srcs gpu_srcs_len) - list(LENGTH xpu_srcs xpu_srcs_len) - list(LENGTH gpudnn_srcs gpudnn_srcs_len) - list(LENGTH kps_srcs kps_srcs_len) + list(LENGTH common_srcs common_srcs_len) + list(LENGTH cpu_srcs cpu_srcs_len) + list(LENGTH gpu_srcs gpu_srcs_len) + list(LENGTH xpu_srcs xpu_srcs_len) + list(LENGTH gpudnn_srcs gpudnn_srcs_len) + list(LENGTH kps_srcs kps_srcs_len) - # kernel source file level - # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!) - # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs - # = dnn srcs: gpudnn_srcs - # level 2: device-independent kernel - # - common_srcs + # kernel source file level + # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!) + # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs + # = dnn srcs: gpudnn_srcs + # level 2: device-independent kernel + # - common_srcs - set(partial_build_flag 0) - set(base_build_flag 0) - if (${common_srcs_len} GREATER 0) - set(partial_build_flag 1) - endif() - if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0) - set(base_build_flag 1) - endif() + set(partial_build_flag 0) + set(base_build_flag 0) + if(${common_srcs_len} GREATER 0) + set(partial_build_flag 1) + endif() + if(${cpu_srcs_len} GREATER 0 + OR ${gpu_srcs_len} GREATER 0 + OR ${xpu_srcs_len} GREATER 0 + OR ${kps_srcs_len} GREATER 0) + set(base_build_flag 1) + endif() - # gpudnn or mkldnn needs to be compiled separately - set(dnn_kernels) - if (${gpudnn_srcs_len} GREATER 0) - if (WITH_GPU) - nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - elseif (WITH_ROCM) - hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() - list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix}) + # gpudnn or mkldnn needs to be compiled separately + set(dnn_kernels) + if(${gpudnn_srcs_len} GREATER 0) + if(WITH_GPU) + nv_library( + ${TARGET}_gpudnn${target_suffix} + SRCS ${gpudnn_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + elseif(WITH_ROCM) + hip_library( + ${TARGET}_gpudnn${target_suffix} + SRCS ${gpudnn_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() - list(LENGTH dnn_kernels dnn_kernels_len) + list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix}) + endif() + list(LENGTH dnn_kernels dnn_kernels_len) - if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1) - if (WITH_GPU) - if (${dnn_kernels_len} GREATER 0) - nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) - else() - nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() - elseif (WITH_ROCM) - if (${dnn_kernels_len} GREATER 0) - hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) - else() - hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() - elseif (WITH_XPU_KP) - xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - else() - cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() - elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1) - if (WITH_GPU) - nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) - elseif (WITH_ROCM) - hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) - elseif (WITH_XPU_KP) - xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix}) - else() - cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix}) - endif() - elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0) - if (WITH_GPU) - nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - elseif (WITH_ROCM) - hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - elseif (WITH_XPU_KP) - xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - else() - cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() + if(${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1) + if(WITH_GPU) + if(${dnn_kernels_len} GREATER 0) + nv_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} + ${dnn_kernels}) + else() + nv_library( + ${TARGET}${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + endif() + elseif(WITH_ROCM) + if(${dnn_kernels_len} GREATER 0) + hip_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library(${TARGET}${target_suffix} + DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + else() + hip_library( + ${TARGET}${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + endif() + elseif(WITH_XPU_KP) + xpu_library( + ${TARGET}${target_suffix} + SRCS ${cpu_srcs} ${kps_srcs} ${xpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + else() + cc_library( + ${TARGET}${target_suffix} + SRCS ${cpu_srcs} ${xpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + endif() + elseif(${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1) + if(WITH_GPU) + nv_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + elseif(WITH_ROCM) + hip_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${gpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + elseif(WITH_XPU_KP) + xpu_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${kps_srcs} ${xpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + xpu_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${TARGET}_base${target_suffix}) + else() + cc_library( + ${TARGET}_base${target_suffix} + SRCS ${cpu_srcs} ${xpu_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + cc_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${TARGET}_base${target_suffix}) + endif() + elseif(${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0) + if(WITH_GPU) + nv_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + elseif(WITH_ROCM) + hip_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) + elseif(WITH_XPU_KP) + xpu_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) else() - set(target_build_flag 0) + cc_library( + ${TARGET}${target_suffix} + SRCS ${common_srcs} + DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() + else() + set(target_build_flag 0) + endif() - if (${target_build_flag} EQUAL 1) - if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR - ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR - ${gpudnn_srcs_len} GREATER 0) - # append target into PHI_KERNELS property - get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) - set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix}) - set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels}) - endif() + if(${target_build_flag} EQUAL 1) + if(${common_srcs_len} GREATER 0 + OR ${cpu_srcs_len} GREATER 0 + OR ${gpu_srcs_len} GREATER 0 + OR ${xpu_srcs_len} GREATER 0 + OR ${kps_srcs_len} GREATER 0 + OR ${gpudnn_srcs_len} GREATER 0) + # append target into PHI_KERNELS property + get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) + set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix}) + set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels}) + endif() - # parse kernel name and auto generate kernel declaration - # here, we don't need to check WITH_XXX, because if not WITH_XXX, the - # xxx_srcs_len will be equal to 0 - if (${common_srcs_len} GREATER 0) - kernel_declare(${common_srcs}) - endif() - if (${cpu_srcs_len} GREATER 0) - kernel_declare(${cpu_srcs}) - endif() - if (${gpu_srcs_len} GREATER 0) - kernel_declare(${gpu_srcs}) - endif() - if (${xpu_srcs_len} GREATER 0) - kernel_declare(${xpu_srcs}) - endif() - if (${gpudnn_srcs_len} GREATER 0) - kernel_declare(${gpudnn_srcs}) - endif() - if (${kps_srcs_len} GREATER 0) - kernel_declare(${kps_srcs}) - endif() + # parse kernel name and auto generate kernel declaration + # here, we don't need to check WITH_XXX, because if not WITH_XXX, the + # xxx_srcs_len will be equal to 0 + if(${common_srcs_len} GREATER 0) + kernel_declare(${common_srcs}) + endif() + if(${cpu_srcs_len} GREATER 0) + kernel_declare(${cpu_srcs}) + endif() + if(${gpu_srcs_len} GREATER 0) + kernel_declare(${gpu_srcs}) + endif() + if(${xpu_srcs_len} GREATER 0) + kernel_declare(${xpu_srcs}) endif() + if(${gpudnn_srcs_len} GREATER 0) + kernel_declare(${gpudnn_srcs}) + endif() + if(${kps_srcs_len} GREATER 0) + kernel_declare(${kps_srcs}) + endif() + endif() endfunction() function(register_kernels) - set(options "") - set(oneValueArgs SUB_DIR) - set(multiValueArgs EXCLUDES DEPS) - cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + set(options "") + set(oneValueArgs SUB_DIR) + set(multiValueArgs EXCLUDES DEPS) + cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - file(GLOB KERNELS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_kernel.h") - string(REPLACE ".h" "" KERNELS "${KERNELS}") - list(LENGTH register_kernels_DEPS register_kernels_DEPS_len) + file( + GLOB KERNELS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*_kernel.h") + string(REPLACE ".h" "" KERNELS "${KERNELS}") + list(LENGTH register_kernels_DEPS register_kernels_DEPS_len) - foreach(target ${KERNELS}) - list(FIND register_kernels_EXCLUDES ${target} _index) - if (${_index} EQUAL -1) - if (${register_kernels_DEPS_len} GREATER 0) - kernel_library(${target} DEPS ${register_kernels_DEPS} SUB_DIR ${register_kernels_SUB_DIR}) - else() - kernel_library(${target} SUB_DIR ${register_kernels_SUB_DIR}) - endif() - endif() - endforeach() + foreach(target ${KERNELS}) + list(FIND register_kernels_EXCLUDES ${target} _index) + if(${_index} EQUAL -1) + if(${register_kernels_DEPS_len} GREATER 0) + kernel_library(${target} DEPS ${register_kernels_DEPS} SUB_DIR + ${register_kernels_SUB_DIR}) + else() + kernel_library(${target} SUB_DIR ${register_kernels_SUB_DIR}) + endif() + endif() + endforeach() endfunction() function(append_op_util_declare TARGET) - file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content) - string(REGEX MATCH "(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*" util_registrar "${target_content}") - string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN" util_declare "${util_registrar}") - string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME" util_declare "${util_declare}") - string(APPEND util_declare ");\n") - file(APPEND ${op_utils_header} "${util_declare}") + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content) + string( + REGEX + MATCH + "(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*" + util_registrar + "${target_content}") + string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN" + util_declare "${util_registrar}") + string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME" + util_declare "${util_declare}") + string(APPEND util_declare ");\n") + file(APPEND ${op_utils_header} "${util_declare}") endfunction() function(register_op_utils TARGET_NAME) - set(utils_srcs) - set(options "") - set(oneValueArgs "") - set(multiValueArgs EXCLUDES DEPS) - cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) + set(utils_srcs) + set(options "") + set(oneValueArgs "") + set(multiValueArgs EXCLUDES DEPS) + cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - file(GLOB SIGNATURES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_sig.cc") - foreach(target ${SIGNATURES}) - append_op_util_declare(${target}) - list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target}) - endforeach() + file( + GLOB SIGNATURES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*_sig.cc") + foreach(target ${SIGNATURES}) + append_op_util_declare(${target}) + list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target}) + endforeach() - cc_library(${TARGET_NAME} SRCS ${utils_srcs} DEPS ${register_op_utils_DEPS}) + cc_library( + ${TARGET_NAME} + SRCS ${utils_srcs} + DEPS ${register_op_utils_DEPS}) endfunction() diff --git a/cmake/phi_header.cmake b/cmake/phi_header.cmake index b23b4086b18..fa5b6724ce8 100644 --- a/cmake/phi_header.cmake +++ b/cmake/phi_header.cmake @@ -12,32 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. -set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir") +set(PADDLE_INFERENCE_INSTALL_DIR + "${CMAKE_BINARY_DIR}/paddle_inference_install_dir") function(phi_header_path_compat TARGET_PATH) -message(STATUS "phi header path compat processing: ${TARGET_PATH}") -string(FIND ${TARGET_PATH} "experimental" pos) -if (pos GREATER 1) + message(STATUS "phi header path compat processing: ${TARGET_PATH}") + string(FIND ${TARGET_PATH} "experimental" pos) + if(pos GREATER 1) file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") foreach(header ${HEADERS}) - if (${header} MATCHES ".*.h$") - file(READ ${header} HEADER_CONTENT) - string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" HEADER_CONTENT "${HEADER_CONTENT}") - string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" HEADER_CONTENT "${HEADER_CONTENT}") - file(WRITE ${header} "${HEADER_CONTENT}") - message(STATUS "phi header path compat processing complete: ${header}") - endif() + if(${header} MATCHES ".*.h$") + file(READ ${header} HEADER_CONTENT) + string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" + HEADER_CONTENT "${HEADER_CONTENT}") + string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" + HEADER_CONTENT "${HEADER_CONTENT}") + file(WRITE ${header} "${HEADER_CONTENT}") + message(STATUS "phi header path compat processing complete: ${header}") + endif() endforeach() -endif() + endif() endfunction() -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental) -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api) -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext) -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include) -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common) -phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common) +phi_header_path_compat( + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core) # In order to be compatible with the original behavior, the header file name needs to be changed -file(RENAME ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h - ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h) +file(RENAME + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h) diff --git a/cmake/python_module.cmake b/cmake/python_module.cmake index 1412b7f7f20..9367435b61b 100644 --- a/cmake/python_module.cmake +++ b/cmake/python_module.cmake @@ -2,42 +2,49 @@ # Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html # To use do: find_python_module(PyQt4 REQUIRED) function(find_python_module module) - string(TOUPPER ${module} module_upper) - if(NOT PY_${module_upper}) - if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") - set(${module}_FIND_REQUIRED TRUE) - else() - set(${module}_FIND_REQUIRED FALSE) - endif() - # A module's location is usually a directory, but for binary modules - # it's a .so file. - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" - "import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))" - RESULT_VARIABLE _${module}_status - OUTPUT_VARIABLE _${module}_location - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT _${module}_status) - set(PY_${module_upper} ${_${module}_location} CACHE STRING - "Location of Python module ${module}") - endif(NOT _${module}_status) - endif(NOT PY_${module_upper}) - find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) - if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) - message(FATAL_ERROR "python module ${module} is not found") + string(TOUPPER ${module} module_upper) + if(NOT PY_${module_upper}) + if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") + set(${module}_FIND_REQUIRED TRUE) + else() + set(${module}_FIND_REQUIRED FALSE) endif() - - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" - "import sys, ${module}; sys.stdout.write(${module}.__version__)" - OUTPUT_VARIABLE _${module}_version - RESULT_VARIABLE _${module}_status - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) + # A module's location is usually a directory, but for binary modules + # it's a .so file. + execute_process( + COMMAND + "${PYTHON_EXECUTABLE}" "-c" + "import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))" + RESULT_VARIABLE _${module}_status + OUTPUT_VARIABLE _${module}_location + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if(NOT _${module}_status) - set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING - "Version of Python module ${module}") + set(PY_${module_upper} + ${_${module}_location} + CACHE STRING "Location of Python module ${module}") endif(NOT _${module}_status) + endif(NOT PY_${module_upper}) + find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) + if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) + message(FATAL_ERROR "python module ${module} is not found") + endif() + + execute_process( + COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import sys, ${module}; sys.stdout.write(${module}.__version__)" + OUTPUT_VARIABLE _${module}_version + RESULT_VARIABLE _${module}_status + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper}_VERSION + ${_${module}_version} + CACHE STRING "Version of Python module ${module}") + endif(NOT _${module}_status) - set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE) - set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE) + set(PY_${module_upper}_FOUND + ${PY_${module_upper}_FOUND} + PARENT_SCOPE) + set(PY_${module_upper}_VERSION + ${PY_${module_upper}_VERSION} + PARENT_SCOPE) endfunction(find_python_module) diff --git a/cmake/rccl.cmake b/cmake/rccl.cmake index f3a472ac930..1f78c74f40e 100644 --- a/cmake/rccl.cmake +++ b/cmake/rccl.cmake @@ -1,28 +1,30 @@ if(NOT WITH_ROCM) - return() + return() endif() # Now we don't support RCCL on windows if(WIN32) - return() + return() endif() if(WITH_RCCL) - set(RCCL_ROOT ${ROCM_PATH}/rccl CACHE PATH "RCCL ROOT") - find_path(RCCL_INCLUDE_DIR rccl.h - PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include - $ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include - NO_DEFAULT_PATH - ) + set(RCCL_ROOT + ${ROCM_PATH}/rccl + CACHE PATH "RCCL ROOT") + find_path( + RCCL_INCLUDE_DIR rccl.h + PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include + $ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include + NO_DEFAULT_PATH) - file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS) + file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" - RCCL_VERSION "${RCCL_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" - RCCL_VERSION "${RCCL_VERSION}") + string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" RCCL_VERSION + "${RCCL_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" RCCL_VERSION + "${RCCL_VERSION}") - # 2604 for ROCM3.5 and 2708 for ROCM 3.9 - message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. " - "Current RCCL version is v${RCCL_VERSION}. ") + # 2604 for ROCM3.5 and 2708 for ROCM 3.9 + message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. " + "Current RCCL version is v${RCCL_VERSION}. ") endif() diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 566dc75fda0..ff8b9d6f9a9 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -4,49 +4,62 @@ include(CheckCXXSourceRuns) include(CheckCXXSourceCompiles) -if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(MMX_FLAG "-mmmx") - set(SSE2_FLAG "-msse2") - set(SSE3_FLAG "-msse3") - set(AVX_FLAG "-mavx") - set(AVX2_FLAG "-mavx2") - set(AVX512F_FLAG "-mavx512f") +if(CMAKE_COMPILER_IS_GNUCC + OR CMAKE_COMPILER_IS_GNUCXX + OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(MMX_FLAG "-mmmx") + set(SSE2_FLAG "-msse2") + set(SSE3_FLAG "-msse3") + set(AVX_FLAG "-mavx") + set(AVX2_FLAG "-mavx2") + set(AVX512F_FLAG "-mavx512f") elseif(MSVC) - set(MMX_FLAG "/arch:MMX") - set(SSE2_FLAG "/arch:SSE2") - set(SSE3_FLAG "/arch:SSE3") - SET(AVX_FLAG "/arch:AVX") - SET(AVX2_FLAG "/arch:AVX2") + set(MMX_FLAG "/arch:MMX") + set(SSE2_FLAG "/arch:SSE2") + set(SSE3_FLAG "/arch:SSE3") + set(AVX_FLAG "/arch:AVX") + set(AVX2_FLAG "/arch:AVX2") endif() set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) # Check MMX set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) -set(MMX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(MMX_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { _mm_setzero_si64(); return 0; -}" MMX_FOUND) +}" + MMX_FOUND) # Check SSE2 set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) -set(SSE2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(SSE2_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { _mm_setzero_si128(); return 0; -}" SSE2_FOUND) +}" + SSE2_FOUND) # Check SSE3 set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) -set(SSE3_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(SSE3_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { @@ -55,12 +68,16 @@ int main() __m128d result = _mm_addsub_pd(a, b); result = _mm_movedup_pd(result); return 0; -}" SSE3_FOUND) +}" + SSE3_FOUND) # Check AVX set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) -set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(AVX_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { @@ -68,24 +85,32 @@ int main() __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); __m256 result = _mm256_add_ps (a, b); return 0; -}" AVX_FOUND) +}" + AVX_FOUND) # Check AVX 2 set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) -set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(AVX2_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); __m256i result = _mm256_abs_epi32 (a); return 0; -}" AVX2_FOUND) +}" + AVX2_FOUND) # Check AVX512F set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) -set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) -CHECK_CXX_SOURCE_RUNS(" +set(AVX512F_FOUND_EXITCODE + 1 + CACHE STRING "Result from TRY_RUN" FORCE) +check_cxx_source_runs( + " #include int main() { @@ -93,7 +118,9 @@ int main() 13, -5, 6, -7, 9, 2, -6, 3); __m512i result = _mm512_abs_epi32 (a); return 0; -}" AVX512F_FOUND) +}" + AVX512F_FOUND) set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) -mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND) +mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND + AVX512F_FOUND) diff --git a/cmake/system.cmake b/cmake/system.cmake index c740136b93d..0562077eae1 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,66 +25,82 @@ if(UNIX AND NOT APPLE) set(LINUX TRUE) endif(UNIX AND NOT APPLE) -IF(WIN32) - SET(HOST_SYSTEM "win32") -ELSE(WIN32) - IF(APPLE) - SET(HOST_SYSTEM "macosx") - EXEC_PROGRAM(sw_vers ARGS -productVersion OUTPUT_VARIABLE HOST_SYSTEM_VERSION) - STRING(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}") - IF(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET}) - # Set cache variable - end user may change this during ccmake or cmake-gui configure. - SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING - "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") - ENDIF() - set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") - ELSE(APPLE) +if(WIN32) + set(HOST_SYSTEM "win32") +else(WIN32) + if(APPLE) + set(HOST_SYSTEM "macosx") + exec_program( + sw_vers ARGS + -productVersion + OUTPUT_VARIABLE HOST_SYSTEM_VERSION) + string(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}") + if(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET}) + # Set cache variable - end user may change this during ccmake or cmake-gui configure. + set(CMAKE_OSX_DEPLOYMENT_TARGET + ${MACOS_VERSION} + CACHE + STRING + "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value." + ) + endif() + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") + else(APPLE) - IF(EXISTS "/etc/issue") - FILE(READ "/etc/issue" LINUX_ISSUE) - IF(LINUX_ISSUE MATCHES "CentOS") - SET(HOST_SYSTEM "centos") - ELSEIF(LINUX_ISSUE MATCHES "Debian") - SET(HOST_SYSTEM "debian") - ELSEIF(LINUX_ISSUE MATCHES "Ubuntu") - SET(HOST_SYSTEM "ubuntu") - ELSEIF(LINUX_ISSUE MATCHES "Red Hat") - SET(HOST_SYSTEM "redhat") - ELSEIF(LINUX_ISSUE MATCHES "Fedora") - SET(HOST_SYSTEM "fedora") - ENDIF() + if(EXISTS "/etc/issue") + file(READ "/etc/issue" LINUX_ISSUE) + if(LINUX_ISSUE MATCHES "CentOS") + set(HOST_SYSTEM "centos") + elseif(LINUX_ISSUE MATCHES "Debian") + set(HOST_SYSTEM "debian") + elseif(LINUX_ISSUE MATCHES "Ubuntu") + set(HOST_SYSTEM "ubuntu") + elseif(LINUX_ISSUE MATCHES "Red Hat") + set(HOST_SYSTEM "redhat") + elseif(LINUX_ISSUE MATCHES "Fedora") + set(HOST_SYSTEM "fedora") + endif() - STRING(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION "${LINUX_ISSUE}") - ENDIF(EXISTS "/etc/issue") + string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION + "${LINUX_ISSUE}") + endif(EXISTS "/etc/issue") - IF(EXISTS "/etc/redhat-release") - FILE(READ "/etc/redhat-release" LINUX_ISSUE) - IF(LINUX_ISSUE MATCHES "CentOS") - SET(HOST_SYSTEM "centos") - ENDIF() - ENDIF(EXISTS "/etc/redhat-release") + if(EXISTS "/etc/redhat-release") + file(READ "/etc/redhat-release" LINUX_ISSUE) + if(LINUX_ISSUE MATCHES "CentOS") + set(HOST_SYSTEM "centos") + endif() + endif(EXISTS "/etc/redhat-release") - IF(NOT HOST_SYSTEM) - SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME}) - ENDIF() + if(NOT HOST_SYSTEM) + set(HOST_SYSTEM ${CMAKE_SYSTEM_NAME}) + endif() - ENDIF(APPLE) -ENDIF(WIN32) + endif(APPLE) +endif(WIN32) # query number of logical cores -CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES) +cmake_host_system_information(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES) -MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) +mark_as_advanced(HOST_SYSTEM CPU_CORES) -MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") -MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") +message( + STATUS + "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") +message(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") # external dependencies log output -SET(EXTERNAL_PROJECT_LOG_ARGS - LOG_DOWNLOAD 0 # Wrap download in script to log output - LOG_UPDATE 1 # Wrap update in script to log output - LOG_CONFIGURE 1 # Wrap configure in script to log output - LOG_BUILD 0 # Wrap build in script to log output - LOG_TEST 1 # Wrap test in script to log output - LOG_INSTALL 0 # Wrap install in script to log output +set(EXTERNAL_PROJECT_LOG_ARGS + LOG_DOWNLOAD + 0 # Wrap download in script to log output + LOG_UPDATE + 1 # Wrap update in script to log output + LOG_CONFIGURE + 1 # Wrap configure in script to log output + LOG_BUILD + 0 # Wrap build in script to log output + LOG_TEST + 1 # Wrap test in script to log output + LOG_INSTALL + 0 # Wrap install in script to log output ) diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index e4b22befff8..5651ceb76e5 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -1,87 +1,103 @@ if(NOT WITH_GPU OR NOT WITH_TENSORRT) - return() + return() endif() if(WIN32) - string(REPLACE "\\" "/" TENSORRT_ROOT "${TENSORRT_ROOT}") - set(TR_INFER_LIB nvinfer.lib) - set(TR_INFER_RT nvinfer.dll) - set(TR_INFER_PLUGIN_RT nvinfer_plugin.dll) + string(REPLACE "\\" "/" TENSORRT_ROOT "${TENSORRT_ROOT}") + set(TR_INFER_LIB nvinfer.lib) + set(TR_INFER_RT nvinfer.dll) + set(TR_INFER_PLUGIN_RT nvinfer_plugin.dll) else() - set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT") - set(TR_INFER_LIB libnvinfer.a) - set(TR_INFER_RT libnvinfer.so) - set(TR_INFER_PLUGIN_RT libnvinfer_plugin.so) + set(TENSORRT_ROOT + "/usr" + CACHE PATH "TENSORRT ROOT") + set(TR_INFER_LIB libnvinfer.a) + set(TR_INFER_RT libnvinfer.so) + set(TR_INFER_PLUGIN_RT libnvinfer_plugin.so) endif() -find_path(TENSORRT_INCLUDE_DIR NvInfer.h - PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/include - ${TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE} - $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include - $ENV{TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE} - NO_DEFAULT_PATH -) +find_path( + TENSORRT_INCLUDE_DIR NvInfer.h + PATHS ${TENSORRT_ROOT} + ${TENSORRT_ROOT}/include + ${TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE} + $ENV{TENSORRT_ROOT} + $ENV{TENSORRT_ROOT}/include + $ENV{TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE} + NO_DEFAULT_PATH) -find_path(TENSORRT_LIBRARY_DIR NAMES ${TR_INFER_LIB} ${TR_INFER_RT} - PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/lib - ${TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE} - $ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib - $ENV{TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE} - NO_DEFAULT_PATH - DOC "Path to TensorRT library." -) +find_path( + TENSORRT_LIBRARY_DIR + NAMES ${TR_INFER_LIB} ${TR_INFER_RT} + PATHS ${TENSORRT_ROOT} + ${TENSORRT_ROOT}/lib + ${TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE} + $ENV{TENSORRT_ROOT} + $ENV{TENSORRT_ROOT}/lib + $ENV{TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE} + NO_DEFAULT_PATH + DOC "Path to TensorRT library.") -find_library(TENSORRT_LIBRARY NAMES ${TR_INFER_LIB} ${TR_INFER_RT} - PATHS ${TENSORRT_LIBRARY_DIR} - NO_DEFAULT_PATH - DOC "Path to TensorRT library.") +find_library( + TENSORRT_LIBRARY + NAMES ${TR_INFER_LIB} ${TR_INFER_RT} + PATHS ${TENSORRT_LIBRARY_DIR} + NO_DEFAULT_PATH + DOC "Path to TensorRT library.") if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY) - set(TENSORRT_FOUND ON) + set(TENSORRT_FOUND ON) else() - set(TENSORRT_FOUND OFF) - message(WARNING "TensorRT is disabled. You are compiling PaddlePaddle with option -DWITH_TENSORRT=ON, but TensorRT is not found, please configure path to TensorRT with option -DTENSORRT_ROOT or install it.") + set(TENSORRT_FOUND OFF) + message( + WARNING + "TensorRT is disabled. You are compiling PaddlePaddle with option -DWITH_TENSORRT=ON, but TensorRT is not found, please configure path to TensorRT with option -DTENSORRT_ROOT or install it." + ) endif() if(TENSORRT_FOUND) - file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") + file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" + TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" + TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" + TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") - if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") - file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - endif() + if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") + file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h + TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" + TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" + TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" + TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + endif() - if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") - message(SEND_ERROR "Failed to detect TensorRT version.") - endif() + if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") + message(SEND_ERROR "Failed to detect TensorRT version.") + endif() - string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" - TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") - string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1" - TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}") - string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1" - TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}") - string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1" - TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" + TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1" + TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1" + TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1" + TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}") - message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " - "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ") - include_directories(${TENSORRT_INCLUDE_DIR}) - link_directories(${TENSORRT_LIBRARY}) - add_definitions(-DPADDLE_WITH_TENSORRT) + message( + STATUS + "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " + "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} " + ) + include_directories(${TENSORRT_INCLUDE_DIR}) + link_directories(${TENSORRT_LIBRARY}) + add_definitions(-DPADDLE_WITH_TENSORRT) endif() diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index eb6fa4ee13c..2004241ab1a 100755 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -15,10 +15,14 @@ include(ExternalProject) # Creat a target named "third_party", which can compile external dependencies on all platform(windows/linux/mac) -set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING - "A path setting third party libraries download & build directories.") -set(THIRD_PARTY_CACHE_PATH "${CMAKE_SOURCE_DIR}" CACHE STRING - "A path cache third party source code to avoid repeated download.") +set(THIRD_PARTY_PATH + "${CMAKE_BINARY_DIR}/third_party" + CACHE STRING + "A path setting third party libraries download & build directories.") +set(THIRD_PARTY_CACHE_PATH + "${CMAKE_SOURCE_DIR}" + CACHE STRING + "A path cache third party source code to avoid repeated download.") set(THIRD_PARTY_BUILD_TYPE Release) set(third_party_deps) @@ -39,389 +43,457 @@ set(third_party_deps) # TAG ${TARGET_TAG} # DIR ${TARGET_SOURCE_DIR}) -FUNCTION(cache_third_party TARGET) - SET(options "") - SET(oneValueArgs URL REPOSITORY TAG DIR) - SET(multiValueArgs "") - cmake_parse_arguments(cache_third_party "${optionps}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - STRING(REPLACE "extern_" "" TARGET_NAME ${TARGET}) - STRING(REGEX REPLACE "[0-9]+" "" TARGET_NAME ${TARGET_NAME}) - STRING(TOUPPER ${TARGET_NAME} TARGET_NAME) - IF(cache_third_party_REPOSITORY) - SET(${TARGET_NAME}_DOWNLOAD_CMD - GIT_REPOSITORY ${cache_third_party_REPOSITORY}) - IF(cache_third_party_TAG) - LIST(APPEND ${TARGET_NAME}_DOWNLOAD_CMD - GIT_TAG ${cache_third_party_TAG}) - ENDIF() - ELSEIF(cache_third_party_URL) - SET(${TARGET_NAME}_DOWNLOAD_CMD - URL ${cache_third_party_URL}) - ELSE() - MESSAGE(FATAL_ERROR "Download link (Git repo or URL) must be specified for cache!") - ENDIF() - IF(WITH_TP_CACHE) - IF(NOT cache_third_party_DIR) - MESSAGE(FATAL_ERROR "Please input the ${TARGET_NAME}_SOURCE_DIR for overwriting when -DWITH_TP_CACHE=ON") - ENDIF() - # Generate and verify cache dir for third_party source code - SET(cache_third_party_REPOSITORY ${cache_third_party_REPOSITORY} ${cache_third_party_URL}) - IF(cache_third_party_REPOSITORY AND cache_third_party_TAG) - STRING(MD5 HASH_REPO ${cache_third_party_REPOSITORY}) - STRING(MD5 HASH_GIT ${cache_third_party_TAG}) - STRING(SUBSTRING ${HASH_REPO} 0 8 HASH_REPO) - STRING(SUBSTRING ${HASH_GIT} 0 8 HASH_GIT) - STRING(CONCAT HASH ${HASH_REPO} ${HASH_GIT}) - # overwrite the original SOURCE_DIR when cache directory - SET(${cache_third_party_DIR} ${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH}) - ELSEIF(cache_third_party_REPOSITORY) - STRING(MD5 HASH_REPO ${cache_third_party_REPOSITORY}) - STRING(SUBSTRING ${HASH_REPO} 0 16 HASH) - # overwrite the original SOURCE_DIR when cache directory - SET(${cache_third_party_DIR} ${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH}) - ENDIF() - - IF(EXISTS ${${cache_third_party_DIR}}) - # judge whether the cache dir is empty - FILE(GLOB files ${${cache_third_party_DIR}}/*) - LIST(LENGTH files files_len) - IF(files_len GREATER 0) - list(APPEND ${TARGET_NAME}_DOWNLOAD_CMD DOWNLOAD_COMMAND "") - ENDIF() - ENDIF() - SET(${cache_third_party_DIR} ${${cache_third_party_DIR}} PARENT_SCOPE) - ENDIF() - - # Pass ${TARGET_NAME}_DOWNLOAD_CMD to parent scope, the double quotation marks can't be removed - SET(${TARGET_NAME}_DOWNLOAD_CMD "${${TARGET_NAME}_DOWNLOAD_CMD}" PARENT_SCOPE) -ENDFUNCTION() - -MACRO(UNSET_VAR VAR_NAME) - UNSET(${VAR_NAME} CACHE) - UNSET(${VAR_NAME}) -ENDMACRO() +function(cache_third_party TARGET) + set(options "") + set(oneValueArgs URL REPOSITORY TAG DIR) + set(multiValueArgs "") + cmake_parse_arguments(cache_third_party "${optionps}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + string(REPLACE "extern_" "" TARGET_NAME ${TARGET}) + string(REGEX REPLACE "[0-9]+" "" TARGET_NAME ${TARGET_NAME}) + string(TOUPPER ${TARGET_NAME} TARGET_NAME) + if(cache_third_party_REPOSITORY) + set(${TARGET_NAME}_DOWNLOAD_CMD GIT_REPOSITORY + ${cache_third_party_REPOSITORY}) + if(cache_third_party_TAG) + list(APPEND ${TARGET_NAME}_DOWNLOAD_CMD GIT_TAG ${cache_third_party_TAG}) + endif() + elseif(cache_third_party_URL) + set(${TARGET_NAME}_DOWNLOAD_CMD URL ${cache_third_party_URL}) + else() + message( + FATAL_ERROR "Download link (Git repo or URL) must be specified for cache!" + ) + endif() + if(WITH_TP_CACHE) + if(NOT cache_third_party_DIR) + message( + FATAL_ERROR + "Please input the ${TARGET_NAME}_SOURCE_DIR for overwriting when -DWITH_TP_CACHE=ON" + ) + endif() + # Generate and verify cache dir for third_party source code + set(cache_third_party_REPOSITORY ${cache_third_party_REPOSITORY} + ${cache_third_party_URL}) + if(cache_third_party_REPOSITORY AND cache_third_party_TAG) + string(MD5 HASH_REPO ${cache_third_party_REPOSITORY}) + string(MD5 HASH_GIT ${cache_third_party_TAG}) + string(SUBSTRING ${HASH_REPO} 0 8 HASH_REPO) + string(SUBSTRING ${HASH_GIT} 0 8 HASH_GIT) + string(CONCAT HASH ${HASH_REPO} ${HASH_GIT}) + # overwrite the original SOURCE_DIR when cache directory + set(${cache_third_party_DIR} + ${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH}) + elseif(cache_third_party_REPOSITORY) + string(MD5 HASH_REPO ${cache_third_party_REPOSITORY}) + string(SUBSTRING ${HASH_REPO} 0 16 HASH) + # overwrite the original SOURCE_DIR when cache directory + set(${cache_third_party_DIR} + ${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH}) + endif() + + if(EXISTS ${${cache_third_party_DIR}}) + # judge whether the cache dir is empty + file(GLOB files ${${cache_third_party_DIR}}/*) + list(LENGTH files files_len) + if(files_len GREATER 0) + list(APPEND ${TARGET_NAME}_DOWNLOAD_CMD DOWNLOAD_COMMAND "") + endif() + endif() + set(${cache_third_party_DIR} + ${${cache_third_party_DIR}} + PARENT_SCOPE) + endif() + + # Pass ${TARGET_NAME}_DOWNLOAD_CMD to parent scope, the double quotation marks can't be removed + set(${TARGET_NAME}_DOWNLOAD_CMD + "${${TARGET_NAME}_DOWNLOAD_CMD}" + PARENT_SCOPE) +endfunction() + +macro(UNSET_VAR VAR_NAME) + unset(${VAR_NAME} CACHE) + unset(${VAR_NAME}) +endmacro() # Funciton to Download the dependencies during compilation # This function has 2 parameters, URL / DIRNAME: # 1. URL: The download url of 3rd dependencies # 2. NAME: The name of file, that determin the dirname # -FUNCTION(file_download_and_uncompress URL NAME) +function(file_download_and_uncompress URL NAME) set(options "") set(oneValueArgs MD5) set(multiValueArgs "") - cmake_parse_arguments(URL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}, MD5: ${URL_MD5}") - SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME}/data PARENT_SCOPE) + cmake_parse_arguments(URL "${options}" "${oneValueArgs}" "${multiValueArgs}" + ${ARGN}) + message(STATUS "Download dependence[${NAME}] from ${URL}, MD5: ${URL_MD5}") + set(${NAME}_INCLUDE_DIR + ${THIRD_PARTY_PATH}/${NAME}/data + PARENT_SCOPE) ExternalProject_Add( - download_${NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${THIRD_PARTY_PATH}/${NAME} - URL ${URL} - URL_MD5 ${URL_MD5} - TIMEOUT 120 - DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ - SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND "" - ) - set(third_party_deps ${third_party_deps} download_${NAME} PARENT_SCOPE) -ENDFUNCTION() - + download_${NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${THIRD_PARTY_PATH}/${NAME} + URL ${URL} + URL_MD5 ${URL_MD5} + TIMEOUT 120 + DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ + SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/ + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND "") + set(third_party_deps + ${third_party_deps} download_${NAME} + PARENT_SCOPE) +endfunction() # Correction of flags on different Platform(WIN/MAC) and Print Warning Message -if (APPLE) - if(WITH_MKL) - MESSAGE(WARNING - "Mac is not supported with MKL in Paddle yet. Force WITH_MKL=OFF.") - set(WITH_MKL OFF CACHE STRING "Disable MKL for building on mac" FORCE) - endif() +if(APPLE) + if(WITH_MKL) + message( + WARNING "Mac is not supported with MKL in Paddle yet. Force WITH_MKL=OFF." + ) + set(WITH_MKL + OFF + CACHE STRING "Disable MKL for building on mac" FORCE) + endif() endif() if(WIN32 OR APPLE) - MESSAGE(STATUS "Disable XBYAK in Windows and MacOS") - SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE) - - if(WITH_LIBXSMM) - MESSAGE(WARNING - "Windows, Mac are not supported with libxsmm in Paddle yet." - "Force WITH_LIBXSMM=OFF") - SET(WITH_LIBXSMM OFF CACHE STRING "Disable LIBXSMM in Windows and MacOS" FORCE) - endif() - - if(WITH_BOX_PS) - MESSAGE(WARNING - "Windows or Mac is not supported with BOX_PS in Paddle yet." - "Force WITH_BOX_PS=OFF") - SET(WITH_BOX_PS OFF CACHE STRING "Disable BOX_PS package in Windows and MacOS" FORCE) - endif() - - if(WITH_PSLIB) - MESSAGE(WARNING - "Windows or Mac is not supported with PSLIB in Paddle yet." - "Force WITH_PSLIB=OFF") - SET(WITH_PSLIB OFF CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE) - endif() - - if(WITH_ARM_BRPC) - MESSAGE(WARNING - "Windows or Mac is not supported with ARM_BRPC in Paddle yet." - "Force WITH_ARM_BRPC=OFF") - SET(WITH_ARM_BRPC OFF CACHE STRING "Disable ARM_BRPC package in Windows and MacOS" FORCE) - endif() - - if(WITH_LIBMCT) - MESSAGE(WARNING - "Windows or Mac is not supported with LIBMCT in Paddle yet." - "Force WITH_LIBMCT=OFF") - SET(WITH_LIBMCT OFF CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE) - endif() - - if(WITH_PSLIB_BRPC) - MESSAGE(WARNING - "Windows or Mac is not supported with PSLIB_BRPC in Paddle yet." - "Force WITH_PSLIB_BRPC=OFF") - SET(WITH_PSLIB_BRPC OFF CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE) - endif() + message(STATUS "Disable XBYAK in Windows and MacOS") + set(WITH_XBYAK + OFF + CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE) + + if(WITH_LIBXSMM) + message(WARNING "Windows, Mac are not supported with libxsmm in Paddle yet." + "Force WITH_LIBXSMM=OFF") + set(WITH_LIBXSMM + OFF + CACHE STRING "Disable LIBXSMM in Windows and MacOS" FORCE) + endif() + + if(WITH_BOX_PS) + message(WARNING "Windows or Mac is not supported with BOX_PS in Paddle yet." + "Force WITH_BOX_PS=OFF") + set(WITH_BOX_PS + OFF + CACHE STRING "Disable BOX_PS package in Windows and MacOS" FORCE) + endif() + + if(WITH_PSLIB) + message(WARNING "Windows or Mac is not supported with PSLIB in Paddle yet." + "Force WITH_PSLIB=OFF") + set(WITH_PSLIB + OFF + CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE) + endif() + + if(WITH_ARM_BRPC) + message( + WARNING "Windows or Mac is not supported with ARM_BRPC in Paddle yet." + "Force WITH_ARM_BRPC=OFF") + set(WITH_ARM_BRPC + OFF + CACHE STRING "Disable ARM_BRPC package in Windows and MacOS" FORCE) + endif() + + if(WITH_LIBMCT) + message(WARNING "Windows or Mac is not supported with LIBMCT in Paddle yet." + "Force WITH_LIBMCT=OFF") + set(WITH_LIBMCT + OFF + CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE) + endif() + + if(WITH_PSLIB_BRPC) + message( + WARNING "Windows or Mac is not supported with PSLIB_BRPC in Paddle yet." + "Force WITH_PSLIB_BRPC=OFF") + set(WITH_PSLIB_BRPC + OFF + CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE) + endif() endif() set(WITH_MKLML ${WITH_MKL}) if(NOT DEFINED WITH_MKLDNN) - if(WITH_MKL AND AVX2_FOUND) - set(WITH_MKLDNN ON) - else() - message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN") - set(WITH_MKLDNN OFF) - endif() + if(WITH_MKL AND AVX2_FOUND) + set(WITH_MKLDNN ON) + else() + message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN") + set(WITH_MKLDNN OFF) + endif() endif() -if(WIN32 OR APPLE OR NOT WITH_GPU OR ON_INFER) - set(WITH_DGC OFF) +if(WIN32 + OR APPLE + OR NOT WITH_GPU + OR ON_INFER) + set(WITH_DGC OFF) endif() if(${CMAKE_VERSION} VERSION_GREATER "3.5.2") - set(SHALLOW_CLONE "GIT_SHALLOW TRUE") # adds --depth=1 arg to git clone of External_Projects + set(SHALLOW_CLONE "GIT_SHALLOW TRUE" + )# adds --depth=1 arg to git clone of External_Projects endif() ########################### include third_party according to flags ############################### -include(external/zlib) # download, build, install zlib -include(external/gflags) # download, build, install gflags -include(external/glog) # download, build, install glog -include(external/boost) # download boost -include(external/eigen) # download eigen3 -include(external/threadpool)# download threadpool -include(external/dlpack) # download dlpack -include(external/xxhash) # download, build, install xxhash -include(external/warpctc) # download, build, install warpctc -include(external/utf8proc) # download, build, install utf8proc - -list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boost extern_xxhash) -list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool extern_utf8proc) -include(external/lapack) # download, build, install lapack - -list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boost extern_xxhash) -list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool extern_lapack) - -include(cblas) # find first, then download, build, install openblas +include(external/zlib) # download, build, install zlib +include(external/gflags) # download, build, install gflags +include(external/glog) # download, build, install glog +include(external/boost) # download boost +include(external/eigen) # download eigen3 +include(external/threadpool) # download threadpool +include(external/dlpack) # download dlpack +include(external/xxhash) # download, build, install xxhash +include(external/warpctc) # download, build, install warpctc +include(external/utf8proc) # download, build, install utf8proc + +list( + APPEND + third_party_deps + extern_eigen3 + extern_gflags + extern_glog + extern_boost + extern_xxhash) +list( + APPEND + third_party_deps + extern_zlib + extern_dlpack + extern_warpctc + extern_threadpool + extern_utf8proc) +include(external/lapack) # download, build, install lapack + +list( + APPEND + third_party_deps + extern_eigen3 + extern_gflags + extern_glog + extern_boost + extern_xxhash) +list( + APPEND + third_party_deps + extern_zlib + extern_dlpack + extern_warpctc + extern_threadpool + extern_lapack) + +include(cblas) # find first, then download, build, install openblas message(STATUS "CBLAS_PROVIDER: ${CBLAS_PROVIDER}") if(${CBLAS_PROVIDER} STREQUAL MKLML) - list(APPEND third_party_deps extern_mklml) + list(APPEND third_party_deps extern_mklml) elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) - list(APPEND third_party_deps extern_openblas) + list(APPEND third_party_deps extern_openblas) endif() - if(WITH_MKLDNN) - include(external/mkldnn) # download, build, install mkldnn - list(APPEND third_party_deps extern_mkldnn) + include(external/mkldnn) # download, build, install mkldnn + list(APPEND third_party_deps extern_mkldnn) endif() -include(external/protobuf) # find first, then download, build, install protobuf +include(external/protobuf) # find first, then download, build, install protobuf if(TARGET extern_protobuf) - list(APPEND third_party_deps extern_protobuf) + list(APPEND third_party_deps extern_protobuf) endif() if(WITH_PYTHON) - include(external/python) # find python and python_module - include(external/pybind11) # download pybind11 - list(APPEND third_party_deps extern_pybind) + include(external/python) # find python and python_module + include(external/pybind11) # download pybind11 + list(APPEND third_party_deps extern_pybind) endif() -IF(WITH_TESTING OR WITH_DISTRIBUTE) - include(external/gtest) # download, build, install gtest - list(APPEND third_party_deps extern_gtest) -ENDIF() +if(WITH_TESTING OR WITH_DISTRIBUTE) + include(external/gtest) # download, build, install gtest + list(APPEND third_party_deps extern_gtest) +endif() if(WITH_ONNXRUNTIME) - include(external/onnxruntime) # download, build, install onnxruntime、paddle2onnx - include(external/paddle2onnx) - list(APPEND third_party_deps extern_onnxruntime extern_paddle2onnx) + include(external/onnxruntime + )# download, build, install onnxruntime、paddle2onnx + include(external/paddle2onnx) + list(APPEND third_party_deps extern_onnxruntime extern_paddle2onnx) endif() if(WITH_GPU) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - include(external/cub) # download cub - list(APPEND third_party_deps extern_cub) - endif() - set(URL "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" CACHE STRING "" FORCE) - file_download_and_uncompress(${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa) # download file externalErrorMsg.tar.gz - if(WITH_TESTING) - # copy externalErrorMsg.pb, just for unittest can get error message correctly. - set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data) - if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja")) - set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data) - else() - set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data) - endif() - set(DST_DIR2 ${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data) - add_custom_command(TARGET download_externalError POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1} - COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2} - COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}") + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + include(external/cub) # download cub + list(APPEND third_party_deps extern_cub) + endif() + set(URL + "https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz" + CACHE STRING "" FORCE) + file_download_and_uncompress( + ${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa + )# download file externalErrorMsg.tar.gz + if(WITH_TESTING) + # copy externalErrorMsg.pb, just for unittest can get error message correctly. + set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data) + if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja")) + set(DST_DIR1 + ${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data) + else() + set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data) endif() + set(DST_DIR2 + ${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data + ) + add_custom_command( + TARGET download_externalError + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1} + COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2} + COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}") + endif() endif(WITH_GPU) if(WITH_XPU) - include(external/xpu) # download, build, install xpu - list(APPEND third_party_deps extern_xpu) + include(external/xpu) # download, build, install xpu + list(APPEND third_party_deps extern_xpu) endif(WITH_XPU) if(WITH_MLU) - include(external/concurrentqueue) # download, build, install concurrentqueue - list(APPEND third_party_deps extern_concurrentqueue) + include(external/concurrentqueue) # download, build, install concurrentqueue + list(APPEND third_party_deps extern_concurrentqueue) endif(WITH_MLU) if(WITH_PSLIB) - include(external/pslib) # download, build, install pslib - list(APPEND third_party_deps extern_pslib) - if(WITH_LIBMCT) - include(external/libmct) # download, build, install libmct - list(APPEND third_party_deps extern_libxsmm) - endif() - if(WITH_PSLIB_BRPC) - include(external/pslib_brpc) # download, build, install pslib_brpc - list(APPEND third_party_deps extern_pslib_brpc) - else() - include(external/snappy) - list(APPEND third_party_deps extern_snappy) - - include(external/leveldb) - list(APPEND third_party_deps extern_leveldb) - if(NOT WITH_HETERPS) - include(external/brpc) - list(APPEND third_party_deps extern_brpc) - endif() + include(external/pslib) # download, build, install pslib + list(APPEND third_party_deps extern_pslib) + if(WITH_LIBMCT) + include(external/libmct) # download, build, install libmct + list(APPEND third_party_deps extern_libxsmm) + endif() + if(WITH_PSLIB_BRPC) + include(external/pslib_brpc) # download, build, install pslib_brpc + list(APPEND third_party_deps extern_pslib_brpc) + else() + include(external/snappy) + list(APPEND third_party_deps extern_snappy) + + include(external/leveldb) + list(APPEND third_party_deps extern_leveldb) + if(NOT WITH_HETERPS) + include(external/brpc) + list(APPEND third_party_deps extern_brpc) endif() + endif() endif(WITH_PSLIB) if(NOT WIN32 AND NOT APPLE) - include(external/gloo) - list(APPEND third_party_deps extern_gloo) + include(external/gloo) + list(APPEND third_party_deps extern_gloo) endif() if(WITH_BOX_PS) - include(external/box_ps) - list(APPEND third_party_deps extern_box_ps) + include(external/box_ps) + list(APPEND third_party_deps extern_box_ps) endif(WITH_BOX_PS) if(WITH_ASCEND OR WITH_ASCEND_CL) - include(external/ascend) - if(WITH_ASCEND OR WITH_ASCEND_CL) - list(APPEND third_party_deps extern_ascend) - endif() - if(WITH_ASCEND_CL) - list(APPEND third_party_deps extern_ascend_cl) - endif() -endif () + include(external/ascend) + if(WITH_ASCEND OR WITH_ASCEND_CL) + list(APPEND third_party_deps extern_ascend) + endif() + if(WITH_ASCEND_CL) + list(APPEND third_party_deps extern_ascend_cl) + endif() +endif() -if (WITH_PSCORE) - include(external/snappy) - list(APPEND third_party_deps extern_snappy) +if(WITH_PSCORE) + include(external/snappy) + list(APPEND third_party_deps extern_snappy) - include(external/leveldb) - list(APPEND third_party_deps extern_leveldb) - - if (WITH_ARM_BRPC) - include(external/arm_brpc) - list(APPEND third_party_deps extern_arm_brpc) - else() - include(external/brpc) - list(APPEND third_party_deps extern_brpc) - endif() + include(external/leveldb) + list(APPEND third_party_deps extern_leveldb) + + if(WITH_ARM_BRPC) + include(external/arm_brpc) + list(APPEND third_party_deps extern_arm_brpc) + else() + include(external/brpc) + list(APPEND third_party_deps extern_brpc) + endif() - include(external/libmct) # download, build, install libmct - list(APPEND third_party_deps extern_libmct) + include(external/libmct) # download, build, install libmct + list(APPEND third_party_deps extern_libmct) - include(external/rocksdb) # download, build, install rocksdb - list(APPEND third_party_deps extern_rocksdb) + include(external/rocksdb) # download, build, install rocksdb + list(APPEND third_party_deps extern_rocksdb) endif() if(WITH_XBYAK) - include(external/xbyak) # download, build, install xbyak - list(APPEND third_party_deps extern_xbyak) + include(external/xbyak) # download, build, install xbyak + list(APPEND third_party_deps extern_xbyak) endif() if(WITH_LIBXSMM) - include(external/libxsmm) # download, build, install libxsmm - list(APPEND third_party_deps extern_libxsmm) + include(external/libxsmm) # download, build, install libxsmm + list(APPEND third_party_deps extern_libxsmm) endif() if(WITH_DGC) - message(STATUS "add dgc lib.") - include(external/dgc) # download, build, install dgc - add_definitions(-DPADDLE_WITH_DGC) - list(APPEND third_party_deps extern_dgc) + message(STATUS "add dgc lib.") + include(external/dgc) # download, build, install dgc + add_definitions(-DPADDLE_WITH_DGC) + list(APPEND third_party_deps extern_dgc) endif() -if (WITH_LITE) - message(STATUS "Compile Paddle with Lite Engine.") - include(external/lite) -endif (WITH_LITE) - -if (WITH_CINN) - message(STATUS "Compile Paddle with CINN.") - include(external/cinn) - add_definitions(-DPADDLE_WITH_CINN) - if (WITH_GPU) - add_definitions(-DCINN_WITH_CUDA) - add_definitions(-DCINN_WITH_CUDNN) - endif (WITH_GPU) - if (WITH_MKL) - add_definitions(-DCINN_WITH_MKL_CBLAS) - add_definitions(-DCINN_WITH_MKLDNN) - endif (WITH_MKL) -endif (WITH_CINN) - -if (WITH_CRYPTO) - include(external/cryptopp) # download, build, install cryptopp - list(APPEND third_party_deps extern_cryptopp) - add_definitions(-DPADDLE_WITH_CRYPTO) -endif (WITH_CRYPTO) - -if (WITH_POCKETFFT) - include(external/pocketfft) - list(APPEND third_party_deps extern_pocketfft) - add_definitions(-DPADDLE_WITH_POCKETFFT) -endif (WITH_POCKETFFT) - -if (WIN32) - include(external/dirent) - list(APPEND third_party_deps extern_dirent) -endif (WIN32) - -if (WITH_INFRT) - include(external/llvm) - list(APPEND third_party_deps ${llvm_libs}) +if(WITH_LITE) + message(STATUS "Compile Paddle with Lite Engine.") + include(external/lite) +endif(WITH_LITE) + +if(WITH_CINN) + message(STATUS "Compile Paddle with CINN.") + include(external/cinn) + add_definitions(-DPADDLE_WITH_CINN) + if(WITH_GPU) + add_definitions(-DCINN_WITH_CUDA) + add_definitions(-DCINN_WITH_CUDNN) + endif(WITH_GPU) + if(WITH_MKL) + add_definitions(-DCINN_WITH_MKL_CBLAS) + add_definitions(-DCINN_WITH_MKLDNN) + endif(WITH_MKL) +endif(WITH_CINN) + +if(WITH_CRYPTO) + include(external/cryptopp) # download, build, install cryptopp + list(APPEND third_party_deps extern_cryptopp) + add_definitions(-DPADDLE_WITH_CRYPTO) +endif(WITH_CRYPTO) + +if(WITH_POCKETFFT) + include(external/pocketfft) + list(APPEND third_party_deps extern_pocketfft) + add_definitions(-DPADDLE_WITH_POCKETFFT) +endif(WITH_POCKETFFT) + +if(WIN32) + include(external/dirent) + list(APPEND third_party_deps extern_dirent) +endif(WIN32) + +if(WITH_INFRT) + include(external/llvm) + list(APPEND third_party_deps ${llvm_libs}) endif() -if (WITH_IPU) - include(external/poplar) - list(APPEND third_party_deps extern_poplar) +if(WITH_IPU) + include(external/poplar) + list(APPEND third_party_deps extern_poplar) endif() add_custom_target(third_party ALL DEPENDS ${third_party_deps}) diff --git a/cmake/thrust.cmake b/cmake/thrust.cmake index ff415b1e3c4..73c2c29847a 100644 --- a/cmake/thrust.cmake +++ b/cmake/thrust.cmake @@ -1,6 +1,8 @@ function(add_thrust_patches_if_necessary) set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu) - file(WRITE ${thrust_detect_file} "" + file( + WRITE ${thrust_detect_file} + "" "#include \"thrust/version.h\"\n" "#include \"thrust/shuffle.h\"\n" "#include \"stdio.h\"\n" @@ -10,10 +12,11 @@ function(add_thrust_patches_if_necessary) " return 0;\n" "}\n") - execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" - "--run" "${thrust_detect_file}" - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE nvcc_res ERROR_QUIET) + execute_process( + COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${thrust_detect_file}" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" + RESULT_VARIABLE nvcc_res + ERROR_QUIET) if(NOT nvcc_res EQUAL 0) set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust") message(STATUS "Add thrust patches: ${thrust_patches}") diff --git a/cmake/unity_build.cmake b/cmake/unity_build.cmake index b7e5564b3a6..e18b2ef1ee6 100644 --- a/cmake/unity_build.cmake +++ b/cmake/unity_build.cmake @@ -1,12 +1,14 @@ # Add the following code before all include to avoid compilation failure. -set(UNITY_CC_BEFORE_CODE [[ +set(UNITY_CC_BEFORE_CODE + [[ #ifndef NOMINMAX #define NOMINMAX #endif #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES #endif]]) -set(UNITY_CU_BEFORE_CODE [[ +set(UNITY_CU_BEFORE_CODE + [[ #ifndef __CUDACC_VER_MAJOR__ #define __CUDACC_VER_MAJOR__ CUDA_COMPILER_MAJOR_VERSION #endif @@ -14,15 +16,13 @@ set(UNITY_CU_BEFORE_CODE [[ #define __CUDACC_VER_MINOR__ CUDA_COMPILER_MINOR_VERSION #endif]]) if(WITH_GPU) - string(REPLACE "." ";" CUDA_COMPILER_VERSION ${CMAKE_CUDA_COMPILER_VERSION}) - list(GET CUDA_COMPILER_VERSION 0 CUDA_COMPILER_MAJOR_VERSION) - list(GET CUDA_COMPILER_VERSION 1 CUDA_COMPILER_MINOR_VERSION) - string(REPLACE - "CUDA_COMPILER_MAJOR_VERSION" ${CUDA_COMPILER_MAJOR_VERSION} - UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE}) - string(REPLACE - "CUDA_COMPILER_MINOR_VERSION" ${CUDA_COMPILER_MINOR_VERSION} - UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE}) + string(REPLACE "." ";" CUDA_COMPILER_VERSION ${CMAKE_CUDA_COMPILER_VERSION}) + list(GET CUDA_COMPILER_VERSION 0 CUDA_COMPILER_MAJOR_VERSION) + list(GET CUDA_COMPILER_VERSION 1 CUDA_COMPILER_MINOR_VERSION) + string(REPLACE "CUDA_COMPILER_MAJOR_VERSION" ${CUDA_COMPILER_MAJOR_VERSION} + UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE}) + string(REPLACE "CUDA_COMPILER_MINOR_VERSION" ${CUDA_COMPILER_MINOR_VERSION} + UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE}) endif() # Group a list of source files that can be included together. @@ -30,37 +30,43 @@ endif() # do not have to exist. # Here you need to specify the source type which belongs to cc or cu. function(register_unity_group TYPE) - # Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR. - string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR}) - string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) - set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") + # Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR. + string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET + ${CMAKE_CURRENT_SOURCE_DIR}) + string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) + set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") - # Variable unity_group_index is used to record the number of UNITY_TARGET groups. - get_property(unity_group_index GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index) - if("${unity_group_index}" STREQUAL "") - set(unity_group_index 0) - endif() + # Variable unity_group_index is used to record the number of UNITY_TARGET groups. + get_property(unity_group_index GLOBAL + PROPERTY ${UNITY_TARGET}_${TYPE}_group_index) + if("${unity_group_index}" STREQUAL "") + set(unity_group_index 0) + endif() - # Variable unity_group_sources is used to record the sources of one group. - set(unity_group_sources ${UNITY_TARGET}_${TYPE}_group_${unity_group_index}_sources) - set_property(GLOBAL PROPERTY ${unity_group_sources} "") - foreach(src ${ARGN}) - # UB use absolute path of source. - if(NOT IS_ABSOLUTE ${src}) - set(src ${CMAKE_CURRENT_SOURCE_DIR}/${src}) - endif() - set_property(GLOBAL APPEND PROPERTY ${unity_group_sources} ${src}) - endforeach() - - # If unity_file does not exists, nv_library or cc_library will use - # dummy_file. Touch unity_file to avoid to use dummy file. - set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE}) - if(NOT EXISTS ${unity_file}) - file(TOUCH ${unity_file}) + # Variable unity_group_sources is used to record the sources of one group. + set(unity_group_sources + ${UNITY_TARGET}_${TYPE}_group_${unity_group_index}_sources) + set_property(GLOBAL PROPERTY ${unity_group_sources} "") + foreach(src ${ARGN}) + # UB use absolute path of source. + if(NOT IS_ABSOLUTE ${src}) + set(src ${CMAKE_CURRENT_SOURCE_DIR}/${src}) endif() + set_property(GLOBAL APPEND PROPERTY ${unity_group_sources} ${src}) + endforeach() + + # If unity_file does not exists, nv_library or cc_library will use + # dummy_file. Touch unity_file to avoid to use dummy file. + set(unity_file + ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE} + ) + if(NOT EXISTS ${unity_file}) + file(TOUCH ${unity_file}) + endif() - math(EXPR unity_group_index "${unity_group_index} + 1") - set_property(GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index ${unity_group_index}) + math(EXPR unity_group_index "${unity_group_index} + 1") + set_property(GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index + ${unity_group_index}) endfunction(register_unity_group) # Combine the original source files used by `TARGET`, then use @@ -72,81 +78,105 @@ endfunction(register_unity_group) # directory on Windows. # Here you need to specify the source type which belongs to cc or cu. function(compose_unity_target_sources TARGET TYPE) - # Variable unity_target_sources represents the source file used in TARGET - set(unity_target_sources "") - get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index) - foreach(src ${ARGN}) - set(unity_file "") - # Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR. - # If use absolute path, sccache/ccache hit rate will be reduced. - if(IS_ABSOLUTE ${src}) - set(src_absolute_path ${src}) - file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src}) - else() - set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) - file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src_absolute_path}) - endif() - # If `unity_group_index_max` is empty, there is no combination - # relationship. - # TODO(Avin0323): Whether use target property `UNITY_BUILD` of CMAKE to - # combine source files. - if(NOT "${unity_group_index_max}" STREQUAL "") - # Search in each registed group. - foreach(unity_group_index RANGE ${unity_group_index_max}) - if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max}) - break() - endif() - get_property(unity_group_sources GLOBAL PROPERTY ${TARGET}_${TYPE}_group_${unity_group_index}_sources) - if(${src_absolute_path} IN_LIST unity_group_sources) - set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_${unity_group_index}_${TYPE}.${TYPE}) - set(unity_file_sources ${TARGET}_${TYPE}_file_${unity_group_index}_sources) - get_property(set_unity_file_sources GLOBAL PROPERTY ${unity_file_sources} SET) - if(NOT ${set_unity_file_sources}) - # Add macro before include source files. - set_property(GLOBAL PROPERTY ${unity_file_sources} "// Generate by Unity Build") - set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CC_BEFORE_CODE}) - if(WITH_GPU AND "${TYPE}" STREQUAL "cu") - set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CU_BEFORE_CODE}) - endif() - endif() - set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_relative_path}\"") - set(unity_target_sources ${unity_target_sources} ${unity_file}) - break() - endif() - endforeach() + # Variable unity_target_sources represents the source file used in TARGET + set(unity_target_sources "") + get_property(unity_group_index_max GLOBAL + PROPERTY ${TARGET}_${TYPE}_group_index) + foreach(src ${ARGN}) + set(unity_file "") + # Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR. + # If use absolute path, sccache/ccache hit rate will be reduced. + if(IS_ABSOLUTE ${src}) + set(src_absolute_path ${src}) + file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src}) + else() + set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src}) + file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} + ${src_absolute_path}) + endif() + # If `unity_group_index_max` is empty, there is no combination + # relationship. + # TODO(Avin0323): Whether use target property `UNITY_BUILD` of CMAKE to + # combine source files. + if(NOT "${unity_group_index_max}" STREQUAL "") + # Search in each registed group. + foreach(unity_group_index RANGE ${unity_group_index_max}) + if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max}) + break() endif() - # Use original source file. - if("${unity_file}" STREQUAL "") - set(unity_target_sources ${unity_target_sources} ${src}) + get_property( + unity_group_sources GLOBAL + PROPERTY ${TARGET}_${TYPE}_group_${unity_group_index}_sources) + if(${src_absolute_path} IN_LIST unity_group_sources) + set(unity_file + ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_${unity_group_index}_${TYPE}.${TYPE} + ) + set(unity_file_sources + ${TARGET}_${TYPE}_file_${unity_group_index}_sources) + get_property( + set_unity_file_sources GLOBAL + PROPERTY ${unity_file_sources} + SET) + if(NOT ${set_unity_file_sources}) + # Add macro before include source files. + set_property(GLOBAL PROPERTY ${unity_file_sources} + "// Generate by Unity Build") + set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} + ${UNITY_CC_BEFORE_CODE}) + if(WITH_GPU AND "${TYPE}" STREQUAL "cu") + set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} + ${UNITY_CU_BEFORE_CODE}) + endif() + endif() + set_property( + GLOBAL APPEND PROPERTY ${unity_file_sources} + "#include \"${src_relative_path}\"") + set(unity_target_sources ${unity_target_sources} ${unity_file}) + break() endif() - endforeach() + endforeach() + endif() + # Use original source file. + if("${unity_file}" STREQUAL "") + set(unity_target_sources ${unity_target_sources} ${src}) + endif() + endforeach() - set(unity_target_${TYPE}_sources ${unity_target_sources} PARENT_SCOPE) + set(unity_target_${TYPE}_sources + ${unity_target_sources} + PARENT_SCOPE) endfunction(compose_unity_target_sources) # Write the unity files used by `UNITY_TARGET`. # Write dependent on whether the contents of the unity file have changed, which # protects incremental compilation speed. function(finish_unity_target TYPE) - # Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR. - string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR}) - string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) - set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") + # Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR. + string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET + ${CMAKE_CURRENT_SOURCE_DIR}) + string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET}) + set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity") - get_property(unity_group_index_max GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index) - if(NOT "${unity_group_index_max}" STREQUAL "") - foreach(unity_group_index RANGE ${unity_group_index_max}) - if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max}) - break() - endif() - get_property(unity_file_sources GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_file_${unity_group_index}_sources) - set(unity_file_read_content "") - string(JOIN "\n" unity_file_write_content ${unity_file_sources}) - set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE}) - file(READ ${unity_file} unity_file_read_content) - if(NOT "${unity_file_read_content}" STREQUAL "${unity_file_write_content}") - file(WRITE ${unity_file} ${unity_file_write_content}) - endif() - endforeach() - endif() + get_property(unity_group_index_max GLOBAL + PROPERTY ${UNITY_TARGET}_${TYPE}_group_index) + if(NOT "${unity_group_index_max}" STREQUAL "") + foreach(unity_group_index RANGE ${unity_group_index_max}) + if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max}) + break() + endif() + get_property( + unity_file_sources GLOBAL + PROPERTY ${UNITY_TARGET}_${TYPE}_file_${unity_group_index}_sources) + set(unity_file_read_content "") + string(JOIN "\n" unity_file_write_content ${unity_file_sources}) + set(unity_file + ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE} + ) + file(READ ${unity_file} unity_file_read_content) + if(NOT "${unity_file_read_content}" STREQUAL + "${unity_file_write_content}") + file(WRITE ${unity_file} ${unity_file_write_content}) + endif() + endforeach() + endif() endfunction(finish_unity_target) diff --git a/cmake/util.cmake b/cmake/util.cmake index 02667dbce69..8e52831ebe9 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -6,50 +6,47 @@ # First Argument: target name want to be linked with libraries # Rest Arguments: libraries which link together. function(target_circle_link_libraries TARGET_NAME) - if(APPLE) - set(LIBS) - set(inArchive OFF) - set(libsInArgn) + if(APPLE) + set(LIBS) + set(inArchive OFF) + set(libsInArgn) - foreach(arg ${ARGN}) - if(${arg} STREQUAL "ARCHIVE_START") - set(inArchive ON) - elseif(${arg} STREQUAL "ARCHIVE_END") - set(inArchive OFF) - else() - if(inArchive) - list(APPEND LIBS "-Wl,-force_load") - endif() - list(APPEND LIBS ${arg}) - list(APPEND libsInArgn ${arg}) - endif() - endforeach() - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - if(NOT IOS_ENABLE_BITCODE) - list(APPEND LIBS "-undefined dynamic_lookup") - endif() + foreach(arg ${ARGN}) + if(${arg} STREQUAL "ARCHIVE_START") + set(inArchive ON) + elseif(${arg} STREQUAL "ARCHIVE_END") + set(inArchive OFF) + else() + if(inArchive) + list(APPEND LIBS "-Wl,-force_load") endif() - list(REVERSE libsInArgn) - target_link_libraries(${TARGET_NAME} - ${LIBS} - ${libsInArgn}) + list(APPEND LIBS ${arg}) + list(APPEND libsInArgn ${arg}) + endif() + endforeach() + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" + STREQUAL "AppleClang") + if(NOT IOS_ENABLE_BITCODE) + list(APPEND LIBS "-undefined dynamic_lookup") + endif() + endif() + list(REVERSE libsInArgn) + target_link_libraries(${TARGET_NAME} ${LIBS} ${libsInArgn}) - else() # LINUX - set(LIBS) + else() # LINUX + set(LIBS) - foreach(arg ${ARGN}) - if(${arg} STREQUAL "ARCHIVE_START") - list(APPEND LIBS "-Wl,--whole-archive") - elseif(${arg} STREQUAL "ARCHIVE_END") - list(APPEND LIBS "-Wl,--no-whole-archive") - else() - list(APPEND LIBS ${arg}) - endif() - endforeach() + foreach(arg ${ARGN}) + if(${arg} STREQUAL "ARCHIVE_START") + list(APPEND LIBS "-Wl,--whole-archive") + elseif(${arg} STREQUAL "ARCHIVE_END") + list(APPEND LIBS "-Wl,--no-whole-archive") + else() + list(APPEND LIBS ${arg}) + endif() + endforeach() - target_link_libraries(${TARGET_NAME} - "-Wl,--start-group" - ${LIBS} - "-Wl,--end-group") - endif() + target_link_libraries(${TARGET_NAME} "-Wl,--start-group" ${LIBS} + "-Wl,--end-group") + endif() endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index 57ca750df6c..83bd3f1b1bc 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -3,7 +3,7 @@ set(PADDLE_VERSION $ENV{PADDLE_VERSION}) set(tmp_version "HEAD") set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?") set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+") -while ("${PADDLE_VERSION}" STREQUAL "") +while("${PADDLE_VERSION}" STREQUAL "") # Check current branch name execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref ${tmp_version} @@ -11,23 +11,24 @@ while ("${PADDLE_VERSION}" STREQUAL "") OUTPUT_VARIABLE GIT_BRANCH_NAME RESULT_VARIABLE GIT_BRANCH_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if (NOT ${GIT_BRANCH_RESULT}) + if(NOT ${GIT_BRANCH_RESULT}) execute_process( - COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always ${tmp_version} + COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always + ${tmp_version} WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if (NOT ${GIT_RESULT}) + if(NOT ${GIT_RESULT}) # Check if current branch is release branch - if (${GIT_BRANCH_NAME} MATCHES "release/${TAG_VERSION_REGEX}") + if(${GIT_BRANCH_NAME} MATCHES "release/${TAG_VERSION_REGEX}") # Check the tag is a correct version - if (${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}") + if(${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}") # if no tag was found, set PADDLE_VERSION to 0.0.0 to represent latest set(PADDLE_VERSION "0.0.0") - elseif (${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") + elseif(${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME}) - else() # otherwise, get the previous git tag name. + else() # otherwise, get the previous git tag name. set(tmp_version "${GIT_TAG_NAME}~1") endif() else() @@ -37,9 +38,9 @@ while ("${PADDLE_VERSION}" STREQUAL "") OUTPUT_VARIABLE GIT_EXACT_TAG_NAME RESULT_VARIABLE GIT_EXACT_TAG_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if (NOT ${GIT_EXACT_TAG_NAME}) + if(NOT ${GIT_EXACT_TAG_NAME}) # Check if current branch is tag branch - if (${GIT_EXACT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") + if(${GIT_EXACT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") string(REPLACE "v" "" PADDLE_VERSION ${GIT_EXACT_TAG_NAME}) else() set(PADDLE_VERSION "0.0.0") diff --git a/cmake/xpu_kp.cmake b/cmake/xpu_kp.cmake index adf3d74c262..6692f24dd6a 100644 --- a/cmake/xpu_kp.cmake +++ b/cmake/xpu_kp.cmake @@ -13,11 +13,11 @@ # limitations under the License. if(NOT WITH_XPU_KP) - return() + return() endif() -set(LINK_FLAGS "-Wl,--allow-multiple-definition") -set(CMAKE_EXE_LINKER_FLAGS "${LINK_FLAGS}") +set(LINK_FLAGS "-Wl,--allow-multiple-definition") +set(CMAKE_EXE_LINKER_FLAGS "${LINK_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "${LINK_FLAGS}") if(NOT XPU_TOOLCHAIN) @@ -31,7 +31,7 @@ message(STATUS "Build with XPU_TOOLCHAIN=" ${XPU_TOOLCHAIN}) set(XPU_CLANG ${XPU_TOOLCHAIN}/bin/clang++) message(STATUS "Build with XPU_CLANG=" ${XPU_CLANG}) -# The host sysroot of XPU compiler is gcc-8.2 +# The host sysroot of XPU compiler is gcc-8.2 if(NOT HOST_SYSROOT) set(HOST_SYSROOT /opt/compiler/gcc-8.2) endif() @@ -45,19 +45,19 @@ if(NOT API_ARCH) endif() if(API_ARCH MATCHES "x86_64") -if(EXISTS ${HOST_SYSROOT}/bin/g++) - set(HOST_CXX ${HOST_SYSROOT}/bin/g++) - set(HOST_AR ${HOST_SYSROOT}/bin/ar) -else() - set(HOST_CXX /usr/bin/g++) - set(HOST_AR /usr/bin/ar) -endif() + if(EXISTS ${HOST_SYSROOT}/bin/g++) + set(HOST_CXX ${HOST_SYSROOT}/bin/g++) + set(HOST_AR ${HOST_SYSROOT}/bin/ar) + else() + set(HOST_CXX /usr/bin/g++) + set(HOST_AR /usr/bin/ar) + endif() else() set(HOST_CXX ${CMAKE_CXX_COMPILER}) set(HOST_AR ${CMAKE_AR}) endif() -set(TOOLCHAIN_ARGS ) +set(TOOLCHAIN_ARGS) if(OPT_LEVEL) set(OPT_LEVEL ${OPT_LEVEL}) @@ -74,8 +74,16 @@ message(STATUS "Build with HOST_AR=" ${HOST_AR}) macro(compile_kernel COMPILE_ARGS) set(options "") set(oneValueArgs "") - set(multiValueArgs KERNEL DIRPATH XNAME DEVICE HOST XPU DEPENDS) - cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(multiValueArgs + KERNEL + DIRPATH + XNAME + DEVICE + HOST + XPU + DEPENDS) + cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) set(kernel_path ${xpu_add_library_DIRPATH}) set(kernel_name ${xpu_add_library_XNAME}) set(device_o_extra_flags ${xpu_add_library_DEVICE}) @@ -84,16 +92,12 @@ macro(compile_kernel COMPILE_ARGS) set(cc_depends ${xpu_add_library_DEPENDS}) set(kernel_target ${kernel_name}_kernel) - add_custom_target(${kernel_target} - WORKING_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS - kernel_build/${kernel_name}.host.o - kernel_build/${kernel_name}.bin.o - COMMENT - ${kernel_target} - VERBATIM - ) + add_custom_target( + ${kernel_target} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS kernel_build/${kernel_name}.host.o kernel_build/${kernel_name}.bin.o + COMMENT ${kernel_target} + VERBATIM) if(cc_depends) add_dependencies(${kernel_target} ${xpu_add_library_DEPENDS}) @@ -106,24 +110,56 @@ macro(compile_kernel COMPILE_ARGS) set(XTDK_DIR ${XPU_TOOLCHAIN}) set(CXX_DIR ${HOST_SYSROOT}) - set(XPU_CXX_FLAGS -fforce-enable-int128 -Wno-error=pessimizing-move -Wno-error=constant-conversion -Wno-error=c++11-narrowing -Wno-error=shift-count-overflow -Wno-error=unused-local-typedef -Wno-error=deprecated-declarations -Wno-deprecated-declarations -std=c++14 -m64 -fPIC -fno-omit-frame-pointer -Wall -Wno-inconsistent-missing-override -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=unused-local-typedefs -Wno-error=ignored-attributes -Wno-error=int-in-bool-context -Wno-error=parentheses -Wno-error=address -Wno-ignored-qualifiers -Wno-ignored-attributes -Wno-parentheses -DNDEBUG ) + set(XPU_CXX_FLAGS + -fforce-enable-int128 + -Wno-error=pessimizing-move + -Wno-error=constant-conversion + -Wno-error=c++11-narrowing + -Wno-error=shift-count-overflow + -Wno-error=unused-local-typedef + -Wno-error=deprecated-declarations + -Wno-deprecated-declarations + -std=c++14 + -m64 + -fPIC + -fno-omit-frame-pointer + -Wall + -Wno-inconsistent-missing-override + -Wextra + -Wnon-virtual-dtor + -Wdelete-non-virtual-dtor + -Wno-unused-parameter + -Wno-unused-function + -Wno-error=unused-local-typedefs + -Wno-error=ignored-attributes + -Wno-error=int-in-bool-context + -Wno-error=parentheses + -Wno-error=address + -Wno-ignored-qualifiers + -Wno-ignored-attributes + -Wno-parentheses + -DNDEBUG) #include path - get_property(dirs DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) + get_property( + dirs + DIRECTORY ${CMAKE_SOURCE_DIR} + PROPERTY INCLUDE_DIRECTORIES) set(XPU_CXX_INCLUDES "") foreach(dir IN LISTS dirs) list(APPEND XPU_CXX_INCLUDES "-I${dir}") endforeach() - string(REPLACE ";" " " XPU_CXX_INCLUDES "${XPU_CXX_INCLUDES}" ) + string(REPLACE ";" " " XPU_CXX_INCLUDES "${XPU_CXX_INCLUDES}") separate_arguments(XPU_CXX_INCLUDES UNIX_COMMAND "${XPU_CXX_INCLUDES}") #related flags - get_directory_property( DirDefs DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS ) + get_directory_property(DirDefs DIRECTORY ${CMAKE_SOURCE_DIR} + COMPILE_DEFINITIONS) set(XPU_CXX_DEFINES "") foreach(def IN LISTS DirDefs) list(APPEND XPU_CXX_DEFINES "-D${def}") endforeach() - string(REPLACE ";" " " XPU_CXX_DEFINES "${XPU_CXX_DEFINES}" ) + string(REPLACE ";" " " XPU_CXX_DEFINES "${XPU_CXX_DEFINES}") separate_arguments(XPU_CXX_DEFINES UNIX_COMMAND "${XPU_CXX_DEFINES}") set(ABI_VERSION "") @@ -133,121 +169,119 @@ macro(compile_kernel COMPILE_ARGS) set(ABI_VERSION "-D_GLIBCXX_USE_CXX11_ABI=1") endif() add_custom_command( - OUTPUT - kernel_build/${kernel_name}.bin.o - COMMAND - ${CMAKE_COMMAND} -E make_directory kernel_build - COMMAND - ${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu + OUTPUT kernel_build/${kernel_name}.bin.o + COMMAND ${CMAKE_COMMAND} -E make_directory kernel_build + COMMAND ${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps + kernel_build/${kernel_name}.xpu COMMAND - ${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES} - -I. -o kernel_build/${kernel_name}.bin.o.sec kernel_build/${kernel_name}.xpu - --xpu-device-only -c -v - COMMAND - ${XTDK_DIR}/bin/xpu2-elfconv kernel_build/${kernel_name}.bin.o.sec kernel_build/${kernel_name}.bin.o ${XPU_CLANG} --sysroot=${CXX_DIR} - WORKING_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS - ${xpu_add_library_DEPENDS} - COMMENT - kernel_build/${kernel_name}.bin.o - VERBATIM - ) - list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.bin.o) + ${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL} + -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} + ${XPU_CXX_INCLUDES} -I. -o kernel_build/${kernel_name}.bin.o.sec + kernel_build/${kernel_name}.xpu --xpu-device-only -c -v + COMMAND ${XTDK_DIR}/bin/xpu2-elfconv kernel_build/${kernel_name}.bin.o.sec + kernel_build/${kernel_name}.bin.o ${XPU_CLANG} --sysroot=${CXX_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${xpu_add_library_DEPENDS} + COMMENT kernel_build/${kernel_name}.bin.o + VERBATIM) + list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.bin.o) add_custom_command( - OUTPUT - kernel_build/${kernel_name}.host.o - COMMAND - ${CMAKE_COMMAND} -E make_directory kernel_build - COMMAND - ${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu + OUTPUT kernel_build/${kernel_name}.host.o + COMMAND ${CMAKE_COMMAND} -E make_directory kernel_build + COMMAND ${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps + kernel_build/${kernel_name}.xpu COMMAND - ${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES} - -I. -o kernel_build/${kernel_name}.host.o kernel_build/${kernel_name}.xpu - --xpu-host-only -c -v - WORKING_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS - ${xpu_add_library_DEPENDS} - COMMENT - kernel_build/${kernel_name}.host.o - VERBATIM - ) - list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.host.o) + ${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL} + -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} + ${XPU_CXX_INCLUDES} -I. -o kernel_build/${kernel_name}.host.o + kernel_build/${kernel_name}.xpu --xpu-host-only -c -v + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${xpu_add_library_DEPENDS} + COMMENT kernel_build/${kernel_name}.host.o + VERBATIM) + list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.host.o) endmacro() ############################################################################### # XPU_ADD_LIBRARY ############################################################################### macro(xpu_add_library TARGET_NAME) - # Separate the sources from the options - set(options "") - set(oneValueArgs "") - set(multiValueArgs STATIC DEPENDS) - cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - set(xpu_srcs ${xpu_add_library_STATIC}) - set(xpu_target ${TARGET_NAME}) - set(cc_srcs_depends ${xpu_add_library_DEPENDS}) - - file(GLOB_RECURSE xpu_srcs_lists ${xpu_srcs}) - list(LENGTH xpu_srcs_lists xpu_srcs_lists_num) - - set(XPU1_DEVICE_O_EXTRA_FLAGS " ") - set(XPU1_HOST_O_EXTRA_FLAGS " ") - - # Distinguish .xpu file from other files - foreach(cur_xpu_src IN LISTS xpu_srcs_lists) - get_filename_component(language_type_name ${cur_xpu_src} EXT) - if(${language_type_name} STREQUAL ".kps") - list(APPEND xpu_kernel_lists ${cur_xpu_src}) - else() - list(APPEND cc_kernel_lists ${cur_xpu_src}) - endif() - endforeach() + # Separate the sources from the options + set(options "") + set(oneValueArgs "") + set(multiValueArgs STATIC DEPENDS) + cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + set(xpu_srcs ${xpu_add_library_STATIC}) + set(xpu_target ${TARGET_NAME}) + set(cc_srcs_depends ${xpu_add_library_DEPENDS}) - # Ensure that there is only one xpu kernel - list(LENGTH xpu_kernel_lists xpu_kernel_lists_num) - list(LENGTH cc_srcs_depends cc_srcs_depends_num) - - if(${xpu_kernel_lists_num}) - foreach(xpu_kernel IN LISTS xpu_kernel_lists) - get_filename_component(kernel_name ${xpu_kernel} NAME_WE) - get_filename_component(kernel_dir ${xpu_kernel} DIRECTORY) - set(kernel_rules ${kernel_dir}/${kernel_name}.rules) - set(kernel_name ${kernel_name}) - compile_kernel( KERNEL ${xpu_kernel} DIRPATH ${kernel_dir} XNAME ${kernel_name} DEVICE ${XPU1_DEVICE_O_EXTRA_FLAGS} HOST ${XPU1_HOST_O_EXTRA_FLAGS} XPU "xpu2" DEPENDS ${cc_srcs_depends}) - endforeach() - - add_custom_target(${xpu_target}_src ALL - WORKING_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS - ${xpu_kernel_depends} - ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a - COMMENT - ${xpu_target}_src - VERBATIM - ) - - add_custom_command( - OUTPUT - ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a - COMMAND - ${HOST_AR} rcs ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a ${xpu_kernel_depends} - WORKING_DIRECTORY - ${CMAKE_CURRENT_BINARY_DIR} - DEPENDS - ${xpu_kernel_depends} - COMMENT - ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a - VERBATIM - ) - - add_library(${xpu_target} STATIC ${cc_kernel_lists}) - add_dependencies(${xpu_target} ${xpu_target}_src) - target_link_libraries(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a) + file(GLOB_RECURSE xpu_srcs_lists ${xpu_srcs}) + list(LENGTH xpu_srcs_lists xpu_srcs_lists_num) + + set(XPU1_DEVICE_O_EXTRA_FLAGS " ") + set(XPU1_HOST_O_EXTRA_FLAGS " ") + + # Distinguish .xpu file from other files + foreach(cur_xpu_src IN LISTS xpu_srcs_lists) + get_filename_component(language_type_name ${cur_xpu_src} EXT) + if(${language_type_name} STREQUAL ".kps") + list(APPEND xpu_kernel_lists ${cur_xpu_src}) else() - add_library(${xpu_target} STATIC ${cc_kernel_lists}) + list(APPEND cc_kernel_lists ${cur_xpu_src}) endif() + endforeach() + + # Ensure that there is only one xpu kernel + list(LENGTH xpu_kernel_lists xpu_kernel_lists_num) + list(LENGTH cc_srcs_depends cc_srcs_depends_num) + + if(${xpu_kernel_lists_num}) + foreach(xpu_kernel IN LISTS xpu_kernel_lists) + get_filename_component(kernel_name ${xpu_kernel} NAME_WE) + get_filename_component(kernel_dir ${xpu_kernel} DIRECTORY) + set(kernel_rules ${kernel_dir}/${kernel_name}.rules) + set(kernel_name ${kernel_name}) + compile_kernel( + KERNEL + ${xpu_kernel} + DIRPATH + ${kernel_dir} + XNAME + ${kernel_name} + DEVICE + ${XPU1_DEVICE_O_EXTRA_FLAGS} + HOST + ${XPU1_HOST_O_EXTRA_FLAGS} + XPU + "xpu2" + DEPENDS + ${cc_srcs_depends}) + endforeach() + + add_custom_target( + ${xpu_target}_src ALL + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${xpu_kernel_depends} + ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a + COMMENT ${xpu_target}_src + VERBATIM) + + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a + COMMAND ${HOST_AR} rcs ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a + ${xpu_kernel_depends} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${xpu_kernel_depends} + COMMENT ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a + VERBATIM) + + add_library(${xpu_target} STATIC ${cc_kernel_lists}) + add_dependencies(${xpu_target} ${xpu_target}_src) + target_link_libraries(${TARGET_NAME} + ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a) + else() + add_library(${xpu_target} STATIC ${cc_kernel_lists}) + endif() endmacro() diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 9d801c9e224..07041455df4 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -1,7 +1,9 @@ add_subdirectory(utils) add_subdirectory(scripts) add_subdirectory(testing) -set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests CACHE INTERNAL "python tests directory") +set(PYTHON_TESTS_DIR + ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests + CACHE INTERNAL "python tests directory") add_subdirectory(phi) add_subdirectory(infrt) add_subdirectory(fluid) diff --git a/paddle/fluid/distributed/CMakeLists.txt b/paddle/fluid/distributed/CMakeLists.txt index a92932b4d32..304a764f5b8 100755 --- a/paddle/fluid/distributed/CMakeLists.txt +++ b/paddle/fluid/distributed/CMakeLists.txt @@ -2,35 +2,49 @@ add_subdirectory(collective) add_subdirectory(store) if(WITH_PYTHON) py_proto_compile(ps_py_proto SRCS the_one_ps.proto) - add_custom_target(ps_py_proto_init ALL - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto) + add_custom_target( + ps_py_proto_init ALL + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto) add_dependencies(ps_py_proto ps_py_proto_init) - if (NOT WIN32) - add_custom_command(TARGET ps_py_proto POST_BUILD - COMMAND mv the_one_ps_pb2.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/) + if(NOT WIN32) + add_custom_command( + TARGET ps_py_proto + POST_BUILD + COMMAND mv the_one_ps_pb2.py + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/) else(NOT WIN32) - string(REPLACE "/" "\\" fleet_proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/") - add_custom_command(TARGET ps_py_proto POST_BUILD + string( + REPLACE "/" "\\" fleet_proto_dstpath + "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/") + add_custom_command( + TARGET ps_py_proto + POST_BUILD COMMAND copy /Y the_one_ps_pb2.py ${fleet_proto_dstpath} - COMMENT "Copy generated python the_one_ps_pb2 into directory ${fleet_proto_dstpath}.") + COMMENT + "Copy generated python the_one_ps_pb2 into directory ${fleet_proto_dstpath}." + ) endif(NOT WIN32) endif() if(NOT WITH_PSCORE) - add_subdirectory(fleet_executor) - return() + add_subdirectory(fleet_executor) + return() endif() proto_library(ps_framework_proto SRCS the_one_ps.proto) -add_custom_command(TARGET ps_framework_proto POST_BUILD - COMMAND mv the_one_ps.pb.h ps.pb.h - COMMAND mv the_one_ps.pb.cc ps.pb.cc) +add_custom_command( + TARGET ps_framework_proto + POST_BUILD + COMMAND mv the_one_ps.pb.h ps.pb.h + COMMAND mv the_one_ps.pb.cc ps.pb.cc) -set(DISTRIBUTE_COMPILE_FLAGS "-Wno-error=unused-value -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result") +set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-error=unused-value -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result" +) -if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") +if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() add_subdirectory(common) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index f6b1bd47c1e..0cfc8270963 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -1,20 +1,65 @@ -cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api) -cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper) +cc_library( + processgroup + SRCS ProcessGroup.cc + DEPS phi_api eager_api) +cc_library( + eager_reducer + SRCS reducer.cc + DEPS eager_api processgroup phi_api string_helper) -if (WITH_DISTRIBUTE) - cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper) +if(WITH_DISTRIBUTE) + cc_library( + processgroup_gloo + SRCS ProcessGroupGloo.cc + DEPS phi_api eager_api gloo_wrapper) endif() if(WITH_NCCL) - cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api) - if (WITH_DISTRIBUTE AND WITH_PSCORE) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api) + cc_library( + processgroup_nccl + SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc + DEPS place + cuda_stream + enforce + collective_helper + device_context + phi_api + eager_api) + if(WITH_DISTRIBUTE AND WITH_PSCORE) + cc_library( + processgroup_heter + SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc + DEPS place + cuda_stream + enforce + collective_helper + device_context + phi_api + eager_api) endif() endif() if(WITH_ASCEND_CL) - cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api) - if (WITH_DISTRIBUTE AND WITH_PSCORE) - cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api) + cc_library( + processgroup_hccl + SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc + DEPS place + npu_stream + enforce + collective_helper + device_context + phi_api + eager_api) + if(WITH_DISTRIBUTE AND WITH_PSCORE) + cc_library( + processgroup_heter + SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc + DEPS place + npu_stream + enforce + collective_helper + device_context + phi_api + eager_api) endif() endif() diff --git a/paddle/fluid/distributed/common/CMakeLists.txt b/paddle/fluid/distributed/common/CMakeLists.txt index eab6165ca68..05f6a1d1cce 100644 --- a/paddle/fluid/distributed/common/CMakeLists.txt +++ b/paddle/fluid/distributed/common/CMakeLists.txt @@ -1,4 +1,6 @@ - -cc_library(afs_wrapper SRCS afs_warpper.cc DEPS fs ps_framework_proto) +cc_library( + afs_wrapper + SRCS afs_warpper.cc + DEPS fs ps_framework_proto) #set_property(GLOBAL PROPERTY COMMON_DEPS afs_warpper) diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt index a36e8e648b1..3cafb0bdb5f 100755 --- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt @@ -7,34 +7,81 @@ proto_library(interceptor_message_proto SRCS interceptor_message.proto) if(WITH_ARM_BRPC) set(BRPC_DEPS arm_brpc snappy gflags glog) elseif(WITH_DISTRIBUTE AND WITH_PSCORE) - set(BRPC_DEPS brpc ssl crypto protobuf zlib leveldb snappy gflags glog) + set(BRPC_DEPS + brpc + ssl + crypto + protobuf + zlib + leveldb + snappy + gflags + glog) else() set(BRPC_DEPS "") endif() -cc_library(task_loop_thread_pool SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc DEPS enforce glog) +cc_library( + task_loop_thread_pool + SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc + DEPS enforce glog) -cc_library(fleet_executor SRCS fleet_executor.cc carrier.cc task_node.cc runtime_graph.cc dist_model.cc interceptor.cc - compute_interceptor.cc amplifier_interceptor.cc source_interceptor.cc sink_interceptor.cc message_service.cc message_bus.cc dist_model_tensor_wrapper.cc - DEPS proto_desc fleet_executor_desc_proto interceptor_message_proto task_loop_thread_pool collective_helper - op_registry executor_gc_helper gflags glog ${BRPC_DEPS}) +cc_library( + fleet_executor + SRCS fleet_executor.cc + carrier.cc + task_node.cc + runtime_graph.cc + dist_model.cc + interceptor.cc + compute_interceptor.cc + amplifier_interceptor.cc + source_interceptor.cc + sink_interceptor.cc + message_service.cc + message_bus.cc + dist_model_tensor_wrapper.cc + DEPS proto_desc + fleet_executor_desc_proto + interceptor_message_proto + task_loop_thread_pool + collective_helper + op_registry + executor_gc_helper + gflags + glog + ${BRPC_DEPS}) if(WITH_DISTRIBUTE) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() - set_source_files_properties(interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(compute_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(amplifier_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(source_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(sink_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(message_bus.h PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(message_bus.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(fleet_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(carrier.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(message_service.h PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(message_service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + compute_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + amplifier_interceptor.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + source_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + sink_interceptor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + message_bus.h PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + message_bus.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + fleet_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties(carrier.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + message_service.h PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + message_service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) add_subdirectory(test) endif() diff --git a/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt index e0db8a261b5..0cd39b3aad6 100644 --- a/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt @@ -1,25 +1,72 @@ -set_source_files_properties(interceptor_ping_pong_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(interceptor_ping_pong_test SRCS interceptor_ping_pong_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + interceptor_ping_pong_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + interceptor_ping_pong_test + SRCS interceptor_ping_pong_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(compute_interceptor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(compute_interceptor_test SRCS compute_interceptor_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + compute_interceptor_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + compute_interceptor_test + SRCS compute_interceptor_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(source_interceptor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(source_interceptor_test SRCS source_interceptor_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + source_interceptor_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + source_interceptor_test + SRCS source_interceptor_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(sink_interceptor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(sink_interceptor_test SRCS sink_interceptor_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + sink_interceptor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + sink_interceptor_test + SRCS sink_interceptor_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(interceptor_pipeline_short_path_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(interceptor_pipeline_short_path_test SRCS interceptor_pipeline_short_path_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + interceptor_pipeline_short_path_test.cc + PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + interceptor_pipeline_short_path_test + SRCS interceptor_pipeline_short_path_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(interceptor_pipeline_long_path_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(interceptor_pipeline_long_path_test SRCS interceptor_pipeline_long_path_test.cc DEPS fleet_executor ${BRPC_DEPS}) +set_source_files_properties( + interceptor_pipeline_long_path_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + interceptor_pipeline_long_path_test + SRCS interceptor_pipeline_long_path_test.cc + DEPS fleet_executor ${BRPC_DEPS}) -set_source_files_properties(compute_interceptor_run_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(compute_interceptor_run_op_test SRCS compute_interceptor_run_op_test.cc DEPS fleet_executor ${BRPC_DEPS} op_registry fill_constant_op elementwise_add_op scope device_context) +set_source_files_properties( + compute_interceptor_run_op_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + compute_interceptor_run_op_test + SRCS compute_interceptor_run_op_test.cc + DEPS fleet_executor + ${BRPC_DEPS} + op_registry + fill_constant_op + elementwise_add_op + scope + device_context) -if(WITH_DISTRIBUTE AND WITH_PSCORE AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) -set_source_files_properties(interceptor_ping_pong_with_brpc_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(interceptor_ping_pong_with_brpc_test SRCS interceptor_ping_pong_with_brpc_test.cc DEPS fleet_executor ${BRPC_DEPS}) +if(WITH_DISTRIBUTE + AND WITH_PSCORE + AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) + set_source_files_properties( + interceptor_ping_pong_with_brpc_test.cc + PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + cc_test( + interceptor_ping_pong_with_brpc_test + SRCS interceptor_ping_pong_with_brpc_test.cc + DEPS fleet_executor ${BRPC_DEPS}) endif() diff --git a/paddle/fluid/distributed/index_dataset/CMakeLists.txt b/paddle/fluid/distributed/index_dataset/CMakeLists.txt index 98bc0a0ad4a..524245be5f2 100644 --- a/paddle/fluid/distributed/index_dataset/CMakeLists.txt +++ b/paddle/fluid/distributed/index_dataset/CMakeLists.txt @@ -1,9 +1,18 @@ proto_library(index_dataset_proto SRCS index_dataset.proto) -cc_library(index_wrapper SRCS index_wrapper.cc DEPS index_dataset_proto fs) +cc_library( + index_wrapper + SRCS index_wrapper.cc + DEPS index_dataset_proto fs) if(WITH_MKLDNN) - cc_library(index_sampler SRCS index_sampler.cc DEPS xxhash index_wrapper eigen3 mkldnn) + cc_library( + index_sampler + SRCS index_sampler.cc + DEPS xxhash index_wrapper eigen3 mkldnn) else() - cc_library(index_sampler SRCS index_sampler.cc DEPS xxhash index_wrapper eigen3) + cc_library( + index_sampler + SRCS index_sampler.cc + DEPS xxhash index_wrapper eigen3) endif() if(WITH_PYTHON) py_proto_compile(index_dataset_py_proto SRCS index_dataset.proto) diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index e7519ef4998..ad49b651e2e 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -1,57 +1,136 @@ set(BRPC_SRCS ps_client.cc server.cc) set_source_files_properties(${BRPC_SRCS}) - if(WITH_HETERPS) - set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context rocksdb) + set(BRPC_DEPS + brpc + ssl + crypto + protobuf + gflags + glog + zlib + leveldb + snappy + gflags + glog + device_context + rocksdb) else() - set(BRPC_DEPS brpc ssl crypto protobuf gflags glog zlib leveldb snappy gflags glog device_context) + set(BRPC_DEPS + brpc + ssl + crypto + protobuf + gflags + glog + zlib + leveldb + snappy + gflags + glog + device_context) endif() -brpc_library(sendrecv_rpc SRCS - ${BRPC_SRCS} - PROTO sendrecv.proto - DEPS ${BRPC_DEPS} ) +brpc_library( + sendrecv_rpc + SRCS + ${BRPC_SRCS} + PROTO + sendrecv.proto + DEPS + ${BRPC_DEPS}) #set_property(GLOBAL PROPERTY RPC_DEPS sendrecv_rpc ${BRPC_DEPS} string_helper) get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) -set_source_files_properties(communicator/communicator.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ps_service/service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(brpc_ps_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(brpc_ps_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ps_local_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - -set_source_files_properties(brpc_utils.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(heter_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(heter_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - -set_source_files_properties(client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ps_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(graph_brpc_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(graph_brpc_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(brpc_utils SRCS brpc_utils.cc DEPS tensor device_context ${COMMON_DEPS} ${RPC_DEPS}) - -cc_library(downpour_server SRCS graph_brpc_server.cc brpc_ps_server.cc DEPS boost eigen3 table brpc_utils simple_threadpool ${RPC_DEPS}) -cc_library(downpour_client SRCS graph_brpc_client.cc brpc_ps_client.cc -ps_local_client.cc DEPS boost eigen3 table brpc_utils simple_threadpool ${RPC_DEPS}) - -cc_library(client SRCS ps_client.cc DEPS downpour_client boost ${RPC_DEPS}) -cc_library(server SRCS server.cc DEPS downpour_server boost ${RPC_DEPS}) - -cc_library(communicator SRCS communicator/communicator.cc DEPS scope client boost table math_function selected_rows_functor ${RPC_DEPS}) -cc_library(ps_service SRCS ps_service/service.cc DEPS communicator client server boost ${RPC_DEPS}) - -cc_library(heter_client SRCS heter_client.cc DEPS brpc_utils ${COMMON_DEPS} ${RPC_DEPS}) -cc_library(heter_server SRCS heter_server.cc DEPS heter_client brpc_utils ${COMMON_DEPS} ${RPC_DEPS}) - -set_source_files_properties(ps_service/graph_py_service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(graph_py_service SRCS ps_service/graph_py_service.cc DEPS ps_service) +set_source_files_properties( + communicator/communicator.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ps_service/service.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + brpc_ps_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + brpc_ps_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ps_local_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + +set_source_files_properties( + brpc_utils.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + heter_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + heter_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + +set_source_files_properties(client.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties(ps_client.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties(server.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + graph_brpc_server.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + graph_brpc_client.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + brpc_utils + SRCS brpc_utils.cc + DEPS tensor device_context ${COMMON_DEPS} ${RPC_DEPS}) + +cc_library( + downpour_server + SRCS graph_brpc_server.cc brpc_ps_server.cc + DEPS boost eigen3 table brpc_utils simple_threadpool ${RPC_DEPS}) +cc_library( + downpour_client + SRCS graph_brpc_client.cc brpc_ps_client.cc ps_local_client.cc + DEPS boost eigen3 table brpc_utils simple_threadpool ${RPC_DEPS}) + +cc_library( + client + SRCS ps_client.cc + DEPS downpour_client boost ${RPC_DEPS}) +cc_library( + server + SRCS server.cc + DEPS downpour_server boost ${RPC_DEPS}) + +cc_library( + communicator + SRCS communicator/communicator.cc + DEPS scope + client + boost + table + math_function + selected_rows_functor + ${RPC_DEPS}) +cc_library( + ps_service + SRCS ps_service/service.cc + DEPS communicator client server boost ${RPC_DEPS}) + +cc_library( + heter_client + SRCS heter_client.cc + DEPS brpc_utils ${COMMON_DEPS} ${RPC_DEPS}) +cc_library( + heter_server + SRCS heter_server.cc + DEPS heter_client brpc_utils ${COMMON_DEPS} ${RPC_DEPS}) + +set_source_files_properties( + ps_service/graph_py_service.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + graph_py_service + SRCS ps_service/graph_py_service.cc + DEPS ps_service) #add_subdirectory(communicator) diff --git a/paddle/fluid/distributed/ps/service/communicator/CMakeLists.txt b/paddle/fluid/distributed/ps/service/communicator/CMakeLists.txt index 3610729d74d..612358c71a6 100644 --- a/paddle/fluid/distributed/ps/service/communicator/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/communicator/CMakeLists.txt @@ -1,8 +1,15 @@ - - get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) -set_source_files_properties(communicator.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - - -cc_library(communicator SRCS communicator.cc DEPS scope client boost table math_function selected_rows_functor ${RPC_DEPS}) +set_source_files_properties( + communicator.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + +cc_library( + communicator + SRCS communicator.cc + DEPS scope + client + boost + table + math_function + selected_rows_functor + ${RPC_DEPS}) diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index b8eff940a0d..fdda59420f0 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -1,49 +1,125 @@ set_property(GLOBAL PROPERTY TABLE_DEPS string_helper) set(graphDir graph) get_property(TABLE_DEPS GLOBAL PROPERTY TABLE_DEPS) -set_source_files_properties(${graphDir}/graph_edge.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ${graphDir}/graph_edge.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) cc_library(graph_edge SRCS ${graphDir}/graph_edge.cc) -set_source_files_properties(${graphDir}/graph_weighted_sampler.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(WeightedSampler SRCS ${graphDir}/graph_weighted_sampler.cc DEPS graph_edge) -set_source_files_properties(${graphDir}/graph_node.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(graph_node SRCS ${graphDir}/graph_node.cc DEPS WeightedSampler) -set_source_files_properties(memory_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ${graphDir}/graph_weighted_sampler.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + WeightedSampler + SRCS ${graphDir}/graph_weighted_sampler.cc + DEPS graph_edge) +set_source_files_properties( + ${graphDir}/graph_node.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + graph_node + SRCS ${graphDir}/graph_node.cc + DEPS WeightedSampler) +set_source_files_properties( + memory_dense_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + barrier_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + common_graph_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/") -include_directories(${PADDLE_LIB_THIRD_PARTY_PATH}libmct/src/extern_libmct/libmct/include) +include_directories( + ${PADDLE_LIB_THIRD_PARTY_PATH}libmct/src/extern_libmct/libmct/include) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") set(TABLE_SRC memory_dense_table.cc barrier_table.cc common_graph_table.cc) #set(EXTERN_DEP rocksdb) -cc_library(common_table SRCS ${TABLE_SRC} DEPS ${TABLE_DEPS} -${RPC_DEPS} graph_edge graph_node device_context string_helper -simple_threadpool xxhash generator) +cc_library( + common_table + SRCS ${TABLE_SRC} + DEPS ${TABLE_DEPS} + ${RPC_DEPS} + graph_edge + graph_node + device_context + string_helper + simple_threadpool + xxhash + generator) -set_source_files_properties(tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(tensor_accessor SRCS tensor_accessor.cc DEPS ${TABLE_DEPS} eigen3 ps_framework_proto device_context) -cc_library(tensor_table SRCS DEPS eigen3 ps_framework_proto executor scope device_context tensor ${TABLE_DEPS}) -set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + tensor_accessor + SRCS tensor_accessor.cc + DEPS ${TABLE_DEPS} eigen3 ps_framework_proto device_context) +cc_library( + tensor_table + SRCS + DEPS eigen3 + ps_framework_proto + executor + scope + device_context + tensor + ${TABLE_DEPS}) +set_source_files_properties(table.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ctr_double_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(sparse_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ctr_dymf_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(ssd_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -set_source_files_properties(memory_sparse_geo_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + sparse_sgd_rule.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ctr_double_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ctr_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + sparse_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ctr_dymf_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + memory_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + ssd_sparse_table.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +set_source_files_properties( + memory_sparse_geo_table.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(sparse_sgd_rule SRCS sparse_sgd_rule.cc DEPS ${TABLE_DEPS} ps_framework_proto) -cc_library(ctr_accessor SRCS ctr_accessor.cc ctr_double_accessor.cc sparse_accessor.cc ctr_dymf_accessor.cc DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) -cc_library(sparse_table SRCS memory_sparse_table.cc ssd_sparse_table.cc memory_sparse_geo_table.cc DEPS ps_framework_proto ${TABLE_DEPS} fs afs_wrapper ctr_accessor common_table rocksdb) +cc_library( + sparse_sgd_rule + SRCS sparse_sgd_rule.cc + DEPS ${TABLE_DEPS} ps_framework_proto) +cc_library( + ctr_accessor + SRCS ctr_accessor.cc ctr_double_accessor.cc sparse_accessor.cc + ctr_dymf_accessor.cc + DEPS ${TABLE_DEPS} ps_framework_proto sparse_sgd_rule) +cc_library( + sparse_table + SRCS memory_sparse_table.cc ssd_sparse_table.cc memory_sparse_geo_table.cc + DEPS ps_framework_proto + ${TABLE_DEPS} + fs + afs_wrapper + ctr_accessor + common_table + rocksdb) -cc_library(table SRCS table.cc DEPS sparse_table common_table tensor_accessor tensor_table ps_framework_proto string_helper device_context gflags glog boost) +cc_library( + table + SRCS table.cc + DEPS sparse_table + common_table + tensor_accessor + tensor_table + ps_framework_proto + string_helper + device_context + gflags + glog + boost) target_link_libraries(table -fopenmp) diff --git a/paddle/fluid/distributed/ps/wrapper/CMakeLists.txt b/paddle/fluid/distributed/ps/wrapper/CMakeLists.txt index 6279b6aa954..8b5457ef9ee 100644 --- a/paddle/fluid/distributed/ps/wrapper/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/wrapper/CMakeLists.txt @@ -1,9 +1,18 @@ - get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) -set_source_files_properties(fleet.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_library(fleet - SRCS fleet.cc - DEPS framework_proto ps_framework_proto ps_service variable_helper scope op_registry fs shell ${RPC_DEPS}) +set_source_files_properties(fleet.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_library( + fleet + SRCS fleet.cc + DEPS framework_proto + ps_framework_proto + ps_service + variable_helper + scope + op_registry + fs + shell + ${RPC_DEPS}) target_link_libraries(fleet z) diff --git a/paddle/fluid/distributed/store/CMakeLists.txt b/paddle/fluid/distributed/store/CMakeLists.txt index 1fde447d97d..cfab4aad5f7 100644 --- a/paddle/fluid/distributed/store/CMakeLists.txt +++ b/paddle/fluid/distributed/store/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(tcp_store SRCS tcp_store.cc tcp_utils.cc DEPS enforce glog) +cc_library( + tcp_store + SRCS tcp_store.cc tcp_utils.cc + DEPS enforce glog) diff --git a/paddle/fluid/distributed/test/CMakeLists.txt b/paddle/fluid/distributed/test/CMakeLists.txt index 9f339d7ee2c..9b7a304b0a9 100644 --- a/paddle/fluid/distributed/test/CMakeLists.txt +++ b/paddle/fluid/distributed/test/CMakeLists.txt @@ -1,46 +1,144 @@ -set_source_files_properties(table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(table_test SRCS table_test.cc DEPS common_table table tensor_accessor -ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS}) +set_source_files_properties( + table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + table_test + SRCS table_test.cc + DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS} + ${RPC_DEPS}) -set_source_files_properties(dense_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(dense_table_test SRCS dense_table_test.cc DEPS common_table table -tensor_accessor ps_framework_proto ${COMMON_DEPS} ${RPC_DEPS}) +set_source_files_properties( + dense_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + dense_table_test + SRCS dense_table_test.cc + DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS} + ${RPC_DEPS}) -set_source_files_properties(barrier_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(barrier_table_test SRCS barrier_table_test.cc DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + barrier_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + barrier_table_test + SRCS barrier_table_test.cc + DEPS common_table table tensor_accessor ps_framework_proto ${COMMON_DEPS}) -set_source_files_properties(brpc_service_dense_sgd_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(brpc_service_dense_sgd_test SRCS brpc_service_dense_sgd_test.cc DEPS scope server client communicator ps_service boost table ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + brpc_service_dense_sgd_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + brpc_service_dense_sgd_test + SRCS brpc_service_dense_sgd_test.cc + DEPS scope + server + client + communicator + ps_service + boost + table + ps_framework_proto + ${COMMON_DEPS}) -set_source_files_properties(brpc_service_sparse_sgd_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(brpc_service_sparse_sgd_test SRCS brpc_service_sparse_sgd_test.cc DEPS scope server client communicator ps_service boost table ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + brpc_service_sparse_sgd_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + brpc_service_sparse_sgd_test + SRCS brpc_service_sparse_sgd_test.cc + DEPS scope + server + client + communicator + ps_service + boost + table + ps_framework_proto + ${COMMON_DEPS}) -set_source_files_properties(brpc_utils_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(brpc_utils_test SRCS brpc_utils_test.cc DEPS brpc_utils scope math_function ${COMMON_DEPS} ${RPC_DEPS}) +set_source_files_properties( + brpc_utils_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + brpc_utils_test + SRCS brpc_utils_test.cc + DEPS brpc_utils scope math_function ${COMMON_DEPS} ${RPC_DEPS}) -set_source_files_properties(graph_node_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(graph_node_test SRCS graph_node_test.cc DEPS graph_py_service scope server client communicator ps_service boost table ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + graph_node_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + graph_node_test + SRCS graph_node_test.cc + DEPS graph_py_service + scope + server + client + communicator + ps_service + boost + table + ps_framework_proto + ${COMMON_DEPS}) -set_source_files_properties(graph_node_split_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(graph_node_split_test SRCS graph_node_split_test.cc DEPS graph_py_service scope server client communicator ps_service boost table ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + graph_node_split_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + graph_node_split_test + SRCS graph_node_split_test.cc + DEPS graph_py_service + scope + server + client + communicator + ps_service + boost + table + ps_framework_proto + ${COMMON_DEPS}) -set_source_files_properties(graph_table_sample_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(graph_table_sample_test SRCS graph_table_sample_test.cc DEPS table ps_framework_proto ${COMMON_DEPS}) +set_source_files_properties( + graph_table_sample_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + graph_table_sample_test + SRCS graph_table_sample_test.cc + DEPS table ps_framework_proto ${COMMON_DEPS}) -set_source_files_properties(feature_value_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(feature_value_test SRCS feature_value_test.cc DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + feature_value_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + feature_value_test + SRCS feature_value_test.cc + DEPS ${COMMON_DEPS} boost table) -set_source_files_properties(sparse_sgd_rule_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(sparse_sgd_rule_test SRCS sparse_sgd_rule_test.cc DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + sparse_sgd_rule_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + sparse_sgd_rule_test + SRCS sparse_sgd_rule_test.cc + DEPS ${COMMON_DEPS} boost table) -set_source_files_properties(ctr_accessor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(ctr_accessor_test SRCS ctr_accessor_test.cc DEPS ${COMMON_DEPS} boost table) -set_source_files_properties(ctr_dymf_accessor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(ctr_dymf_accessor_test SRCS ctr_dymf_accessor_test.cc DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + ctr_accessor_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + ctr_accessor_test + SRCS ctr_accessor_test.cc + DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + ctr_dymf_accessor_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + ctr_dymf_accessor_test + SRCS ctr_dymf_accessor_test.cc + DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + memory_sparse_table_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + memory_sparse_table_test + SRCS memory_sparse_table_test.cc + DEPS ${COMMON_DEPS} boost table) -set_source_files_properties(memory_sparse_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(memory_sparse_table_test SRCS memory_sparse_table_test.cc DEPS ${COMMON_DEPS} boost table) - -set_source_files_properties(memory_geo_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(memory_sparse_geo_table_test SRCS memory_geo_table_test.cc DEPS ${COMMON_DEPS} boost table) +set_source_files_properties( + memory_geo_table_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + memory_sparse_geo_table_test + SRCS memory_geo_table_test.cc + DEPS ${COMMON_DEPS} boost table) diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index 11c98e5da9d..73d8539329a 100644 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -1,29 +1,82 @@ -set(eager_deps phi_api phi_dygraph_api hook_utils tensor_utils utils global_utils backward phi_tensor tracer layer autograd_meta eager_nan_inf_utils grad_node_info grad_tensor_holder accumulation_node custom_operator_node) +set(eager_deps + phi_api + phi_dygraph_api + hook_utils + tensor_utils + utils + global_utils + backward + phi_tensor + tracer + layer + autograd_meta + eager_nan_inf_utils + grad_node_info + grad_tensor_holder + accumulation_node + custom_operator_node) -set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) -set(generated_deps final_dygraph_function final_dygraph_node dygraph_function dygraph_node) +set(fluid_deps + tracer + layer + proto_desc + operator + op_registry + variable_helper + memcpy) +set(generated_deps final_dygraph_function final_dygraph_node dygraph_function + dygraph_node) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - message("Performing Eager Dygraph Auto Code Generation") - add_subdirectory(auto_code_generator) + message("Performing Eager Dygraph Auto Code Generation") + add_subdirectory(auto_code_generator) endif() add_subdirectory(api) add_subdirectory(accumulation) add_subdirectory(custom_operator) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_subdirectory(pylayer) - cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulator) - add_dependencies(grad_tensor_holder eager_final_state_codegen) - cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune) + add_subdirectory(pylayer) + cc_library( + grad_tensor_holder + SRCS grad_tensor_holder.cc + DEPS grad_node_info gradient_accumulator) + add_dependencies(grad_tensor_holder eager_final_state_codegen) + cc_library( + backward + SRCS backward.cc + DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune) endif() -cc_library(eager_nan_inf_utils SRCS nan_inf_utils.cc DEPS phi_tensor nan_inf_utils enforce) -cc_library(grad_node_info SRCS grad_node_info.cc DEPS phi_api phi_tensor) +cc_library( + eager_nan_inf_utils + SRCS nan_inf_utils.cc + DEPS phi_tensor nan_inf_utils enforce) +cc_library( + grad_node_info + SRCS grad_node_info.cc + DEPS phi_api phi_tensor) -cc_library(autograd_meta SRCS autograd_meta.cc DEPS phi_api phi_tensor) -cc_library(utils SRCS utils.cc DEPS phi_api phi_tensor global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta hook_utils) +cc_library( + autograd_meta + SRCS autograd_meta.cc + DEPS phi_api phi_tensor) +cc_library( + utils + SRCS utils.cc + DEPS phi_api + phi_tensor + global_utils + layer + proto_desc + operator + op_registry + variable_helper + memcpy + scale_op + autograd_meta + hook_utils) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_subdirectory(tests) + add_subdirectory(tests) endif() diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index 0531aa5aab3..297e853947d 100644 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi_api grad_node_info) +cc_library( + accumulation_node + SRCS accumulation_node.cc + DEPS gradient_accumulator phi_api grad_node_info) diff --git a/paddle/fluid/eager/api/CMakeLists.txt b/paddle/fluid/eager/api/CMakeLists.txt index 4c241fd5b72..4525a58a44d 100644 --- a/paddle/fluid/eager/api/CMakeLists.txt +++ b/paddle/fluid/eager/api/CMakeLists.txt @@ -1,4 +1,7 @@ add_subdirectory(utils) add_subdirectory(generated) -cc_library(eager_api SRCS all.cc DEPS tensor_utils hook_utils global_utils eager_scale) +cc_library( + eager_api + SRCS all.cc + DEPS tensor_utils hook_utils global_utils eager_scale) diff --git a/paddle/fluid/eager/api/generated/CMakeLists.txt b/paddle/fluid/eager/api/generated/CMakeLists.txt index 4f634c6884b..3f6bb90d69b 100644 --- a/paddle/fluid/eager/api/generated/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(eager_generated) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_subdirectory(fluid_generated) + add_subdirectory(fluid_generated) endif() diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt index 81ff07b8963..f704d2a4918 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt @@ -1,6 +1,12 @@ -cc_library(scale_node SRCS scale_node.cc DEPS global_utils phi phi_api grad_node_info) +cc_library( + scale_node + SRCS scale_node.cc + DEPS global_utils phi phi_api grad_node_info) if(NOT (NOT WITH_PYTHON AND ON_INFER)) -cc_library(final_dygraph_node SRCS nodes.cc DEPS ${eager_deps}) -add_dependencies(final_dygraph_node eager_final_state_codegen) + cc_library( + final_dygraph_node + SRCS nodes.cc + DEPS ${eager_deps}) + add_dependencies(final_dygraph_node eager_final_state_codegen) endif() diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt index c70bb80c35c..8d6df647999 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt @@ -1,6 +1,12 @@ -cc_library(eager_scale SRCS scale.cc DEPS phi_api phi autograd_meta scale_node) +cc_library( + eager_scale + SRCS scale.cc + DEPS phi_api phi autograd_meta scale_node) if(NOT (NOT WITH_PYTHON AND ON_INFER)) -cc_library(final_dygraph_function SRCS dygraph_functions.cc DEPS ${eager_deps}) -add_dependencies(final_dygraph_function eager_final_state_codegen) + cc_library( + final_dygraph_function + SRCS dygraph_functions.cc + DEPS ${eager_deps}) + add_dependencies(final_dygraph_function eager_final_state_codegen) endif() diff --git a/paddle/fluid/eager/api/utils/CMakeLists.txt b/paddle/fluid/eager/api/utils/CMakeLists.txt index a2a380ebad6..1fd4905605e 100644 --- a/paddle/fluid/eager/api/utils/CMakeLists.txt +++ b/paddle/fluid/eager/api/utils/CMakeLists.txt @@ -1,3 +1,12 @@ -cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi_api autograd_meta grad_node_info accumulation_node) -cc_library(hook_utils SRCS hook_utils.cc DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node) -cc_library(global_utils SRCS global_utils.cc DEPS place tracer) +cc_library( + tensor_utils + SRCS tensor_utils.cc + DEPS phi_api autograd_meta grad_node_info accumulation_node) +cc_library( + hook_utils + SRCS hook_utils.cc + DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node) +cc_library( + global_utils + SRCS global_utils.cc + DEPS place tracer) diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index d673c64d9da..8c067074d6e 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -1,99 +1,161 @@ add_subdirectory(final_state_generator) -set(EAGER_GENERETOR_DEPS ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} pybind proto_desc executor layer tracer engine imperative_profiler imperative_flag) +set(EAGER_GENERETOR_DEPS + ${GLOB_OP_LIB} + ${GLOB_OPERATOR_DEPS} + pybind + proto_desc + executor + layer + tracer + engine + imperative_profiler + imperative_flag) add_executable(eager_generator eager_generator.cc) target_link_libraries(eager_generator ${EAGER_GENERETOR_DEPS}) -get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) +get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(eager_generator ${os_dependency_modules}) if(WITH_ROCM) - target_link_libraries(eager_generator ${ROCM_HIPRTC_LIB}) + target_link_libraries(eager_generator ${ROCM_HIPRTC_LIB}) endif() # Prepare file structure -message("Generate dygraph file structure at path: ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/generated") -execute_process( - COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/generate_file_structures.py" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/" +message( + "Generate dygraph file structure at path: ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/generated" ) +execute_process( + COMMAND + "${PYTHON_EXECUTABLE}" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/generate_file_structures.py" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/") -set(tmp_dygraph_forward_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.tmp.h") -set(tmp_dygraph_forward_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions.tmp.cc") -set(tmp_dygraph_node_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.tmp.h") -set(tmp_dygraph_node_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.tmp.cc") -set(dygraph_forward_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h") -set(dygraph_forward_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions.cc") -set(dygraph_node_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h") -set(dygraph_node_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.cc") +set(tmp_dygraph_forward_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.tmp.h" +) +set(tmp_dygraph_forward_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions.tmp.cc" +) +set(tmp_dygraph_node_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.tmp.h" +) +set(tmp_dygraph_node_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.tmp.cc" +) +set(dygraph_forward_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" +) +set(dygraph_forward_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions.cc" +) +set(dygraph_node_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.h" +) +set(dygraph_node_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/nodes/nodes.cc" +) if(WIN32) - set(EAGER_CODEGEN_DEPS eager_generator) - if("${CMAKE_GENERATOR}" STREQUAL "Ninja") - set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}") - else() - set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") - endif() - - if(${CBLAS_PROVIDER} STREQUAL MKLML) - message("Copied libiomp5md.dll for Eager AutoCodeGen") - ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${eager_generator_path} - DEPENDS mklml) - list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/libiomp5md.dll) - else(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) - message("Copied openblas.dll for Eager AutoCodeGen") - ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/openblas.dll - COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_SHARED_LIB} ${eager_generator_path} - DEPENDS extern_openblas) - list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/openblas.dll) - endif() + set(EAGER_CODEGEN_DEPS eager_generator) + if("${CMAKE_GENERATOR}" STREQUAL "Ninja") + set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}") + else() + set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") + endif() + + if(${CBLAS_PROVIDER} STREQUAL MKLML) + message("Copied libiomp5md.dll for Eager AutoCodeGen") + add_custom_command( + OUTPUT ${eager_generator_path}/libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} + ${eager_generator_path} + DEPENDS mklml) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/libiomp5md.dll) + else(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) + message("Copied openblas.dll for Eager AutoCodeGen") + add_custom_command( + OUTPUT ${eager_generator_path}/openblas.dll + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_SHARED_LIB} + ${eager_generator_path} + DEPENDS extern_openblas) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/openblas.dll) + endif() - if(WITH_MKLDNN) - message("Copied mkldnn.dll for Eager AutoCodeGen") - ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/mkldnn.dll - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${eager_generator_path} - DEPENDS mkldnn) - list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/mkldnn.dll) - endif() + if(WITH_MKLDNN) + message("Copied mkldnn.dll for Eager AutoCodeGen") + add_custom_command( + OUTPUT ${eager_generator_path}/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} + ${eager_generator_path} + DEPENDS mkldnn) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/mkldnn.dll) + endif() - if(WITH_ONNXRUNTIME) - message("Copied onnxruntime for Eager AutoCodeGen") - ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/onnxruntime.dll - COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SHARED_LIB} ${eager_generator_path} - DEPENDS onnxruntime) - list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/onnxruntime.dll) - ADD_CUSTOM_COMMAND(OUTPUT ${eager_generator_path}/paddle2onnx.dll - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_SHARED_LIB} ${eager_generator_path} - DEPENDS paddle2onnx) - list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/paddle2onnx.dll) - endif() + if(WITH_ONNXRUNTIME) + message("Copied onnxruntime for Eager AutoCodeGen") + add_custom_command( + OUTPUT ${eager_generator_path}/onnxruntime.dll + COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SHARED_LIB} + ${eager_generator_path} + DEPENDS onnxruntime) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/onnxruntime.dll) + add_custom_command( + OUTPUT ${eager_generator_path}/paddle2onnx.dll + COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_SHARED_LIB} + ${eager_generator_path} + DEPENDS paddle2onnx) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/paddle2onnx.dll) + endif() - add_custom_target(eager_codegen - COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_h_path} ${dygraph_forward_h_path} - COMMENT "copy_if_different ${tmp_dygraph_forward_h_path} to ${dygraph_forward_h_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_cc_path} ${dygraph_forward_cc_path} - COMMENT "copy_if_different ${tmp_dygraph_forward_cc_path} to ${dygraph_forward_cc_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_h_path} ${dygraph_node_h_path} - COMMENT "copy_if_different ${tmp_dygraph_node_h_path} to ${dygraph_node_h_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_cc_path} ${dygraph_node_cc_path} - COMMENT "copy_if_different ${tmp_dygraph_node_cc_path} to ${dygraph_node_cc_path}" - DEPENDS ${EAGER_CODEGEN_DEPS} - VERBATIM) + add_custom_target( + eager_codegen + COMMAND + "${eager_generator_path}/eager_generator.exe" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_h_path} + ${dygraph_forward_h_path} + COMMENT + "copy_if_different ${tmp_dygraph_forward_h_path} to ${dygraph_forward_h_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_cc_path} + ${dygraph_forward_cc_path} + COMMENT + "copy_if_different ${tmp_dygraph_forward_cc_path} to ${dygraph_forward_cc_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_h_path} + ${dygraph_node_h_path} + COMMENT + "copy_if_different ${tmp_dygraph_node_h_path} to ${dygraph_node_h_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_cc_path} + ${dygraph_node_cc_path} + COMMENT + "copy_if_different ${tmp_dygraph_node_cc_path} to ${dygraph_node_cc_path}" + DEPENDS ${EAGER_CODEGEN_DEPS} + VERBATIM) else() - add_custom_target(eager_codegen - COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${CMAKE_CURRENT_BINARY_DIR}/../../pybind" - "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" - "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_h_path} ${dygraph_forward_h_path} - COMMENT "copy_if_different ${tmp_dygraph_forward_h_path} to ${dygraph_forward_h_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_cc_path} ${dygraph_forward_cc_path} - COMMENT "copy_if_different ${tmp_dygraph_forward_cc_path} to ${dygraph_forward_cc_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_h_path} ${dygraph_node_h_path} - COMMENT "copy_if_different ${tmp_dygraph_node_h_path} to ${dygraph_node_h_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_cc_path} ${dygraph_node_cc_path} - COMMENT "copy_if_different ${tmp_dygraph_node_cc_path} to ${dygraph_node_cc_path}" - DEPENDS eager_generator - VERBATIM) + add_custom_target( + eager_codegen + COMMAND + ${CMAKE_COMMAND} -E env + "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:${CMAKE_CURRENT_BINARY_DIR}/../../pybind" + "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_h_path} + ${dygraph_forward_h_path} + COMMENT + "copy_if_different ${tmp_dygraph_forward_h_path} to ${dygraph_forward_h_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_forward_cc_path} + ${dygraph_forward_cc_path} + COMMENT + "copy_if_different ${tmp_dygraph_forward_cc_path} to ${dygraph_forward_cc_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_h_path} + ${dygraph_node_h_path} + COMMENT + "copy_if_different ${tmp_dygraph_node_h_path} to ${dygraph_node_h_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_dygraph_node_cc_path} + ${dygraph_node_cc_path} + COMMENT + "copy_if_different ${tmp_dygraph_node_cc_path} to ${dygraph_node_cc_path}" + DEPENDS eager_generator + VERBATIM) endif() diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt index 50dab6ce840..06668fa7365 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt @@ -1,39 +1,72 @@ -set(api_yaml_path "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/new_api.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api.yaml") -set(backward_yaml_path "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/backward.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/new_backward.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api.yaml") -set(tmp_forwards_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.cc") -set(tmp_forwards_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.h") -set(tmp_nodes_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/tmp_nodes.cc") -set(tmp_nodes_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/tmp_nodes.h") -set(forwards_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.cc") -set(forwards_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h") -set(nodes_cc_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.cc") -set(nodes_h_path "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h") +set(api_yaml_path + "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/new_api.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api.yaml" +) +set(backward_yaml_path + "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/backward.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/new_backward.yaml,${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api.yaml" +) +set(tmp_forwards_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.cc" +) +set(tmp_forwards_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/tmp_dygraph_functions.h" +) +set(tmp_nodes_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/tmp_nodes.cc" +) +set(tmp_nodes_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/tmp_nodes.h" +) +set(forwards_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.cc" +) +set(forwards_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +) +set(nodes_cc_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.cc" +) +set(nodes_h_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/eager_generated/backwards/nodes.h" +) # StringTensor only needs forward api -set(fwd_api_yaml_path "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml") +set(fwd_api_yaml_path + "${PADDLE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml") message("Final State Eager CodeGen") -add_custom_target(eager_final_state_codegen - COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py" - "--api_yaml_path=${api_yaml_path}" - "--backward_yaml_path=${backward_yaml_path}" - "--forwards_cc_path=${tmp_forwards_cc_path}" - "--forwards_h_path=${tmp_forwards_h_path}" - "--nodes_cc_path=${tmp_nodes_cc_path}" - "--nodes_h_path=${tmp_nodes_h_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_cc_path} ${forwards_cc_path} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_h_path} ${forwards_h_path} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_cc_path} ${nodes_cc_path} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_h_path} ${nodes_h_path} - VERBATIM -) +add_custom_target( + eager_final_state_codegen + COMMAND + "${PYTHON_EXECUTABLE}" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py" + "--api_yaml_path=${api_yaml_path}" + "--backward_yaml_path=${backward_yaml_path}" + "--forwards_cc_path=${tmp_forwards_cc_path}" + "--forwards_h_path=${tmp_forwards_h_path}" + "--nodes_cc_path=${tmp_nodes_cc_path}" "--nodes_h_path=${tmp_nodes_h_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_cc_path} + ${forwards_cc_path} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_h_path} + ${forwards_h_path} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_cc_path} + ${nodes_cc_path} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_nodes_h_path} + ${nodes_h_path} + VERBATIM) -set(tmp_python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h") -set(python_c_output_path "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/eager_final_state_op_function_impl.h") - -add_custom_target(eager_final_state_python_c_codegen - COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py" - "--api_yaml_path=${api_yaml_path},${fwd_api_yaml_path}" - "--output_path=${tmp_python_c_output_path}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_python_c_output_path} ${python_c_output_path} - VERBATIM +set(tmp_python_c_output_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/tmp_eager_final_state_op_function_impl.h" +) +set(python_c_output_path + "${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/eager_final_state_op_function_impl.h" ) + +add_custom_target( + eager_final_state_python_c_codegen + COMMAND + "${PYTHON_EXECUTABLE}" + "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py" + "--api_yaml_path=${api_yaml_path},${fwd_api_yaml_path}" + "--output_path=${tmp_python_c_output_path}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_python_c_output_path} + ${python_c_output_path} + VERBATIM) diff --git a/paddle/fluid/eager/custom_operator/CMakeLists.txt b/paddle/fluid/eager/custom_operator/CMakeLists.txt index ccc9a03a556..424194557dd 100644 --- a/paddle/fluid/eager/custom_operator/CMakeLists.txt +++ b/paddle/fluid/eager/custom_operator/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(custom_operator_node SRCS custom_operator_node.cc DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info) +cc_library( + custom_operator_node + SRCS custom_operator_node.cc + DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info) diff --git a/paddle/fluid/eager/pylayer/CMakeLists.txt b/paddle/fluid/eager/pylayer/CMakeLists.txt index 59030342ecc..4b0ad071117 100644 --- a/paddle/fluid/eager/pylayer/CMakeLists.txt +++ b/paddle/fluid/eager/pylayer/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi_api grad_node_info) +cc_library( + py_layer_node + SRCS py_layer_node.cc + DEPS pybind phi_api grad_node_info) diff --git a/paddle/fluid/eager/tests/data_structure_tests/CMakeLists.txt b/paddle/fluid/eager/tests/data_structure_tests/CMakeLists.txt index 76c59561fc0..90159e9b8c3 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/data_structure_tests/CMakeLists.txt @@ -1,9 +1,27 @@ -cc_test(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ${eager_deps}) -cc_test(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS ${eager_deps}) -cc_test(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS ${eager_deps}) -cc_test(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS ${eager_deps}) -cc_test(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS ${eager_deps}) +cc_test( + test_egr_ds_eager_tensor + SRCS eager_tensor_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_ds_auotgrad_meta + SRCS autograd_meta_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_ds_grad_node_info + SRCS grad_node_info_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_ds_accumulation_node + SRCS accumulation_node_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_ds_tensor_wrapper + SRCS tensor_wrapper_test.cc + DEPS ${eager_deps}) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - cc_test(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc DEPS ${eager_deps} ${generated_deps}) + cc_test( + test_egr_ds_grad_tensor_holder + SRCS grad_tensor_holder_test.cc + DEPS ${eager_deps} ${generated_deps}) endif() diff --git a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt index 516789cbb8c..7b6dfae729f 100644 --- a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt @@ -1,7 +1,29 @@ -cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op dygraph_function) +cc_library( + performance_benchmark_utils + SRCS benchmark_utils.cc + DEPS ${eager_deps} + ${fluid_deps} + ${generated_deps} + eager_scale + scale_node + scale_op + matmul_v2_op + dygraph_function) -cc_test(test_egr_performance_benchmark_eager_cpu SRCS benchmark_eager_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) -cc_test(test_egr_performance_benchmark_fluid_cpu SRCS benchmark_fluid_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test( + test_egr_performance_benchmark_eager_cpu + SRCS benchmark_eager_cpu.cc + DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test( + test_egr_performance_benchmark_fluid_cpu + SRCS benchmark_fluid_cpu.cc + DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) -cc_test(test_egr_performance_benchmark_eager_cuda SRCS benchmark_eager_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) -cc_test(test_egr_performance_benchmark_fluid_cuda SRCS benchmark_fluid_cuda.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test( + test_egr_performance_benchmark_eager_cuda + SRCS benchmark_eager_cuda.cc + DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) +cc_test( + test_egr_performance_benchmark_fluid_cuda + SRCS benchmark_fluid_cuda.cc + DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) diff --git a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt index 719ef6673c0..2f57489999f 100644 --- a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt @@ -1,14 +1,47 @@ -cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps}) -cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps}) -cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) -cc_test(test_egr_task_nan_inf_utils SRCS nan_inf_utils_test.cc DEPS eager_nan_inf_utils) +cc_test( + test_egr_task_tensor_utils + SRCS tensor_utils_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_task_eager_utils + SRCS eager_utils_test.cc + DEPS ${eager_deps}) +cc_test( + test_egr_task_forward_autograd + SRCS forward_autograd_test.cc + DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) +cc_test( + test_egr_task_nan_inf_utils + SRCS nan_inf_utils_test.cc + DEPS eager_nan_inf_utils) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) - cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) - cc_test(test_egr_task_grad SRCS grad_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) - cc_test(test_egr_task_fwd_bwd_joint SRCS fwd_bwd_joint_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) - cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) - cc_test(test_egr_task_hook_intermidiate SRCS hook_test_intermidiate.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node) - cc_test(test_egr_task_autocodegen SRCS generated_test.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) + cc_test( + test_egr_task_hook + SRCS hook_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) + cc_test( + test_egr_task_backward + SRCS backward_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) + cc_test( + test_egr_task_grad + SRCS grad_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) + cc_test( + test_egr_task_fwd_bwd_joint + SRCS fwd_bwd_joint_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) + cc_test( + test_egr_task_cross_batch + SRCS cross_batch_accumulation_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node) + cc_test( + test_egr_task_hook_intermidiate + SRCS hook_test_intermidiate.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps} dygraph_node) + cc_test( + test_egr_task_autocodegen + SRCS generated_test.cc + DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) endif() diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index bb7f3f26463..5402beb49e6 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -1,22 +1,30 @@ - #windows treat symbolic file as a real file, which is different with unix #We create a hidden file and compile it instead of origin source file. function(windows_symbolic TARGET) set(oneValueArgs "") set(multiValueArgs SRCS PATH) - cmake_parse_arguments(windows_symbolic "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(windows_symbolic "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) set(final_path ${CMAKE_CURRENT_SOURCE_DIR}/${windows_symbolic_PATH}) foreach(src ${windows_symbolic_SRCS}) get_filename_component(src ${src} NAME_WE) - if (NOT EXISTS ${final_path}/${src}.cc OR NOT EXISTS ${final_path}/${src}.cu) - message(FATAL " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file.") + if(NOT EXISTS ${final_path}/${src}.cc OR NOT EXISTS ${final_path}/${src}.cu) + message( + FATAL + " ${src}.cc and ${src}.cu must exsits, and ${src}.cu must be symbolic file." + ) endif() - file(GENERATE OUTPUT ${final_path}/.${src}.cu INPUT ${final_path}/${src}.cc) + file( + GENERATE + OUTPUT ${final_path}/.${src}.cu + INPUT ${final_path}/${src}.cc) - add_custom_command(OUTPUT ${final_path}/.${src}.cu - COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu" - COMMENT "create hidden file of ${src}.cu") + add_custom_command( + OUTPUT ${final_path}/.${src}.cu + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" + "${final_path}/.${src}.cu" + COMMENT "create hidden file of ${src}.cu") add_custom_target(${TARGET} ALL DEPENDS ${final_path}/.${src}.cu) endforeach() endfunction() @@ -26,7 +34,7 @@ add_subdirectory(details) add_subdirectory(fleet) add_subdirectory(io) add_subdirectory(new_executor) -if (WITH_CINN) +if(WITH_CINN) add_subdirectory(paddle2cinn) endif() #ddim lib @@ -34,420 +42,1101 @@ proto_library(framework_proto SRCS framework.proto) proto_library(pass_desc_proto SRCS pass_desc.proto DEPS framework_proto) proto_library(op_def_proto SRCS op_def.proto DEPS framework_proto) -cc_library(op_def_api SRCS op_def_api.cc DEPS op_def_proto boost) - -FILE(GLOB OP_DEF_FILES ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/compat/*.pbtxt) -FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt +cc_library( + op_def_api + SRCS op_def_api.cc + DEPS op_def_proto boost) + +file(GLOB OP_DEF_FILES + ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/compat/*.pbtxt) +file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt "namespace { \n" "const std::unordered_map op_def_map = { \n") foreach(OP_DEF_FILE ${OP_DEF_FILES}) - FILE(READ ${OP_DEF_FILE} OP_DEF_CONTENT) - get_filename_component(OP_NAME ${OP_DEF_FILE} NAME_WE) - FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt - "{\"${OP_NAME}\",R\"(${OP_DEF_CONTENT})\"},\n") + file(READ ${OP_DEF_FILE} OP_DEF_CONTENT) + get_filename_component(OP_NAME ${OP_DEF_FILE} NAME_WE) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt + "{\"${OP_NAME}\",R\"(${OP_DEF_CONTENT})\"},\n") endforeach(OP_DEF_FILE) -FILE(APPEND ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt "{\"\",\"\"}};\n}") +file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/op_def.pbtxt "{\"\",\"\"}};\n}") proto_library(heter_service_proto SRCS heter_service.proto) proto_library(data_feed_proto SRCS data_feed.proto) proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto - data_feed_proto) - -cc_library(string_array SRCS string_array.cc DEPS utf8proc) - -cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context) -cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor) + data_feed_proto) + +cc_library( + string_array + SRCS string_array.cc + DEPS utf8proc) + +cc_library( + data_type + SRCS data_type.cc + DEPS framework_proto ddim device_context) +cc_test( + data_type_test + SRCS data_type_test.cc + DEPS data_type place tensor) if(WITH_GPU) - if (WIN32) + if(WIN32) windows_symbolic(tensor_util SRCS tensor_util.cu) - nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor) + nv_library( + tensor + SRCS tensor.cc .tensor_util.cu + DEPS place memory data_type device_context dense_tensor) add_dependencies(tensor tensor_util) else() - nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor) + nv_library( + tensor + SRCS tensor.cc tensor_util.cu + DEPS place memory data_type device_context profiler dense_tensor) endif(WIN32) elseif(WITH_ROCM) - hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor) + hip_library( + tensor + SRCS tensor.cc tensor_util.cu + DEPS place memory data_type device_context profiler dense_tensor) else() - cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor) + cc_library( + tensor + SRCS tensor.cc tensor_util.cc + DEPS place memory data_type device_context profiler dense_tensor) endif() -cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) +cc_test( + tensor_test + SRCS tensor_test.cc + DEPS tensor) if(WITH_GPU) - nv_test(tensor_util_test SRCS tensor_util_test.cc tensor_util_test.cu DEPS tensor dlpack_tensor) + nv_test( + tensor_util_test + SRCS tensor_util_test.cc tensor_util_test.cu + DEPS tensor dlpack_tensor) elseif(WITH_ROCM) - hip_test(tensor_util_test SRCS tensor_util_test.cc tensor_util_test.cu DEPS tensor dlpack_tensor) + hip_test( + tensor_util_test + SRCS tensor_util_test.cc tensor_util_test.cu + DEPS tensor dlpack_tensor) else() - cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor dlpack_tensor) + cc_test( + tensor_util_test + SRCS tensor_util_test.cc + DEPS tensor dlpack_tensor) endif() -cc_test(copy_same_tensor_test SRCS copy_same_tensor_test.cc DEPS tensor) +cc_test( + copy_same_tensor_test + SRCS copy_same_tensor_test.cc + DEPS tensor) -cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -cc_library(mixed_vector SRCS mixed_vector.cc DEPS device_context place memory) +cc_test( + eigen_test + SRCS eigen_test.cc + DEPS tensor) +cc_library( + mixed_vector + SRCS mixed_vector.cc + DEPS device_context place memory) if(WITH_GPU) - nv_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor) + nv_test( + mixed_vector_test + SRCS mixed_vector_test.cc mixed_vector_test.cu + DEPS mixed_vector place memory device_context tensor) elseif(WITH_ROCM) - hip_test(mixed_vector_test SRCS mixed_vector_test.cc mixed_vector_test.cu DEPS mixed_vector place memory device_context tensor) + hip_test( + mixed_vector_test + SRCS mixed_vector_test.cc mixed_vector_test.cu + DEPS mixed_vector place memory device_context tensor) else() - cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS mixed_vector place memory device_context tensor) + cc_test( + mixed_vector_test + SRCS mixed_vector_test.cc + DEPS mixed_vector place memory device_context tensor) endif() -cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim mixed_vector place tensor framework_proto version) +cc_library( + lod_tensor + SRCS lod_tensor.cc + DEPS ddim mixed_vector place tensor framework_proto version) -cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_utils lod_tensor memory) +cc_test( + lod_tensor_test + SRCS lod_tensor_test.cc + DEPS lod_utils lod_tensor memory) if(WITH_GPU) - nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) + nv_test( + lod_tensor_gpu_test + SRCS lod_tensor_test.cu + DEPS lod_tensor) elseif(WITH_ROCM) - hip_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) + hip_test( + lod_tensor_gpu_test + SRCS lod_tensor_test.cu + DEPS lod_tensor) endif() -cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memory gflags glog) - -cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) -cc_test(reader_test SRCS reader_test.cc DEPS reader) - -cc_library(threadpool SRCS threadpool.cc DEPS enforce) -cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) - -cc_library(var_type_traits SRCS var_type_traits.cc DEPS lod_tensor selected_rows_utils framework_proto scope) -if (WITH_GPU) +cc_library( + garbage_collector + SRCS garbage_collector.cc + DEPS device_context memory gflags glog) + +cc_library( + reader + SRCS reader.cc + DEPS lod_tensor ddim) +cc_test( + reader_test + SRCS reader_test.cc + DEPS reader) + +cc_library( + threadpool + SRCS threadpool.cc + DEPS enforce) +cc_test( + threadpool_test + SRCS threadpool_test.cc + DEPS threadpool) + +cc_library( + var_type_traits + SRCS var_type_traits.cc + DEPS lod_tensor selected_rows_utils framework_proto scope) +if(WITH_GPU) target_link_libraries(var_type_traits dynload_cuda) endif() -cc_test(var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits) +cc_test( + var_type_traits_test + SRCS var_type_traits_test.cc + DEPS var_type_traits) set(BRPC_DEPS "") if(WITH_PSCORE) - set(BRPC_DEPS brpc ssl crypto) + set(BRPC_DEPS brpc ssl crypto) endif() if(WITH_PSLIB) - if(WITH_PSLIB_BRPC) - set(BRPC_DEPS pslib_brpc) - elseif(NOT WITH_HETERPS) - set(BRPC_DEPS brpc ssl crypto) - endif() - if (WITH_ARM_BRPC) - set(BRPC_DEPS arm_brpc) - endif() + if(WITH_PSLIB_BRPC) + set(BRPC_DEPS pslib_brpc) + elseif(NOT WITH_HETERPS) + set(BRPC_DEPS brpc ssl crypto) + endif() + if(WITH_ARM_BRPC) + set(BRPC_DEPS arm_brpc) + endif() endif() -cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash var_type_traits) -cc_library(device_worker SRCS device_worker.cc DEPS trainer_desc_proto lod_tensor scope ${BRPC_DEPS}) -cc_test(device_worker_test SRCS device_worker_test.cc DEPS device_worker) - -cc_library(scope_pool SRCS scope_pool.cc DEPS scope) -cc_test(scope_test SRCS scope_test.cc DEPS scope) -cc_test(variable_test SRCS variable_test.cc DEPS tensor var_type_traits) - -cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) +cc_library( + scope + SRCS scope.cc + DEPS glog threadpool xxhash var_type_traits) +cc_library( + device_worker + SRCS device_worker.cc + DEPS trainer_desc_proto lod_tensor scope ${BRPC_DEPS}) +cc_test( + device_worker_test + SRCS device_worker_test.cc + DEPS device_worker) + +cc_library( + scope_pool + SRCS scope_pool.cc + DEPS scope) +cc_test( + scope_test + SRCS scope_test.cc + DEPS scope) +cc_test( + variable_test + SRCS variable_test.cc + DEPS tensor var_type_traits) + +cc_library( + data_device_transform + SRCS data_device_transform.cc + DEPS tensor) if(WITH_GPU) - nv_test(data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context math_function scope) + nv_test( + data_device_transform_test + SRCS data_device_transform_test.cu + DEPS operator op_registry device_context math_function scope) elseif(WITH_ROCM) - hip_test(data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context math_function scope) + hip_test( + data_device_transform_test + SRCS data_device_transform_test.cu + DEPS operator op_registry device_context math_function scope) endif() if(WITH_GPU) - if (WIN32) -#windows treat symbolic file as a real file, which is different with unix -#We create a hidden file and compile it instead of origin source file. - windows_symbolic(hidden_file SRCS data_type_transform.cu) - nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor) - add_dependencies(data_type_transform hidden_file) + if(WIN32) + #windows treat symbolic file as a real file, which is different with unix + #We create a hidden file and compile it instead of origin source file. + windows_symbolic(hidden_file SRCS data_type_transform.cu) + nv_library( + data_type_transform + SRCS .data_type_transform.cu + DEPS tensor) + add_dependencies(data_type_transform hidden_file) else() - nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor) + nv_library( + data_type_transform + SRCS data_type_transform.cu + DEPS tensor) endif(WIN32) - nv_test(data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform) + nv_test( + data_type_transform_test + SRCS data_type_transform_test.cc data_type_transform_test.cu + DEPS data_type_transform) elseif(WITH_ROCM) - hip_library(data_type_transform SRCS data_type_transform.cu DEPS tensor) - hip_test(data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform) + hip_library( + data_type_transform + SRCS data_type_transform.cu + DEPS tensor) + hip_test( + data_type_transform_test + SRCS data_type_transform_test.cc data_type_transform_test.cu + DEPS data_type_transform) else() - cc_library(data_type_transform SRCS data_type_transform.cc DEPS tensor) - cc_test(data_type_transform_test SRCS data_type_transform_test.cc DEPS data_type_transform) + cc_library( + data_type_transform + SRCS data_type_transform.cc + DEPS tensor) + cc_test( + data_type_transform_test + SRCS data_type_transform_test.cc + DEPS data_type_transform) endif() -cc_library(data_layout_transform SRCS data_layout_transform.cc DEPS tensor math_function) -cc_test(data_layout_transform_test SRCS data_layout_transform_test.cc DEPS data_layout_transform) - -cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor - framework_proto selected_rows_utils data_device_transform data_type_transform data_layout_transform) - -cc_library(attribute SRCS attribute.cc DEPS framework_proto boost enforce) -cc_test(attribute_test SRCS attribute_test.cc DEPS attribute framework_proto proto_desc) -cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc -device_context) - -cc_library(op_version_proto SRCS op_version_proto.cc DEPS framework_proto boost) - -cc_library(op_version_registry SRCS op_version_registry.cc DEPS op_version_proto framework_proto boost) -cc_test(op_version_registry_test SRCS op_version_registry_test.cc DEPS op_version_registry) - -cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute glog) -cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) -cc_library(no_need_buffer_vars_inference SRCS no_need_buffer_vars_inference.cc DEPS attribute device_context) -cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto no_need_buffer_vars_inference) -cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) - -cc_test(no_need_buffer_vars_inference_test SRCS no_need_buffer_vars_inference_test.cc DEPS no_need_buffer_vars_inference layer) - -cc_library(transfer_scope_cache SRCS transfer_scope_cache.cc DEPS scope framework_proto device_context) - -cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) - -cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) - -IF(WITH_XPU) -cc_library(phi_utils SRCS phi_utils.cc DEPS lod_tensor selected_rows_utils place phi var_type_traits phi_api_utils op_info xpu_op_list) -ELSE() -cc_library(phi_utils SRCS phi_utils.cc DEPS lod_tensor selected_rows_utils place phi var_type_traits phi_api_utils op_info) -ENDIF() +cc_library( + data_layout_transform + SRCS data_layout_transform.cc + DEPS tensor math_function) +cc_test( + data_layout_transform_test + SRCS data_layout_transform_test.cc + DEPS data_layout_transform) + +cc_library( + data_transform + SRCS data_transform.cc + DEPS math_function + tensor + framework_proto + selected_rows_utils + data_device_transform + data_type_transform + data_layout_transform) + +cc_library( + attribute + SRCS attribute.cc + DEPS framework_proto boost enforce) +cc_test( + attribute_test + SRCS attribute_test.cc + DEPS attribute framework_proto proto_desc) +cc_test( + program_desc_test + SRCS program_desc_test.cc + DEPS proto_desc device_context) + +cc_library( + op_version_proto + SRCS op_version_proto.cc + DEPS framework_proto boost) + +cc_library( + op_version_registry + SRCS op_version_registry.cc + DEPS op_version_proto framework_proto boost) +cc_test( + op_version_registry_test + SRCS op_version_registry_test.cc + DEPS op_version_registry) + +cc_library( + op_proto_maker + SRCS op_proto_maker.cc + DEPS framework_proto attribute glog) +cc_test( + op_proto_maker_test + SRCS op_proto_maker_test.cc + DEPS op_proto_maker) +cc_library( + no_need_buffer_vars_inference + SRCS no_need_buffer_vars_inference.cc + DEPS attribute device_context) +cc_library( + op_info + SRCS op_info.cc + DEPS attribute framework_proto no_need_buffer_vars_inference) +cc_library( + shape_inference + SRCS shape_inference.cc + DEPS ddim attribute device_context) + +cc_test( + no_need_buffer_vars_inference_test + SRCS no_need_buffer_vars_inference_test.cc + DEPS no_need_buffer_vars_inference layer) + +cc_library( + transfer_scope_cache + SRCS transfer_scope_cache.cc + DEPS scope framework_proto device_context) + +cc_library( + unused_var_check + SRCS unused_var_check.cc + DEPS glog no_need_buffer_vars_inference) + +cc_library( + op_kernel_type + SRCS op_kernel_type.cc + DEPS device_context place) + +if(WITH_XPU) + cc_library( + phi_utils + SRCS phi_utils.cc + DEPS lod_tensor + selected_rows_utils + place + phi + var_type_traits + phi_api_utils + op_info + xpu_op_list) +else() + cc_library( + phi_utils + SRCS phi_utils.cc + DEPS lod_tensor + selected_rows_utils + place + phi + var_type_traits + phi_api_utils + op_info) +endif() -IF(WITH_XPU) -cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto - shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils - phi_utils kernel_factory infershape_utils op_utils) -ELSE() -cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto - shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils - phi_utils kernel_factory infershape_utils op_utils) -ENDIF() +if(WITH_XPU) + cc_library( + operator + SRCS operator.cc + DEPS xpu_op_list + op_info + device_context + tensor + scope + glog + trainer_desc_proto + data_feed_proto + shape_inference + data_transform + lod_tensor + profiler + transfer_scope_cache + op_kernel_type + op_call_stack + unused_var_check + nan_inf_utils + phi_utils + kernel_factory + infershape_utils + op_utils) +else() + cc_library( + operator + SRCS operator.cc + DEPS op_info + device_context + tensor + scope + glog + trainer_desc_proto + data_feed_proto + shape_inference + data_transform + lod_tensor + profiler + transfer_scope_cache + op_kernel_type + op_call_stack + unused_var_check + nan_inf_utils + phi_utils + kernel_factory + infershape_utils + op_utils) +endif() -cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) -cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) +cc_test( + operator_test + SRCS operator_test.cc + DEPS operator op_registry device_context) +cc_test( + operator_exception_test + SRCS operator_exception_test.cc + DEPS operator op_registry device_context) cc_library(version SRCS version.cc) -cc_test(version_test SRCS version_test.cc DEPS version) - -cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc process_mesh_desc.cc DEPS attribute shape_inference op_info operator glog version) - -cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) - -cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce) -cc_test(op_call_stack_test SRCS op_call_stack_test.cc DEPS op_call_stack) - -cc_library(program_processing SRCS program_processing.cc DEPS boost proto_desc) -cc_test(program_processing_test SRCS program_processing_test.cc DEPS proto_desc program_processing) +cc_test( + version_test + SRCS version_test.cc + DEPS version) + +cc_library( + proto_desc + SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc process_mesh_desc.cc + DEPS attribute shape_inference op_info operator glog version) + +cc_library( + op_registry + SRCS op_registry.cc + DEPS op_proto_maker op_info operator glog proto_desc) + +cc_library( + op_call_stack + SRCS op_call_stack.cc + DEPS op_proto_maker enforce) +cc_test( + op_call_stack_test + SRCS op_call_stack_test.cc + DEPS op_call_stack) + +cc_library( + program_processing + SRCS program_processing.cc + DEPS boost proto_desc) +cc_test( + program_processing_test + SRCS program_processing_test.cc + DEPS proto_desc program_processing) if(WITH_GPU) - nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) + nv_test( + op_registry_test + SRCS op_registry_test.cc + DEPS op_registry) elseif(WITH_ROCM) - hip_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) + hip_test( + op_registry_test + SRCS op_registry_test.cc + DEPS op_registry) endif() if(WITH_PYTHON) py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto) py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto) - py_proto_compile(distributed_strategy_py_proto SRCS distributed_strategy.proto) + py_proto_compile(distributed_strategy_py_proto SRCS + distributed_strategy.proto) py_proto_compile(pass_desc_py_proto SRCS pass_desc.proto) -#Generate an empty \ - #__init__.py to make framework_py_proto as a valid python module. - add_custom_target(fleet_proto_init ALL - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto - COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/__init__.py - ) - add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) - add_dependencies(framework_py_proto framework_py_proto_init trainer_py_proto distributed_strategy_py_proto fleet_proto_init pass_desc_py_proto ps_py_proto ps_py_proto_init) - if (NOT WIN32) - add_custom_command(TARGET framework_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto + #Generate an empty \ + #__init__.py to make framework_py_proto as a valid python module. + add_custom_target( + fleet_proto_init ALL + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto + COMMAND + ${CMAKE_COMMAND} -E touch + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/__init__.py) + add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E + touch __init__.py) + add_dependencies( + framework_py_proto + framework_py_proto_init + trainer_py_proto + distributed_strategy_py_proto + fleet_proto_init + pass_desc_py_proto + ps_py_proto + ps_py_proto_init) + if(NOT WIN32) + add_custom_command( + TARGET framework_py_proto + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/ - COMMAND cp distributed_strategy_*.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto + COMMAND cp distributed_strategy_*.py + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto COMMENT "Copy generated python proto into directory paddle/fluid/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - add_custom_target(fleet_executor_proto_init ALL DEPENDS fleet_proto_init fleet_executor_desc_py_proto - COMMAND cp ${PADDLE_BINARY_DIR}/paddle/fluid/distributed/fleet_executor/fleet_executor_*.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto - COMMENT "Copy generated python proto into directory paddle/distributed/fleet/proto.") + add_custom_target( + fleet_executor_proto_init ALL + DEPENDS fleet_proto_init fleet_executor_desc_py_proto + COMMAND + cp + ${PADDLE_BINARY_DIR}/paddle/fluid/distributed/fleet_executor/fleet_executor_*.py + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto + COMMENT + "Copy generated python proto into directory paddle/distributed/fleet/proto." + ) else(NOT WIN32) - string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/") - string(REPLACE "/" "\\" fleet_proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/") - add_custom_command(TARGET framework_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto - COMMAND copy /Y *.py ${proto_dstpath} - COMMAND copy /Y distributed_strategy_*.py ${fleet_proto_dstpath} - COMMENT "Copy generated python proto into directory paddle/fluid/proto." - COMMENT "Copy generated python proto into directory paddle/distributed/fleet/proto." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + string(REPLACE "/" "\\" proto_dstpath + "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/") + string( + REPLACE "/" "\\" fleet_proto_dstpath + "${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto/") + add_custom_command( + TARGET framework_py_proto + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto + COMMAND copy /Y *.py ${proto_dstpath} + COMMAND copy /Y distributed_strategy_*.py ${fleet_proto_dstpath} + COMMENT "Copy generated python proto into directory paddle/fluid/proto." + COMMENT + "Copy generated python proto into directory paddle/distributed/fleet/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif(NOT WIN32) endif() -if (WITH_PSCORE) - add_custom_target(index_dataset_proto_init ALL DEPENDS fleet_proto_init index_dataset_py_proto - COMMAND cp ${PADDLE_BINARY_DIR}/paddle/fluid/distributed/index_dataset/index_dataset_*.py ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto - COMMENT "Copy generated python proto into directory paddle/distributed/fleet/proto.") +if(WITH_PSCORE) + add_custom_target( + index_dataset_proto_init ALL + DEPENDS fleet_proto_init index_dataset_py_proto + COMMAND + cp + ${PADDLE_BINARY_DIR}/paddle/fluid/distributed/index_dataset/index_dataset_*.py + ${PADDLE_BINARY_DIR}/python/paddle/distributed/fleet/proto + COMMENT + "Copy generated python proto into directory paddle/distributed/fleet/proto." + ) endif(WITH_PSCORE) -cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) - -cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) -cc_library(variable_helper SRCS variable_helper.cc DEPS lod_tensor) - -if (TENSORRT_FOUND) -cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper tensorrt_engine_op) +cc_library( + lod_rank_table + SRCS lod_rank_table.cc + DEPS lod_tensor) + +cc_library( + feed_fetch_method + SRCS feed_fetch_method.cc + DEPS lod_tensor scope glog) +cc_library( + variable_helper + SRCS variable_helper.cc + DEPS lod_tensor) + +if(TENSORRT_FOUND) + cc_library( + naive_executor + SRCS naive_executor.cc + DEPS op_registry + denormal + device_context + scope + framework_proto + glog + lod_rank_table + feed_fetch_method + graph_to_program_pass + variable_helper + tensorrt_engine_op) else() -cc_library(naive_executor SRCS naive_executor.cc DEPS op_registry denormal device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper) + cc_library( + naive_executor + SRCS naive_executor.cc + DEPS op_registry + denormal + device_context + scope + framework_proto + glog + lod_rank_table + feed_fetch_method + graph_to_program_pass + variable_helper) endif(TENSORRT_FOUND) -cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector op_registry while_op_helper recurrent_op_helper conditional_block_op_helper) +cc_library( + executor_gc_helper + SRCS executor_gc_helper.cc + DEPS scope + proto_desc + operator + garbage_collector + op_registry + while_op_helper + recurrent_op_helper + conditional_block_op_helper) if(WITH_DISTRIBUTE) if(WITH_PSLIB) - cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc - dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc - heterxpu_trainer.cc - data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc ps_gpu_worker.cc - ps_gpu_trainer.cc downpour_worker.cc downpour_worker_opt.cc data_feed.cu - pull_dense_worker.cc section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry - device_context scope framework_proto trainer_desc_proto glog fs shell - fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper metrics lodtensor_printer - lod_rank_table feed_fetch_method collective_helper ${GLOB_DISTRIBUTE_DEPS} - graph_to_program_pass variable_helper data_feed_proto timer monitor - heter_service_proto fleet_executor ${BRPC_DEP}) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") + cc_library( + executor + SRCS executor.cc + multi_trainer.cc + pipeline_trainer.cc + dataset_factory.cc + dist_multi_trainer.cc + trainer_factory.cc + trainer.cc + data_feed_factory.cc + heterxpu_trainer.cc + data_feed.cc + device_worker.cc + hogwild_worker.cc + hetercpu_worker.cc + ps_gpu_worker.cc + ps_gpu_trainer.cc + downpour_worker.cc + downpour_worker_opt.cc + data_feed.cu + pull_dense_worker.cc + section_worker.cc + device_worker_factory.cc + data_set.cc + DEPS op_registry + device_context + scope + framework_proto + trainer_desc_proto + glog + fs + shell + fleet_wrapper + heter_wrapper + ps_gpu_wrapper + box_wrapper + metrics + lodtensor_printer + lod_rank_table + feed_fetch_method + collective_helper + ${GLOB_DISTRIBUTE_DEPS} + graph_to_program_pass + variable_helper + data_feed_proto + timer + monitor + heter_service_proto + fleet_executor + ${BRPC_DEP}) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() - set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(hetercpu_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(heterxpu_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + hetercpu_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + heterxpu_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) elseif(WITH_PSCORE) - cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc - dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc - heterxpu_trainer.cc heter_pipeline_trainer.cc - data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc - downpour_worker.cc downpour_lite_worker.cc downpour_worker_opt.cc data_feed.cu - pull_dense_worker.cc section_worker.cc heter_section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry - device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog - index_sampler index_wrapper sampler index_dataset_proto - lod_rank_table fs shell fleet_wrapper heter_wrapper box_wrapper metrics lodtensor_printer feed_fetch_method - graph_to_program_pass variable_helper timer monitor heter_service_proto fleet heter_server brpc fleet_executor) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") + cc_library( + executor + SRCS executor.cc + multi_trainer.cc + pipeline_trainer.cc + dataset_factory.cc + dist_multi_trainer.cc + trainer_factory.cc + trainer.cc + data_feed_factory.cc + heterxpu_trainer.cc + heter_pipeline_trainer.cc + data_feed.cc + device_worker.cc + hogwild_worker.cc + hetercpu_worker.cc + downpour_worker.cc + downpour_lite_worker.cc + downpour_worker_opt.cc + data_feed.cu + pull_dense_worker.cc + section_worker.cc + heter_section_worker.cc + device_worker_factory.cc + data_set.cc + DEPS op_registry + device_context + scope + framework_proto + data_feed_proto + heter_service_proto + trainer_desc_proto + glog + index_sampler + index_wrapper + sampler + index_dataset_proto + lod_rank_table + fs + shell + fleet_wrapper + heter_wrapper + box_wrapper + metrics + lodtensor_printer + feed_fetch_method + graph_to_program_pass + variable_helper + timer + monitor + heter_service_proto + fleet + heter_server + brpc + fleet_executor) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() - set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(multi_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(hogwild_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(downpour_lite_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(heter_section_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(heter_pipeline_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + multi_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + hogwild_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + downpour_lite_worker.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + heter_section_worker.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + heter_pipeline_trainer.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) else() - cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc - dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc - heterxpu_trainer.cc - data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc ps_gpu_worker.cc - ps_gpu_trainer.cc downpour_worker.cc downpour_worker_opt.cc data_feed.cu - pull_dense_worker.cc section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry - device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog - lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper metrics lodtensor_printer feed_fetch_method - graph_to_program_pass variable_helper timer monitor fleet_executor) + cc_library( + executor + SRCS executor.cc + multi_trainer.cc + pipeline_trainer.cc + dataset_factory.cc + dist_multi_trainer.cc + trainer_factory.cc + trainer.cc + data_feed_factory.cc + heterxpu_trainer.cc + data_feed.cc + device_worker.cc + hogwild_worker.cc + hetercpu_worker.cc + ps_gpu_worker.cc + ps_gpu_trainer.cc + downpour_worker.cc + downpour_worker_opt.cc + data_feed.cu + pull_dense_worker.cc + section_worker.cc + device_worker_factory.cc + data_set.cc + DEPS op_registry + device_context + scope + framework_proto + data_feed_proto + heter_service_proto + trainer_desc_proto + glog + lod_rank_table + fs + shell + fleet_wrapper + heter_wrapper + ps_gpu_wrapper + box_wrapper + metrics + lodtensor_printer + feed_fetch_method + graph_to_program_pass + variable_helper + timer + monitor + fleet_executor) endif() elseif(WITH_PSLIB) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() - set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(hetercpu_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(heterxpu_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc - dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc - heterxpu_trainer.cc - data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc ps_gpu_worker.cc - ps_gpu_trainer.cc downpour_worker.cc downpour_worker_opt.cc data_feed.cu - pull_dense_worker.cc section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry - device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog - lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper lodtensor_printer feed_fetch_method - graph_to_program_pass variable_helper timer monitor fleet_executor ${BRPC_DEP}) + set_source_files_properties( + executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + device_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + hetercpu_worker.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + heterxpu_trainer.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + cc_library( + executor + SRCS executor.cc + multi_trainer.cc + pipeline_trainer.cc + dataset_factory.cc + dist_multi_trainer.cc + trainer_factory.cc + trainer.cc + data_feed_factory.cc + heterxpu_trainer.cc + data_feed.cc + device_worker.cc + hogwild_worker.cc + hetercpu_worker.cc + ps_gpu_worker.cc + ps_gpu_trainer.cc + downpour_worker.cc + downpour_worker_opt.cc + data_feed.cu + pull_dense_worker.cc + section_worker.cc + device_worker_factory.cc + data_set.cc + DEPS op_registry + device_context + scope + framework_proto + data_feed_proto + heter_service_proto + trainer_desc_proto + glog + lod_rank_table + fs + shell + fleet_wrapper + heter_wrapper + ps_gpu_wrapper + box_wrapper + lodtensor_printer + feed_fetch_method + graph_to_program_pass + variable_helper + timer + monitor + fleet_executor + ${BRPC_DEP}) else() - cc_library(executor SRCS executor.cc multi_trainer.cc pipeline_trainer.cc dataset_factory.cc - dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc - heterxpu_trainer.cc - data_feed.cc device_worker.cc hogwild_worker.cc hetercpu_worker.cc ps_gpu_worker.cc - ps_gpu_trainer.cc downpour_worker.cc downpour_worker_opt.cc data_feed.cu - pull_dense_worker.cc section_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry - device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog - lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper lodtensor_printer feed_fetch_method - graph_to_program_pass variable_helper timer monitor fleet_executor) + cc_library( + executor + SRCS executor.cc + multi_trainer.cc + pipeline_trainer.cc + dataset_factory.cc + dist_multi_trainer.cc + trainer_factory.cc + trainer.cc + data_feed_factory.cc + heterxpu_trainer.cc + data_feed.cc + device_worker.cc + hogwild_worker.cc + hetercpu_worker.cc + ps_gpu_worker.cc + ps_gpu_trainer.cc + downpour_worker.cc + downpour_worker_opt.cc + data_feed.cu + pull_dense_worker.cc + section_worker.cc + device_worker_factory.cc + data_set.cc + DEPS op_registry + device_context + scope + framework_proto + data_feed_proto + heter_service_proto + trainer_desc_proto + glog + lod_rank_table + fs + shell + fleet_wrapper + heter_wrapper + ps_gpu_wrapper + box_wrapper + lodtensor_printer + feed_fetch_method + graph_to_program_pass + variable_helper + timer + monitor + fleet_executor) endif() -target_link_libraries(executor while_op_helper executor_gc_helper recurrent_op_helper conditional_block_op_helper) - -cc_library(parallel_executor SRCS parallel_executor.cc DEPS - threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor async_ssa_graph_executor - graph build_strategy bind_threaded_ssa_graph_executor collective_helper - fast_threaded_ssa_graph_executor variable_helper) - -cc_library(executor_cache SRCS executor_cache.cc DEPS parallel_executor) +target_link_libraries(executor while_op_helper executor_gc_helper + recurrent_op_helper conditional_block_op_helper) + +cc_library( + parallel_executor + SRCS parallel_executor.cc + DEPS threaded_ssa_graph_executor + scope_buffered_ssa_graph_executor + parallel_ssa_graph_executor + async_ssa_graph_executor + graph + build_strategy + bind_threaded_ssa_graph_executor + collective_helper + fast_threaded_ssa_graph_executor + variable_helper) + +cc_library( + executor_cache + SRCS executor_cache.cc + DEPS parallel_executor) if(WITH_PSCORE) - get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) - cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS - conditional_block_op executor gloo_wrapper ${RPC_DEPS}) - cc_test(heter_pipeline_trainer_test SRCS heter_pipeline_trainer_test.cc DEPS - conditional_block_op scale_op heter_listen_and_serv_op executor heter_server gloo_wrapper eigen_function ${RPC_DEPS}) + get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) + cc_test( + dist_multi_trainer_test + SRCS dist_multi_trainer_test.cc + DEPS conditional_block_op executor gloo_wrapper ${RPC_DEPS}) + cc_test( + heter_pipeline_trainer_test + SRCS heter_pipeline_trainer_test.cc + DEPS conditional_block_op + scale_op + heter_listen_and_serv_op + executor + heter_server + gloo_wrapper + eigen_function + ${RPC_DEPS}) else() - cc_test(dist_multi_trainer_test SRCS dist_multi_trainer_test.cc DEPS - conditional_block_op executor gloo_wrapper) + cc_test( + dist_multi_trainer_test + SRCS dist_multi_trainer_test.cc + DEPS conditional_block_op executor gloo_wrapper) endif() -cc_library(prune SRCS prune.cc DEPS framework_proto boost) -cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) -cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry - proto_desc) -cc_library(selected_rows_utils SRCS selected_rows_utils.cc DEPS selected_rows) -cc_test(selected_rows_utils_test SRCS selected_rows_utils_test.cc DEPS selected_rows_utils) - -cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto op_kernel_type) +cc_library( + prune + SRCS prune.cc + DEPS framework_proto boost) +cc_test( + prune_test + SRCS prune_test.cc + DEPS op_info prune recurrent_op device_context) +cc_test( + var_type_inference_test + SRCS var_type_inference_test.cc + DEPS op_registry proto_desc) +cc_library( + selected_rows_utils + SRCS selected_rows_utils.cc + DEPS selected_rows) +cc_test( + selected_rows_utils_test + SRCS selected_rows_utils_test.cc + DEPS selected_rows_utils) + +cc_test( + op_kernel_type_test + SRCS op_kernel_type_test.cc + DEPS place device_context framework_proto op_kernel_type) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) -cc_test(tuple_test SRCS tuple_test.cc ) +cc_test(tuple_test SRCS tuple_test.cc) cc_test(inlined_vector_test SRCS inlined_vector_test.cc) -cc_library(dlpack_tensor SRCS dlpack_tensor.cc DEPS tensor dlpack) -cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog) - -cc_library(op_compatible_info SRCS op_compatible_info.cc DEPS string_helper proto_desc) -cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatible_info proto_desc string_helper glog) - -cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer) -cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) -cc_library(generator SRCS generator.cc DEPS enforce place) - -cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place var_type_traits phi phi_api_utils op_info shape_inference) -cc_test(infershape_utils_test SRCS infershape_utils_test.cc DEPS infershape_utils infermeta_utils meta_tensor) +cc_library( + dlpack_tensor + SRCS dlpack_tensor.cc + DEPS tensor dlpack) +cc_test( + dlpack_tensor_test + SRCS dlpack_tensor_test.cc + DEPS dlpack_tensor glog) + +cc_library( + op_compatible_info + SRCS op_compatible_info.cc + DEPS string_helper proto_desc) +cc_test( + op_compatible_info_test + SRCS op_compatible_info_test.cc + DEPS op_compatible_info proto_desc string_helper glog) + +cc_library( + save_load_util + SRCS save_load_util.cc + DEPS tensor scope layer) +cc_test( + save_load_util_test + SRCS save_load_util_test.cc + DEPS save_load_util tensor scope layer) +cc_library( + generator + SRCS generator.cc + DEPS enforce place) + +cc_library( + infershape_utils + SRCS infershape_utils.cc + DEPS lod_tensor + selected_rows_utils + attribute + place + var_type_traits + phi + phi_api_utils + op_info + shape_inference) +cc_test( + infershape_utils_test + SRCS infershape_utils_test.cc + DEPS infershape_utils infermeta_utils meta_tensor) # Get the current working branch execute_process( COMMAND git rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_BRANCH - OUTPUT_STRIP_TRAILING_WHITESPACE - ) + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE) # Get the latest abbreviated commit hash of the working branch execute_process( COMMAND git log -1 --format=%h - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE PADDLE_COMMIT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_COMMIT + OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "commit: ${PADDLE_COMMIT}") message(STATUS "branch: ${PADDLE_BRANCH}") configure_file(commit.h.in commit.h) -cc_library(custom_operator SRCS custom_operator.cc DEPS tensor attribute framework_proto op_registry operator dynamic_loader string_helper phi_tensor op_meta_info phi_api) +cc_library( + custom_operator + SRCS custom_operator.cc + DEPS tensor + attribute + framework_proto + op_registry + operator + dynamic_loader + string_helper + phi_tensor + op_meta_info + phi_api) #cc_binary(test_executor SRCS test_executor.cc DEPS executor op_registry ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ) #cc_binary(new_executor SRCS new_exec_test.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) -set(FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator) +set(FLUID_FRAMEWORK_MODULES + proto_desc + memory + lod_tensor + executor + data_feed_proto + layer + dynamic_loader + custom_operator) cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) @@ -456,11 +1145,23 @@ if(WITH_TESTING AND TEST selected_rows_utils_test) endif() cc_test(scope_guard_test SRCS scope_guard_test.cc) -cc_test(phi_utils_test SRCS phi_utils_test.cc DEPS phi_utils) +cc_test( + phi_utils_test + SRCS phi_utils_test.cc + DEPS phi_utils) if(WITH_GPU OR WITH_ROCM) - cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place gpu_info) + cc_library( + fluid_convert_utils + SRCS convert_utils.cc + DEPS data_type place gpu_info) else() - cc_library(fluid_convert_utils SRCS convert_utils.cc DEPS data_type place) + cc_library( + fluid_convert_utils + SRCS convert_utils.cc + DEPS data_type place) endif() -cc_test(convert_utils_test SRCS convert_utils_test.cc DEPS fluid_convert_utils) +cc_test( + convert_utils_test + SRCS convert_utils_test.cc + DEPS fluid_convert_utils) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 948eaab40b4..e193274ff21 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -1,96 +1,284 @@ -cc_library(var_handle SRCS var_handle.cc DEPS place framework_proto node) -cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context lod_tensor) +cc_library( + var_handle + SRCS var_handle.cc + DEPS place framework_proto node) +cc_library( + op_handle_base + SRCS op_handle_base.cc + DEPS var_handle device_context lod_tensor) -cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) -cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) -cc_library(fetch_async_op_handle SRCS fetch_async_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) +cc_library( + scale_loss_grad_op_handle + SRCS scale_loss_grad_op_handle.cc + DEPS op_handle_base scope lod_tensor ddim memory) +cc_library( + fetch_op_handle + SRCS fetch_op_handle.cc + DEPS op_handle_base scope lod_tensor ddim memory) +cc_library( + fetch_async_op_handle + SRCS fetch_async_op_handle.cc + DEPS op_handle_base scope lod_tensor ddim memory) -cc_library(share_tensor_buffer_functor SRCS share_tensor_buffer_functor.cc DEPS framework_proto scope place operator op_registry) -cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry) -cc_library(share_tensor_buffer_op_handle SRCS share_tensor_buffer_op_handle.cc DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor) -cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place operator op_registry) -cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framework_proto scope place operator op_registry) -cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper) +cc_library( + share_tensor_buffer_functor + SRCS share_tensor_buffer_functor.cc + DEPS framework_proto scope place operator op_registry) +cc_library( + computation_op_handle + SRCS computation_op_handle.cc + DEPS framework_proto scope place operator op_registry) +cc_library( + share_tensor_buffer_op_handle + SRCS share_tensor_buffer_op_handle.cc + DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor) +cc_library( + rpc_op_handle + SRCS rpc_op_handle.cc + DEPS framework_proto scope place operator op_registry) +cc_library( + fetch_barrier_op_handle + SRCS fetch_barrier_op_handle.cc + DEPS framework_proto scope place operator op_registry) +cc_library( + multi_devices_helper + SRCS multi_devices_helper.cc + DEPS graph graph_helper) -cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows_utils) +cc_library( + variable_visitor + SRCS variable_visitor.cc + DEPS lod_tensor selected_rows_utils) if(WITH_PSCORE) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - set_source_files_properties(reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(threaded_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(async_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) + set_source_files_properties( + reduce_op_handle.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + threaded_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + async_ssa_graph_executor.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) endif() - if(WITH_GPU) - nv_library(nan_inf_utils SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu DEPS framework_proto scope place) - nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - dynload_cuda variable_visitor) - nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - dynload_cuda variable_visitor place device_memory_aligment) - nv_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor - ddim memory dynload_cuda variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle) - - if(WITH_DGC) - nv_library(sparse_all_reduce_op_handle SRCS sparse_all_reduce_op_handle.cc DEPS op_handle_base scope - lod_tensor ddim memory dynload_cuda variable_visitor dgc all_reduce_op_handle) - endif() - - if(WITH_DISTRIBUTE) - nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim dynload_cuda selected_rows_functor) - else() - nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim dynload_cuda selected_rows_functor) - endif() - nv_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) - nv_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle) + nv_library( + nan_inf_utils + SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu + DEPS framework_proto scope place) + nv_library( + all_reduce_op_handle + SRCS all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor) + nv_library( + fused_all_reduce_op_handle + SRCS fused_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor + place + device_memory_aligment) + nv_library( + grad_merge_all_reduce_op_handle + SRCS grad_merge_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor + place + device_memory_aligment + all_reduce_op_handle + fused_all_reduce_op_handle) + + if(WITH_DGC) + nv_library( + sparse_all_reduce_op_handle + SRCS sparse_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor + dgc + all_reduce_op_handle) + endif() + + if(WITH_DISTRIBUTE) + nv_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim dynload_cuda + selected_rows_functor) + else() + nv_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim dynload_cuda + selected_rows_functor) + endif() + nv_library( + broadcast_op_handle + SRCS broadcast_op_handle.cc + DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) + nv_library( + fused_broadcast_op_handle + SRCS fused_broadcast_op_handle.cc + DEPS broadcast_op_handle) elseif(WITH_ROCM) - hip_library(nan_inf_utils SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu DEPS framework_proto scope place) - hip_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - dynload_cuda variable_visitor) - hip_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - dynload_cuda variable_visitor place device_memory_aligment) - hip_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor - ddim memory dynload_cuda variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle) - - if(WITH_DISTRIBUTE) - hip_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim dynload_cuda selected_rows_functor) - else() - hip_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim dynload_cuda selected_rows_functor) - endif() - hip_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) - hip_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle) + hip_library( + nan_inf_utils + SRCS nan_inf_utils_detail.cc nan_inf_utils_detail.cu + DEPS framework_proto scope place) + hip_library( + all_reduce_op_handle + SRCS all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor) + hip_library( + fused_all_reduce_op_handle + SRCS fused_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor + place + device_memory_aligment) + hip_library( + grad_merge_all_reduce_op_handle + SRCS grad_merge_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + dynload_cuda + variable_visitor + place + device_memory_aligment + all_reduce_op_handle + fused_all_reduce_op_handle) + + if(WITH_DISTRIBUTE) + hip_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim dynload_cuda + selected_rows_functor) + else() + hip_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim dynload_cuda + selected_rows_functor) + endif() + hip_library( + broadcast_op_handle + SRCS broadcast_op_handle.cc + DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) + hip_library( + fused_broadcast_op_handle + SRCS fused_broadcast_op_handle.cc + DEPS broadcast_op_handle) else() - if (WITH_ASCEND_CL) - cc_library(nan_inf_utils SRCS nan_inf_utils_detail.cc DEPS npu_op_runner framework_proto scope place) - else() - cc_library(nan_inf_utils SRCS nan_inf_utils_detail.cc DEPS framework_proto scope place) - endif() - cc_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - variable_visitor) - cc_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory - variable_visitor place device_memory_aligment) - cc_library(grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor - ddim memory variable_visitor place device_memory_aligment all_reduce_op_handle fused_all_reduce_op_handle) - if(WITH_DISTRIBUTE) - cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim selected_rows_functor) - else() - cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope - ddim selected_rows_functor) - endif() - cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) - cc_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle) + if(WITH_ASCEND_CL) + cc_library( + nan_inf_utils + SRCS nan_inf_utils_detail.cc + DEPS npu_op_runner framework_proto scope place) + else() + cc_library( + nan_inf_utils + SRCS nan_inf_utils_detail.cc + DEPS framework_proto scope place) + endif() + cc_library( + all_reduce_op_handle + SRCS all_reduce_op_handle.cc + DEPS op_handle_base scope lod_tensor ddim memory variable_visitor) + cc_library( + fused_all_reduce_op_handle + SRCS fused_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + variable_visitor + place + device_memory_aligment) + cc_library( + grad_merge_all_reduce_op_handle + SRCS grad_merge_all_reduce_op_handle.cc + DEPS op_handle_base + scope + lod_tensor + ddim + memory + variable_visitor + place + device_memory_aligment + all_reduce_op_handle + fused_all_reduce_op_handle) + if(WITH_DISTRIBUTE) + cc_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) + else() + cc_library( + reduce_op_handle + SRCS reduce_op_handle.cc + DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) + endif() + cc_library( + broadcast_op_handle + SRCS broadcast_op_handle.cc + DEPS op_handle_base scope ddim memory variable_visitor) + cc_library( + fused_broadcast_op_handle + SRCS fused_broadcast_op_handle.cc + DEPS broadcast_op_handle) endif() -cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) +cc_library( + gather_op_handle + SRCS gather_op_handle.cc + DEPS op_handle_base scope ddim memory variable_visitor) -cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows_utils reference_count_pass_helper) +cc_library( + eager_deletion_op_handle + SRCS eager_deletion_op_handle.cc + DEPS lod_tensor selected_rows_utils reference_count_pass_helper) -set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto +set(SSA_GRAPH_EXECUTOR_DEPS + graph + framework_proto multi_devices_helper reference_count_pass eager_deletion_pass @@ -98,60 +286,122 @@ set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto buffer_shared_cross_op_memory_reuse_pass inplace_addto_op_pass set_reader_device_info_utils) -cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS}) +cc_library( + ssa_graph_executor + SRCS ssa_graph_executor.cc + DEPS ${SSA_GRAPH_EXECUTOR_DEPS}) -cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope - simple_threadpool device_context) +cc_library( + threaded_ssa_graph_executor + SRCS threaded_ssa_graph_executor.cc + DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool + device_context) -cc_library(parallel_ssa_graph_executor SRCS parallel_ssa_graph_executor.cc DEPS threaded_ssa_graph_executor) +cc_library( + parallel_ssa_graph_executor + SRCS parallel_ssa_graph_executor.cc + DEPS threaded_ssa_graph_executor) set(ASYNC_SSA_GRAPH_EXECUTOR_DEPS threaded_ssa_graph_executor) -cc_library(async_ssa_graph_executor SRCS async_ssa_graph_executor.cc DEPS ${ASYNC_SSA_GRAPH_EXECUTOR_DEPS}) +cc_library( + async_ssa_graph_executor + SRCS async_ssa_graph_executor.cc + DEPS ${ASYNC_SSA_GRAPH_EXECUTOR_DEPS}) -cc_test(broadcast_op_test SRCS broadcast_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory - device_context broadcast_op_handle) -cc_test(gather_op_test SRCS gather_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory - device_context gather_op_handle) +cc_test( + broadcast_op_test + SRCS broadcast_op_handle_test.cc + DEPS var_handle + op_handle_base + scope + ddim + memory + device_context + broadcast_op_handle) +cc_test( + gather_op_test + SRCS gather_op_handle_test.cc + DEPS var_handle + op_handle_base + scope + ddim + memory + device_context + gather_op_handle) -cc_library(scope_buffered_monitor SRCS scope_buffered_monitor.cc DEPS scope profiler selected_rows_utils) -cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_executor.cc DEPS ssa_graph_executor scope_buffered_monitor) +cc_library( + scope_buffered_monitor + SRCS scope_buffered_monitor.cc + DEPS scope profiler selected_rows_utils) +cc_library( + scope_buffered_ssa_graph_executor + SRCS scope_buffered_ssa_graph_executor.cc + DEPS ssa_graph_executor scope_buffered_monitor) #cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory # device_context reduce_op_handle ) -cc_library(bind_threaded_ssa_graph_executor SRCS bind_threaded_ssa_graph_executor.cc - DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool device_context) -cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc - DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool device_context) -cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle) - -cc_test(exception_holder_test SRCS exception_holder_test.cc ) - -set(IR_PASS_DEPS graph_viz_pass multi_devices_graph_pass - multi_devices_graph_print_pass multi_devices_graph_check_pass - fuse_elewise_add_act_pass fuse_bn_act_pass fuse_bn_add_act_pass - multi_batch_merge_pass +cc_library( + bind_threaded_ssa_graph_executor + SRCS bind_threaded_ssa_graph_executor.cc + DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool + device_context) +cc_library( + fast_threaded_ssa_graph_executor + SRCS fast_threaded_ssa_graph_executor.cc + DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool + device_context) +cc_test( + fused_broadcast_op_test + SRCS fused_broadcast_op_handle_test.cc + DEPS fused_broadcast_op_handle) + +cc_test(exception_holder_test SRCS exception_holder_test.cc) + +set(IR_PASS_DEPS + graph_viz_pass + multi_devices_graph_pass + multi_devices_graph_print_pass + multi_devices_graph_check_pass + fuse_elewise_add_act_pass + fuse_bn_act_pass + fuse_bn_add_act_pass + multi_batch_merge_pass fuse_relu_depthwise_conv_pass lock_free_optimize_pass sequential_execution_pass all_reduce_deps_pass add_reader_dependency_pass modify_op_lock_and_record_event_pass - coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass - fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass - sync_batch_norm_pass runtime_context_cache_pass graph_to_program_pass - fix_op_run_order_pass fuse_gemm_epilogue_pass) + coalesce_grad_tensor_pass + fuse_all_reduce_op_pass + backward_optimizer_op_deps_pass + fuse_adam_op_pass + fuse_sgd_op_pass + fuse_momentum_op_pass + sync_batch_norm_pass + runtime_context_cache_pass + graph_to_program_pass + fix_op_run_order_pass + fuse_gemm_epilogue_pass) -if (WITH_CINN) +if(WITH_CINN) set(IR_PASS_DEPS ${IR_PASS_DEPS} build_cinn_pass) endif() -if(NOT APPLE AND NOT WIN32 AND (WITH_GPU OR WITH_ROCM)) +if(NOT APPLE + AND NOT WIN32 + AND (WITH_GPU OR WITH_ROCM)) set(IR_PASS_DEPS ${IR_PASS_DEPS} fusion_group_pass) endif() -cc_library(build_strategy SRCS build_strategy.cc DEPS pass_builder ${IR_PASS_DEPS}) -cc_test(build_strategy_test SRCS build_strategy_test.cc - DEPS build_strategy op_registry op_proto_maker graph string_helper) +cc_library( + build_strategy + SRCS build_strategy.cc + DEPS pass_builder ${IR_PASS_DEPS}) +cc_test( + build_strategy_test + SRCS build_strategy_test.cc + DEPS build_strategy op_registry op_proto_maker graph string_helper) -if (WITH_MKLDNN) +if(WITH_MKLDNN) target_link_libraries(build_strategy mkldnn_placement_pass) endif() diff --git a/paddle/fluid/framework/fleet/CMakeLists.txt b/paddle/fluid/framework/fleet/CMakeLists.txt index 2e9104f40cc..3b22a4b0d5d 100644 --- a/paddle/fluid/framework/fleet/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/CMakeLists.txt @@ -1,71 +1,125 @@ if(WITH_PSLIB) - if(WITH_PSLIB_BRPC) - set(BRPC_DEPS pslib_brpc) - else() - if(NOT WITH_HETERPS) - set(BRPC_DEPS brpc) - endif() - endif(WITH_PSLIB_BRPC) - cc_library(fleet_wrapper SRCS fleet_wrapper.cc DEPS framework_proto proto_desc op_registry variable_helper scope ${BRPC_DEPS} pslib) + if(WITH_PSLIB_BRPC) + set(BRPC_DEPS pslib_brpc) + else() + if(NOT WITH_HETERPS) + set(BRPC_DEPS brpc) + endif() + endif(WITH_PSLIB_BRPC) + cc_library( + fleet_wrapper + SRCS fleet_wrapper.cc + DEPS framework_proto + proto_desc + op_registry + variable_helper + scope + ${BRPC_DEPS} + pslib) else() - cc_library(fleet_wrapper SRCS fleet_wrapper.cc DEPS framework_proto variable_helper scope) + cc_library( + fleet_wrapper + SRCS fleet_wrapper.cc + DEPS framework_proto variable_helper scope) endif(WITH_PSLIB) if(WITH_HETERPS) - if(WITH_NCCL AND WITH_GPU) - nv_library(ps_gpu_wrapper SRCS ps_gpu_wrapper.cu ps_gpu_wrapper.cc - DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) - add_subdirectory(heter_ps) - elseif(WITH_XPU_KP) - xpu_library(ps_gpu_wrapper SRCS ps_gpu_wrapper.kps ps_gpu_wrapper.cc - DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) - add_subdirectory(heter_ps) - elseif(WITH_RCCL) - hip_library(ps_gpu_wrapper SRCS ps_gpu_wrapper.cu ps_gpu_wrapper.cc - DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) - add_subdirectory(heter_ps) - endif() + if(WITH_NCCL AND WITH_GPU) + nv_library( + ps_gpu_wrapper + SRCS ps_gpu_wrapper.cu ps_gpu_wrapper.cc + DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) + add_subdirectory(heter_ps) + elseif(WITH_XPU_KP) + xpu_library( + ps_gpu_wrapper + SRCS ps_gpu_wrapper.kps ps_gpu_wrapper.cc + DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) + add_subdirectory(heter_ps) + elseif(WITH_RCCL) + hip_library( + ps_gpu_wrapper + SRCS ps_gpu_wrapper.cu ps_gpu_wrapper.cc + DEPS heter_ps gloo_wrapper ${BRPC_DEPS}) + add_subdirectory(heter_ps) + endif() else() - cc_library(ps_gpu_wrapper SRCS ps_gpu_wrapper.cc DEPS gloo_wrapper) + cc_library( + ps_gpu_wrapper + SRCS ps_gpu_wrapper.cc + DEPS gloo_wrapper) endif(WITH_HETERPS) if(WITH_NCCL OR WITH_RCCL) - cc_library(nccl_wrapper SRCS nccl_wrapper.cc DEPS framework_proto variable_helper scope) + cc_library( + nccl_wrapper + SRCS nccl_wrapper.cc + DEPS framework_proto variable_helper scope) endif() if(WITH_BOX_PS) - if(WITH_GPU) - nv_library(box_wrapper SRCS box_wrapper.cc box_wrapper.cu DEPS framework_proto lod_tensor box_ps) - endif() - if(WITH_ROCM) - hip_library(box_wrapper SRCS box_wrapper.cc box_wrapper.cu DEPS framework_proto lod_tensor box_ps) - endif() + if(WITH_GPU) + nv_library( + box_wrapper + SRCS box_wrapper.cc box_wrapper.cu + DEPS framework_proto lod_tensor box_ps) + endif() + if(WITH_ROCM) + hip_library( + box_wrapper + SRCS box_wrapper.cc box_wrapper.cu + DEPS framework_proto lod_tensor box_ps) + endif() else() - cc_library(box_wrapper SRCS box_wrapper.cc DEPS framework_proto lod_tensor) + cc_library( + box_wrapper + SRCS box_wrapper.cc + DEPS framework_proto lod_tensor) endif(WITH_BOX_PS) - if(WITH_GLOO) - cc_library(gloo_wrapper SRCS gloo_wrapper.cc DEPS framework_proto variable_helper scope gloo) - cc_library(metrics SRCS metrics.cc DEPS gloo_wrapper) + cc_library( + gloo_wrapper + SRCS gloo_wrapper.cc + DEPS framework_proto variable_helper scope gloo) + cc_library( + metrics + SRCS metrics.cc + DEPS gloo_wrapper) else() - cc_library(gloo_wrapper SRCS gloo_wrapper.cc DEPS framework_proto variable_helper scope) - cc_library(metrics SRCS metrics.cc DEPS gloo_wrapper) + cc_library( + gloo_wrapper + SRCS gloo_wrapper.cc + DEPS framework_proto variable_helper scope) + cc_library( + metrics + SRCS metrics.cc + DEPS gloo_wrapper) endif(WITH_GLOO) if(WITH_PSLIB) -set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") -if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") -endif() -set_source_files_properties(heter_wrapper.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") + endif() + set_source_files_properties( + heter_wrapper.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) endif() -cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto -device_context heter_service_proto ${BRPC_DEPS}) +cc_library( + heter_wrapper + SRCS heter_wrapper.cc + DEPS framework_proto device_context heter_service_proto ${BRPC_DEPS}) -cc_test(test_fleet_cc SRCS test_fleet.cc DEPS fleet_wrapper gloo_wrapper fs shell) +cc_test( + test_fleet_cc + SRCS test_fleet.cc + DEPS fleet_wrapper gloo_wrapper fs shell) if(WITH_ASCEND OR WITH_ASCEND_CL) - cc_library(ascend_wrapper SRCS ascend_wrapper.cc DEPS framework_proto lod_tensor ascend_ge ascend_graph) + cc_library( + ascend_wrapper + SRCS ascend_wrapper.cc + DEPS framework_proto lod_tensor ascend_ge ascend_graph) endif() diff --git a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt index d62fc1c0849..7540c6147f4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt +++ b/paddle/fluid/framework/fleet/heter_ps/CMakeLists.txt @@ -1,38 +1,96 @@ -IF(WITH_GPU) - SET(HETERPS_DEPS device_context) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - SET(HETERPS_DEPS ${HETERPS_DEPS} cub) - endif() - if(WITH_PSCORE) - get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) - SET(HETERPS_DEPS ${HETERPS_DEPS} ${RPC_DEPS}) - endif() - nv_library(heter_comm_kernel SRCS heter_comm_kernel.cu feature_value.h DEPS ${HETERPS_DEPS}) - nv_library(hashtable_kernel SRCS hashtable_kernel.cu feature_value.h DEPS ${HETERPS_DEPS}) - nv_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h mem_pool.h DEPS ${HETERPS_DEPS} heter_comm_kernel hashtable_kernel) - nv_test(test_heter_comm SRCS feature_value.h DEPS heter_comm) - nv_library(heter_ps SRCS heter_ps.cu DEPS heter_comm) - if(WITH_PSCORE) - nv_library(graph_gpu_ps SRCS graph_gpu_ps_table_inl.cu DEPS heter_comm table hashtable_kernel) - nv_library(graph_sampler SRCS graph_sampler_inl.h DEPS graph_gpu_ps) - nv_library(graph_gpu_wrapper SRCS graph_gpu_wrapper.cu DEPS heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS} graph_gpu_ps) - nv_test(test_cpu_query SRCS test_cpu_query.cu DEPS heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS} graph_gpu_ps graph_gpu_wrapper) - #ADD_EXECUTABLE(test_sample_rate test_sample_rate.cu) - #target_link_libraries(test_sample_rate heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS}) - #nv_test(test_sample_rate SRCS test_sample_rate.cu DEPS heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS}) - #ADD_EXECUTABLE(test_cpu_query test_cpu_query.cu) - #target_link_libraries(test_cpu_query graph_gpu_ps) - endif() -ENDIF() -IF(WITH_XPU_KP) - SET(HETERPS_DEPS device_context) - xpu_library(heter_comm_kernel SRCS heter_comm_kernel.h heter_comm_kernel.kps feature_value.h) - xpu_library(hashtable_kernel SRCS hashtable.h hashtable_kernel.kps) - cc_library(heter_comm SRCS heter_comm.h heter_resource.cc DEPS ${HETERPS_DEPS} heter_comm_kernel hashtable_kernel) - cc_library(heter_ps SRCS heter_ps.cc DEPS heter_comm) -ENDIF() -IF(WITH_ROCM) - hip_library(heter_comm SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h hashtable.h DEPS cub device_context) - hip_test(test_heter_comm SRCS feature_value.h DEPS heter_comm) - hip_library(heter_ps SRCS heter_ps.cu DEPS heter_comm) -ENDIF() +if(WITH_GPU) + set(HETERPS_DEPS device_context) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + set(HETERPS_DEPS ${HETERPS_DEPS} cub) + endif() + if(WITH_PSCORE) + get_property(RPC_DEPS GLOBAL PROPERTY RPC_DEPS) + set(HETERPS_DEPS ${HETERPS_DEPS} ${RPC_DEPS}) + endif() + nv_library( + heter_comm_kernel + SRCS heter_comm_kernel.cu feature_value.h + DEPS ${HETERPS_DEPS}) + nv_library( + hashtable_kernel + SRCS hashtable_kernel.cu feature_value.h + DEPS ${HETERPS_DEPS}) + nv_library( + heter_comm + SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h + mem_pool.h + DEPS ${HETERPS_DEPS} heter_comm_kernel hashtable_kernel) + nv_test( + test_heter_comm + SRCS feature_value.h + DEPS heter_comm) + nv_library( + heter_ps + SRCS heter_ps.cu + DEPS heter_comm) + if(WITH_PSCORE) + nv_library( + graph_gpu_ps + SRCS graph_gpu_ps_table_inl.cu + DEPS heter_comm table hashtable_kernel) + nv_library( + graph_sampler + SRCS graph_sampler_inl.h + DEPS graph_gpu_ps) + nv_library( + graph_gpu_wrapper + SRCS graph_gpu_wrapper.cu + DEPS heter_comm + table + heter_comm_kernel + hashtable_kernel + heter_ps + ${HETERPS_DEPS} + graph_gpu_ps) + nv_test( + test_cpu_query + SRCS test_cpu_query.cu + DEPS heter_comm + table + heter_comm_kernel + hashtable_kernel + heter_ps + ${HETERPS_DEPS} + graph_gpu_ps + graph_gpu_wrapper) + #ADD_EXECUTABLE(test_sample_rate test_sample_rate.cu) + #target_link_libraries(test_sample_rate heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS}) + #nv_test(test_sample_rate SRCS test_sample_rate.cu DEPS heter_comm table heter_comm_kernel hashtable_kernel heter_ps ${HETERPS_DEPS}) + #ADD_EXECUTABLE(test_cpu_query test_cpu_query.cu) + #target_link_libraries(test_cpu_query graph_gpu_ps) + endif() +endif() +if(WITH_XPU_KP) + set(HETERPS_DEPS device_context) + xpu_library(heter_comm_kernel SRCS heter_comm_kernel.h heter_comm_kernel.kps + feature_value.h) + xpu_library(hashtable_kernel SRCS hashtable.h hashtable_kernel.kps) + cc_library( + heter_comm + SRCS heter_comm.h heter_resource.cc + DEPS ${HETERPS_DEPS} heter_comm_kernel hashtable_kernel) + cc_library( + heter_ps + SRCS heter_ps.cc + DEPS heter_comm) +endif() +if(WITH_ROCM) + hip_library( + heter_comm + SRCS heter_comm.h feature_value.h heter_resource.cc heter_resource.h + hashtable.h + DEPS cub device_context) + hip_test( + test_heter_comm + SRCS feature_value.h + DEPS heter_comm) + hip_library( + heter_ps + SRCS heter_ps.cu + DEPS heter_comm) +endif() diff --git a/paddle/fluid/framework/io/CMakeLists.txt b/paddle/fluid/framework/io/CMakeLists.txt index 85b45f1a5bb..0033e825172 100644 --- a/paddle/fluid/framework/io/CMakeLists.txt +++ b/paddle/fluid/framework/io/CMakeLists.txt @@ -1,7 +1,16 @@ -cc_library(shell SRCS shell.cc DEPS string_helper glog timer enforce) -cc_library(fs SRCS fs.cc DEPS string_helper glog boost enforce shell) +cc_library( + shell + SRCS shell.cc + DEPS string_helper glog timer enforce) +cc_library( + fs + SRCS fs.cc + DEPS string_helper glog boost enforce shell) -cc_test(test_fs SRCS test_fs.cc DEPS fs shell) -if (WITH_CRYPTO) - add_subdirectory(crypto) -endif (WITH_CRYPTO) +cc_test( + test_fs + SRCS test_fs.cc + DEPS fs shell) +if(WITH_CRYPTO) + add_subdirectory(crypto) +endif(WITH_CRYPTO) diff --git a/paddle/fluid/framework/io/crypto/CMakeLists.txt b/paddle/fluid/framework/io/crypto/CMakeLists.txt index ae16353ec92..e2de877c39e 100644 --- a/paddle/fluid/framework/io/crypto/CMakeLists.txt +++ b/paddle/fluid/framework/io/crypto/CMakeLists.txt @@ -1,3 +1,12 @@ -cc_library(paddle_crypto SRCS cipher_utils.cc cipher.cc aes_cipher.cc DEPS cryptopp enforce) -cc_test(aes_cipher_test SRCS aes_cipher_test.cc DEPS paddle_crypto) -cc_test(cipher_utils_test SRCS cipher_utils_test.cc DEPS paddle_crypto) +cc_library( + paddle_crypto + SRCS cipher_utils.cc cipher.cc aes_cipher.cc + DEPS cryptopp enforce) +cc_test( + aes_cipher_test + SRCS aes_cipher_test.cc + DEPS paddle_crypto) +cc_test( + cipher_utils_test + SRCS cipher_utils_test.cc + DEPS paddle_crypto) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 3fc938f7641..374b5490d5d 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -1,6 +1,11 @@ -set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp) -set(pass_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) -file(WRITE ${pass_file} "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n") +set(pass_file + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp) +set(pass_file_final + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h) +file( + WRITE ${pass_file} + "// Generated by the paddle/fluid/framework/ir/CMakeLists.txt. DO NOT EDIT!\n\n" +) file(APPEND ${pass_file} "\#pragma once\n") file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n") @@ -9,54 +14,103 @@ copy_if_different(${pass_file} ${pass_file_final}) add_subdirectory(fuse_optimizer_ops_pass) add_subdirectory(memory_optimize_pass) add_subdirectory(multi_devices_graph_pass) -if(NOT APPLE AND NOT WIN32 AND (WITH_GPU OR WITH_ROCM)) - add_subdirectory(fusion_group) +if(NOT APPLE + AND NOT WIN32 + AND (WITH_GPU OR WITH_ROCM)) + add_subdirectory(fusion_group) endif() # Usage: pass_library(target inference) will append to paddle_inference_pass.h unset(INFER_IR_PASSES CACHE) # clear the global variable function(pass_library TARGET DEST) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS DIR) - set(targetPrefix "") - - cmake_parse_arguments(pass_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if(pass_library_DIR) - cc_library(${TARGET} SRCS ${pass_library_DIR}/${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base op_version_registry ${pass_library_DEPS}) - else() - cc_library(${TARGET} SRCS ${TARGET}.cc DEPS graph_pattern_detector pass fuse_pass_base op_version_registry ${pass_library_DEPS}) - endif() + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS DIR) + set(targetPrefix "") + + cmake_parse_arguments(pass_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + if(pass_library_DIR) + cc_library( + ${TARGET} + SRCS ${pass_library_DIR}/${TARGET}.cc + DEPS graph_pattern_detector pass fuse_pass_base op_version_registry + ${pass_library_DEPS}) + else() + cc_library( + ${TARGET} + SRCS ${TARGET}.cc + DEPS graph_pattern_detector pass fuse_pass_base op_version_registry + ${pass_library_DEPS}) + endif() - # add more DEST here, such as train, dist and collect USE_PASS into a file automatically. - if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference") - if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") - message(STATUS "add pass ${TARGET} ${DEST}") - endif() - file(APPEND ${pass_file} "USE_PASS(${TARGET});\n") - set(INFER_IR_PASSES ${INFER_IR_PASSES} ${TARGET} CACHE INTERNAL "") + # add more DEST here, such as train, dist and collect USE_PASS into a file automatically. + if(${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference") + if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") + message(STATUS "add pass ${TARGET} ${DEST}") endif() + file(APPEND ${pass_file} "USE_PASS(${TARGET});\n") + set(INFER_IR_PASSES + ${INFER_IR_PASSES} ${TARGET} + CACHE INTERNAL "") + endif() endfunction() -cc_library(node SRCS node.cc DEPS proto_desc) -cc_library(graph SRCS graph.cc DEPS node pretty_log) -cc_library(graph_helper SRCS graph_helper.cc DEPS graph) -cc_library(pass SRCS pass.cc DEPS graph node graph_helper) -cc_library(graph_traits SRCS graph_traits.cc DEPS graph) -cc_library(cost_model SRCS cost_model.cc DEPS executor graph profiler proto_desc device_tracer) +cc_library( + node + SRCS node.cc + DEPS proto_desc) +cc_library( + graph + SRCS graph.cc + DEPS node pretty_log) +cc_library( + graph_helper + SRCS graph_helper.cc + DEPS graph) +cc_library( + pass + SRCS pass.cc + DEPS graph node graph_helper) +cc_library( + graph_traits + SRCS graph_traits.cc + DEPS graph) +cc_library( + cost_model + SRCS cost_model.cc + DEPS executor graph profiler proto_desc device_tracer) -SET(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) -if (WITH_TESTING) - SET(GRAPH_PATTERN_DETECTOR_DEPS ${GRAPH_PATTERN_DETECTOR_DEPS} gtest) +set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) +if(WITH_TESTING) + set(GRAPH_PATTERN_DETECTOR_DEPS ${GRAPH_PATTERN_DETECTOR_DEPS} gtest) endif(WITH_TESTING) -cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS ${GRAPH_PATTERN_DETECTOR_DEPS}) +cc_library( + graph_pattern_detector + SRCS graph_pattern_detector.cc + DEPS ${GRAPH_PATTERN_DETECTOR_DEPS}) -cc_library(op_compat_sensible_pass SRCS op_compat_sensible_pass.cc DEPS graph_pattern_detector op_def_api pass) -cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS graph_pattern_detector executor) -cc_library(fuse_pass_base SRCS fuse_pass_base.cc DEPS op_compat_sensible_pass) -cc_library(placement_pass_base SRCS placement_pass_base.cc DEPS pass) +cc_library( + op_compat_sensible_pass + SRCS op_compat_sensible_pass.cc + DEPS graph_pattern_detector op_def_api pass) +cc_library( + subgraph_detector + SRCS subgraph_detector.cc + DEPS graph_pattern_detector executor) +cc_library( + fuse_pass_base + SRCS fuse_pass_base.cc + DEPS op_compat_sensible_pass) +cc_library( + placement_pass_base + SRCS placement_pass_base.cc + DEPS pass) -cc_library(coalesce_grad_tensor_pass SRCS coalesce_grad_tensor_pass.cc DEPS graph graph_helper) +cc_library( + coalesce_grad_tensor_pass + SRCS coalesce_grad_tensor_pass.cc + DEPS graph graph_helper) pass_library(graph_to_program_pass base) pass_library(graph_viz_pass base) @@ -106,141 +160,348 @@ pass_library(generate_pass DEPS pass_desc_proto) target_link_libraries(generate_pass pass_desc_proto) if(WITH_TENSORRT) - pass_library(trt_map_matmul_to_mul_pass inference) - pass_library(trt_embedding_eltwise_layernorm_fuse_pass inference) - pass_library(trt_multihead_matmul_fuse_pass inference) - pass_library(trt_skip_layernorm_fuse_pass inference) - pass_library(preln_embedding_eltwise_layernorm_fuse_pass inference) - pass_library(preln_skip_layernorm_fuse_pass inference) - pass_library(set_transformer_input_convert_pass inference) - pass_library(remove_padding_recover_padding_pass inference) - pass_library(delete_remove_padding_recover_padding_pass inference) + pass_library(trt_map_matmul_to_mul_pass inference) + pass_library(trt_embedding_eltwise_layernorm_fuse_pass inference) + pass_library(trt_multihead_matmul_fuse_pass inference) + pass_library(trt_skip_layernorm_fuse_pass inference) + pass_library(preln_embedding_eltwise_layernorm_fuse_pass inference) + pass_library(preln_skip_layernorm_fuse_pass inference) + pass_library(set_transformer_input_convert_pass inference) + pass_library(remove_padding_recover_padding_pass inference) + pass_library(delete_remove_padding_recover_padding_pass inference) endif() if(WITH_GPU OR WITH_ROCM) - pass_library(cudnn_placement_pass base DEPS placement_pass_base) - pass_library(embedding_eltwise_layernorm_fuse_pass inference) + pass_library(cudnn_placement_pass base DEPS placement_pass_base) + pass_library(embedding_eltwise_layernorm_fuse_pass inference) endif() if(WITH_MKLDNN) - pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) - pass_library(mkldnn_inplace_pass inference DEPS mkldnn_placement_pass op_registry elementwise_add_op gelu_op activation_op softmax_op softmax DIR mkldnn) - pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) - pass_library(conv_affine_channel_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn) - pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(scale_matmul_fuse_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) - pass_library(cpu_bfloat16_pass inference DIR mkldnn) - pass_library(fc_mkldnn_pass inference DIR mkldnn) - pass_library(interpolate_mkldnn_pass inference DIR mkldnn) - pass_library(softplus_activation_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) - pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(cpu_quantize_placement_pass base DIR mkldnn) - pass_library(cpu_quantize_pass inference DIR mkldnn) - pass_library(cpu_quantize_squash_pass inference DIR mkldnn) - pass_library(reshape_transpose_matmul_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(reshape_transpose_matmul_v2_mkldnn_fuse_pass inference DIR mkldnn) - pass_library(matmul_transpose_reshape_fuse_pass inference DIR mkldnn) - pass_library(matmul_v2_transpose_reshape_fuse_pass inference DIR mkldnn) - pass_library(batch_norm_act_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_fuse_pass inference DIR mkldnn) - pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) - pass_library(quant_dequant_mkldnn_pass inference DIR mkldnn) - pass_library(compute_propagate_scales_mkldnn_pass inference DIR mkldnn) + pass_library(mkldnn_placement_pass base DEPS placement_pass_base DIR mkldnn) + pass_library( + mkldnn_inplace_pass + inference + DEPS + mkldnn_placement_pass + op_registry + elementwise_add_op + gelu_op + activation_op + softmax_op + softmax + DIR + mkldnn) + pass_library(depthwise_conv_mkldnn_pass base DIR mkldnn) + pass_library(conv_affine_channel_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(conv_bias_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(conv_activation_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(conv_concat_relu_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(conv_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(int8_scale_calculation_mkldnn_pass inference DIR mkldnn) + pass_library(fc_elementwise_add_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(scale_matmul_fuse_pass inference DIR mkldnn) + pass_library(cpu_bfloat16_placement_pass inference DIR mkldnn) + pass_library(cpu_bfloat16_pass inference DIR mkldnn) + pass_library(fc_mkldnn_pass inference DIR mkldnn) + pass_library(interpolate_mkldnn_pass inference DIR mkldnn) + pass_library(softplus_activation_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(shuffle_channel_mkldnn_detect_pass inference DIR mkldnn) + pass_library(fc_act_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(elt_act_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(cpu_quantize_placement_pass base DIR mkldnn) + pass_library(cpu_quantize_pass inference DIR mkldnn) + pass_library(cpu_quantize_squash_pass inference DIR mkldnn) + pass_library(reshape_transpose_matmul_mkldnn_fuse_pass inference DIR mkldnn) + pass_library(reshape_transpose_matmul_v2_mkldnn_fuse_pass inference DIR + mkldnn) + pass_library(matmul_transpose_reshape_fuse_pass inference DIR mkldnn) + pass_library(matmul_v2_transpose_reshape_fuse_pass inference DIR mkldnn) + pass_library(batch_norm_act_fuse_pass inference DIR mkldnn) + pass_library(multi_gru_fuse_pass inference DIR mkldnn) + pass_library(multi_gru_seq_fuse_pass inference DIR mkldnn) + pass_library(quant_dequant_mkldnn_pass inference DIR mkldnn) + pass_library(compute_propagate_scales_mkldnn_pass inference DIR mkldnn) endif() if(WITH_IPU) - pass_library(forward_graph_extract_pass base DIR ipu) - pass_library(optimizer_extract_pass base DIR ipu) - pass_library(optimizer_state_align_pass base DIR ipu) - pass_library(ipu_graph_builder_pass base DIR ipu) - pass_library(ipu_runtime_replacer_pass base DIR ipu) - pass_library(inference_process_pass base DIR ipu) - pass_library(inference_postprocess_pass base DIR ipu) - pass_library(popart_canonicalization_pass base DIR ipu) - pass_library(ipu_inplace_pass base DIR ipu) - pass_library(infer_shape_pass base DIR ipu) - pass_library(delete_scale_op_pass base DIR ipu) - pass_library(avg_shard_pass base DIR ipu) + pass_library(forward_graph_extract_pass base DIR ipu) + pass_library(optimizer_extract_pass base DIR ipu) + pass_library(optimizer_state_align_pass base DIR ipu) + pass_library(ipu_graph_builder_pass base DIR ipu) + pass_library(ipu_runtime_replacer_pass base DIR ipu) + pass_library(inference_process_pass base DIR ipu) + pass_library(inference_postprocess_pass base DIR ipu) + pass_library(popart_canonicalization_pass base DIR ipu) + pass_library(ipu_inplace_pass base DIR ipu) + pass_library(infer_shape_pass base DIR ipu) + pass_library(delete_scale_op_pass base DIR ipu) + pass_library(avg_shard_pass base DIR ipu) endif() -cc_library(fuse_bn_act_pass SRCS fuse_bn_act_pass.cc DEPS pass graph_pattern_detector ) -cc_library(fuse_bn_add_act_pass SRCS fuse_bn_add_act_pass.cc DEPS pass graph_pattern_detector ) -cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector ) -cc_library(fuse_gemm_epilogue_pass SRCS fuse_gemm_epilogue_pass.cc DEPS pass graph_pattern_detector ) -cc_library(fuse_relu_depthwise_conv_pass SRCS fuse_relu_depthwise_conv_pass.cc DEPS pass graph_pattern_detector ) - -set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library") - -cc_library(pass_builder SRCS pass_builder.cc DEPS pass) -cc_library(pass_test_util SRCS pass_test_util.cc DEPS graph pass) - -cc_test(node_test SRCS node_test.cc DEPS node) -cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper) -cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry) -cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry) -cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass) -cc_test(cost_model_test SRCS cost_model_test.cc DEPS cost_model op_registry) -cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector) -cc_test(test_op_compat_sensible_pass SRCS op_compat_sensible_pass_tester.cc DEPS op_compat_sensible_pass) -cc_test(test_fc_fuse_pass_cc SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) -cc_test(test_fc_lstm_fuse_pass_cc SRCS fc_lstm_fuse_pass_tester.cc DEPS fc_lstm_fuse_pass framework_proto) -cc_test(test_fc_gru_fuse_pass_cc SRCS fc_gru_fuse_pass_tester.cc DEPS fc_gru_fuse_pass framework_proto) -cc_test(test_seqpool_concat_fuse_pass SRCS seqpool_concat_fuse_pass_tester.cc DEPS seqpool_concat_fuse_pass framework_proto) -cc_test(test_seqpool_cvm_concat_fuse_pass SRCS seqpool_cvm_concat_fuse_pass_tester.cc DEPS seqpool_cvm_concat_fuse_pass framework_proto) -cc_test(test_repeated_fc_relu_fuse_pass_cc SRCS repeated_fc_relu_fuse_pass_tester.cc DEPS repeated_fc_relu_fuse_pass framework_proto) -cc_test(test_is_test_pass SRCS is_test_pass_tester.cc DEPS is_test_pass) -cc_test(test_simplify_with_basic_ops_pass SRCS simplify_with_basic_ops_pass_tester.cc DEPS simplify_with_basic_ops_pass) -cc_test(test_fc_elementwise_layernorm_fuse_pass_cc SRCS fc_elementwise_layernorm_fuse_pass_tester.cc DEPS fc_elementwise_layernorm_fuse_pass) -cc_test(test_skip_layernorm_fuse_pass SRCS skip_layernorm_fuse_pass_tester.cc DEPS skip_layernorm_fuse_pass) -cc_test(test_multihead_matmul_fuse_pass SRCS multihead_matmul_fuse_pass_tester.cc DEPS multihead_matmul_fuse_pass) -cc_test(test_conv_bn_fuse_pass_cc SRCS conv_bn_fuse_pass_tester.cc DEPS conv_bn_fuse_pass) -cc_test(test_adaptive_pool2d_convert_global_pass SRCS adaptive_pool2d_convert_global_pass_tester.cc DEPS adaptive_pool2d_convert_global_pass) -cc_test(test_unsqueeze2_eltwise_fuse_pass_cc SRCS unsqueeze2_eltwise_fuse_pass_tester.cc DEPS unsqueeze2_eltwise_fuse_pass) -cc_test(test_generate_pass_cc SRCS generate_pass_tester.cc DEPS generate_pass pass_desc_proto) +cc_library( + fuse_bn_act_pass + SRCS fuse_bn_act_pass.cc + DEPS pass graph_pattern_detector) +cc_library( + fuse_bn_add_act_pass + SRCS fuse_bn_add_act_pass.cc + DEPS pass graph_pattern_detector) +cc_library( + fuse_elewise_add_act_pass + SRCS fuse_elewise_add_act_pass.cc + DEPS pass graph_pattern_detector) +cc_library( + fuse_gemm_epilogue_pass + SRCS fuse_gemm_epilogue_pass.cc + DEPS pass graph_pattern_detector) +cc_library( + fuse_relu_depthwise_conv_pass + SRCS fuse_relu_depthwise_conv_pass.cc + DEPS pass graph_pattern_detector) + +set(GLOB_PASS_LIB + ${PASS_LIBRARY} + CACHE INTERNAL "Global PASS library") + +cc_library( + pass_builder + SRCS pass_builder.cc + DEPS pass) +cc_library( + pass_test_util + SRCS pass_test_util.cc + DEPS graph pass) + +cc_test( + node_test + SRCS node_test.cc + DEPS node) +cc_test( + pass_test + SRCS pass_test.cc + DEPS graph pass graph_helper) +cc_test( + graph_test + SRCS graph_test.cc + DEPS graph graph_helper op_registry) +cc_test( + graph_helper_test + SRCS graph_helper_test.cc + DEPS graph graph_helper op_registry) +cc_test( + graph_to_program_pass_test + SRCS graph_to_program_pass_test.cc + DEPS graph_to_program_pass) +cc_test( + cost_model_test + SRCS cost_model_test.cc + DEPS cost_model op_registry) +cc_test( + test_graph_pattern_detector + SRCS graph_pattern_detector_tester.cc + DEPS graph_pattern_detector) +cc_test( + test_op_compat_sensible_pass + SRCS op_compat_sensible_pass_tester.cc + DEPS op_compat_sensible_pass) +cc_test( + test_fc_fuse_pass_cc + SRCS fc_fuse_pass_tester.cc + DEPS fc_fuse_pass framework_proto) +cc_test( + test_fc_lstm_fuse_pass_cc + SRCS fc_lstm_fuse_pass_tester.cc + DEPS fc_lstm_fuse_pass framework_proto) +cc_test( + test_fc_gru_fuse_pass_cc + SRCS fc_gru_fuse_pass_tester.cc + DEPS fc_gru_fuse_pass framework_proto) +cc_test( + test_seqpool_concat_fuse_pass + SRCS seqpool_concat_fuse_pass_tester.cc + DEPS seqpool_concat_fuse_pass framework_proto) +cc_test( + test_seqpool_cvm_concat_fuse_pass + SRCS seqpool_cvm_concat_fuse_pass_tester.cc + DEPS seqpool_cvm_concat_fuse_pass framework_proto) +cc_test( + test_repeated_fc_relu_fuse_pass_cc + SRCS repeated_fc_relu_fuse_pass_tester.cc + DEPS repeated_fc_relu_fuse_pass framework_proto) +cc_test( + test_is_test_pass + SRCS is_test_pass_tester.cc + DEPS is_test_pass) +cc_test( + test_simplify_with_basic_ops_pass + SRCS simplify_with_basic_ops_pass_tester.cc + DEPS simplify_with_basic_ops_pass) +cc_test( + test_fc_elementwise_layernorm_fuse_pass_cc + SRCS fc_elementwise_layernorm_fuse_pass_tester.cc + DEPS fc_elementwise_layernorm_fuse_pass) +cc_test( + test_skip_layernorm_fuse_pass + SRCS skip_layernorm_fuse_pass_tester.cc + DEPS skip_layernorm_fuse_pass) +cc_test( + test_multihead_matmul_fuse_pass + SRCS multihead_matmul_fuse_pass_tester.cc + DEPS multihead_matmul_fuse_pass) +cc_test( + test_conv_bn_fuse_pass_cc + SRCS conv_bn_fuse_pass_tester.cc + DEPS conv_bn_fuse_pass) +cc_test( + test_adaptive_pool2d_convert_global_pass + SRCS adaptive_pool2d_convert_global_pass_tester.cc + DEPS adaptive_pool2d_convert_global_pass) +cc_test( + test_unsqueeze2_eltwise_fuse_pass_cc + SRCS unsqueeze2_eltwise_fuse_pass_tester.cc + DEPS unsqueeze2_eltwise_fuse_pass) +cc_test( + test_generate_pass_cc + SRCS generate_pass_tester.cc + DEPS generate_pass pass_desc_proto) if(WITH_GPU OR WITH_ROCM) - cc_test(test_embedding_eltwise_layernorm_fuse_pass SRCS embedding_eltwise_layernorm_fuse_pass_tester.cc DEPS embedding_eltwise_layernorm_fuse_pass) - cc_test(test_cudnn_placement_pass SRCS cudnn_placement_pass_tester.cc DEPS cudnn_placement_pass) + cc_test( + test_embedding_eltwise_layernorm_fuse_pass + SRCS embedding_eltwise_layernorm_fuse_pass_tester.cc + DEPS embedding_eltwise_layernorm_fuse_pass) + cc_test( + test_cudnn_placement_pass + SRCS cudnn_placement_pass_tester.cc + DEPS cudnn_placement_pass) endif() if(NOT WIN32) - cc_test(test_sync_batch_norm_pass SRCS sync_batch_norm_pass_tester.cc DEPS sync_batch_norm_pass) + cc_test( + test_sync_batch_norm_pass + SRCS sync_batch_norm_pass_tester.cc + DEPS sync_batch_norm_pass) endif() -if (WITH_MKLDNN) - cc_test(test_depthwise_conv_mkldnn_pass SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc DEPS depthwise_conv_mkldnn_pass) - cc_test(test_conv_bias_mkldnn_fuse_pass_cc SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc DEPS conv_bias_mkldnn_fuse_pass naive_executor) - cc_test(test_conv_activation_mkldnn_fuse_pass SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc DEPS conv_activation_mkldnn_fuse_pass) - cc_test(test_conv_concat_relu_mkldnn_fuse_pass SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc DEPS conv_concat_relu_mkldnn_fuse_pass) - cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util) - cc_test(test_int8_scale_calculation_mkldnn_pass SRCS mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc DEPS int8_scale_calculation_mkldnn_pass pass_test_util) - cc_test(test_fc_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS fc_elementwise_add_mkldnn_fuse_pass pass_test_util) - cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util) - cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util) - set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function) -if (WITH_GPU OR WITH_ROCM) +if(WITH_MKLDNN) + cc_test( + test_depthwise_conv_mkldnn_pass + SRCS mkldnn/depthwise_conv_mkldnn_pass_tester.cc + DEPS depthwise_conv_mkldnn_pass) + cc_test( + test_conv_bias_mkldnn_fuse_pass_cc + SRCS mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc + DEPS conv_bias_mkldnn_fuse_pass naive_executor) + cc_test( + test_conv_activation_mkldnn_fuse_pass + SRCS mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc + DEPS conv_activation_mkldnn_fuse_pass) + cc_test( + test_conv_concat_relu_mkldnn_fuse_pass + SRCS mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc + DEPS conv_concat_relu_mkldnn_fuse_pass) + cc_test( + test_conv_elementwise_add_mkldnn_fuse_pass + SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc + DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util) + cc_test( + test_int8_scale_calculation_mkldnn_pass + SRCS mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc + DEPS int8_scale_calculation_mkldnn_pass pass_test_util) + cc_test( + test_fc_elementwise_add_mkldnn_fuse_pass + SRCS mkldnn/fc_elementwise_add_mkldnn_fuse_pass_tester.cc + DEPS fc_elementwise_add_mkldnn_fuse_pass pass_test_util) + cc_test( + test_fc_act_mkldnn_fuse_pass + SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc + DEPS fc_act_mkldnn_fuse_pass pass_test_util) + cc_test( + test_batch_norm_act_fuse_pass + SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc + DEPS batch_norm_act_fuse_pass pass_test_util) + set(TEST_CONV_BN_PASS_DEPS + conv_bn_fuse_pass + graph_to_program_pass + conv_op + conv_transpose_op + math_function + im2col + vol2col + batch_norm_op + gelu_op + activation_op + elementwise_add_op + concat_and_split + naive_executor + device_context + eigen_function) + if(WITH_GPU OR WITH_ROCM) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) + endif() + cc_test( + test_conv_batch_norm_mkldnn_fuse_pass + SRCS mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc + DEPS ${TEST_CONV_BN_PASS_DEPS}) + cc_test( + test_scale_matmul_fuse_pass + SRCS mkldnn/scale_matmul_fuse_pass_tester.cc + DEPS scale_matmul_fuse_pass) + cc_test( + test_mkldnn_placement_pass + SRCS mkldnn/mkldnn_placement_pass_tester.cc + DEPS mkldnn_placement_pass) + cc_test( + test_mkldnn_inplace_pass + SRCS mkldnn/mkldnn_inplace_pass_tester.cc + DEPS mkldnn_inplace_pass) + cc_test( + test_compute_propagate_scales_mkldnn_pass + SRCS mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc + DEPS compute_propagate_scales_mkldnn_pass naive_executor) + cc_test( + test_cpu_quantize_placement_pass + SRCS mkldnn/cpu_quantize_placement_pass_tester.cc + DEPS cpu_quantize_placement_pass) + cc_test( + test_cpu_quantize_pass + SRCS mkldnn/cpu_quantize_pass_tester.cc + DEPS cpu_quantize_pass naive_executor) + cc_test( + test_cpu_quantize_squash_pass + SRCS mkldnn/cpu_quantize_squash_pass_tester.cc + DEPS cpu_quantize_squash_pass naive_executor) + cc_test( + test_reshape_transpose_matmul_mkldnn_fuse_pass + SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc + DEPS reshape_transpose_matmul_mkldnn_fuse_pass + reshape_transpose_matmul_v2_mkldnn_fuse_pass) + cc_test( + test_matmul_transpose_reshape_fuse_pass + SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc + DEPS matmul_transpose_reshape_fuse_pass + matmul_v2_transpose_reshape_fuse_pass) + cc_test( + test_shuffle_channel_mkldnn_detect_pass + SRCS mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc + DEPS shuffle_channel_mkldnn_detect_pass) + cc_test( + test_cpu_bfloat16_placement_pass + SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc + DEPS cpu_bfloat16_placement_pass) + cc_test( + test_cpu_bfloat16_pass + SRCS mkldnn/cpu_bfloat16_pass_tester.cc + DEPS cpu_bfloat16_pass) + cc_test( + test_multi_gru_fuse_pass + SRCS mkldnn/multi_gru_fuse_pass_tester.cc + DEPS multi_gru_fuse_pass) + cc_test( + test_multi_gru_seq_fuse_pass + SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc + DEPS multi_gru_seq_fuse_pass) + set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass + mkldnn_placement_pass) + cc_test( + test_fc_rnn_mkldnn_fuse_pass + SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc + DEPS ${TEST_FC_RNN_PASS_DEPS}) endif() - cc_test(test_conv_batch_norm_mkldnn_fuse_pass SRCS mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc DEPS ${TEST_CONV_BN_PASS_DEPS}) - cc_test(test_scale_matmul_fuse_pass SRCS mkldnn/scale_matmul_fuse_pass_tester.cc DEPS scale_matmul_fuse_pass) - cc_test(test_mkldnn_placement_pass SRCS mkldnn/mkldnn_placement_pass_tester.cc DEPS mkldnn_placement_pass) - cc_test(test_mkldnn_inplace_pass SRCS mkldnn/mkldnn_inplace_pass_tester.cc DEPS mkldnn_inplace_pass) - cc_test(test_compute_propagate_scales_mkldnn_pass SRCS mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc DEPS compute_propagate_scales_mkldnn_pass naive_executor) - cc_test(test_cpu_quantize_placement_pass SRCS mkldnn/cpu_quantize_placement_pass_tester.cc DEPS cpu_quantize_placement_pass) - cc_test(test_cpu_quantize_pass SRCS mkldnn/cpu_quantize_pass_tester.cc DEPS cpu_quantize_pass naive_executor) - cc_test(test_cpu_quantize_squash_pass SRCS mkldnn/cpu_quantize_squash_pass_tester.cc DEPS cpu_quantize_squash_pass naive_executor) - cc_test(test_reshape_transpose_matmul_mkldnn_fuse_pass SRCS mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc DEPS reshape_transpose_matmul_mkldnn_fuse_pass reshape_transpose_matmul_v2_mkldnn_fuse_pass) - cc_test(test_matmul_transpose_reshape_fuse_pass SRCS mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc DEPS matmul_transpose_reshape_fuse_pass matmul_v2_transpose_reshape_fuse_pass) - cc_test(test_shuffle_channel_mkldnn_detect_pass SRCS mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc DEPS shuffle_channel_mkldnn_detect_pass) - cc_test(test_cpu_bfloat16_placement_pass SRCS mkldnn/cpu_bfloat16_placement_pass_tester.cc DEPS cpu_bfloat16_placement_pass) - cc_test(test_cpu_bfloat16_pass SRCS mkldnn/cpu_bfloat16_pass_tester.cc DEPS cpu_bfloat16_pass) - cc_test(test_multi_gru_fuse_pass SRCS mkldnn/multi_gru_fuse_pass_tester.cc DEPS multi_gru_fuse_pass) - cc_test(test_multi_gru_seq_fuse_pass SRCS mkldnn/multi_gru_seq_fuse_pass_tester.cc DEPS multi_gru_seq_fuse_pass) - set(TEST_FC_RNN_PASS_DEPS fc_gru_fuse_pass fc_lstm_fuse_pass mkldnn_placement_pass) - cc_test(test_fc_rnn_mkldnn_fuse_pass SRCS mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc DEPS ${TEST_FC_RNN_PASS_DEPS}) -endif () diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/CMakeLists.txt b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/CMakeLists.txt index 22876e962a0..7146e991919 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/CMakeLists.txt @@ -1,4 +1,16 @@ -cc_library(fuse_optimizer_op_pass SRCS fuse_optimizer_op_pass.cc DEPS graph graph_helper) -cc_library(fuse_adam_op_pass SRCS fuse_adam_op_pass.cc DEPS fuse_optimizer_op_pass) -cc_library(fuse_sgd_op_pass SRCS fuse_sgd_op_pass.cc DEPS fuse_optimizer_op_pass) -cc_library(fuse_momentum_op_pass SRCS fuse_momentum_op_pass.cc DEPS fuse_optimizer_op_pass) +cc_library( + fuse_optimizer_op_pass + SRCS fuse_optimizer_op_pass.cc + DEPS graph graph_helper) +cc_library( + fuse_adam_op_pass + SRCS fuse_adam_op_pass.cc + DEPS fuse_optimizer_op_pass) +cc_library( + fuse_sgd_op_pass + SRCS fuse_sgd_op_pass.cc + DEPS fuse_optimizer_op_pass) +cc_library( + fuse_momentum_op_pass + SRCS fuse_momentum_op_pass.cc + DEPS fuse_optimizer_op_pass) diff --git a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt index 78b15398cc7..7df678fbdd7 100644 --- a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt +++ b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt @@ -1,14 +1,22 @@ -cc_library(code_generator - SRCS operation.cc code_generator.cc code_generator_helper.cc - DEPS graph subgraph_detector) +cc_library( + code_generator + SRCS operation.cc code_generator.cc code_generator_helper.cc + DEPS graph subgraph_detector) if(WITH_GPU OR WITH_ROCM) - cc_test(test_code_generator SRCS code_generator_tester.cc DEPS code_generator device_code lod_tensor graph_viz_pass) + cc_test( + test_code_generator + SRCS code_generator_tester.cc + DEPS code_generator device_code lod_tensor graph_viz_pass) endif() -cc_library(fusion_group_pass - SRCS fusion_group_pass.cc elementwise_group_detector.cc - DEPS subgraph_detector fuse_pass_base code_generator device_code) -cc_test(test_fusion_group_pass SRCS fusion_group_pass_tester.cc DEPS fusion_group_pass graph_viz_pass) +cc_library( + fusion_group_pass + SRCS fusion_group_pass.cc elementwise_group_detector.cc + DEPS subgraph_detector fuse_pass_base code_generator device_code) +cc_test( + test_fusion_group_pass + SRCS fusion_group_pass_tester.cc + DEPS fusion_group_pass graph_viz_pass) if(WITH_TESTING AND TEST test_code_generator) - set_tests_properties(test_code_generator PROPERTIES TIMEOUT 120) + set_tests_properties(test_code_generator PROPERTIES TIMEOUT 120) endif() diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index 25b07ddf414..32d02902e86 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -1,24 +1,80 @@ -cc_library(op_graph_view SRCS op_graph_view.cc DEPS op_handle_base) -cc_library(conditional_block_op_eager_deletion_pass SRCS conditional_block_op_eager_deletion_pass.cc DEPS conditional_block_op_helper graph_helper pass computation_op_handle) -cc_library(while_op_eager_deletion_pass SRCS while_op_eager_deletion_pass.cc DEPS while_op_helper graph_helper pass computation_op_handle) -cc_library(recurrent_op_eager_deletion_pass SRCS recurrent_op_eager_deletion_pass.cc DEPS recurrent_op_helper graph_helper pass computation_op_handle) -cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle var_handle) -cc_library(reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper) +cc_library( + op_graph_view + SRCS op_graph_view.cc + DEPS op_handle_base) +cc_library( + conditional_block_op_eager_deletion_pass + SRCS conditional_block_op_eager_deletion_pass.cc + DEPS conditional_block_op_helper graph_helper pass computation_op_handle) +cc_library( + while_op_eager_deletion_pass + SRCS while_op_eager_deletion_pass.cc + DEPS while_op_helper graph_helper pass computation_op_handle) +cc_library( + recurrent_op_eager_deletion_pass + SRCS recurrent_op_eager_deletion_pass.cc + DEPS recurrent_op_helper graph_helper pass computation_op_handle) +cc_library( + reference_count_pass_helper + SRCS reference_count_pass_helper.cc + DEPS garbage_collector computation_op_handle var_handle) +cc_library( + reference_count_pass + SRCS reference_count_pass.cc + DEPS computation_op_handle graph graph_helper pass op_graph_view + reference_count_pass_helper) -SET(EAGER_DELETETION_PASS_DEPS computation_op_handle eager_deletion_op_handle graph graph_helper pass conditional_block_op_eager_deletion_pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper) -if (WITH_CINN) - cc_library(share_varinfo_into_cinn_pass SRCS share_varinfo_into_cinn_pass.cc DEPS pass enforce graph_helper computation_op_handle eager_deletion_op_handle cinn_compiler) - cc_test(share_varinfo_into_cinn_pass_test SRCS share_varinfo_into_cinn_pass_test.cc DEPS share_varinfo_into_cinn_pass parallel_executor cinn_compiler elementwise_add_op mul_op cinn_launch_op) +set(EAGER_DELETETION_PASS_DEPS + computation_op_handle + eager_deletion_op_handle + graph + graph_helper + pass + conditional_block_op_eager_deletion_pass + while_op_eager_deletion_pass + recurrent_op_eager_deletion_pass + reference_count_pass_helper) +if(WITH_CINN) + cc_library( + share_varinfo_into_cinn_pass + SRCS share_varinfo_into_cinn_pass.cc + DEPS pass enforce graph_helper computation_op_handle + eager_deletion_op_handle cinn_compiler) + cc_test( + share_varinfo_into_cinn_pass_test + SRCS share_varinfo_into_cinn_pass_test.cc + DEPS share_varinfo_into_cinn_pass parallel_executor cinn_compiler + elementwise_add_op mul_op cinn_launch_op) list(APPEND EAGER_DELETETION_PASS_DEPS share_varinfo_into_cinn_pass) endif() -cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS ${EAGER_DELETETION_PASS_DEPS}) +cc_library( + eager_deletion_pass + SRCS eager_deletion_pass.cc + DEPS ${EAGER_DELETETION_PASS_DEPS}) -cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle graph pass multi_devices_helper) +cc_library( + memory_reuse_pass + SRCS memory_reuse_pass.cc + DEPS computation_op_handle reference_count_pass_helper + share_tensor_buffer_op_handle graph pass multi_devices_helper) -cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass executor_gc_helper) -cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass) +cc_library( + buffer_shared_inplace_op_pass + SRCS buffer_shared_inplace_op_pass.cc + DEPS memory_reuse_pass executor_gc_helper) +cc_library( + buffer_shared_cross_op_memory_reuse_pass + SRCS buffer_shared_cross_op_memory_reuse_pass.cc + DEPS memory_reuse_pass) -cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass) +cc_library( + inplace_addto_op_pass + SRCS inplace_addto_op_pass.cc + DEPS memory_reuse_pass) -cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op eigen_function) +cc_test( + test_reference_count_pass_last_lived_ops + SRCS test_reference_count_pass_last_lived_ops.cc + DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op + eigen_function) diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/CMakeLists.txt b/paddle/fluid/framework/ir/multi_devices_graph_pass/CMakeLists.txt index fea12baf065..e97331bc87a 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/CMakeLists.txt @@ -1,7 +1,17 @@ -cc_library(modify_op_lock_and_record_event_pass SRCS modify_op_lock_and_record_event_pass.cc DEPS computation_op_handle scale_loss_grad_op_handle op_graph_view multi_devices_helper) +cc_library( + modify_op_lock_and_record_event_pass + SRCS modify_op_lock_and_record_event_pass.cc + DEPS computation_op_handle scale_loss_grad_op_handle op_graph_view + multi_devices_helper) -cc_library(multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper) -cc_library(multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper) +cc_library( + multi_devices_graph_print_pass + SRCS multi_devices_graph_print_pass.cc + DEPS multi_devices_helper) +cc_library( + multi_devices_graph_check_pass + SRCS multi_devices_graph_check_pass.cc + DEPS multi_devices_helper) set(ALL_REDUCE_OP_HANDLES all_reduce_op_handle) set(ALL_REDUCE_OP_HANDLES grad_merge_all_reduce_op_handle) @@ -9,13 +19,46 @@ if(WITH_GPU AND WITH_DGC) list(APPEND ALL_REDUCE_OP_HANDLES sparse_all_reduce_op_handle) endif() -cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle - scale_loss_grad_op_handle rpc_op_handle fetch_barrier_op_handle ${ALL_REDUCE_OP_HANDLES} reduce_op_handle broadcast_op_handle fused_broadcast_op_handle) -cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS graph graph_helper pass) -cc_library(set_reader_device_info_utils SRCS set_reader_device_info_utils.cc DEPS graph graph_helper pass multi_devices_graph_pass) +cc_library( + multi_devices_graph_pass + SRCS multi_devices_graph_pass.cc + DEPS multi_devices_helper + computation_op_handle + scale_loss_grad_op_handle + rpc_op_handle + fetch_barrier_op_handle + ${ALL_REDUCE_OP_HANDLES} + reduce_op_handle + broadcast_op_handle + fused_broadcast_op_handle) +cc_library( + sequential_execution_pass + SRCS sequential_execution_pass.cc + DEPS graph graph_helper pass) +cc_library( + set_reader_device_info_utils + SRCS set_reader_device_info_utils.cc + DEPS graph graph_helper pass multi_devices_graph_pass) -cc_library(fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle grad_merge_all_reduce_op_handle) -cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS all_reduce_op_handle graph graph_helper pass) -cc_library(backward_optimizer_op_deps_pass SRCS backward_optimizer_op_deps_pass.cc DEPS graph graph_helper pass) -cc_library(add_reader_dependency_pass SRCS add_reader_dependency_pass.cc DEPS graph graph_helper pass) -cc_library(fix_op_run_order_pass SRCS fix_op_run_order_pass.cc DEPS graph graph_helper multi_devices_helper pass op_handle_base eager_deletion_op_handle) +cc_library( + fuse_all_reduce_op_pass + SRCS fuse_all_reduce_op_pass.cc + DEPS graph graph_helper fused_all_reduce_op_handle + grad_merge_all_reduce_op_handle) +cc_library( + all_reduce_deps_pass + SRCS all_reduce_deps_pass.cc + DEPS all_reduce_op_handle graph graph_helper pass) +cc_library( + backward_optimizer_op_deps_pass + SRCS backward_optimizer_op_deps_pass.cc + DEPS graph graph_helper pass) +cc_library( + add_reader_dependency_pass + SRCS add_reader_dependency_pass.cc + DEPS graph graph_helper pass) +cc_library( + fix_op_run_order_pass + SRCS fix_op_run_order_pass.cc + DEPS graph graph_helper multi_devices_helper pass op_handle_base + eager_deletion_op_handle) diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index 60460007399..44d540769f2 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -1,76 +1,136 @@ -set(INTERPRETERCORE_DEPS op_registry device_context scope framework_proto data_feed_proto heter_service_proto trainer_desc_proto glog -lod_rank_table fs shell fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper lodtensor_printer feed_fetch_method -graph_to_program_pass variable_helper timer monitor nan_inf_utils) - +set(INTERPRETERCORE_DEPS + op_registry + device_context + scope + framework_proto + data_feed_proto + heter_service_proto + trainer_desc_proto + glog + lod_rank_table + fs + shell + fleet_wrapper + heter_wrapper + ps_gpu_wrapper + box_wrapper + lodtensor_printer + feed_fetch_method + graph_to_program_pass + variable_helper + timer + monitor + nan_inf_utils) add_subdirectory(workqueue) add_subdirectory(garbage_collector) -cc_library(data_transfer SRCS data_transfer.cc DEPS enforce scope glog) -cc_library(new_executor_defs SRCS new_executor_defs.cc DEPS enforce glog scope) -cc_library(interpretercore_util SRCS interpretercore_util.cc DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs data_transfer) -cc_library(event_manager SRCS event_manager.cc DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs) -cc_library(stream_analyzer SRCS stream_analyzer.cc DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs) +cc_library( + data_transfer + SRCS data_transfer.cc + DEPS enforce scope glog) +cc_library( + new_executor_defs + SRCS new_executor_defs.cc + DEPS enforce glog scope) +cc_library( + interpretercore_util + SRCS interpretercore_util.cc + DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs data_transfer) +cc_library( + event_manager + SRCS event_manager.cc + DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs) +cc_library( + stream_analyzer + SRCS stream_analyzer.cc + DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs) if(WITH_GPU OR WITH_ROCM) -cc_library(interpretercore SRCS interpretercore.cc DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util interpretercore_event_garbage_collector interpretercore_fast_garbage_collector stream_analyzer event_manager) + cc_library( + interpretercore + SRCS interpretercore.cc + DEPS workqueue + ${DEVICE_EVENT_LIBS} + interpretercore_util + interpretercore_event_garbage_collector + interpretercore_fast_garbage_collector + stream_analyzer + event_manager) else() -cc_library(interpretercore SRCS interpretercore.cc DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util interpretercore_event_garbage_collector stream_analyzer event_manager) + cc_library( + interpretercore + SRCS interpretercore.cc + DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util + interpretercore_event_garbage_collector stream_analyzer event_manager) endif() -cc_library(standalone_executor SRCS standalone_executor.cc DEPS interpretercore) +cc_library( + standalone_executor + SRCS standalone_executor.cc + DEPS interpretercore) -cc_library(staticgraph_executor_statistics SRCS executor_statistics.cc DEPS enforce glog os_info) +cc_library( + staticgraph_executor_statistics + SRCS executor_statistics.cc + DEPS enforce glog os_info) # cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler) # skip win32 since wget is not installed by default on windows machine. -if (WITH_GPU AND WITH_TESTING AND NOT WIN32 AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - add_custom_target( - download_program - COMMAND wget -nc https://paddle-ci.gz.bcebos.com/new_exec/lm_main_program - COMMAND wget -nc https://paddle-ci.gz.bcebos.com/new_exec/lm_startup_program - ) - - # all operators used in the program - set(OPS - fill_constant_op - uniform_random_op - lookup_table_op - transpose_op - reshape_op - split_op - slice_op - concat_op - matmul_op - elementwise_add_op - elementwise_mul_op - softmax_with_cross_entropy_op - reduce_mean_op - reduce_sum_op - activation_op - sum_op - elementwise_max_op - elementwise_div_op - sgd_op - squared_l2_norm_op - memcpy_h2d_op - memcpy_d2h_op) - - # All deps of the operators above, part of GLOB_OPERATOR_DEPS. - set(OP_DEPS - generator - softmax - selected_rows_functor - jit_kernel_helper - concat_and_split - cross_entropy) +if(WITH_GPU + AND WITH_TESTING + AND NOT WIN32 + AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + add_custom_target( + download_program + COMMAND wget -nc https://paddle-ci.gz.bcebos.com/new_exec/lm_main_program + COMMAND wget -nc + https://paddle-ci.gz.bcebos.com/new_exec/lm_startup_program) + + # all operators used in the program + set(OPS + fill_constant_op + uniform_random_op + lookup_table_op + transpose_op + reshape_op + split_op + slice_op + concat_op + matmul_op + elementwise_add_op + elementwise_mul_op + softmax_with_cross_entropy_op + reduce_mean_op + reduce_sum_op + activation_op + sum_op + elementwise_max_op + elementwise_div_op + sgd_op + squared_l2_norm_op + memcpy_h2d_op + memcpy_d2h_op) + + # All deps of the operators above, part of GLOB_OPERATOR_DEPS. + set(OP_DEPS generator softmax selected_rows_functor jit_kernel_helper + concat_and_split cross_entropy) - cc_test(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${OPS} ${OP_DEPS}) - set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100) + cc_test( + standalone_executor_test + SRCS standalone_executor_test.cc + DEPS interpretercore + standalone_executor + operator + op_registry + executor + ${OPS} + ${OP_DEPS}) + set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100) - add_dependencies(standalone_executor_test download_program) - if (WITH_PROFILER) - target_link_libraries(standalone_executor_test profiler) - add_dependencies(standalone_executor_test profiler) - endif() + add_dependencies(standalone_executor_test download_program) + if(WITH_PROFILER) + target_link_libraries(standalone_executor_test profiler) + add_dependencies(standalone_executor_test profiler) + endif() endif() diff --git a/paddle/fluid/framework/new_executor/garbage_collector/CMakeLists.txt b/paddle/fluid/framework/new_executor/garbage_collector/CMakeLists.txt index 2033eba88f9..359c56c561a 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/garbage_collector/CMakeLists.txt @@ -1,10 +1,22 @@ -cc_library(interpretercore_garbage_collector SRCS garbage_collector.cc DEPS garbage_collector) -cc_library(interpretercore_event_garbage_collector SRCS event_garbage_collector.cc DEPS interpretercore_garbage_collector) +cc_library( + interpretercore_garbage_collector + SRCS garbage_collector.cc + DEPS garbage_collector) +cc_library( + interpretercore_event_garbage_collector + SRCS event_garbage_collector.cc + DEPS interpretercore_garbage_collector) if(WITH_GPU OR WITH_ROCM) - if(WITH_GPU) - nv_library(interpretercore_fast_garbage_collector SRCS fast_garbage_collector.cc DEPS interpretercore_garbage_collector) - elseif(WITH_ROCM) - hip_library(interpretercore_fast_garbage_collector SRCS fast_garbage_collector.cc DEPS interpretercore_garbage_collector) - endif() + if(WITH_GPU) + nv_library( + interpretercore_fast_garbage_collector + SRCS fast_garbage_collector.cc + DEPS interpretercore_garbage_collector) + elseif(WITH_ROCM) + hip_library( + interpretercore_fast_garbage_collector + SRCS fast_garbage_collector.cc + DEPS interpretercore_garbage_collector) + endif() endif() diff --git a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt index 2690b29e01b..781ef9a64a2 100644 --- a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt @@ -1,3 +1,12 @@ -cc_library(workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog) -cc_library(workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog os_info) -cc_test(workqueue_test SRCS workqueue_test.cc DEPS workqueue) +cc_library( + workqueue_utils + SRCS workqueue_utils.cc events_waiter.cc + DEPS enforce glog) +cc_library( + workqueue + SRCS workqueue.cc + DEPS workqueue_utils enforce glog os_info) +cc_test( + workqueue_test + SRCS workqueue_test.cc + DEPS workqueue) diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index 75e258d1476..7cb9cf254fb 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -1,29 +1,85 @@ -cc_library(cinn_cache_key SRCS cinn_cache_key.cc DEPS boost graph graph_helper lod_tensor proto_desc) -cc_library(build_cinn_pass SRCS build_cinn_pass.cc DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors enforce) -cc_library(transform_desc SRCS transform_desc.cc DEPS proto_desc cinn) -cc_library(transform_type SRCS transform_type.cc DEPS errors enforce cinn) -cc_library(cinn_graph_symbolization SRCS cinn_graph_symbolization.cc DEPS lod_tensor graph transform_desc cinn) -cc_library(cinn_compiler SRCS cinn_compiler.cc DEPS framework_proto graph lod_tensor cinn_cache_key cinn_graph_symbolization cinn cinn_launch_context) +cc_library( + cinn_cache_key + SRCS cinn_cache_key.cc + DEPS boost graph graph_helper lod_tensor proto_desc) +cc_library( + build_cinn_pass + SRCS build_cinn_pass.cc + DEPS pass subgraph_detector graph_pattern_detector cinn_compiler errors + enforce) +cc_library( + transform_desc + SRCS transform_desc.cc + DEPS proto_desc cinn) +cc_library( + transform_type + SRCS transform_type.cc + DEPS errors enforce cinn) +cc_library( + cinn_graph_symbolization + SRCS cinn_graph_symbolization.cc + DEPS lod_tensor graph transform_desc cinn) +cc_library( + cinn_compiler + SRCS cinn_compiler.cc + DEPS framework_proto + graph + lod_tensor + cinn_cache_key + cinn_graph_symbolization + cinn + cinn_launch_context) -if (WITH_TESTING) - cc_test(cinn_lib_test SRCS cinn_lib_test.cc DEPS cinn) +if(WITH_TESTING) + cc_test( + cinn_lib_test + SRCS cinn_lib_test.cc + DEPS cinn) set_tests_properties(cinn_lib_test PROPERTIES LABELS "RUN_TYPE=CINN") - cc_test(cinn_cache_key_test SRCS cinn_cache_key_test.cc DEPS cinn_cache_key) + cc_test( + cinn_cache_key_test + SRCS cinn_cache_key_test.cc + DEPS cinn_cache_key) set_tests_properties(cinn_cache_key_test PROPERTIES LABELS "RUN_TYPE=CINN") - cc_test(build_cinn_pass_test SRCS build_cinn_pass_test.cc DEPS build_cinn_pass cinn_compiler op_registry mul_op activation_op elementwise_add_op) + cc_test( + build_cinn_pass_test + SRCS build_cinn_pass_test.cc + DEPS build_cinn_pass cinn_compiler op_registry mul_op activation_op + elementwise_add_op) set_tests_properties(build_cinn_pass_test PROPERTIES LABELS "RUN_TYPE=CINN") - cc_test(transform_desc_test SRCS transform_desc_test.cc DEPS transform_desc) + cc_test( + transform_desc_test + SRCS transform_desc_test.cc + DEPS transform_desc) set_tests_properties(transform_desc_test PROPERTIES LABELS "RUN_TYPE=CINN") - cc_test(transform_type_test SRCS transform_type_test.cc DEPS transform_type) + cc_test( + transform_type_test + SRCS transform_type_test.cc + DEPS transform_type) set_tests_properties(transform_type_test PROPERTIES LABELS "RUN_TYPE=CINN") - cc_test(cinn_graph_symbolization_test SRCS cinn_graph_symbolization_test.cc DEPS cinn_graph_symbolization) - set_tests_properties(cinn_graph_symbolization_test PROPERTIES LABELS "RUN_TYPE=CINN") + cc_test( + cinn_graph_symbolization_test + SRCS cinn_graph_symbolization_test.cc + DEPS cinn_graph_symbolization) + set_tests_properties(cinn_graph_symbolization_test PROPERTIES LABELS + "RUN_TYPE=CINN") - cc_test(cinn_compiler_test SRCS cinn_compiler_test.cc DEPS cinn_compiler place proto_desc graph_viz_pass build_cinn_pass cinn mul_op activation_op elementwise_add_op) + cc_test( + cinn_compiler_test + SRCS cinn_compiler_test.cc + DEPS cinn_compiler + place + proto_desc + graph_viz_pass + build_cinn_pass + cinn + mul_op + activation_op + elementwise_add_op) set_tests_properties(cinn_compiler_test PROPERTIES LABELS "RUN_TYPE=CINN") endif() diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index 92af1901b71..eaf0a09541d 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,65 +1,214 @@ -cc_library(imperative_flag SRCS flags.cc DEPS gflags flags) -cc_library(var_helper SRCS var_helper.cc DEPS tensor phi_api) -IF(WITH_XPU) -cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper) -ELSE() -cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper) -ENDIF() -cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api) +cc_library( + imperative_flag + SRCS flags.cc + DEPS gflags flags) +cc_library( + var_helper + SRCS var_helper.cc + DEPS tensor phi_api) +if(WITH_XPU) + cc_library( + prepared_operator + SRCS prepared_operator.cc + DEPS xpu_op_list + proto_desc + operator + device_context + lod_tensor + selected_rows_utils + var_type_traits + op_kernel_type + data_transform + nan_inf_utils + phi_api + phi_utils + var_helper) +else() + cc_library( + prepared_operator + SRCS prepared_operator.cc + DEPS proto_desc + operator + device_context + lod_tensor + selected_rows_utils + var_type_traits + op_kernel_type + data_transform + nan_inf_utils + phi_api + phi_utils + var_helper) +endif() +cc_library( + layer + SRCS layer.cc + DEPS prepared_operator + math_function + imperative_flag + variable_helper + op_registry + var_helper + phi_api) add_subdirectory(jit) -if (WITH_GPU) -cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info phi_gpu_info) +if(WITH_GPU) + cc_library( + layout_autotune + SRCS layout_autotune.cc + DEPS op_info phi_gpu_info) else() -cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info) + cc_library( + layout_autotune + SRCS layout_autotune.cc + DEPS op_info) endif() -cc_library(amp SRCS amp_auto_cast.cc DEPS layer var_helper) -cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper layout_autotune) -cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator switch_autotune) -cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator switch_autotune) -cc_library(imperative_profiler SRCS profiler.cc DEPS flags) +cc_library( + amp + SRCS amp_auto_cast.cc + DEPS layer var_helper) +cc_library( + tracer + SRCS tracer.cc + DEPS layer + engine + program_desc_tracer + amp + denormal + garbage_collector + var_helper + layout_autotune) +cc_library( + basic_engine + SRCS basic_engine.cc + DEPS layer gradient_accumulator switch_autotune) +cc_library( + engine + SRCS basic_engine.cc partial_grad_engine.cc + DEPS layer gradient_accumulator switch_autotune) +cc_library( + imperative_profiler + SRCS profiler.cc + DEPS flags) if(NOT WIN32) - if(WITH_NCCL OR WITH_RCCL) - cc_library(imperative_all_reduce SRCS all_reduce.cc DEPS collective_helper device_context selected_rows_utils tensor) - cc_library(nccl_context SRCS nccl_context.cc DEPS collective_helper device_context imperative_all_reduce var_type_traits) - if(WITH_NCCL) - nv_library(reducer SRCS reducer.cc reducer.cu DEPS layer imperative_all_reduce) - endif() - if(WITH_RCCL) - hip_library(reducer SRCS reducer.cc reducer.cu DEPS layer imperative_all_reduce) - endif() - endif() - if(WITH_XPU_BKCL) - cc_library(bkcl_context SRCS bkcl_context.cc DEPS collective_helper device_context tensor var_type_traits) - cc_library(reducer SRCS reducer.cc DEPS layer) + if(WITH_NCCL OR WITH_RCCL) + cc_library( + imperative_all_reduce + SRCS all_reduce.cc + DEPS collective_helper device_context selected_rows_utils tensor) + cc_library( + nccl_context + SRCS nccl_context.cc + DEPS collective_helper device_context imperative_all_reduce + var_type_traits) + if(WITH_NCCL) + nv_library( + reducer + SRCS reducer.cc reducer.cu + DEPS layer imperative_all_reduce) endif() - if(WITH_ASCEND_CL) - cc_library(hccl_context SRCS hccl_context.cc DEPS collective_helper device_context tensor var_type_traits) - cc_library(reducer SRCS reducer.cc DEPS layer) + if(WITH_RCCL) + hip_library( + reducer + SRCS reducer.cc reducer.cu + DEPS layer imperative_all_reduce) endif() - if(WITH_CNCL) - cc_library(cncl_context SRCS cncl_context.cc DEPS collective_helper device_context tensor var_type_traits) - cc_library(reducer SRCS reducer.cc DEPS layer) - endif() - if(WITH_NCCL OR WITH_RCCL OR WITH_XPU_BKCL OR WITH_ASCEND_CL) - cc_library(heter_ccl_context SRCS heter_ccl_context.cc DEPS collective_helper device_context tensor var_type_traits) - endif() - cc_library(data_loader SRCS data_loader.cc DEPS enforce) + endif() + if(WITH_XPU_BKCL) + cc_library( + bkcl_context + SRCS bkcl_context.cc + DEPS collective_helper device_context tensor var_type_traits) + cc_library( + reducer + SRCS reducer.cc + DEPS layer) + endif() + if(WITH_ASCEND_CL) + cc_library( + hccl_context + SRCS hccl_context.cc + DEPS collective_helper device_context tensor var_type_traits) + cc_library( + reducer + SRCS reducer.cc + DEPS layer) + endif() + if(WITH_CNCL) + cc_library( + cncl_context + SRCS cncl_context.cc + DEPS collective_helper device_context tensor var_type_traits) + cc_library( + reducer + SRCS reducer.cc + DEPS layer) + endif() + if(WITH_NCCL + OR WITH_RCCL + OR WITH_XPU_BKCL + OR WITH_ASCEND_CL) + cc_library( + heter_ccl_context + SRCS heter_ccl_context.cc + DEPS collective_helper device_context tensor var_type_traits) + endif() + cc_library( + data_loader + SRCS data_loader.cc + DEPS enforce) endif(NOT WIN32) if(WITH_GLOO) - cc_library(imperative_gloo_context SRCS gloo_context.cc DEPS collective_helper device_context tensor var_type_traits) - if ( WIN32 OR (NOT (WITH_NCCL OR WITH_RCCL OR WITH_XPU_BKCL OR WITH_ASCEND_CL OR WITH_CNCL) )) - cc_library(reducer SRCS reducer.cc DEPS layer) - endif() + cc_library( + imperative_gloo_context + SRCS gloo_context.cc + DEPS collective_helper device_context tensor var_type_traits) + if(WIN32 + OR (NOT + (WITH_NCCL + OR WITH_RCCL + OR WITH_XPU_BKCL + OR WITH_ASCEND_CL + OR WITH_CNCL) + )) + cc_library( + reducer + SRCS reducer.cc + DEPS layer) + endif() endif() if(WITH_MLU) - SET(MLU_DEPS mlu_baseop) + set(MLU_DEPS mlu_baseop) endif() if(NOT WITH_ASCEND_CL) -cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function phi_tensor ${MLU_DEPS}) + cc_library( + gradient_accumulator + SRCS gradient_accumulator.cc + DEPS blas + operator + lod_tensor + selected_rows_utils + selected_rows_functor + var_type_traits + layer + math_function + phi_tensor + ${MLU_DEPS}) else() -cc_library(gradient_accumulator SRCS gradient_accumulator.cc DEPS blas operator lod_tensor selected_rows_utils selected_rows_functor var_type_traits layer math_function npu_op_runner phi_tensor) + cc_library( + gradient_accumulator + SRCS gradient_accumulator.cc + DEPS blas + operator + lod_tensor + selected_rows_utils + selected_rows_functor + var_type_traits + layer + math_function + npu_op_runner + phi_tensor) endif() add_subdirectory(tests) diff --git a/paddle/fluid/imperative/jit/CMakeLists.txt b/paddle/fluid/imperative/jit/CMakeLists.txt index 66f2a984079..bcc1c0746b8 100644 --- a/paddle/fluid/imperative/jit/CMakeLists.txt +++ b/paddle/fluid/imperative/jit/CMakeLists.txt @@ -1,2 +1,8 @@ -cc_library(op_desc_meta SRCS op_desc_meta.cc DEPS proto_desc layer) -cc_library(program_desc_tracer SRCS program_desc_tracer.cc DEPS op_desc_meta) +cc_library( + op_desc_meta + SRCS op_desc_meta.cc + DEPS proto_desc layer) +cc_library( + program_desc_tracer + SRCS program_desc_tracer.cc + DEPS op_desc_meta) diff --git a/paddle/fluid/imperative/tests/CMakeLists.txt b/paddle/fluid/imperative/tests/CMakeLists.txt index 09de0106ed6..5084363b9c1 100644 --- a/paddle/fluid/imperative/tests/CMakeLists.txt +++ b/paddle/fluid/imperative/tests/CMakeLists.txt @@ -1,26 +1,108 @@ if(WIN32) - cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS device_context) + cc_test( + nccl_context_test + SRCS nccl_context_test.cc + DEPS device_context) else() - if (WITH_GLOO AND (WITH_NCCL OR WITH_RCCL)) - cc_test(nccl_context_test SRCS nccl_context_test.cc DEPS nccl_context) - cc_test(heter_ccl_context_test SRCS heter_ccl_context_test.cc DEPS heter_ccl_context nccl_context imperative_gloo_context gloo_context gloo_wrapper gloo fs shell) - #set_tests_properties(heter_ccl_context_test PROPERTIES LABELS "RUN_TYPE=DIST") - endif() - if (WITH_XPU_BKCL) - cc_test(bkcl_context_test SRCS bkcl_context_test.cc DEPS bkcl_context) - endif() - if (WITH_CNCL) - cc_test(cncl_context_test SRCS cncl_context_test.cc DEPS cncl_context) - endif() + if(WITH_GLOO AND (WITH_NCCL OR WITH_RCCL)) + cc_test( + nccl_context_test + SRCS nccl_context_test.cc + DEPS nccl_context) + cc_test( + heter_ccl_context_test + SRCS heter_ccl_context_test.cc + DEPS heter_ccl_context + nccl_context + imperative_gloo_context + gloo_context + gloo_wrapper + gloo + fs + shell) + #set_tests_properties(heter_ccl_context_test PROPERTIES LABELS "RUN_TYPE=DIST") + endif() + if(WITH_XPU_BKCL) + cc_test( + bkcl_context_test + SRCS bkcl_context_test.cc + DEPS bkcl_context) + endif() + if(WITH_CNCL) + cc_test( + cncl_context_test + SRCS cncl_context_test.cc + DEPS cncl_context) + endif() endif(WIN32) - -cc_test(test_gradient_accmulator SRCS test_gradient_accmulator.cc DEPS memcpy selected_rows_utils selected_rows_functor gradient_accumulator math_function phi_tensor phi_api phi_api_utils) -cc_test(test_layer SRCS test_layer.cc DEPS layer proto_desc operator op_registry variable_helper mul_op memcpy) -cc_test(test_prepare_op SRCS test_prepare_op.cc DEPS prepared_operator op_info split_op layer concat_and_split activation_op place) -cc_test(test_tracer SRCS test_tracer.cc DEPS tracer layer proto_desc operator op_registry variable_helper mul_op reduce_sum_op elementwise_add_op memcpy) -cc_test(test_hooks SRCS test_hooks.cc DEPS tracer basic_engine layer proto_desc operator op_registry variable_helper mul_op elementwise_add_op memcpy) -cc_test(test_eager SRCS test_eager.cc DEPS tracer layer prepared_operator mul_op) -if (WITH_NCCL OR WITH_RCCL OR WITH_XPU_BKCL OR WITH_CNCL) -cc_test(test_group SRCS test_group.cc DEPS reducer concat_and_split memcpy) +cc_test( + test_gradient_accmulator + SRCS test_gradient_accmulator.cc + DEPS memcpy + selected_rows_utils + selected_rows_functor + gradient_accumulator + math_function + phi_tensor + phi_api + phi_api_utils) +cc_test( + test_layer + SRCS test_layer.cc + DEPS layer + proto_desc + operator + op_registry + variable_helper + mul_op + memcpy) +cc_test( + test_prepare_op + SRCS test_prepare_op.cc + DEPS prepared_operator + op_info + split_op + layer + concat_and_split + activation_op + place) +cc_test( + test_tracer + SRCS test_tracer.cc + DEPS tracer + layer + proto_desc + operator + op_registry + variable_helper + mul_op + reduce_sum_op + elementwise_add_op + memcpy) +cc_test( + test_hooks + SRCS test_hooks.cc + DEPS tracer + basic_engine + layer + proto_desc + operator + op_registry + variable_helper + mul_op + elementwise_add_op + memcpy) +cc_test( + test_eager + SRCS test_eager.cc + DEPS tracer layer prepared_operator mul_op) +if(WITH_NCCL + OR WITH_RCCL + OR WITH_XPU_BKCL + OR WITH_CNCL) + cc_test( + test_group + SRCS test_group.cc + DEPS reducer concat_and_split memcpy) endif() diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 633f481df80..109cb5d8fe0 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -17,19 +17,20 @@ if(WITH_TESTING) include(tests/test.cmake) # some generic cmake function for inference endif() -cc_library(paddle_inference_io - SRCS io.cc - DEPS paddle_framework ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) +cc_library( + paddle_inference_io + SRCS io.cc + DEPS paddle_framework ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) # analysis and tensorrt must be added before creating static library, # otherwise, there would be undefined reference to them in static library. add_subdirectory(analysis) add_subdirectory(utils) -if (TENSORRT_FOUND) +if(TENSORRT_FOUND) add_subdirectory(tensorrt) endif() -if (WITH_LITE) +if(WITH_LITE) add_subdirectory(lite) endif() @@ -42,20 +43,30 @@ add_subdirectory(api) # Create static inference library if needed # All static libs in inference/api -set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor - zero_copy_tensor reset_tensor_array - analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) +set(STATIC_INFERENCE_API + paddle_inference_api + analysis_predictor + zero_copy_tensor + reset_tensor_array + analysis_config + paddle_pass_builder + activation_functions + ${mkldnn_quantizer_cfg}) #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy if(WIN32 AND WITH_GPU) - cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} ${utils_modules}) + cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} + ${utils_modules}) else() - create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules}) + create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} + ${STATIC_INFERENCE_API} ${utils_modules}) endif() if(NOT APPLE) # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac. - set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.sym") + set(LINK_FLAGS + "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.sym" + ) set_target_properties(paddle_inference PROPERTIES LINK_FLAGS "${LINK_FLAGS}") endif() @@ -63,7 +74,7 @@ endif() add_subdirectory(capi_exp) if(WITH_TESTING AND WITH_INFERENCE_API_TEST) - add_subdirectory(tests/api) + add_subdirectory(tests/api) endif() set(SHARED_INFERENCE_SRCS @@ -80,43 +91,53 @@ set(SHARED_INFERENCE_SRCS ${PADDLE_CUSTOM_OP_SRCS}) # shared inference library deps -set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor ${utils_modules}) +set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor + ${utils_modules}) -if (WITH_CRYPTO) - set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto) -endif (WITH_CRYPTO) +if(WITH_CRYPTO) + set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto) +endif(WITH_CRYPTO) -if (WITH_PSCORE) - set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} fleet ps_service tensor_table) -endif () +if(WITH_PSCORE) + set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} fleet ps_service + tensor_table) +endif() -if (WITH_ONNXRUNTIME) - set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS} - ${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc - ) -endif (WITH_ONNXRUNTIME) +if(WITH_ONNXRUNTIME) + set(SHARED_INFERENCE_SRCS + ${SHARED_INFERENCE_SRCS} + ${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc) +endif(WITH_ONNXRUNTIME) # Create shared inference library -cc_library(paddle_inference_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} - DEPS ${SHARED_INFERENCE_DEPS}) +cc_library( + paddle_inference_shared SHARED + SRCS ${SHARED_INFERENCE_SRCS} + DEPS ${SHARED_INFERENCE_DEPS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(paddle_inference_shared ${os_dependency_modules}) if(WIN32) - target_link_libraries(paddle_inference_shared gflags) + target_link_libraries(paddle_inference_shared gflags) endif() -set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference) +set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME + paddle_inference) if(NOT APPLE AND NOT WIN32) # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. - if (WITH_CUSTOM_DEVICE) - set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map") + if(WITH_CUSTOM_DEVICE) + set(LINK_FLAGS + "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference_custom_device.map" + ) else() - set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") + set(LINK_FLAGS + "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") endif() - set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") + set_target_properties(paddle_inference_shared PROPERTIES LINK_FLAGS + "${LINK_FLAGS}") # check symbol hidden - FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake + file( + WRITE ${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake "execute_process(COMMAND sh -c \"${CMAKE_CURRENT_SOURCE_DIR}/check_symbol.sh" " ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_inference.so\" RESULT_VARIABLE symbol_res)\n" "if(NOT \"\${symbol_res}\" STREQUAL \"0\")\n" @@ -126,5 +147,6 @@ if(NOT APPLE AND NOT WIN32) OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol" COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_BINARY_DIR}/check_symbol.cmake" DEPENDS paddle_inference_shared) - add_custom_target(check_symbol ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol") + add_custom_target(check_symbol ALL + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/.check_symbol") endif() diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 3d1a467565c..f374c5c7cc2 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -1,72 +1,112 @@ unset(analysis_deps CACHE) set(analysis_deps # analysis_deps can be extended across the project - framework_proto proto_desc graph pass paddle_inference_io executor pretty_log - ir_pass_manager - CACHE INTERNAL "") + framework_proto + proto_desc + graph + pass + paddle_inference_io + executor + pretty_log + ir_pass_manager + CACHE INTERNAL "") add_subdirectory(ir_passes) add_subdirectory(passes) -cc_library(analysis_helper SRCS helper.cc DEPS framework_proto proto_desc graph paddle_inference_io) +cc_library( + analysis_helper + SRCS helper.cc + DEPS framework_proto proto_desc graph paddle_inference_io) -cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES} analysis_helper) +cc_library( + ir_pass_manager + SRCS ir_pass_manager.cc + DEPS graph pass ${INFER_IR_PASSES} analysis_helper) -cc_library(argument INTERFACE SRCS argument.cc DEPS scope proto_desc) -cc_library(analysis_pass INTERFACE SRCS analysis_pass.cc DEPS proto_desc) +cc_library( + argument INTERFACE + SRCS argument.cc + DEPS scope proto_desc) +cc_library( + analysis_pass INTERFACE + SRCS analysis_pass.cc + DEPS proto_desc) -cc_library(analysis SRCS analyzer.cc - DEPS ${analysis_deps} analysis_helper - analysis_pass ${INFER_IR_PASSES} - ) +cc_library( + analysis + SRCS analyzer.cc + DEPS ${analysis_deps} analysis_helper analysis_pass ${INFER_IR_PASSES}) function(inference_analysis_test_build TARGET) if(WITH_TESTING) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS EXTRA_DEPS) - cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - inference_base_test_build(${TARGET} - SRCS ${analysis_test_SRCS} - DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS}) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS EXTRA_DEPS) + cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + inference_base_test_build( + ${TARGET} + SRCS + ${analysis_test_SRCS} + DEPS + analysis + pass + ${GLOB_PASS_LIB} + ${analysis_test_EXTRA_DEPS}) endif() endfunction() function(inference_analysis_test_run TARGET) if(WITH_TESTING) - set(options "") - set(oneValueArgs "") - set(multiValueArgs COMMAND ARGS) - cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - inference_base_test_run(${TARGET} - COMMAND ${analysis_test_COMMAND} - ARGS ${analysis_test_ARGS}) + set(options "") + set(oneValueArgs "") + set(multiValueArgs COMMAND ARGS) + cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + inference_base_test_run(${TARGET} COMMAND ${analysis_test_COMMAND} ARGS + ${analysis_test_ARGS}) endif() endfunction() function(inference_analysis_test TARGET) if(WITH_TESTING) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS ARGS EXTRA_DEPS) - cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - inference_base_test_build(${TARGET} - SRCS ${analysis_test_SRCS} - DEPS analysis pass ${GLOB_PASS_LIB} ${analysis_test_EXTRA_DEPS}) - inference_base_test_run(${TARGET} - COMMAND ${TARGET} - ARGS ${analysis_test_ARGS}) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS ARGS EXTRA_DEPS) + cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + inference_base_test_build( + ${TARGET} + SRCS + ${analysis_test_SRCS} + DEPS + analysis + pass + ${GLOB_PASS_LIB} + ${analysis_test_EXTRA_DEPS}) + inference_base_test_run(${TARGET} COMMAND ${TARGET} ARGS + ${analysis_test_ARGS}) endif() endfunction(inference_analysis_test) - -if (NOT APPLE AND NOT WIN32) - inference_analysis_test(test_analyzer - SRCS analyzer_tester.cc - EXTRA_DEPS reset_tensor_array paddle_inference_shared - ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR}) +if(NOT APPLE AND NOT WIN32) + inference_analysis_test( + test_analyzer + SRCS + analyzer_tester.cc + EXTRA_DEPS + reset_tensor_array + paddle_inference_shared + ARGS + --inference_model_dir=${WORD2VEC_MODEL_DIR}) elseif(WIN32) - inference_analysis_test(test_analyzer - SRCS analyzer_tester.cc - EXTRA_DEPS reset_tensor_array paddle_inference_api - ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR}) + inference_analysis_test( + test_analyzer + SRCS + analyzer_tester.cc + EXTRA_DEPS + reset_tensor_array + paddle_inference_api + ARGS + --inference_model_dir=${WORD2VEC_MODEL_DIR}) endif() diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index 7faef7d391f..a7a561b7b37 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -1,34 +1,63 @@ -cc_library(subgraph_util SRCS subgraph_util.cc DEPS subgraph_detector) +cc_library( + subgraph_util + SRCS subgraph_util.cc + DEPS subgraph_detector) -if (WITH_GPU AND TENSORRT_FOUND) - cc_library(tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_util tensorrt_op_teller infer_io_utils) +if(WITH_GPU AND TENSORRT_FOUND) + cc_library( + tensorrt_subgraph_pass + SRCS tensorrt_subgraph_pass.cc + DEPS subgraph_util tensorrt_op_teller infer_io_utils) - set(analysis_deps ${analysis_deps} - subgraph_util tensorrt_subgraph_pass - CACHE INTERNAL "") + set(analysis_deps + ${analysis_deps} subgraph_util tensorrt_subgraph_pass + CACHE INTERNAL "") - set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp) + set(pass_file + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp + ) file(APPEND ${pass_file} "USE_PASS(tensorrt_subgraph_pass);\n") - set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") + set(INFER_IR_PASSES + ${INFER_IR_PASSES} tensorrt_subgraph_pass + CACHE INTERNAL "") endif() -if (WITH_LITE) - cc_library(lite_subgraph_pass SRCS lite_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util lite_op_teller) - set(analysis_deps ${analysis_deps} subgraph_util lite_subgraph_pass CACHE INTERNAL "") - set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp) +if(WITH_LITE) + cc_library( + lite_subgraph_pass + SRCS lite_subgraph_pass.cc + DEPS ${analysis_deps} subgraph_util lite_op_teller) + set(analysis_deps + ${analysis_deps} subgraph_util lite_subgraph_pass + CACHE INTERNAL "") + set(pass_file + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp + ) file(APPEND ${pass_file} "USE_PASS(lite_subgraph_pass);\n") - set(INFER_IR_PASSES ${INFER_IR_PASSES} lite_subgraph_pass CACHE INTERNAL "") - cc_test(lite_subgraph_pass_tester SRCS lite_subgraph_pass_tester.cc DEPS lite_subgraph_pass gtest glog) + set(INFER_IR_PASSES + ${INFER_IR_PASSES} lite_subgraph_pass + CACHE INTERNAL "") + cc_test( + lite_subgraph_pass_tester + SRCS lite_subgraph_pass_tester.cc + DEPS lite_subgraph_pass gtest glog) endif() -MESSAGE("WITH_DLNNE:${WITH_DLNNE}") +message("WITH_DLNNE:${WITH_DLNNE}") if(WITH_DLNNE) - cc_library(dlnne_subgraph_pass SRCS dlnne_subgraph_pass.cc DEPS ${analysis_deps} subgraph_util) - set(analysis_deps ${analysis_deps} - subgraph_util dlnne_subgraph_pass - CACHE INTERNAL "") + cc_library( + dlnne_subgraph_pass + SRCS dlnne_subgraph_pass.cc + DEPS ${analysis_deps} subgraph_util) + set(analysis_deps + ${analysis_deps} subgraph_util dlnne_subgraph_pass + CACHE INTERNAL "") - set(pass_file ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp) + set(pass_file + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h.tmp + ) file(APPEND ${pass_file} "USE_PASS(dlnne_subgraph_pass);\n") - set(INFER_IR_PASSES ${INFER_IR_PASSES} dlnne_subgraph_pass CACHE INTERNAL "") + set(INFER_IR_PASSES + ${INFER_IR_PASSES} dlnne_subgraph_pass + CACHE INTERNAL "") endif() diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index a950899a8a4..17bb8b6c62a 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -1,28 +1,55 @@ -cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager) -cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager) -cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zero_copy_tensor) -cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager) -cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass) -cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass) -cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass) -IF(WITH_TESTING) - cc_library(ir_graph_clean_pass SRCS ir_graph_clean_pass.cc DEPS analysis_pass gtest) -ELSE() - cc_library(ir_graph_clean_pass SRCS ir_graph_clean_pass.cc DEPS analysis_pass) -ENDIF() - -cc_library(analysis_passes SRCS passes.cc DEPS +cc_library( ir_graph_build_pass + SRCS ir_graph_build_pass.cc + DEPS analysis_pass argument ir_pass_manager) +cc_library( ir_analysis_pass + SRCS ir_analysis_pass.cc + DEPS analysis_pass argument ir_pass_manager) +cc_library( + memory_optim_pass + SRCS memory_optimize_pass.cc + DEPS analysis_pass zero_copy_tensor) +cc_library( ir_params_sync_among_devices_pass + SRCS ir_params_sync_among_devices_pass.cc + DEPS analysis_pass argument ir_pass_manager) +cc_library( + ir_graph_to_program_pass + SRCS ir_graph_to_program_pass.cc + DEPS analysis_pass graph_to_program_pass) +cc_library( adjust_cudnn_workspace_size_pass - memory_optim_pass + SRCS adjust_cudnn_workspace_size_pass.cc + DEPS analysis_pass graph_to_program_pass) +cc_library( inference_op_replace_pass - ir_graph_to_program_pass - ir_graph_clean_pass -) + SRCS inference_op_replace_pass.cc + DEPS analysis_pass graph_to_program_pass) +if(WITH_TESTING) + cc_library( + ir_graph_clean_pass + SRCS ir_graph_clean_pass.cc + DEPS analysis_pass gtest) +else() + cc_library( + ir_graph_clean_pass + SRCS ir_graph_clean_pass.cc + DEPS analysis_pass) +endif() + +cc_library( + analysis_passes + SRCS passes.cc + DEPS ir_graph_build_pass + ir_analysis_pass + ir_params_sync_among_devices_pass + adjust_cudnn_workspace_size_pass + memory_optim_pass + inference_op_replace_pass + ir_graph_to_program_pass + ir_graph_clean_pass) -set(analysis_deps ${analysis_deps} - analysis_passes - subgraph_detector - CACHE INTERNAL "") +set(analysis_deps + ${analysis_deps} analysis_passes subgraph_detector + CACHE INTERNAL "") diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 56cc4aa755b..e25c5e96398 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -14,7 +14,7 @@ # if(APPLE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") endif(APPLE) add_subdirectory(details) @@ -22,76 +22,139 @@ add_subdirectory(details) if(WITH_MKLDNN) set(mkldnn_quantizer_cfg mkldnn_quantizer_config) set(mkldnn_quantizer_src ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn_quantizer.cc) - cc_library(${mkldnn_quantizer_cfg} SRCS mkldnn_quantizer_config.cc DEPS lod_tensor paddle_pass_builder) - set(mkldnn_quantizer_cfg ${mkldnn_quantizer_cfg} PARENT_SCOPE) + cc_library( + ${mkldnn_quantizer_cfg} + SRCS mkldnn_quantizer_config.cc + DEPS lod_tensor paddle_pass_builder) + set(mkldnn_quantizer_cfg + ${mkldnn_quantizer_cfg} + PARENT_SCOPE) endif() -cc_library(analysis_config SRCS analysis_config.cc DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder table_printer utf8proc) -cc_library(paddle_infer_contrib SRCS paddle_infer_contrib.cc DEPS zero_copy_tensor) +cc_library( + analysis_config + SRCS analysis_config.cc + DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder table_printer + utf8proc) +cc_library( + paddle_infer_contrib + SRCS paddle_infer_contrib.cc + DEPS zero_copy_tensor) cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) -set(paddle_inference_api_deps lod_tensor scope reset_tensor_array - analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto custom_operator) +set(paddle_inference_api_deps + lod_tensor + scope + reset_tensor_array + analysis_config + paddle_infer_contrib + zero_copy_tensor + trainer_desc_proto + custom_operator) if(WITH_CRYPTO) - list(APPEND paddle_inference_api_deps paddle_crypto) + list(APPEND paddle_inference_api_deps paddle_crypto) endif() -cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS ${paddle_inference_api_deps}) +cc_library( + paddle_inference_api + SRCS api.cc api_impl.cc helper.cc + DEPS ${paddle_inference_api_deps}) if(WIN32) - target_link_libraries(paddle_inference_api gflags) + target_link_libraries(paddle_inference_api gflags) endif() -set(inference_deps ${analysis_deps} paddle_inference_api analysis naive_executor ${GLOB_PASS_LIB}) +set(inference_deps ${analysis_deps} paddle_inference_api analysis + naive_executor ${GLOB_PASS_LIB}) if(WITH_GPU AND TENSORRT_FOUND) - set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) + set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) endif() -if (WITH_ONNXRUNTIME) - cc_library(analysis_predictor SRCS analysis_predictor.cc onnxruntime_predictor.cc resource_manager.cc infer_context.cc ${mkldnn_quantizer_src} DEPS ${inference_deps} - zero_copy_tensor ir_pass_manager op_compatible_info infer_io_utils onnxruntime paddle2onnx) -else (WITH_ONNXRUNTIME) - cc_library(analysis_predictor SRCS analysis_predictor.cc resource_manager.cc infer_context.cc ${mkldnn_quantizer_src} DEPS ${inference_deps} - zero_copy_tensor ir_pass_manager op_compatible_info infer_io_utils) -endif (WITH_ONNXRUNTIME) - - -cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api) +if(WITH_ONNXRUNTIME) + cc_library( + analysis_predictor + SRCS analysis_predictor.cc onnxruntime_predictor.cc resource_manager.cc + infer_context.cc ${mkldnn_quantizer_src} + DEPS ${inference_deps} + zero_copy_tensor + ir_pass_manager + op_compatible_info + infer_io_utils + onnxruntime + paddle2onnx) +else(WITH_ONNXRUNTIME) + cc_library( + analysis_predictor + SRCS analysis_predictor.cc resource_manager.cc infer_context.cc + ${mkldnn_quantizer_src} + DEPS ${inference_deps} zero_copy_tensor ir_pass_manager op_compatible_info + infer_io_utils) +endif(WITH_ONNXRUNTIME) + +cc_test( + test_paddle_inference_api + SRCS api_tester.cc + DEPS paddle_inference_api) if(WITH_TESTING) - if (NOT APPLE AND NOT WIN32) - if (WITH_GPU) - inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared - ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) + if(NOT APPLE AND NOT WIN32) + if(WITH_GPU) + inference_base_test( + test_api_impl + SRCS + api_impl_tester.cc + DEPS + paddle_inference_shared + ARGS + --word2vec_dirname=${WORD2VEC_MODEL_DIR} + --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) endif() elseif(WIN32) - inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps} - ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) + inference_base_test( + test_api_impl + SRCS + api_impl_tester.cc + DEPS + ${inference_deps} + ARGS + --word2vec_dirname=${WORD2VEC_MODEL_DIR} + --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) endif() endif() -if (NOT APPLE AND NOT WIN32) - cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS paddle_inference_shared - ARGS --dirname=${WORD2VEC_MODEL_DIR}) -elseif (WIN32) - cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor benchmark ${inference_deps} - ARGS --dirname=${WORD2VEC_MODEL_DIR}) +if(NOT APPLE AND NOT WIN32) + cc_test( + test_analysis_predictor + SRCS analysis_predictor_tester.cc + DEPS paddle_inference_shared ARGS --dirname=${WORD2VEC_MODEL_DIR}) +elseif(WIN32) + cc_test( + test_analysis_predictor + SRCS analysis_predictor_tester.cc + DEPS analysis_predictor benchmark ${inference_deps} ARGS + --dirname=${WORD2VEC_MODEL_DIR}) endif() if(WITH_TESTING AND WITH_MKLDNN) - if (NOT APPLE AND NOT WIN32) - cc_test(test_mkldnn_quantizer SRCS mkldnn_quantizer_tester.cc DEPS paddle_inference_shared ARGS --dirname=${WORD2VEC_MODEL_DIR}) - elseif (WIN32) - cc_test(test_mkldnn_quantizer SRCS mkldnn_quantizer_tester.cc DEPS analysis_predictor benchmark ${inference_deps} - ARGS --dirname=${WORD2VEC_MODEL_DIR}) + if(NOT APPLE AND NOT WIN32) + cc_test( + test_mkldnn_quantizer + SRCS mkldnn_quantizer_tester.cc + DEPS paddle_inference_shared ARGS --dirname=${WORD2VEC_MODEL_DIR}) + elseif(WIN32) + cc_test( + test_mkldnn_quantizer + SRCS mkldnn_quantizer_tester.cc + DEPS analysis_predictor benchmark ${inference_deps} ARGS + --dirname=${WORD2VEC_MODEL_DIR}) endif() endif() if(WITH_TESTING AND TEST test_api_impl) - if(NOT APPLE) - set_tests_properties(test_api_impl PROPERTIES TIMEOUT 120) - endif() + if(NOT APPLE) + set_tests_properties(test_api_impl PROPERTIES TIMEOUT 120) + endif() endif() diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 547e265d2fd..a76ed63f106 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -1,31 +1,33 @@ cmake_minimum_required(VERSION 3.0) project(cpp_inference_demo CXX C) -option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) -option(USE_TENSORRT "Compile demo with TensorRT." OFF) -option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB + "Compile demo with static/shared library, default use static." ON) +option(USE_TENSORRT "Compile demo with TensorRT." OFF) +option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF) if(NOT WITH_STATIC_LIB) add_definitions("-DPADDLE_WITH_SHARED_LIB") else() - # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. + # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. # Set it to empty in static library mode to avoid compilation issues. add_definitions("/DPD_INFER_DECL=") endif() macro(safe_set_static_flag) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) endmacro() if(NOT DEFINED PADDLE_LIB) - message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") + message( + FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") endif() if(NOT DEFINED DEMO_NAME) message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") @@ -47,7 +49,7 @@ link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib") link_directories("${PADDLE_LIB}/paddle/lib") -if (WITH_ONNXRUNTIME) +if(WITH_ONNXRUNTIME) include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include") include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include") @@ -55,21 +57,25 @@ if (WITH_ONNXRUNTIME) link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib") endif() -if (WIN32) +if(WIN32) add_definitions("/DGOOGLE_GLOG_DLL_DECL=") option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) - if (MSVC_STATIC_CRT) - if (WITH_MKL) + if(MSVC_STATIC_CRT) + if(WITH_MKL) set(FLAG_OPENMP "/openmp") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4251 /wd4267 /wd4305") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_RELEASE + "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4251 /wd4267 /wd4305") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_RELEASE + "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") safe_set_static_flag() - if (WITH_STATIC_LIB) + if(WITH_STATIC_LIB) add_definitions(-DSTATIC_LIB) endif() endif() @@ -82,42 +88,55 @@ endif() if(WITH_GPU) if(NOT WIN32) - set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + set(CUDA_LIB + "/usr/local/cuda/lib64/" + CACHE STRING "CUDA Library") else() - set(CUDA_LIB "" CACHE STRING "CUDA_LIB") + set(CUDA_LIB + "" + CACHE STRING "CUDA_LIB") if("${CUDA_LIB}" STREQUAL "") if(DEFINED ENV{CUDA_PATH}) set(CUDA_LIB "$ENV{CUDA_PATH}\\lib\\x64") else() - set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\lib\\x64") + set(CUDA_LIB + "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\lib\\x64" + ) endif() endif() message(STATUS "Current CUDA lib path: ${CUDA_LIB}") endif(NOT WIN32) endif() -if (USE_TENSORRT AND WITH_GPU) - set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") +if(USE_TENSORRT AND WITH_GPU) + set(TENSORRT_ROOT + "" + CACHE STRING "The root directory of TensorRT library") if("${TENSORRT_ROOT}" STREQUAL "") - message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") + message( + FATAL_ERROR + "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH " + ) endif() set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") - file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") + file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h + TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") endif() if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") message(SEND_ERROR "Failed to detect TensorRT version.") endif() string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" - TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") - message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " - "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") + TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") + message( + STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " + "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") include_directories("${TENSORRT_INCLUDE_DIR}") link_directories("${TENSORRT_LIB_DIR}") endif() @@ -129,8 +148,9 @@ if(WITH_MKL) set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} ${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} - ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(MATH_LIB + ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") if(EXISTS ${MKLDNN_PATH}) @@ -145,65 +165,99 @@ else() set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") include_directories("${OPENBLAS_LIB_PATH}/include/openblas") if(WIN32) - set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(MATH_LIB + ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(MATH_LIB + ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() if(WITH_STATIC_LIB) - set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX} + ) else() if(WIN32) - set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX} + ) endif() endif() -if (WITH_ONNXRUNTIME) +if(WITH_ONNXRUNTIME) if(WIN32) - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx) + set(DEPS + ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib + paddle2onnx) elseif(APPLE) - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx) + set(DEPS + ${DEPS} + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib + paddle2onnx) else() - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx) + set(DEPS + ${DEPS} + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 + paddle2onnx) endif() endif() - -if (NOT WIN32) +if(NOT WIN32) set(EXTERNAL_LIB "-lrt -ldl -lpthread") - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags protobuf xxhash cryptopp utf8proc + set(DEPS + ${DEPS} + ${MATH_LIB} + ${MKLDNN_LIB} + glog + gflags + protobuf + xxhash + cryptopp + utf8proc ${EXTERNAL_LIB}) else() - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags_static libprotobuf xxhash cryptopp-static utf8proc_static + set(DEPS + ${DEPS} + ${MATH_LIB} + ${MKLDNN_LIB} + glog + gflags_static + libprotobuf + xxhash + cryptopp-static + utf8proc_static ${EXTERNAL_LIB}) set(DEPS ${DEPS} shlwapi.lib) endif(NOT WIN32) if(WITH_GPU) if(NOT WIN32) - if (USE_TENSORRT) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(USE_TENSORRT) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS + ${DEPS} + ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) else() if(USE_TENSORRT) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() - set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() @@ -217,40 +271,61 @@ if(WIN32) endif() if(USE_TENSORRT) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} ${LIB_PATH} + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} + ${LIB_PATH}) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH}) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX} + ${LIB_PATH}) endif() endif() if(WITH_MKL) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll + ${LIB_PATH} + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll + ${LIB_PATH} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll + ${LIB_PATH}) else() - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll + ${LIB_PATH}) endif() if(WITH_ONNXRUNTIME) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll - ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll - ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll + ${LIB_PATH} + COMMAND + ${CMAKE_COMMAND} -E copy + ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll + ${LIB_PATH}) endif() if(NOT WITH_STATIC_LIB) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${LIB_PATH}) endif() endif() diff --git a/paddle/fluid/inference/api/details/CMakeLists.txt b/paddle/fluid/inference/api/details/CMakeLists.txt index 0d7a8d57a9c..c1ff6ea68a2 100644 --- a/paddle/fluid/inference/api/details/CMakeLists.txt +++ b/paddle/fluid/inference/api/details/CMakeLists.txt @@ -13,13 +13,28 @@ # limitations under the License. # -cc_library(reset_tensor_array SRCS reset_tensor_array.cc DEPS lod_tensor scope) -if (WITH_ONNXRUNTIME) - cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce onnxruntime) - cc_library(zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc DEPS onnxruntime) -else (WITH_ONNXRUNTIME) - cc_library(zero_copy_tensor SRCS zero_copy_tensor.cc DEPS scope lod_tensor enforce) - cc_library(zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc) -endif (WITH_ONNXRUNTIME) +cc_library( + reset_tensor_array + SRCS reset_tensor_array.cc + DEPS lod_tensor scope) +if(WITH_ONNXRUNTIME) + cc_library( + zero_copy_tensor + SRCS zero_copy_tensor.cc + DEPS scope lod_tensor enforce onnxruntime) + cc_library( + zero_copy_tensor_dummy + SRCS zero_copy_tensor_dummy.cc + DEPS onnxruntime) +else(WITH_ONNXRUNTIME) + cc_library( + zero_copy_tensor + SRCS zero_copy_tensor.cc + DEPS scope lod_tensor enforce) + cc_library(zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc) +endif(WITH_ONNXRUNTIME) -cc_test(zero_copy_tensor_test SRCS zero_copy_tensor_test.cc DEPS paddle_inference_api) +cc_test( + zero_copy_tensor_test + SRCS zero_copy_tensor_test.cc + DEPS paddle_inference_api) diff --git a/paddle/fluid/inference/capi/CMakeLists.txt b/paddle/fluid/inference/capi/CMakeLists.txt index 32f780122bc..73ba41607aa 100644 --- a/paddle/fluid/inference/capi/CMakeLists.txt +++ b/paddle/fluid/inference/capi/CMakeLists.txt @@ -15,15 +15,22 @@ set(C_API_SRCS pd_config.cc pd_predictor.cc pd_tensor.cc c_api.cc) -cc_library(paddle_inference_c SRCS ${C_API_SRCS} DEPS paddle_inference) +cc_library( + paddle_inference_c + SRCS ${C_API_SRCS} + DEPS paddle_inference) if(NOT ON_INFER) - return() + return() endif() # Create inference capi shared library -cc_library(paddle_inference_c_shared SHARED SRCS ${C_API_SRCS} DEPS paddle_inference) -set_target_properties(paddle_inference_c_shared PROPERTIES OUTPUT_NAME paddle_inference_c) +cc_library( + paddle_inference_c_shared SHARED + SRCS ${C_API_SRCS} + DEPS paddle_inference) +set_target_properties(paddle_inference_c_shared PROPERTIES OUTPUT_NAME + paddle_inference_c) if(WIN32) - target_link_libraries(paddle_inference_c_shared shlwapi.lib) + target_link_libraries(paddle_inference_c_shared shlwapi.lib) endif() diff --git a/paddle/fluid/inference/capi_exp/CMakeLists.txt b/paddle/fluid/inference/capi_exp/CMakeLists.txt index 521d24329d4..e35e14a0c02 100644 --- a/paddle/fluid/inference/capi_exp/CMakeLists.txt +++ b/paddle/fluid/inference/capi_exp/CMakeLists.txt @@ -15,15 +15,22 @@ set(C_API_SRCS pd_config.cc pd_predictor.cc pd_tensor.cc pd_utils.cc) -cc_library(paddle_inference_c SRCS ${C_API_SRCS} DEPS paddle_inference) +cc_library( + paddle_inference_c + SRCS ${C_API_SRCS} + DEPS paddle_inference) if(NOT ON_INFER) - return() + return() endif() # Create inference capi shared library -cc_library(paddle_inference_c_shared SHARED SRCS ${C_API_SRCS} DEPS paddle_inference) -set_target_properties(paddle_inference_c_shared PROPERTIES OUTPUT_NAME paddle_inference_c) +cc_library( + paddle_inference_c_shared SHARED + SRCS ${C_API_SRCS} + DEPS paddle_inference) +set_target_properties(paddle_inference_c_shared PROPERTIES OUTPUT_NAME + paddle_inference_c) if(WIN32) - target_link_libraries(paddle_inference_c_shared shlwapi.lib) + target_link_libraries(paddle_inference_c_shared shlwapi.lib) endif() diff --git a/paddle/fluid/inference/experimental/javaapi/CMakeLists.txt b/paddle/fluid/inference/experimental/javaapi/CMakeLists.txt index 5b66d1de919..fc4a3c408df 100644 --- a/paddle/fluid/inference/experimental/javaapi/CMakeLists.txt +++ b/paddle/fluid/inference/experimental/javaapi/CMakeLists.txt @@ -1,5 +1,6 @@ include_directories($ENV{jni_path} $ENV{jni_sub_path} $ENV{paddle_path}) -find_library(PADDLE_INFERENCE_C libpaddle_inference_c.so HINTS $ENV{paddle_inference_lib}) +find_library(PADDLE_INFERENCE_C libpaddle_inference_c.so + HINTS $ENV{paddle_inference_lib}) aux_source_directory(native JNI_SRCS) add_library(paddle_inference SHARED ${JNI_SRCS}) target_link_libraries(paddle_inference ${PADDLE_INFERENCE_C}) diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt index 6d981d007e7..7aa010cb006 100644 --- a/paddle/fluid/inference/lite/CMakeLists.txt +++ b/paddle/fluid/inference/lite/CMakeLists.txt @@ -2,8 +2,23 @@ if(XPU_SDK_ROOT) set(XPU_DEPS xpuapi xpurt) endif() -cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash) -cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS}) -cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS}) -cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) -cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils) +cc_library( + lite_op_teller + SRCS op_teller.cc + DEPS ${LITE_DEPS} framework_proto device_context boost xxhash) +cc_library( + lite_engine + SRCS engine.cc + DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS}) +cc_library( + lite_tensor_utils + SRCS tensor_utils.cc + DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS}) +cc_test( + test_lite_engine + SRCS test_engine_lite.cc + DEPS lite_engine protobuf framework_proto glog gtest analysis) +cc_test( + test_lite_tensor_utils + SRCS test_tensor_utils.cc + DEPS lite_engine lite_tensor_utils) diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt index c713e3a66ac..abd00ef9de6 100644 --- a/paddle/fluid/inference/tensorrt/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt @@ -1,11 +1,27 @@ # Compiling with WITH_PYTHON=ON and WITH_TENSORRT=ON failed on windows. Temporarily add paddle_inference_api dependency to solve the problem if(WIN32) - nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api) + nv_library( + tensorrt_engine + SRCS engine.cc trt_int8_calibrator.cc + DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost + paddle_inference_api) else() - nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost) + nv_library( + tensorrt_engine + SRCS engine.cc trt_int8_calibrator.cc + DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost) endif() -nv_library(tensorrt_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost) -nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) -nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine) +nv_library( + tensorrt_op_teller + SRCS op_teller.cc + DEPS framework_proto device_context boost) +nv_test( + test_tensorrt + SRCS test_tensorrt.cc + DEPS dynload_cuda device_context dynamic_loader) +nv_test( + test_tensorrt_engine + SRCS test_engine.cc + DEPS dynload_cuda tensorrt_engine) add_subdirectory(plugin) add_subdirectory(convert) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 05ab3fb53e5..b27a584de2b 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,66 +1,70 @@ # Add TRT tests -nv_library(tensorrt_converter - SRCS matmul_op.cc - conv2d_op.cc - fc_op.cc - pool2d_op.cc - elementwise_op.cc - batch_norm_op.cc - activation_op.cc - unary_op.cc - softmax_op.cc - concat_op.cc - dropout_op.cc - group_norm_op.cc - pad_op.cc - split_op.cc - prelu_op.cc - leaky_relu_op.cc - gelu_op.cc - layer_norm_op.cc - multihead_matmul_op.cc - shuffle_channel_op.cc - swish_op.cc - instance_norm_op.cc - stack_op.cc - transpose_op.cc - flatten_op.cc - flatten_contiguous_range_op.cc - emb_eltwise_layernorm.cc - skip_layernorm.cc - scale_op.cc - slice_op.cc - hard_sigmoid_op.cc - hard_swish_op.cc - clip_op.cc - gather_op.cc - anchor_generator_op.cc - yolo_box_op.cc - yolo_box_head_op.cc - arg_max_op.cc - roi_align_op.cc - affine_channel_op.cc - multiclass_nms_op.cc - multiclass_nms3_op.cc - nearest_interp_op.cc - reshape_op.cc - reduce_op.cc - gather_nd_op.cc - tile_op.cc - conv3d_op.cc - mish_op.cc - nearest_interp_v2_op.cc - pool3d_op.cc - deformable_conv_op.cc - preln_emb_eltwise_layernorm.cc - strided_slice_op.cc - preln_skip_layernorm.cc - roll_op.cc - transformer_input_convert_op.cc - remove_padding_op.cc - recover_padding_op.cc - DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) +nv_library( + tensorrt_converter + SRCS matmul_op.cc + conv2d_op.cc + fc_op.cc + pool2d_op.cc + elementwise_op.cc + batch_norm_op.cc + activation_op.cc + unary_op.cc + softmax_op.cc + concat_op.cc + dropout_op.cc + group_norm_op.cc + pad_op.cc + split_op.cc + prelu_op.cc + leaky_relu_op.cc + gelu_op.cc + layer_norm_op.cc + multihead_matmul_op.cc + shuffle_channel_op.cc + swish_op.cc + instance_norm_op.cc + stack_op.cc + transpose_op.cc + flatten_op.cc + flatten_contiguous_range_op.cc + emb_eltwise_layernorm.cc + skip_layernorm.cc + scale_op.cc + slice_op.cc + hard_sigmoid_op.cc + hard_swish_op.cc + clip_op.cc + gather_op.cc + anchor_generator_op.cc + yolo_box_op.cc + yolo_box_head_op.cc + arg_max_op.cc + roi_align_op.cc + affine_channel_op.cc + multiclass_nms_op.cc + multiclass_nms3_op.cc + nearest_interp_op.cc + reshape_op.cc + reduce_op.cc + gather_nd_op.cc + tile_op.cc + conv3d_op.cc + mish_op.cc + nearest_interp_v2_op.cc + pool3d_op.cc + deformable_conv_op.cc + preln_emb_eltwise_layernorm.cc + strided_slice_op.cc + preln_skip_layernorm.cc + roll_op.cc + transformer_input_convert_op.cc + remove_padding_op.cc + recover_padding_op.cc + DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto + op_registry) -nv_test(test_op_converter SRCS test_op_converter.cc DEPS - paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter) - +nv_test( + test_op_converter + SRCS test_op_converter.cc + DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine + tensorrt_converter) diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index ee1d6c1dc7d..0377c82838b 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -1,23 +1,35 @@ -nv_library(tensorrt_plugin - SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu - prelu_op_plugin.cu gelu_op_plugin.cu - pool_op_plugin.cu swish_op_plugin.cu layer_norm_op_plugin.cu - instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu - qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu - hard_swish_op_plugin.cu stack_op_plugin.cu - anchor_generator_op_plugin.cu - yolo_box_op_plugin.cu - yolo_box_head_op_plugin.cu - roi_align_op_plugin.cu - gather_nd_op_plugin.cu - mish_op_plugin.cu - pool3d_op_plugin.cu - deformable_conv_op_plugin.cu - matmul_op_int8_plugin.cu - transformer_input_convert_plugin.cu - remove_padding_plugin.cu - recover_padding_plugin.cu - DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) +nv_library( + tensorrt_plugin + SRCS trt_plugin.cc + split_op_plugin.cu + elementwise_op_plugin.cu + prelu_op_plugin.cu + gelu_op_plugin.cu + pool_op_plugin.cu + swish_op_plugin.cu + layer_norm_op_plugin.cu + instance_norm_op_plugin.cu + emb_eltwise_layernorm_plugin.cu + qkv_to_context_plugin.cu + skip_layernorm_op_plugin.cu + slice_op_plugin.cu + hard_swish_op_plugin.cu + stack_op_plugin.cu + anchor_generator_op_plugin.cu + yolo_box_op_plugin.cu + yolo_box_head_op_plugin.cu + roi_align_op_plugin.cu + gather_nd_op_plugin.cu + mish_op_plugin.cu + pool3d_op_plugin.cu + deformable_conv_op_plugin.cu + matmul_op_int8_plugin.cu + transformer_input_convert_plugin.cu + remove_padding_plugin.cu + recover_padding_plugin.cu + DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) -nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS - paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_plugin) +nv_test( + test_split_plugin + SRCS test_split_plugin.cc + DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_plugin) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index fc85f836618..307af84fa36 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -1,409 +1,592 @@ -if (NOT APPLE AND NOT WIN32) - set(INFERENCE_EXTRA_DEPS paddle_inference_shared) +if(NOT APPLE AND NOT WIN32) + set(INFERENCE_EXTRA_DEPS paddle_inference_shared) else() - set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_inference_io ir_pass_manager analysis_predictor benchmark) + set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_inference_io + ir_pass_manager analysis_predictor benchmark) endif() if(WITH_GPU AND TENSORRT_FOUND) - set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps}) + set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps}) endif() function(download_data install_dir data_file check_sum) - string(REGEX MATCH "[^/\\]+$" file_name ${data_file}) - if (NOT EXISTS ${install_dir}/${file_name}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} ${data_file} ${check_sum}) - endif() + string(REGEX MATCH "[^/\\]+$" file_name ${data_file}) + if(NOT EXISTS ${install_dir}/${file_name}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL} + ${data_file} ${check_sum}) + endif() endfunction() function(download_data_without_verify install_dir data_file) - string(REGEX MATCH "[^/\\]+$" file_name ${data_file}) - if (NOT EXISTS ${install_dir}/${file_name}) - inference_download_and_uncompress_without_verify(${install_dir} ${INFERENCE_URL} ${data_file}) - endif() + string(REGEX MATCH "[^/\\]+$" file_name ${data_file}) + if(NOT EXISTS ${install_dir}/${file_name}) + inference_download_and_uncompress_without_verify( + ${install_dir} ${INFERENCE_URL} ${data_file}) + endif() endfunction() function(download_int8_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 + ${data_file} ${check_sum}) + endif() endfunction() function(download_int8_data_without_verify install_dir data_file) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress_without_verify(${install_dir} ${INFERENCE_URL}/int8 ${data_file}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress_without_verify( + ${install_dir} ${INFERENCE_URL}/int8 ${data_file}) + endif() endfunction() function(download_bfloat16_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/bfloat16 ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/bfloat16 + ${data_file} ${check_sum}) + endif() endfunction() function(download_bfloat16_data_without_verify install_dir data_file) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress_without_verify(${install_dir} ${INFERENCE_URL}/bfloat16 ${data_file}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress_without_verify( + ${install_dir} ${INFERENCE_URL}/bfloat16 ${data_file}) + endif() endfunction() function(download_GRU_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/gru ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/gru + ${data_file} ${check_sum}) + endif() endfunction() function(download_GRU_data_without_verify install_dir data_file) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress_without_verify(${install_dir} ${INFERENCE_URL}/gru ${data_file}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress_without_verify( + ${install_dir} ${INFERENCE_URL}/gru ${data_file}) + endif() endfunction() function(download_quant_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress( + ${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) + endif() endfunction() function(download_quant_data_without_verify install_dir data_file) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress_without_verify(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress_without_verify( + ${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file}) + endif() endfunction() -function(download_model_and_data install_dir model_name model_check_sum data_name data_check_sum) - download_data(${install_dir} ${model_name} ${model_check_sum}) - download_data(${install_dir} ${data_name} ${data_check_sum}) +function(download_model_and_data install_dir model_name model_check_sum + data_name data_check_sum) + download_data(${install_dir} ${model_name} ${model_check_sum}) + download_data(${install_dir} ${data_name} ${data_check_sum}) endfunction() -function(download_model_and_data_without_verify install_dir model_name data_name) - download_data_without_verify(${install_dir} ${model_name}) - download_data_without_verify(${install_dir} ${data_name}) +function(download_model_and_data_without_verify install_dir model_name + data_name) + download_data_without_verify(${install_dir} ${model_name}) + download_data_without_verify(${install_dir} ${data_name}) endfunction() function(download_result install_dir result_name check_sum) - download_data(${install_dir} ${result_name} ${check_sum}) + download_data(${install_dir} ${result_name} ${check_sum}) endfunction() function(download_result_without_verify install_dir result_name) - download_data_without_verify(${install_dir} ${result_name}) + download_data_without_verify(${install_dir} ${result_name}) endfunction() function(inference_analysis_api_test target install_dir filename) - inference_analysis_test(${target} SRCS ${filename} - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt --refer_result=${install_dir}/result.txt) + inference_analysis_test( + ${target} + SRCS + ${filename} + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${install_dir}/model + --infer_data=${install_dir}/data.txt + --refer_result=${install_dir}/result.txt) endfunction() function(inference_analysis_api_int8_test target install_dir filename) - inference_analysis_test(${target} SRCS ${filename} - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${install_dir}/model - --infer_data=${install_dir}/data.txt - --refer_result=${install_dir}/result.txt - --accuracy=0.8 - --batch_size=5 - --enable_int8=true) + inference_analysis_test( + ${target} + SRCS + ${filename} + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${install_dir}/model + --infer_data=${install_dir}/data.txt + --refer_result=${install_dir}/result.txt + --accuracy=0.8 + --batch_size=5 + --enable_int8=true) endfunction() -function(inference_multiple_models_analysis_api_test target install_dir filename) - inference_analysis_test(${target} SRCS ${filename} - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${install_dir}/mobilenet_v2_models/1 --infer_model2=${install_dir}/mobilenet_v2_models/xx --infer_model3=${install_dir}/mobilenet_v2_models/3) +function(inference_multiple_models_analysis_api_test target install_dir + filename) + inference_analysis_test( + ${target} + SRCS + ${filename} + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${install_dir}/mobilenet_v2_models/1 + --infer_model2=${install_dir}/mobilenet_v2_models/xx + --infer_model3=${install_dir}/mobilenet_v2_models/3) endfunction() function(inference_analysis_api_test_build TARGET_NAME filename) - inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}) + inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS}) endfunction() -function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir data_path) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${model_dir}/model - --infer_data=${data_path} - --warmup_batch_size=${WARMUP_BATCH_SIZE} - --batch_size=50 - --enable_int8=true - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --iterations=2) +function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir + data_path) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${model_dir}/model + --infer_data=${data_path} + --warmup_batch_size=${WARMUP_BATCH_SIZE} + --batch_size=50 + --enable_int8=true + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=2) endfunction() -function(inference_analysis_api_int8_test_run_custom_warmup_batch_size TARGET_NAME test_binary model_dir data_path warmup_batch_size) - set(WARMUP_BATCH_SIZE ${warmup_batch_size}) - inference_analysis_api_int8_test_run(${TARGET_NAME} ${test_binary} ${model_dir} ${data_path}) +function(inference_analysis_api_int8_test_run_custom_warmup_batch_size + TARGET_NAME test_binary model_dir data_path warmup_batch_size) + set(WARMUP_BATCH_SIZE ${warmup_batch_size}) + inference_analysis_api_int8_test_run(${TARGET_NAME} ${test_binary} + ${model_dir} ${data_path}) endfunction() -function(inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary model_dir data_path) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${model_dir}/model - --infer_data=${data_path} - --batch_size=50 - --enable_bf16=true - --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} - --iterations=2) +function(inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary + model_dir data_path) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${model_dir}/model + --infer_data=${data_path} + --batch_size=50 + --enable_bf16=true + --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=2) endfunction() -function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_binary model_dir data_path) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${model_dir}/model - --infer_data=${data_path} - --warmup_batch_size=10 - --batch_size=300 - --enable_int8=true - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --iterations=1) +function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME + test_binary model_dir data_path) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${model_dir}/model + --infer_data=${data_path} + --warmup_batch_size=10 + --batch_size=300 + --enable_int8=true + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=1) endfunction() function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename) - inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}) + inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS}) endfunction() -function(inference_analysis_api_test_with_fake_data_run TARGET_NAME test_binary model_dir disable_fc) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${model_dir}/model - --disable_mkldnn_fc=${disable_fc}) +function(inference_analysis_api_test_with_fake_data_run TARGET_NAME test_binary + model_dir disable_fc) + inference_analysis_test_run( + ${TARGET_NAME} COMMAND ${test_binary} ARGS --infer_model=${model_dir}/model + --disable_mkldnn_fc=${disable_fc}) endfunction() -function(inference_analysis_api_quant_test_run TARGET_NAME test_binary fp32_model_dir int8_model_dir data_path enable_quant_int8) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --fp32_model=${fp32_model_dir} - --int8_model=${int8_model_dir} - --infer_data=${data_path} - --batch_size=50 - --enable_int8=true - --enable_quant_int8=${enable_quant_int8} - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --with_accuracy_layer=false - --iterations=2) +function( + inference_analysis_api_quant_test_run + TARGET_NAME + test_binary + fp32_model_dir + int8_model_dir + data_path + enable_quant_int8) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --fp32_model=${fp32_model_dir} + --int8_model=${int8_model_dir} + --infer_data=${data_path} + --batch_size=50 + --enable_int8=true + --enable_quant_int8=${enable_quant_int8} + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --with_accuracy_layer=false + --iterations=2) endfunction() -function(inference_analysis_api_lexical_test_run TARGET_NAME test_binary infer_model data_path) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${infer_model} - --infer_data=${data_path} - --batch_size=50 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --with_accuracy_layer=true - --use_analysis=true - --iterations=2) +function(inference_analysis_api_lexical_test_run TARGET_NAME test_binary + infer_model data_path) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${infer_model} + --infer_data=${data_path} + --batch_size=50 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --with_accuracy_layer=true + --use_analysis=true + --iterations=2) endfunction() -function(inference_analysis_api_lexical_bfloat16_test_run TARGET_NAME test_binary infer_model data_path) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${infer_model} - --infer_data=${data_path} - --batch_size=50 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --with_accuracy_layer=true - --use_analysis=true - --enable_bf16=true - --iterations=2) +function(inference_analysis_api_lexical_bfloat16_test_run TARGET_NAME + test_binary infer_model data_path) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${infer_model} + --infer_data=${data_path} + --batch_size=50 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --with_accuracy_layer=true + --use_analysis=true + --enable_bf16=true + --iterations=2) endfunction() -function(inference_analysis_api_lexical_int8_test_run TARGET_NAME test_binary infer_model data_path fuse_multi_gru) - inference_analysis_test_run(${TARGET_NAME} - COMMAND ${test_binary} - ARGS --infer_model=${infer_model} - --infer_data=${data_path} - --batch_size=100 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --with_accuracy_layer=true - --use_analysis=true - --enable_int8=true - --quantized_accuracy=0.01 - --fuse_multi_gru=${fuse_multi_gru} - --iterations=4) +function(inference_analysis_api_lexical_int8_test_run TARGET_NAME test_binary + infer_model data_path fuse_multi_gru) + inference_analysis_test_run( + ${TARGET_NAME} + COMMAND + ${test_binary} + ARGS + --infer_model=${infer_model} + --infer_data=${data_path} + --batch_size=100 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --with_accuracy_layer=true + --use_analysis=true + --enable_int8=true + --quantized_accuracy=0.01 + --fuse_multi_gru=${fuse_multi_gru} + --iterations=4) endfunction() -function(preprocess_data2bin_test_run target py_script_source data_dir output_file) - py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${py_script_source} - ARGS --data_dir=${data_dir} - --output_file=${output_file} - --local) +function(preprocess_data2bin_test_run target py_script_source data_dir + output_file) + py_test(${target} + SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${py_script_source} ARGS + --data_dir=${data_dir} --output_file=${output_file} --local) endfunction() if(NOT APPLE AND WITH_MKLML) - # RNN1 - set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1") - download_model_and_data_without_verify(${RNN1_INSTALL_DIR} "rnn1/model.tar.gz" "rnn1/data.txt.tar.gz") - inference_analysis_api_test(test_analyzer_rnn1 ${RNN1_INSTALL_DIR} analyzer_rnn1_tester.cc) - - # seq_pool1 - set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool") - download_model_and_data_without_verify(${SEQ_POOL1_INSTALL_DIR} "seq_pool1_model_.tar.gz" "seq_pool1_data.txt.tar.gz") - inference_analysis_api_test(test_analyzer_seq_pool1_compare_determine ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_compare_determine_tester.cc) - inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_compare_tester.cc) - inference_analysis_api_test(test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) - inference_analysis_api_test(test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_statis_tester.cc) - inference_analysis_api_test(test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_profile_tester.cc) - if(NOT WIN32 AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - set_tests_properties(test_analyzer_seq_pool1_compare_determine PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_seq_pool1_fuse_compare_zero_copy PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_seq_pool1_fuse_statis PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_seq_pool1_profile PROPERTIES TIMEOUT 120) - endif() + # RNN1 + set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1") + download_model_and_data_without_verify( + ${RNN1_INSTALL_DIR} "rnn1/model.tar.gz" "rnn1/data.txt.tar.gz") + inference_analysis_api_test(test_analyzer_rnn1 ${RNN1_INSTALL_DIR} + analyzer_rnn1_tester.cc) + + # seq_pool1 + set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool") + download_model_and_data_without_verify( + ${SEQ_POOL1_INSTALL_DIR} "seq_pool1_model_.tar.gz" + "seq_pool1_data.txt.tar.gz") + inference_analysis_api_test( + test_analyzer_seq_pool1_compare_determine ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_compare_determine_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_compare_tester.cc) + inference_analysis_api_test( + test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) + inference_analysis_api_test( + test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_fuse_statis_tester.cc) + inference_analysis_api_test( + test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} + analyzer_seq_pool1_profile_tester.cc) + if(NOT WIN32 AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + set_tests_properties(test_analyzer_seq_pool1_compare_determine + PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_fuse_compare_zero_copy + PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_fuse_statis PROPERTIES TIMEOUT + 120) + set_tests_properties(test_analyzer_seq_pool1_profile PROPERTIES TIMEOUT 120) + endif() else() - # TODO: fix this test on MACOS and OPENBLAS, the reason is that - # fusion_seqexpand_concat_fc_op is not supported on MACOS and OPENBLAS - message(WARNING "These tests has been disabled in OSX or WITH_MKL=OFF before being fixed: \n test_analyzer_rnn1") - message(WARNING "These tests has been disabled in OSX or WITH_MKL=OFF before being fixed: \n test_analyzer_seq_pool1") + # TODO: fix this test on MACOS and OPENBLAS, the reason is that + # fusion_seqexpand_concat_fc_op is not supported on MACOS and OPENBLAS + message( + WARNING + "These tests has been disabled in OSX or WITH_MKL=OFF before being fixed: \n test_analyzer_rnn1" + ) + message( + WARNING + "These tests has been disabled in OSX or WITH_MKL=OFF before being fixed: \n test_analyzer_seq_pool1" + ) endif() - # RNN2 set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2") -download_model_and_data_without_verify(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc) +download_model_and_data_without_verify(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" + "rnn2_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} + analyzer_rnn2_tester.cc) # TODO(luotao, Superjom) Disable DAM test, temporarily fix # https://github.com/PaddlePaddle/Paddle/issues/15032#issuecomment-455990914. # After inference framework refactor, will reopen it. # normal DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") -download_model_and_data_without_verify(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") +download_model_and_data_without_verify(${DAM_INSTALL_DIR} "DAM_model.tar.gz" + "DAM_data.txt.tar.gz") #inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc EXTRA_DEPS legacy_allocator) # small DAM set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") -download_model_and_data_without_verify(${DAM_SMALL_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") -inference_analysis_test(test_analyzer_small_dam SRCS analyzer_dam_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${DAM_SMALL_INSTALL_DIR}/model --infer_data=${DAM_SMALL_INSTALL_DIR}/data.txt) - -#save model -inference_analysis_api_test(test_analyzer_save_model ${DAM_SMALL_INSTALL_DIR} analyzer_save_model_tester.cc) +download_model_and_data_without_verify( + ${DAM_SMALL_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") +inference_analysis_test( + test_analyzer_small_dam + SRCS + analyzer_dam_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${DAM_SMALL_INSTALL_DIR}/model + --infer_data=${DAM_SMALL_INSTALL_DIR}/data.txt) + +#save model +inference_analysis_api_test(test_analyzer_save_model ${DAM_SMALL_INSTALL_DIR} + analyzer_save_model_tester.cc) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") -download_model_and_data_without_verify(${CHINESE_NER_INSTALL_DIR} "chinese_ner_model.tar.gz" "chinese_ner-data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_ner ${CHINESE_NER_INSTALL_DIR} analyzer_ner_tester.cc) +download_model_and_data_without_verify( + ${CHINESE_NER_INSTALL_DIR} "chinese_ner_model.tar.gz" + "chinese_ner-data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_ner ${CHINESE_NER_INSTALL_DIR} + analyzer_ner_tester.cc) # lac set(LAC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lac") -download_model_and_data(${LAC_INSTALL_DIR} "lac_model.tar.gz" 419ca6eb85f57a01bfe173591910aec5 "lac_data.txt.tar.gz" 9983539cd6b34fbdc411e43422776bfd) -inference_analysis_api_test(test_analyzer_lac ${LAC_INSTALL_DIR} analyzer_lac_tester.cc) +download_model_and_data( + ${LAC_INSTALL_DIR} "lac_model.tar.gz" 419ca6eb85f57a01bfe173591910aec5 + "lac_data.txt.tar.gz" 9983539cd6b34fbdc411e43422776bfd) +inference_analysis_api_test(test_analyzer_lac ${LAC_INSTALL_DIR} + analyzer_lac_tester.cc) # Pyramid DNN set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn") -download_model_and_data_without_verify(${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" "PyramidDNN_data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} analyzer_pyramid_dnn_tester.cc) +download_model_and_data_without_verify( + ${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" + "PyramidDNN_data.txt.tar.gz") +inference_analysis_api_test( + test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} + analyzer_pyramid_dnn_tester.cc) # Ernie set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie") -download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1) -download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62) -if (WITH_GPU) - inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc) +download_model_and_data( + ${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 + "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1) +download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" + 73beea65abda2edb61c1662cd3180c62) +if(WITH_GPU) + inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} + analyzer_ernie_tester.cc) endif() -inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc) +inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} + analyzer_ernie_int8_tester.cc) # Ernie large set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large") -download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" af7715245ed32cc77374625d4c80f7ef "Ernie_large_data.txt.tar.gz" edb2113eec93783cad56ed76d47ba57f) -download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz" 1facda98eef1085dc9d435ebf3f23a73) -inference_analysis_test(test_analyzer_ernie_large SRCS analyzer_ernie_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt --refer_result=${ERNIE_INSTALL_DIR}/result.txt --ernie_large=true) -if(NOT WIN32 AND NOT APPLE AND TEST test_analyzer_ernie_large) - set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 150 LABELS "RUN_TYPE=NIGHTLY") +download_model_and_data( + ${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" + af7715245ed32cc77374625d4c80f7ef "Ernie_large_data.txt.tar.gz" + edb2113eec93783cad56ed76d47ba57f) +download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz" + 1facda98eef1085dc9d435ebf3f23a73) +inference_analysis_test( + test_analyzer_ernie_large + SRCS + analyzer_ernie_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${ERNIE_INSTALL_DIR}/model + --infer_data=${ERNIE_INSTALL_DIR}/data.txt + --refer_result=${ERNIE_INSTALL_DIR}/result.txt + --ernie_large=true) +if(NOT WIN32 + AND NOT APPLE + AND TEST test_analyzer_ernie_large) + set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 150 LABELS + "RUN_TYPE=NIGHTLY") endif() -if (WIN32 AND TEST test_analyzer_ernie_large) - set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 200) +if(WIN32 AND TEST test_analyzer_ernie_large) + set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 200) endif() # text_classification -set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") -download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" 3f0f440313ca50e26184e65ffd5809ab "text_classification_data.txt.tar.gz" 36ae620020cc3377f45ed330dd36238f) -inference_analysis_api_test(test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} analyzer_text_classification_tester.cc) +set(TEXT_CLASSIFICATION_INSTALL_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") +download_model_and_data( + ${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" + 3f0f440313ca50e26184e65ffd5809ab "text_classification_data.txt.tar.gz" + 36ae620020cc3377f45ed330dd36238f) +inference_analysis_api_test( + test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} + analyzer_text_classification_tester.cc) # seq_conv1 set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1") -download_model_and_data_without_verify(${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} analyzer_seq_conv1_tester.cc) +download_model_and_data_without_verify( + ${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} + analyzer_seq_conv1_tester.cc) # transformer, the dataset only works on batch_size=8 now set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer") -download_model_and_data_without_verify(${TRANSFORMER_INSTALL_DIR} "temp/transformer_model.tar.gz" "temp/transformer_data.txt.tar.gz") -inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_compare_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) -inference_analysis_test(test_analyzer_transformer_fuse SRCS analyzer_transformer_fuse_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) -inference_analysis_test(test_analyzer_transformer_profile SRCS analyzer_transformer_profile_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +download_model_and_data_without_verify( + ${TRANSFORMER_INSTALL_DIR} "temp/transformer_model.tar.gz" + "temp/transformer_data.txt.tar.gz") +inference_analysis_test( + test_analyzer_transformer + SRCS + analyzer_transformer_compare_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRANSFORMER_INSTALL_DIR}/model + --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt + --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +inference_analysis_test( + test_analyzer_transformer_fuse + SRCS + analyzer_transformer_fuse_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRANSFORMER_INSTALL_DIR}/model + --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt + --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +inference_analysis_test( + test_analyzer_transformer_profile + SRCS + analyzer_transformer_profile_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRANSFORMER_INSTALL_DIR}/model + --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt + --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) # VIT-OCR set(VIT_OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/vit") -if (NOT EXISTS ${VIT_OCR_INSTALL_DIR}/vit_ocr.tgz) - inference_download_and_uncompress_without_verify(${VIT_OCR_INSTALL_DIR} ${INFERENCE_URL} "ocr/vit_ocr.tgz") +if(NOT EXISTS ${VIT_OCR_INSTALL_DIR}/vit_ocr.tgz) + inference_download_and_uncompress_without_verify( + ${VIT_OCR_INSTALL_DIR} ${INFERENCE_URL} "ocr/vit_ocr.tgz") endif() -inference_analysis_test(test_analyzer_vit_ocr SRCS analyzer_vit_ocr_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr/model --infer_data=${VIT_OCR_INSTALL_DIR}/vit_ocr/datavit.txt) +inference_analysis_test( + test_analyzer_vit_ocr + SRCS + analyzer_vit_ocr_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr/model + --infer_data=${VIT_OCR_INSTALL_DIR}/vit_ocr/datavit.txt) # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") -if (NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz) - inference_download_and_uncompress_without_verify(${OCR_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos/ocr.tar.gz") +if(NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz) + inference_download_and_uncompress_without_verify( + ${OCR_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" + "inference-vis-demos/ocr.tar.gz") endif() -inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) +inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} + analyzer_vis_tester.cc) # densebox set(DENSEBOX_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/densebox") download_data_without_verify(${DENSEBOX_INSTALL_DIR} "densebox.tar.gz") -inference_analysis_test(test_analyzer_detect_functional_mkldnn SRCS analyzer_detect_functional_mkldnn_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${DENSEBOX_INSTALL_DIR}/model --infer_data=${DENSEBOX_INSTALL_DIR}/detect_input_50.txt - --infer_shape=${DENSEBOX_INSTALL_DIR}/shape_50.txt) +inference_analysis_test( + test_analyzer_detect_functional_mkldnn + SRCS + analyzer_detect_functional_mkldnn_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${DENSEBOX_INSTALL_DIR}/model + --infer_data=${DENSEBOX_INSTALL_DIR}/detect_input_50.txt + --infer_shape=${DENSEBOX_INSTALL_DIR}/shape_50.txt) # mobilenet with transpose op set(MOBILENET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet") -if (NOT EXISTS ${MOBILENET_INSTALL_DIR}/mobilenet.tar.gz) - inference_download_and_uncompress_without_verify(${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos/mobilenet.tar.gz") +if(NOT EXISTS ${MOBILENET_INSTALL_DIR}/mobilenet.tar.gz) + inference_download_and_uncompress_without_verify( + ${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" + "inference-vis-demos/mobilenet.tar.gz") endif() -inference_analysis_api_test(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) +inference_analysis_api_test(test_analyzer_mobilenet_transpose + ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) ### Image classification tests with fake data set(IMG_CLASS_TEST_APP "test_analyzer_image_classification") set(IMG_CLASS_TEST_APP_SRC "analyzer_image_classification_tester.cc") # build test binary to be used in subsequent tests -inference_analysis_api_test_with_fake_data_build(${IMG_CLASS_TEST_APP} ${IMG_CLASS_TEST_APP_SRC}) +inference_analysis_api_test_with_fake_data_build(${IMG_CLASS_TEST_APP} + ${IMG_CLASS_TEST_APP_SRC}) # googlenet set(GOOGLENET_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/googlenet") download_data_without_verify(${GOOGLENET_MODEL_DIR} "googlenet.tar.gz") -inference_analysis_api_test_with_fake_data_run(test_analyzer_googlenet ${IMG_CLASS_TEST_APP} - ${GOOGLENET_MODEL_DIR} false) +inference_analysis_api_test_with_fake_data_run( + test_analyzer_googlenet ${IMG_CLASS_TEST_APP} ${GOOGLENET_MODEL_DIR} false) # resnet50 set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") download_data_without_verify(${RESNET50_MODEL_DIR} "resnet50_model.tar.gz") -inference_analysis_api_test_with_fake_data_run(test_analyzer_resnet50 ${IMG_CLASS_TEST_APP} - ${RESNET50_MODEL_DIR} true) -if (WIN32) - set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 200) +inference_analysis_api_test_with_fake_data_run( + test_analyzer_resnet50 ${IMG_CLASS_TEST_APP} ${RESNET50_MODEL_DIR} true) +if(WIN32) + set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 200) endif() - # mobilenet with depthwise_conv op -set(MOBILENET_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv") +set(MOBILENET_MODEL_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv") download_data_without_verify(${MOBILENET_MODEL_DIR} "mobilenet_model.tar.gz") -inference_analysis_api_test_with_fake_data_run(test_analyzer_mobilenet_depthwise_conv ${IMG_CLASS_TEST_APP} - ${MOBILENET_MODEL_DIR} false) +inference_analysis_api_test_with_fake_data_run( + test_analyzer_mobilenet_depthwise_conv ${IMG_CLASS_TEST_APP} + ${MOBILENET_MODEL_DIR} false) if(WITH_MKLDNN) @@ -418,97 +601,135 @@ if(WITH_MKLDNN) set(IMAGENET_DATA_ARCHIVE "imagenet_val_100_tail.tar.gz") set(IMAGENET_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/imagenet") set(IMAGENET_DATA_PATH "${IMAGENET_DATA_DIR}/data.bin") - download_int8_data_without_verify(${IMAGENET_DATA_DIR} ${IMAGENET_DATA_ARCHIVE}) + download_int8_data_without_verify(${IMAGENET_DATA_DIR} + ${IMAGENET_DATA_ARCHIVE}) # build test binary to be used in subsequent tests set(INT8_IMG_CLASS_TEST_APP "test_analyzer_int8_image_classification") - set(INT8_IMG_CLASS_TEST_APP_SRC "analyzer_int8_image_classification_tester.cc") - inference_analysis_api_test_build(${INT8_IMG_CLASS_TEST_APP} ${INT8_IMG_CLASS_TEST_APP_SRC}) + set(INT8_IMG_CLASS_TEST_APP_SRC + "analyzer_int8_image_classification_tester.cc") + inference_analysis_api_test_build(${INT8_IMG_CLASS_TEST_APP} + ${INT8_IMG_CLASS_TEST_APP_SRC}) # resnet50 int8 set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50") - download_int8_data_without_verify(${INT8_RESNET50_MODEL_DIR} "resnet50_int8_model.tar.gz" ) - inference_analysis_api_int8_test_run(test_analyzer_int8_resnet50 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH}) + download_int8_data_without_verify(${INT8_RESNET50_MODEL_DIR} + "resnet50_int8_model.tar.gz") + inference_analysis_api_int8_test_run( + test_analyzer_int8_resnet50 ${INT8_IMG_CLASS_TEST_APP} + ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH}) # mobilenetv1 int8 set(INT8_MOBILENETV1_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv1") - download_int8_data_without_verify(${INT8_MOBILENETV1_MODEL_DIR} "mobilenetv1_int8_model.tar.gz" ) - inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenetv1 ${INT8_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV1_MODEL_DIR} ${IMAGENET_DATA_PATH}) - + download_int8_data_without_verify(${INT8_MOBILENETV1_MODEL_DIR} + "mobilenetv1_int8_model.tar.gz") + inference_analysis_api_int8_test_run( + test_analyzer_int8_mobilenetv1 ${INT8_IMG_CLASS_TEST_APP} + ${INT8_MOBILENETV1_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # mobilenetv2 int8 set(INT8_MOBILENETV2_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv2") - download_int8_data_without_verify(${INT8_MOBILENETV2_MODEL_DIR} "mobilenet_v2_int8_model.tar.gz" ) - inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenetv2 ${INT8_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) - + download_int8_data_without_verify(${INT8_MOBILENETV2_MODEL_DIR} + "mobilenet_v2_int8_model.tar.gz") + inference_analysis_api_int8_test_run( + test_analyzer_int8_mobilenetv2 ${INT8_IMG_CLASS_TEST_APP} + ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # resnet101 int8 # TODO(grygielski) Enable after MKL-DNN 1.0 merge set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101") - download_int8_data_without_verify(${INT8_RESNET101_MODEL_DIR} "Res101_int8_model.tar.gz" ) -# inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH}) - + download_int8_data_without_verify(${INT8_RESNET101_MODEL_DIR} + "Res101_int8_model.tar.gz") + # inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # vgg16 int8 # TODO(grygielski) Enable after MKL-DNN 1.0 merge set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16") - download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR} "VGG16_int8_model.tar.gz" ) -# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH}) - + download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR} + "VGG16_int8_model.tar.gz") + # inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # vgg19 int8 # TODO(grygielski) Enable after MKL-DNN 1.0 merge set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19") - download_int8_data_without_verify(${INT8_VGG19_MODEL_DIR} "VGG19_int8_model.tar.gz" ) -# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg19 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG19_MODEL_DIR} ${IMAGENET_DATA_PATH}) + download_int8_data_without_verify(${INT8_VGG19_MODEL_DIR} + "VGG19_int8_model.tar.gz") + # inference_analysis_api_int8_test_run(test_analyzer_int8_vgg19 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG19_MODEL_DIR} ${IMAGENET_DATA_PATH}) # googlenet int8 set(INT8_GOOGLENET_MODEL_DIR "${INT8_DATA_DIR}/googlenet") - download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" ) - inference_analysis_api_int8_test_run_custom_warmup_batch_size(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10) - - # mobilenetv3_large_x1_0 int8 - set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large") - set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar") - if (NOT EXISTS ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME}) - inference_download_and_uncompress_without_verify(${INT8_MOBILENETV3_LARGE_MODEL_DIR} "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" ${INT8_MOBILENETV3_FILE_NAME}) - endif() - inference_analysis_test_run(test_analyzer_int8_mobilenetv3_large - COMMAND ${INT8_IMG_CLASS_TEST_APP} - ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer - --infer_data=${IMAGENET_DATA_PATH} - --warmup_batch_size=50 - --batch_size=1 - --enable_int8=true - --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} - --iterations=100 - --with_accuracy_layer=false) + download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} + "GoogleNet_int8_model.tar.gz") + inference_analysis_api_int8_test_run_custom_warmup_batch_size( + test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} + ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10) + + # mobilenetv3_large_x1_0 int8 + set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large") + set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar") + if(NOT EXISTS + ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME}) + inference_download_and_uncompress_without_verify( + ${INT8_MOBILENETV3_LARGE_MODEL_DIR} + "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" + ${INT8_MOBILENETV3_FILE_NAME}) + endif() + inference_analysis_test_run( + test_analyzer_int8_mobilenetv3_large + COMMAND + ${INT8_IMG_CLASS_TEST_APP} + ARGS + --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer + --infer_data=${IMAGENET_DATA_PATH} + --warmup_batch_size=50 + --batch_size=1 + --enable_int8=true + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=100 + --with_accuracy_layer=false) ### BFLOAT16 tests # build test binary to be used in subsequent tests set(BF16_IMG_CLASS_TEST_APP "test_analyzer_bfloat16_image_classification") - set(BF16_IMG_CLASS_TEST_APP_SRC "analyzer_bfloat16_image_classification_tester.cc") - inference_analysis_api_test_build(${BF16_IMG_CLASS_TEST_APP} ${BF16_IMG_CLASS_TEST_APP_SRC}) + set(BF16_IMG_CLASS_TEST_APP_SRC + "analyzer_bfloat16_image_classification_tester.cc") + inference_analysis_api_test_build(${BF16_IMG_CLASS_TEST_APP} + ${BF16_IMG_CLASS_TEST_APP_SRC}) # resnet50 bfloat16 - inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_resnet50 ${BF16_IMG_CLASS_TEST_APP} ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH}) - + inference_analysis_api_bfloat16_test_run( + test_analyzer_bfloat16_resnet50 ${BF16_IMG_CLASS_TEST_APP} + ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH}) + # googlenet bfloat16 - inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_googlenet ${BF16_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH}) + inference_analysis_api_bfloat16_test_run( + test_analyzer_bfloat16_googlenet ${BF16_IMG_CLASS_TEST_APP} + ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH}) # mobilenetv1 bfloat16 - inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv1 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV1_MODEL_DIR} ${IMAGENET_DATA_PATH}) + inference_analysis_api_bfloat16_test_run( + test_analyzer_bfloat16_mobilenetv1 ${BF16_IMG_CLASS_TEST_APP} + ${INT8_MOBILENETV1_MODEL_DIR} ${IMAGENET_DATA_PATH}) # mobilenetv2 bfloat16 - inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) - - # mobilenetv3_large - inference_analysis_test_run(test_analyzer_bfloat16_mobilenetv3_large - COMMAND ${BF16_IMG_CLASS_TEST_APP} - ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer - --infer_data=${IMAGENET_DATA_PATH} - --batch_size=1 - --enable_bf16=true - --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} - --iterations=100 - --with_accuracy_layer=false) + inference_analysis_api_bfloat16_test_run( + test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} + ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) + + # mobilenetv3_large + inference_analysis_test_run( + test_analyzer_bfloat16_mobilenetv3_large + COMMAND + ${BF16_IMG_CLASS_TEST_APP} + ARGS + --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer + --infer_data=${IMAGENET_DATA_PATH} + --batch_size=1 + --enable_bf16=true + --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=100 + --with_accuracy_layer=false) ### Object detection models set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin") @@ -516,21 +737,25 @@ if(WITH_MKLDNN) set(INT8_OBJ_DETECT_TEST_APP_SRC "analyzer_int8_object_detection_tester.cc") # download dataset if necessary - download_int8_data_without_verify(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz") - + download_int8_data_without_verify(${INT8_DATA_DIR} + "pascalvoc_val_head_300.tar.gz") # build test binary to be used in subsequent tests - inference_analysis_api_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC}) + inference_analysis_api_test_build(${INT8_OBJ_DETECT_TEST_APP} + ${INT8_OBJ_DETECT_TEST_APP_SRC}) # mobilenet-ssd int8 set(INT8_MOBILENET_SSD_MODEL_DIR "${INT8_DATA_DIR}/mobilenet-ssd") - download_int8_data_without_verify(${INT8_MOBILENET_SSD_MODEL_DIR} "mobilenet_ssd_int8_model.tar.gz" ) - inference_analysis_api_object_dection_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) + download_int8_data_without_verify(${INT8_MOBILENET_SSD_MODEL_DIR} + "mobilenet_ssd_int8_model.tar.gz") + inference_analysis_api_object_dection_int8_test_run( + test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} + ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) ### Lexcial analysis GRU model set(GRU_PATH "${INFERENCE_DEMO_INSTALL_DIR}/gru") - download_GRU_data_without_verify("${GRU_PATH}" "GRU_eval_data.tar.gz") - download_GRU_data_without_verify("${GRU_PATH}" "GRU_eval_model_v2.tar.gz") + download_gru_data_without_verify("${GRU_PATH}" "GRU_eval_data.tar.gz") + download_gru_data_without_verify("${GRU_PATH}" "GRU_eval_model_v2.tar.gz") set(GRU_DATA_PATH "${GRU_PATH}/GRU_eval_data.bin") set(GRU_MODEL_PATH "${GRU_PATH}/GRU_eval_model_v2") set(LEXICAL_TEST_APP "test_analyzer_lexical_analysis") @@ -539,266 +764,497 @@ if(WITH_MKLDNN) # build test binary to be used in subsequent tests inference_analysis_api_test_build(${LEXICAL_TEST_APP} ${LEXICAL_TEST_APP_SRC}) # run lexcial analysis test - inference_analysis_api_lexical_test_run(test_analyzer_lexical_gru ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} ${GRU_DATA_PATH}) + inference_analysis_api_lexical_test_run( + test_analyzer_lexical_gru ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} + ${GRU_DATA_PATH}) # run bfloat16 lexical analysis test - inference_analysis_api_lexical_bfloat16_test_run(test_analyzer_lexical_gru_bfloat16 ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} ${GRU_DATA_PATH}) + inference_analysis_api_lexical_bfloat16_test_run( + test_analyzer_lexical_gru_bfloat16 ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} + ${GRU_DATA_PATH}) # run post-training quantization lexical analysis test - inference_analysis_api_lexical_int8_test_run(test_analyzer_lexical_gru_int8 ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} ${GRU_DATA_PATH} false) - # run post-training quantization lexical analysis test with multi_gru fuse - inference_analysis_api_lexical_int8_test_run(test_analyzer_lexical_gru_int8_multi_gru ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} ${GRU_DATA_PATH} true) + inference_analysis_api_lexical_int8_test_run( + test_analyzer_lexical_gru_int8 ${LEXICAL_TEST_APP} ${GRU_MODEL_PATH} + ${GRU_DATA_PATH} false) + # run post-training quantization lexical analysis test with multi_gru fuse + inference_analysis_api_lexical_int8_test_run( + test_analyzer_lexical_gru_int8_multi_gru ${LEXICAL_TEST_APP} + ${GRU_MODEL_PATH} ${GRU_DATA_PATH} true) ### optimized FP32 vs. Quant INT8 tests - + set(QUANT_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant") set(QUANT_IMG_CLASS_TEST_APP "test_analyzer_quant_image_classification") - set(QUANT_IMG_CLASS_TEST_APP_SRC "analyzer_quant_image_classification_tester.cc") + set(QUANT_IMG_CLASS_TEST_APP_SRC + "analyzer_quant_image_classification_tester.cc") # build test binary to be used in subsequent tests - inference_analysis_api_test_build(${QUANT_IMG_CLASS_TEST_APP} ${QUANT_IMG_CLASS_TEST_APP_SRC}) + inference_analysis_api_test_build(${QUANT_IMG_CLASS_TEST_APP} + ${QUANT_IMG_CLASS_TEST_APP_SRC}) # MobileNetV1 FP32 vs. Quant INT8 # The FP32 model should already be downloaded for slim Quant unit tests on Linux set(QUANT2_MobileNetV1_MODEL_DIR "${QUANT_DATA_DIR}/MobileNetV1_quant2") - set(QUANT2_INT8_MobileNetV1_MODEL_DIR "${QUANT_DATA_DIR}/MobileNetV1_quant2_int8") + set(QUANT2_INT8_MobileNetV1_MODEL_DIR + "${QUANT_DATA_DIR}/MobileNetV1_quant2_int8") if(NOT LINUX) - download_quant_data_without_verify(${QUANT2_MobileNetV1_MODEL_DIR} "MobileNet_qat_perf.tar.gz") + download_quant_data_without_verify(${QUANT2_MobileNetV1_MODEL_DIR} + "MobileNet_qat_perf.tar.gz") endif(NOT LINUX) - download_quant_data_without_verify(${QUANT2_INT8_MobileNetV1_MODEL_DIR} "MobileNet_qat_perf_int8.tar.gz") - inference_analysis_api_quant_test_run(test_analyzer_quant_performance_benchmark ${QUANT_IMG_CLASS_TEST_APP} ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float ${QUANT2_INT8_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf_int8 ${IMAGENET_DATA_PATH} false) + download_quant_data_without_verify(${QUANT2_INT8_MobileNetV1_MODEL_DIR} + "MobileNet_qat_perf_int8.tar.gz") + inference_analysis_api_quant_test_run( + test_analyzer_quant_performance_benchmark + ${QUANT_IMG_CLASS_TEST_APP} + ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float + ${QUANT2_INT8_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf_int8 + ${IMAGENET_DATA_PATH} + false) # Quant2 MobileNetV1 - inference_analysis_api_quant_test_run(test_analyzer_quant2_mobilenetv1_mkldnn ${QUANT_IMG_CLASS_TEST_APP} ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float ${IMAGENET_DATA_PATH} true) + inference_analysis_api_quant_test_run( + test_analyzer_quant2_mobilenetv1_mkldnn + ${QUANT_IMG_CLASS_TEST_APP} + ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float + ${QUANT2_MobileNetV1_MODEL_DIR}/MobileNet_qat_perf/float + ${IMAGENET_DATA_PATH} + true) # Quant2 ResNet50 with input/output scales in `fake_quantize_range_abs_max` operators and the `out_threshold` attributes, # with weight scales in `fake_channel_wise_dequantize_max_abs` operators - set(QUANT2_RESNET50_CHANNELWISE_MODEL_DIR "${QUANT_DATA_DIR}/ResNet50_quant2_channelwise") - set(QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE "ResNet50_qat_channelwise.tar.gz") + set(QUANT2_RESNET50_CHANNELWISE_MODEL_DIR + "${QUANT_DATA_DIR}/ResNet50_quant2_channelwise") + set(QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE + "ResNet50_qat_channelwise.tar.gz") if(NOT LINUX) - download_quant_data_without_verify(${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR} ${QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE}) + download_quant_data_without_verify( + ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR} + ${QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE}) endif(NOT LINUX) - set(QUANT2_RESNET50_MODEL ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR}/ResNet50_qat_channelwise) - inference_analysis_api_quant_test_run(test_analyzer_quant2_resnet50_channelwise_mkldnn ${QUANT_IMG_CLASS_TEST_APP} ${QUANT2_RESNET50_MODEL} ${QUANT2_RESNET50_MODEL} ${IMAGENET_DATA_PATH} true) + set(QUANT2_RESNET50_MODEL + ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR}/ResNet50_qat_channelwise) + inference_analysis_api_quant_test_run( + test_analyzer_quant2_resnet50_channelwise_mkldnn + ${QUANT_IMG_CLASS_TEST_APP} ${QUANT2_RESNET50_MODEL} + ${QUANT2_RESNET50_MODEL} ${IMAGENET_DATA_PATH} true) ### Other tests - + # MKLDNN quantizer config set(MKLDNN_QUANTIZER_CONFIG_TEST_APP "test_mkldnn_quantizer_config") set(MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC "mkldnn_quantizer_config_tester.cc") - inference_analysis_api_test_build(${MKLDNN_QUANTIZER_CONFIG_TEST_APP} ${MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC}) - inference_analysis_test_run(test_mkldnn_quantizer_config COMMAND ${MKLDNN_QUANTIZER_CONFIG_TEST_APP}) + inference_analysis_api_test_build(${MKLDNN_QUANTIZER_CONFIG_TEST_APP} + ${MKLDNN_QUANTIZER_CONFIG_TEST_APP_SRC}) + inference_analysis_test_run(test_mkldnn_quantizer_config COMMAND + ${MKLDNN_QUANTIZER_CONFIG_TEST_APP}) # preprocess data2bin imagenet - download_int8_data_without_verify(${INT8_DATA_DIR} "imagenet_small.tar.gz") - set(IMAGENET_SMALL_DATA_DIR "${INT8_DATA_DIR}/imagenet_small") - set(IMAGENET_SMALL_OUTPUT_FILE "imagenet_small.bin") - preprocess_data2bin_test_run(preprocess_local_imagenet "full_ILSVRC2012_val_preprocess.py" ${IMAGENET_SMALL_DATA_DIR} ${IMAGENET_SMALL_OUTPUT_FILE}) - + download_int8_data_without_verify(${INT8_DATA_DIR} "imagenet_small.tar.gz") + set(IMAGENET_SMALL_DATA_DIR "${INT8_DATA_DIR}/imagenet_small") + set(IMAGENET_SMALL_OUTPUT_FILE "imagenet_small.bin") + preprocess_data2bin_test_run( + preprocess_local_imagenet "full_ILSVRC2012_val_preprocess.py" + ${IMAGENET_SMALL_DATA_DIR} ${IMAGENET_SMALL_OUTPUT_FILE}) + # preprocess data2bin pascalvoc download_int8_data_without_verify(${INT8_DATA_DIR} "pascalvoc_small.tar.gz") set(PASCALVOC_SMALL_DATA_DIR "${INT8_DATA_DIR}/pascalvoc_small") set(PASCALVOC_SMALL_OUTPUT_FILE "pascalvoc_small.bin") - preprocess_data2bin_test_run(preprocess_local_pascalvoc "full_pascalvoc_test_preprocess.py" ${PASCALVOC_SMALL_DATA_DIR} ${PASCALVOC_SMALL_OUTPUT_FILE}) + preprocess_data2bin_test_run( + preprocess_local_pascalvoc "full_pascalvoc_test_preprocess.py" + ${PASCALVOC_SMALL_DATA_DIR} ${PASCALVOC_SMALL_OUTPUT_FILE}) endif() # bert, max_len=20, embedding_dim=128 set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128") -download_model_and_data_without_verify(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") -if (WITH_GPU) - inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc) +download_model_and_data_without_verify( + ${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz") +if(WITH_GPU) + inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} + analyzer_bert_tester.cc) endif() # multiple models prediction set(MMP_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/multi_model_prediction") -download_data_without_verify(${MMP_INSTALL_DIR} PaddleInference/mobilenet_v2_models.tar.gz) -inference_multiple_models_analysis_api_test(test_analyzer_multi_model_prediction ${MMP_INSTALL_DIR} analyzer_mmp_tester.cc) +download_data_without_verify(${MMP_INSTALL_DIR} + PaddleInference/mobilenet_v2_models.tar.gz) +inference_multiple_models_analysis_api_test( + test_analyzer_multi_model_prediction ${MMP_INSTALL_DIR} + analyzer_mmp_tester.cc) if(WITH_GPU AND TENSORRT_FOUND) - set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_models") - if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models.tar.gz) - inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz" 3dcccdc38b549b6b1b4089723757bd98) - endif() - set(TEST_SPLIT_CONVERTER_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test") - if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz) - inference_download_and_uncompress_without_verify(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz") - endif() - inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(trt_resnet50_test SRCS trt_resnet50_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(trt_resnext_test SRCS trt_resnext_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(trt_fc_prelu_test SRCS trt_fc_prelu_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(trt_cascade_rcnn_test SRCS trt_cascade_rcnn_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) - inference_analysis_test(test_analyzer_capi_exp_gpu SRCS analyzer_capi_exp_gpu_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - inference_analysis_test(test_analyzer_capi_exp_xpu SRCS analyzer_capi_exp_xpu_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - - set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model") - if (NOT EXISTS ${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model.tgz) - inference_download_and_uncompress_without_verify(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "small_quant_model.tgz") - endif() - inference_analysis_test(trt_quant_int8_test SRCS trt_quant_int8_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) - - set(TRT_MODEL_QUANT_YOLOV3_DIR "${INFERENCE_DEMO_INSTALL_DIR}/yolov3_r50_quant_aware") - if (NOT EXISTS ${INFERENCE_DEMO_INSTALL_DIR}/yolov3_r50_quant_aware.tgz) - inference_download_and_uncompress_without_verify(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "yolov3_r50_quant_aware.tgz") - endif() - inference_analysis_test(trt_quant_int8_yolov3_r50_test SRCS trt_quant_int8_yolov3_r50_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_QUANT_YOLOV3_DIR}) - - set(TEST_TRT_DYNAMIC_MODEL2 "${TRT_MODEL_INSTALL_DIR}/complex_model_dynamic") - if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL2}/complex_model_dynamic2.tar.gz) - inference_download_and_uncompress_without_verify(${TEST_TRT_DYNAMIC_MODEL2} ${INFERENCE_URL}/tensorrt_test "complex_model_dynamic2.tar.gz") - endif() - - set(TEST_TRT_DYNAMIC_MODEL "${TRT_MODEL_INSTALL_DIR}/conv_bn_swish_split_gelu") - if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL}/conv_bn_swish_split_gelu.tar.gz) - inference_download_and_uncompress(${TEST_TRT_DYNAMIC_MODEL} ${INFERENCE_URL}/tensorrt_test "conv_bn_swish_split_gelu.tar.gz" 2a5e8791e47b221b4f782151d76da9c6) - endif() - inference_analysis_test(trt_dynamic_shape_test SRCS trt_dynamic_shape_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}) - - set(TEST_TRT_ERNIE_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test") - if (NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4.tar.gz) - inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4.tar.gz" 5fa371efa75706becbaad79195d2ca68) - endif() - - inference_analysis_test(test_trt_dynamic_shape_ernie SRCS trt_dynamic_shape_ernie_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4) - - set(TEST_TRT_TRANSFORMER_PRUNE_MODEL "${TRT_MODEL_INSTALL_DIR}/transformer_prune") - if (NOT EXISTS ${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune.tar.gz) - inference_download_and_uncompress(${TEST_TRT_TRANSFORMER_PRUNE_MODEL} ${INFERENCE_URL}/tensorrt_test "transformer_prune.tar.gz" 77b56dc73ff0cf44ddb1ce9ca0b0f471) - endif() - - inference_analysis_test(test_trt_dynamic_shape_transformer_prune SRCS trt_dynamic_shape_transformer_prune_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune) - - if (NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized.tgz) - inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz" 833d73fc6a7f7e1ee4a1fd6419209e55) - endif() - - inference_analysis_test(test_trt_dynamic_shape_ernie_ser_deser SRCS trt_dynamic_shape_ernie_serialize_deserialize_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) - - if (NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4_fp16_unserialized.tgz) - inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_fp16_unserialized.tgz" c5ff2d0cad79953ffbf2b8b9e2fae6e4) - endif() - - inference_analysis_test(test_trt_dynamic_shape_ernie_fp16_ser_deser SRCS trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_fp16_unserialized) + set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_models") + if(NOT EXISTS ${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models.tar.gz) + inference_download_and_uncompress( + ${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test + "trt_inference_test_models.tar.gz" 3dcccdc38b549b6b1b4089723757bd98) + endif() + set(TEST_SPLIT_CONVERTER_MODEL + "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test") + if(NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz) + inference_download_and_uncompress_without_verify( + ${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test + "split_converter.tgz") + endif() + inference_analysis_test( + trt_mobilenet_test + SRCS + trt_mobilenet_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + trt_resnet50_test + SRCS + trt_resnet50_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + trt_resnext_test + SRCS + trt_resnext_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + trt_fc_prelu_test + SRCS + trt_fc_prelu_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + trt_cascade_rcnn_test + SRCS + trt_cascade_rcnn_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + trt_split_converter_test + SRCS + trt_split_converter_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) + inference_analysis_test( + test_analyzer_capi_exp_gpu + SRCS + analyzer_capi_exp_gpu_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test( + test_analyzer_capi_exp_xpu + SRCS + analyzer_capi_exp_xpu_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + + set(TRT_MODEL_QUANT_RESNET_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model") + if(NOT EXISTS ${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model.tgz) + inference_download_and_uncompress_without_verify( + ${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test + "small_quant_model.tgz") + endif() + inference_analysis_test( + trt_quant_int8_test + SRCS + trt_quant_int8_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) + + set(TRT_MODEL_QUANT_YOLOV3_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/yolov3_r50_quant_aware") + if(NOT EXISTS ${INFERENCE_DEMO_INSTALL_DIR}/yolov3_r50_quant_aware.tgz) + inference_download_and_uncompress_without_verify( + ${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test + "yolov3_r50_quant_aware.tgz") + endif() + inference_analysis_test( + trt_quant_int8_yolov3_r50_test + SRCS + trt_quant_int8_yolov3_r50_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_QUANT_YOLOV3_DIR}) + + set(TEST_TRT_DYNAMIC_MODEL2 "${TRT_MODEL_INSTALL_DIR}/complex_model_dynamic") + if(NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL2}/complex_model_dynamic2.tar.gz) + inference_download_and_uncompress_without_verify( + ${TEST_TRT_DYNAMIC_MODEL2} ${INFERENCE_URL}/tensorrt_test + "complex_model_dynamic2.tar.gz") + endif() + + set(TEST_TRT_DYNAMIC_MODEL + "${TRT_MODEL_INSTALL_DIR}/conv_bn_swish_split_gelu") + if(NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL}/conv_bn_swish_split_gelu.tar.gz) + inference_download_and_uncompress( + ${TEST_TRT_DYNAMIC_MODEL} ${INFERENCE_URL}/tensorrt_test + "conv_bn_swish_split_gelu.tar.gz" 2a5e8791e47b221b4f782151d76da9c6) + endif() + inference_analysis_test( + trt_dynamic_shape_test + SRCS + trt_dynamic_shape_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TRT_MODEL_INSTALL_DIR}) + + set(TEST_TRT_ERNIE_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test") + if(NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4.tar.gz) + inference_download_and_uncompress( + ${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test + "ernie_model_4.tar.gz" 5fa371efa75706becbaad79195d2ca68) + endif() + + inference_analysis_test( + test_trt_dynamic_shape_ernie + SRCS + trt_dynamic_shape_ernie_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4) + + set(TEST_TRT_TRANSFORMER_PRUNE_MODEL + "${TRT_MODEL_INSTALL_DIR}/transformer_prune") + if(NOT EXISTS ${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune.tar.gz) + inference_download_and_uncompress( + ${TEST_TRT_TRANSFORMER_PRUNE_MODEL} ${INFERENCE_URL}/tensorrt_test + "transformer_prune.tar.gz" 77b56dc73ff0cf44ddb1ce9ca0b0f471) + endif() + + inference_analysis_test( + test_trt_dynamic_shape_transformer_prune + SRCS + trt_dynamic_shape_transformer_prune_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TEST_TRT_TRANSFORMER_PRUNE_MODEL}/transformer_prune) + + if(NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized.tgz) + inference_download_and_uncompress( + ${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test + "ernie_model_4_unserialized.tgz" 833d73fc6a7f7e1ee4a1fd6419209e55) + endif() + + inference_analysis_test( + test_trt_dynamic_shape_ernie_ser_deser + SRCS + trt_dynamic_shape_ernie_serialize_deserialize_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_unserialized) + + if(NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4_fp16_unserialized.tgz) + inference_download_and_uncompress( + ${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test + "ernie_model_4_fp16_unserialized.tgz" c5ff2d0cad79953ffbf2b8b9e2fae6e4) + endif() + + inference_analysis_test( + test_trt_dynamic_shape_ernie_fp16_ser_deser + SRCS + trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4_fp16_unserialized) endif() set(LITE_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lite") download_data_without_verify(${LITE_MODEL_INSTALL_DIR} "mul_model_fp32.tgz") -inference_analysis_test(lite_mul_model_test SRCS lite_mul_model_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${LITE_MODEL_INSTALL_DIR}) -inference_analysis_test(lite_resnet50_test SRCS lite_resnet50_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR}) - -inference_analysis_test(test_analyzer_capi_exp SRCS analyzer_capi_exp_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${RESNET50_MODEL_DIR}/model) - -inference_analysis_test(test_analyzer_capi_exp_pd_config SRCS analyzer_capi_exp_pd_config_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) - -inference_analysis_test(test_analyzer_capi_exp_pd_tensor SRCS analyzer_capi_exp_pd_tensor_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) - -if (NOT APPLE AND NOT WIN32) - inference_analysis_test(test_analyzer_capi_exp_pd_threads SRCS analyzer_capi_exp_pd_threads_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) +inference_analysis_test( + lite_mul_model_test + SRCS + lite_mul_model_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${LITE_MODEL_INSTALL_DIR}) +inference_analysis_test( + lite_resnet50_test + SRCS + lite_resnet50_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${RESNET50_MODEL_DIR}) + +inference_analysis_test( + test_analyzer_capi_exp + SRCS + analyzer_capi_exp_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${RESNET50_MODEL_DIR}/model) + +inference_analysis_test( + test_analyzer_capi_exp_pd_config + SRCS + analyzer_capi_exp_pd_config_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${MOBILENET_INSTALL_DIR}/model) + +inference_analysis_test( + test_analyzer_capi_exp_pd_tensor + SRCS + analyzer_capi_exp_pd_tensor_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${MOBILENET_INSTALL_DIR}/model) + +if(NOT APPLE AND NOT WIN32) + inference_analysis_test( + test_analyzer_capi_exp_pd_threads + SRCS + analyzer_capi_exp_pd_threads_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${MOBILENET_INSTALL_DIR}/model) endif() -inference_analysis_test(test_analyzer_zerocopytensor_tensor SRCS analyzer_zerocopy_tensor_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${OCR_INSTALL_DIR}/model) - -if(WITH_DISTRIBUTE AND WITH_PSCORE AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) - inference_analysis_test(test_analyzer_dist_model SRCS analyzer_dist_model_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${OCR_INSTALL_DIR}/model) +inference_analysis_test( + test_analyzer_zerocopytensor_tensor + SRCS + analyzer_zerocopy_tensor_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${OCR_INSTALL_DIR}/model) + +if(WITH_DISTRIBUTE + AND WITH_PSCORE + AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) + inference_analysis_test( + test_analyzer_dist_model + SRCS + analyzer_dist_model_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${OCR_INSTALL_DIR}/model) endif() -inference_analysis_test(test_analyzer_paddletensor_tensor SRCS analyzer_paddle_tensor_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${OCR_INSTALL_DIR}/model --infer_data=${OCR_INSTALL_DIR}/data.txt --refer_result=${OCR_INSTALL_DIR}/result.txt) - +inference_analysis_test( + test_analyzer_paddletensor_tensor + SRCS + analyzer_paddle_tensor_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${OCR_INSTALL_DIR}/model + --infer_data=${OCR_INSTALL_DIR}/data.txt + --refer_result=${OCR_INSTALL_DIR}/result.txt) + if(WITH_MKLDNN) - inference_analysis_test(test_analyzer_capi_exp_int SRCS analyzer_capi_exp_int_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${INT8_DATA_DIR}/resnet50/model) + inference_analysis_test( + test_analyzer_capi_exp_int + SRCS + analyzer_capi_exp_int_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${INT8_DATA_DIR}/resnet50/model) endif() -inference_analysis_test(test_analyzer_capi_exp_ner SRCS analyzer_capi_exp_ner_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_inference_c - ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model) +inference_analysis_test( + test_analyzer_capi_exp_ner + SRCS + analyzer_capi_exp_ner_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + paddle_inference_c + ARGS + --infer_model=${CHINESE_NER_INSTALL_DIR}/model) if(WITH_GPU) - inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR}) - - inference_analysis_test(paddle_infer_api_copy_tensor_tester SRCS paddle_infer_api_copy_tensor_tester.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR}) - set_tests_properties(paddle_infer_api_copy_tensor_tester PROPERTIES TIMEOUT 30) + inference_analysis_test( + paddle_infer_api_test + SRCS + paddle_infer_api_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${RESNET50_MODEL_DIR}) + + inference_analysis_test( + paddle_infer_api_copy_tensor_tester + SRCS + paddle_infer_api_copy_tensor_tester.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${RESNET50_MODEL_DIR}) + set_tests_properties(paddle_infer_api_copy_tensor_tester PROPERTIES TIMEOUT + 30) endif() -cc_test(paddle_infer_api_errors_test SRCS paddle_infer_api_errors_tester.cc DEPS paddle_inference_api) +cc_test( + paddle_infer_api_errors_test + SRCS paddle_infer_api_errors_tester.cc + DEPS paddle_inference_api) if("$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - return() + return() endif() if(WITH_GPU AND TENSORRT_FOUND) - set_tests_properties(trt_resnext_test PROPERTIES TIMEOUT 300) - set_tests_properties(trt_quant_int8_yolov3_r50_test PROPERTIES TIMEOUT 300) - set_tests_properties(trt_resnet50_test PROPERTIES TIMEOUT 300) - set_tests_properties(trt_cascade_rcnn_test PROPERTIES TIMEOUT 300) - set_tests_properties(test_trt_dynamic_shape_ernie_ser_deser PROPERTIES TIMEOUT 300) - set_tests_properties(test_trt_dynamic_shape_ernie_fp16_ser_deser PROPERTIES TIMEOUT 300) - set_tests_properties(test_trt_dynamic_shape_ernie PROPERTIES TIMEOUT 300) + set_tests_properties(trt_resnext_test PROPERTIES TIMEOUT 300) + set_tests_properties(trt_quant_int8_yolov3_r50_test PROPERTIES TIMEOUT 300) + set_tests_properties(trt_resnet50_test PROPERTIES TIMEOUT 300) + set_tests_properties(trt_cascade_rcnn_test PROPERTIES TIMEOUT 300) + set_tests_properties(test_trt_dynamic_shape_ernie_ser_deser PROPERTIES TIMEOUT + 300) + set_tests_properties(test_trt_dynamic_shape_ernie_fp16_ser_deser + PROPERTIES TIMEOUT 300) + set_tests_properties(test_trt_dynamic_shape_ernie PROPERTIES TIMEOUT 300) endif() if(WITH_MKLDNN) - set_tests_properties(test_analyzer_int8_resnet50 PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_int8_mobilenet_ssd PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_quant2_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_quant2_resnet50_channelwise_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_resnet50 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_mobilenet_ssd PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_quant_performance_benchmark + PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT + 120) + set_tests_properties(test_analyzer_quant2_mobilenetv1_mkldnn + PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_quant2_resnet50_channelwise_mkldnn + PROPERTIES TIMEOUT 120) endif() set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120) @@ -809,45 +1265,74 @@ set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120) set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120) -set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120) -if (WITH_GPU) - set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120) - set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120) +set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT + 120) +if(WITH_GPU) + set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120) endif() if(WITH_GPU AND TENSORRT_FOUND) - set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120) - if(WITH_MKLDNN) - set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120) - endif() + set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120) + if(WITH_MKLDNN) + set_tests_properties(test_analyzer_bfloat16_resnet50 PROPERTIES TIMEOUT 120) + endif() endif() if(ON_INFER OR WITH_GPU) - set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_transformer_profile PROPERTIES TIMEOUT 120) endif() -if (WITH_IPU) - #word2vec sample - set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec/word2vec.inference.model") - inference_analysis_test(ipu_word2vec_sample SRCS ipu_word2vec_sample.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${WORD2VEC_INSTALL_DIR}) - - # ERNIE - set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie") - inference_analysis_api_test(ipu_ernie_test ${ERNIE_INSTALL_DIR} ipu_ernie_test.cc - ARGS --warmup=true --repeat=10) - inference_analysis_api_test(ipu_ernie_fp16_test ${ERNIE_INSTALL_DIR} ipu_ernie_fp16_test.cc - ARGS --warmup=true --repeat=10) - - # Resnet50 - set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") - inference_analysis_test(ipu_resnet50_test SRCS ipu_resnet50_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR} --warmup=true --repeat=10) - inference_analysis_test(ipu_resnet50_fp16_test SRCS ipu_resnet50_fp16_test.cc - EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${RESNET50_MODEL_DIR} --warmup=true --repeat=10) - - # Only support Resnet50 and Ernie currently - inference_analysis_api_test(ipu_multi_model_profile SRCS ipu_multi_model_profile.cc - ARGS --model_name="Resnet50" --infer_model=${RESNET50_MODEL_DIR} --warmup=true --repeat=10) +if(WITH_IPU) + #word2vec sample + set(WORD2VEC_INSTALL_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/word2vec/word2vec.inference.model") + inference_analysis_test( + ipu_word2vec_sample + SRCS + ipu_word2vec_sample.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${WORD2VEC_INSTALL_DIR}) + + # ERNIE + set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie") + inference_analysis_api_test(ipu_ernie_test ${ERNIE_INSTALL_DIR} + ipu_ernie_test.cc ARGS --warmup=true --repeat=10) + inference_analysis_api_test( + ipu_ernie_fp16_test ${ERNIE_INSTALL_DIR} ipu_ernie_fp16_test.cc ARGS + --warmup=true --repeat=10) + + # Resnet50 + set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") + inference_analysis_test( + ipu_resnet50_test + SRCS + ipu_resnet50_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${RESNET50_MODEL_DIR} + --warmup=true + --repeat=10) + inference_analysis_test( + ipu_resnet50_fp16_test + SRCS + ipu_resnet50_fp16_test.cc + EXTRA_DEPS + ${INFERENCE_EXTRA_DEPS} + ARGS + --infer_model=${RESNET50_MODEL_DIR} + --warmup=true + --repeat=10) + + # Only support Resnet50 and Ernie currently + inference_analysis_api_test( + ipu_multi_model_profile + SRCS + ipu_multi_model_profile.cc + ARGS + --model_name="Resnet50" + --infer_model=${RESNET50_MODEL_DIR} + --warmup=true + --repeat=10) endif() diff --git a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt index ad7ef0c04ce..5aef30bf335 100644 --- a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt +++ b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt @@ -1,32 +1,34 @@ cmake_minimum_required(VERSION 3.0) project(cpp_inference_demo CXX C) -option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) -option(USE_TENSORRT "Compile demo with TensorRT." OFF) -option(WITH_GTEST "Compile demo with GTEST" OFF) -option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB + "Compile demo with static/shared library, default use static." OFF) +option(USE_TENSORRT "Compile demo with TensorRT." OFF) +option(WITH_GTEST "Compile demo with GTEST" OFF) +option(WITH_ONNXRUNTIME "Compile demo with ONNXRuntime" OFF) if(NOT WITH_STATIC_LIB) add_definitions("-DPADDLE_WITH_SHARED_LIB") else() - # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. + # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode. # Set it to empty in static library mode to avoid compilation issues. add_definitions("/DPD_INFER_DECL=") endif() macro(safe_set_static_flag) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) endmacro() if(NOT DEFINED PADDLE_LIB) - message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") + message( + FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") endif() if(NOT DEFINED DEMO_NAME) message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") @@ -46,7 +48,7 @@ link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib") link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib") link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib") link_directories("${PADDLE_LIB}/paddle/lib") -if (WITH_ONNXRUNTIME) +if(WITH_ONNXRUNTIME) include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/include") include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/include") @@ -54,21 +56,25 @@ if (WITH_ONNXRUNTIME) link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib") endif() -if (WIN32) +if(WIN32) add_definitions("/DGOOGLE_GLOG_DLL_DECL=") option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) - if (MSVC_STATIC_CRT) - if (WITH_MKL) + if(MSVC_STATIC_CRT) + if(WITH_MKL) set(FLAG_OPENMP "/openmp") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4530") - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_DEBUG + "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_C_FLAGS_RELEASE + "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4530") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_DEBUG + "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd ${FLAG_OPENMP}") + set(CMAKE_CXX_FLAGS_RELEASE + "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT ${FLAG_OPENMP}") safe_set_static_flag() - if (WITH_STATIC_LIB) + if(WITH_STATIC_LIB) add_definitions(-DSTATIC_LIB) endif() endif() @@ -81,60 +87,75 @@ endif() if(WITH_GPU) if(NOT WIN32) - set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + set(CUDA_LIB + "/usr/local/cuda/lib64/" + CACHE STRING "CUDA Library") else() - set(CUDA_LIB "" CACHE STRING "CUDA_LIB") + set(CUDA_LIB + "" + CACHE STRING "CUDA_LIB") if("${CUDA_LIB}" STREQUAL "") if(DEFINED ENV{CUDA_PATH}) set(CUDA_LIB "$ENV{CUDA_PATH}\\lib\\x64") else() - set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\lib\\x64") + set(CUDA_LIB + "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2\\lib\\x64" + ) endif() endif() message(STATUS "Current CUDA lib path: ${CUDA_LIB}") endif(NOT WIN32) endif() -if (USE_TENSORRT AND WITH_GPU) - set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library") +if(USE_TENSORRT AND WITH_GPU) + set(TENSORRT_ROOT + "" + CACHE STRING "The root directory of TensorRT library") if("${TENSORRT_ROOT}" STREQUAL "") - message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ") + message( + FATAL_ERROR + "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH " + ) endif() set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include) set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib) file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" + TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" + TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" + TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") - file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") - string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION - "${TENSORRT_VERSION_FILE_CONTENTS}") + file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h + TENSORRT_VERSION_FILE_CONTENTS) + string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" + TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" + TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" + TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" + TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") endif() if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") message(SEND_ERROR "Failed to detect TensorRT version.") endif() string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" - TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") + TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1" - TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}") + TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}") string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1" - TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}") + TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}") string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1" - TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}") - message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " - "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ") + TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}") + message( + STATUS + "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " + "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} " + ) include_directories("${TENSORRT_INCLUDE_DIR}") link_directories("${TENSORRT_LIB_DIR}") add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION}) @@ -150,8 +171,9 @@ if(WITH_MKL) set(MATH_LIB ${MATH_LIB_PATH}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} ${MATH_LIB_PATH}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} - ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(MATH_LIB + ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") if(EXISTS ${MKLDNN_PATH}) @@ -166,63 +188,97 @@ else() set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas") include_directories("${OPENBLAS_LIB_PATH}/include/openblas") if(WIN32) - set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(MATH_LIB + ${OPENBLAS_LIB_PATH}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(MATH_LIB ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(MATH_LIB + ${OPENBLAS_LIB_PATH}/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() if(WITH_STATIC_LIB) - set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX} + ) else() if(WIN32) - set(DEPS ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/paddle_inference${CMAKE_STATIC_LIBRARY_SUFFIX}) else() - set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS + ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX} + ) endif() endif() -if (WITH_ONNXRUNTIME) +if(WITH_ONNXRUNTIME) if(WIN32) - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib paddle2onnx) + set(DEPS + ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.lib + paddle2onnx) elseif(APPLE) - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib paddle2onnx) + set(DEPS + ${DEPS} + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.1.10.0.dylib + paddle2onnx) else() - set(DEPS ${DEPS} ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 paddle2onnx) + set(DEPS + ${DEPS} + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/libonnxruntime.so.1.10.0 + paddle2onnx) endif() endif() -if (NOT WIN32) +if(NOT WIN32) set(EXTERNAL_LIB "-lrt -ldl -lpthread") - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags protobuf xxhash cryptopp + set(DEPS + ${DEPS} + ${MATH_LIB} + ${MKLDNN_LIB} + glog + gflags + protobuf + xxhash + cryptopp ${EXTERNAL_LIB}) else() - set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} - glog gflags_static libprotobuf xxhash cryptopp-static ${EXTERNAL_LIB}) + set(DEPS + ${DEPS} + ${MATH_LIB} + ${MKLDNN_LIB} + glog + gflags_static + libprotobuf + xxhash + cryptopp-static + ${EXTERNAL_LIB}) set(DEPS ${DEPS} shlwapi.lib) endif(NOT WIN32) if(WITH_GPU) if(NOT WIN32) - if (USE_TENSORRT) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(USE_TENSORRT) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS + ${DEPS} + ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) else() if(USE_TENSORRT) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) if(${TENSORRT_MAJOR_VERSION} EQUAL 7) - set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} + ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() - set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) - set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) endif() endif() @@ -237,11 +293,14 @@ if(WITH_GTEST) include(GNUInstallDirs) include_directories(${GTEST_INSTALL_DIR}/include) add_dependencies(${DEMO_NAME} thirdparty_gtest) - IF(WIN32) + if(WIN32) target_link_libraries(${DEMO_NAME} ${GTEST_LIBRARIES}) - ELSE() - target_link_libraries(${DEMO_NAME} ${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest${CMAKE_STATIC_LIBRARY_SUFFIX}) - ENDIF(WIN32) + else() + target_link_libraries( + ${DEMO_NAME} + ${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest${CMAKE_STATIC_LIBRARY_SUFFIX} + ) + endif(WIN32) endif() if(WIN32) if("${CMAKE_GENERATOR}" MATCHES "Ninja") @@ -251,41 +310,62 @@ if(WIN32) endif() if(USE_TENSORRT) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/nvinfer${CMAKE_SHARED_LIBRARY_SUFFIX} ${LIB_PATH} + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/nvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX} + ${LIB_PATH}) if(${TENSORRT_MAJOR_VERSION} EQUAL 7) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX} - ${LIB_PATH}) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${TENSORRT_LIB_DIR}/myelin64_1${CMAKE_SHARED_LIBRARY_SUFFIX} + ${LIB_PATH}) endif() endif() if(WITH_MKL) message("LIB_PATH IS ${LIB_PATH}") - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/mklml.dll + ${LIB_PATH} + COMMAND ${CMAKE_COMMAND} -E copy ${MATH_LIB_PATH}/lib/libiomp5md.dll + ${LIB_PATH} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_PATH}/lib/mkldnn.dll + ${LIB_PATH}) else() - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_LIB_PATH}/lib/openblas.dll + ${LIB_PATH}) endif() if(WITH_ONNXRUNTIME) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll - ${LIB_PATH} - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll - ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E copy + ${PADDLE_LIB_THIRD_PARTY_PATH}onnxruntime/lib/onnxruntime.dll + ${LIB_PATH} + COMMAND + ${CMAKE_COMMAND} -E copy + ${PADDLE_LIB_THIRD_PARTY_PATH}paddle2onnx/lib/paddle2onnx.dll + ${LIB_PATH}) endif() if(NOT WITH_STATIC_LIB) - add_custom_command(TARGET ${DEMO_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${LIB_PATH} - ) + add_custom_command( + TARGET ${DEMO_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + "${PADDLE_LIB}/paddle/lib/paddle_inference.dll" ${LIB_PATH}) endif() endif() diff --git a/paddle/fluid/inference/tests/infer_ut/external-cmake/gtest-cpp.cmake b/paddle/fluid/inference/tests/infer_ut/external-cmake/gtest-cpp.cmake index b38984314ec..49b0a04197d 100644 --- a/paddle/fluid/inference/tests/infer_ut/external-cmake/gtest-cpp.cmake +++ b/paddle/fluid/inference/tests/infer_ut/external-cmake/gtest-cpp.cmake @@ -1,43 +1,50 @@ find_package(Git REQUIRED) message("${CMAKE_BUILD_TYPE}") -SET(GTEST_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gtest) -SET(GTEST_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gtest/src/extern_gtest) -SET(GTEST_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gtest) -SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE) -set(GTEST_REPOSITORY https://github.com/google/googletest.git) -set(GTEST_TAG release-1.8.1) -INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR}) -IF(WIN32) - # if use CMAKE_INSTALL_LIBDIR, the path of lib actually is install/gtest/lib/gtest.lib but GTEST_LIBRARIES - # is install/gtest/gtest.lib - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) -ELSE() - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) -ENDIF(WIN32) +set(GTEST_PREFIX_DIR ${CMAKE_CURRENT_BINARY_DIR}/gtest) +set(GTEST_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/gtest/src/extern_gtest) +set(GTEST_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/install/gtest) +set(GTEST_INCLUDE_DIR + "${GTEST_INSTALL_DIR}/include" + CACHE PATH "gtest include directory." FORCE) +set(GTEST_REPOSITORY https://github.com/google/googletest.git) +set(GTEST_TAG release-1.8.1) +include_directories(${GTEST_INCLUDE_DIR}) +if(WIN32) + # if use CMAKE_INSTALL_LIBDIR, the path of lib actually is install/gtest/lib/gtest.lib but GTEST_LIBRARIES + # is install/gtest/gtest.lib + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/gtest.lib" + CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/gtest_main.lib" + CACHE FILEPATH "gtest main libraries." FORCE) +else() + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest.a" + CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgtest_main.a" + CACHE FILEPATH "gtest main libraries." FORCE) +endif(WIN32) ExternalProject_Add( - extern_gtest - PREFIX gtest - GIT_REPOSITORY ${GTEST_REPOSITORY} - GIT_TAG ${GTEST_TAG} - DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release - BUILD_BYPRODUCTS ${GTEST_LIBRARIES} - BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES} -) + extern_gtest + PREFIX gtest + GIT_REPOSITORY ${GTEST_REPOSITORY} + GIT_TAG ${GTEST_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=Release + BUILD_BYPRODUCTS ${GTEST_LIBRARIES} + BUILD_BYPRODUCTS ${GTEST_MAIN_LIBRARIES}) -ADD_LIBRARY(thirdparty_gtest STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET thirdparty_gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) -ADD_DEPENDENCIES(thirdparty_gtest extern_gtest) +add_library(thirdparty_gtest STATIC IMPORTED GLOBAL) +set_property(TARGET thirdparty_gtest PROPERTY IMPORTED_LOCATION + ${GTEST_LIBRARIES}) +add_dependencies(thirdparty_gtest extern_gtest) -ADD_LIBRARY(thirdparty_gtest_main STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET thirdparty_gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) -ADD_DEPENDENCIES(thirdparty_gtest_main extern_gtest) +add_library(thirdparty_gtest_main STATIC IMPORTED GLOBAL) +set_property(TARGET thirdparty_gtest_main PROPERTY IMPORTED_LOCATION + ${GTEST_MAIN_LIBRARIES}) +add_dependencies(thirdparty_gtest_main extern_gtest) diff --git a/paddle/fluid/inference/tests/test.cmake b/paddle/fluid/inference/tests/test.cmake index 6b6c0cd22f0..d4b3ebdaa0b 100644 --- a/paddle/fluid/inference/tests/test.cmake +++ b/paddle/fluid/inference/tests/test.cmake @@ -1,26 +1,33 @@ include(ExternalProject) -set(INFERENCE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url") -set(INFERENCE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING - "A path setting inference demo download directories.") -set(CPU_NUM_THREADS_ON_CI 4 CACHE STRING "Run multi-threads on CI to reduce CI time.") -set(WARMUP_BATCH_SIZE 100 CACHE STRING "Default warmup_batch_size.") +set(INFERENCE_URL + "http://paddle-inference-dist.bj.bcebos.com" + CACHE STRING "inference download url") +set(INFERENCE_DEMO_INSTALL_DIR + "${THIRD_PARTY_PATH}/inference_demo" + CACHE STRING "A path setting inference demo download directories.") +set(CPU_NUM_THREADS_ON_CI + 4 + CACHE STRING "Run multi-threads on CI to reduce CI time.") +set(WARMUP_BATCH_SIZE + 100 + CACHE STRING "Default warmup_batch_size.") function(inference_download INSTALL_DIR URL FILENAME) message(STATUS "Download inference test stuff from ${URL}/${FILENAME}") string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME}) ExternalProject_Add( - extern_inference_download_${FILENAME_EX} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${INSTALL_DIR} - URL ${URL}/${FILENAME} - DOWNLOAD_COMMAND wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} - DOWNLOAD_DIR ${INSTALL_DIR} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND "" - ) + extern_inference_download_${FILENAME_EX} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${INSTALL_DIR} + URL ${URL}/${FILENAME} + DOWNLOAD_COMMAND wget --no-check-certificate -q -O + ${INSTALL_DIR}/${FILENAME} ${URL}/${FILENAME} + DOWNLOAD_DIR ${INSTALL_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND "") endfunction() function(inference_download_and_uncompress INSTALL_DIR URL FILENAME CHECK_SUM) @@ -30,93 +37,101 @@ function(inference_download_and_uncompress INSTALL_DIR URL FILENAME CHECK_SUM) set(EXTERNAL_PROJECT_NAME "extern_download_${FILENAME_EX}") set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}") ExternalProject_Add( - ${EXTERNAL_PROJECT_NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${INSTALL_DIR} - URL ${URL}/${FILENAME} - URL_HASH MD5=${CHECK_SUM} - DOWNLOAD_DIR ${INSTALL_DIR} - DOWNLOAD_NO_EXTRACT 1 - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND ${CMAKE_COMMAND} -E chdir ${INSTALL_DIR} - ${CMAKE_COMMAND} -E tar xzf ${DOWNLOAD_NAME} - UPDATE_COMMAND "" - INSTALL_COMMAND "" - ) + ${EXTERNAL_PROJECT_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${INSTALL_DIR} + URL ${URL}/${FILENAME} + URL_HASH MD5=${CHECK_SUM} + DOWNLOAD_DIR ${INSTALL_DIR} + DOWNLOAD_NO_EXTRACT 1 + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E chdir ${INSTALL_DIR} ${CMAKE_COMMAND} -E + tar xzf ${DOWNLOAD_NAME} + UPDATE_COMMAND "" + INSTALL_COMMAND "") endfunction() -function(inference_download_and_uncompress_without_verify INSTALL_DIR URL FILENAME) +function(inference_download_and_uncompress_without_verify INSTALL_DIR URL + FILENAME) message(STATUS "Download inference test stuff from ${URL}/${FILENAME}") string(REGEX REPLACE "[-%./\\]" "_" FILENAME_EX ${FILENAME}) string(REGEX MATCH "[^/\\]+$" DOWNLOAD_NAME ${FILENAME}) set(EXTERNAL_PROJECT_NAME "extern_download_${FILENAME_EX}") set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}") ExternalProject_Add( - ${EXTERNAL_PROJECT_NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${INSTALL_DIR} - URL ${URL}/${FILENAME} - DOWNLOAD_DIR ${INSTALL_DIR} - DOWNLOAD_NO_EXTRACT 1 - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND ${CMAKE_COMMAND} -E chdir ${INSTALL_DIR} - ${CMAKE_COMMAND} -E tar xzf ${DOWNLOAD_NAME} - UPDATE_COMMAND "" - INSTALL_COMMAND "" - ) + ${EXTERNAL_PROJECT_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${INSTALL_DIR} + URL ${URL}/${FILENAME} + DOWNLOAD_DIR ${INSTALL_DIR} + DOWNLOAD_NO_EXTRACT 1 + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E chdir ${INSTALL_DIR} ${CMAKE_COMMAND} -E + tar xzf ${DOWNLOAD_NAME} + UPDATE_COMMAND "" + INSTALL_COMMAND "") endfunction() set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec") if(NOT EXISTS ${WORD2VEC_INSTALL_DIR}/word2vec.inference.model.tar.gz) - inference_download_and_uncompress_without_verify(${WORD2VEC_INSTALL_DIR} ${INFERENCE_URL} "word2vec.inference.model.tar.gz") + inference_download_and_uncompress_without_verify( + ${WORD2VEC_INSTALL_DIR} ${INFERENCE_URL} "word2vec.inference.model.tar.gz") endif() set(WORD2VEC_MODEL_DIR "${WORD2VEC_INSTALL_DIR}/word2vec.inference.model") -set(IMG_CLS_RESNET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/image_classification_resnet") -if(NOT EXISTS ${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model.tgz) - inference_download_and_uncompress_without_verify(${IMG_CLS_RESNET_INSTALL_DIR} ${INFERENCE_URL} "image_classification_resnet.inference.model.tgz") +set(IMG_CLS_RESNET_INSTALL_DIR + "${INFERENCE_DEMO_INSTALL_DIR}/image_classification_resnet") +if(NOT EXISTS + ${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model.tgz +) + inference_download_and_uncompress_without_verify( + ${IMG_CLS_RESNET_INSTALL_DIR} ${INFERENCE_URL} + "image_classification_resnet.inference.model.tgz") endif() -set(IMG_CLS_RESNET_MODEL_DIR "${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model") +set(IMG_CLS_RESNET_MODEL_DIR + "${IMG_CLS_RESNET_INSTALL_DIR}/image_classification_resnet.inference.model") if(WITH_ONNXRUNTIME) set(MOBILENETV2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/MobileNetV2") if(NOT EXISTS ${MOBILENETV2_INSTALL_DIR}/MobileNetV2.inference.model.tar.gz) - inference_download_and_uncompress_without_verify(${MOBILENETV2_INSTALL_DIR} ${INFERENCE_URL} "MobileNetV2.inference.model.tar.gz") + inference_download_and_uncompress_without_verify( + ${MOBILENETV2_INSTALL_DIR} ${INFERENCE_URL} + "MobileNetV2.inference.model.tar.gz") endif() set(MOBILENETV2_MODEL_DIR "${MOBILENETV2_INSTALL_DIR}/MobileNetV2") endif() -function (inference_base_test_build TARGET) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - cc_test_build(${TARGET} SRCS ${base_test_SRCS} DEPS ${base_test_DEPS}) +function(inference_base_test_build TARGET) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + cc_test_build(${TARGET} SRCS ${base_test_SRCS} DEPS ${base_test_DEPS}) endfunction() -function (inference_base_test_run TARGET) - set(options "") - set(oneValueArgs "") - set(multiValueArgs COMMAND ARGS) - cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if(WITH_GPU) - set(mem_opt "--fraction_of_gpu_memory_to_use=0.5") - endif() - cc_test_run(${TARGET} COMMAND ${base_test_COMMAND} ARGS ${mem_opt} ${base_test_ARGS}) +function(inference_base_test_run TARGET) + set(options "") + set(oneValueArgs "") + set(multiValueArgs COMMAND ARGS) + cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + if(WITH_GPU) + set(mem_opt "--fraction_of_gpu_memory_to_use=0.5") + endif() + cc_test_run(${TARGET} COMMAND ${base_test_COMMAND} ARGS ${mem_opt} + ${base_test_ARGS}) endfunction() -function (inference_base_test TARGET) - set(options "") - set(oneValueArgs "") - set(multiValueArgs SRCS ARGS DEPS) - cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - inference_base_test_build(${TARGET} - SRCS ${base_test_SRCS} - DEPS ${base_test_DEPS}) - inference_base_test_run(${TARGET} - COMMAND ${TARGET} - ARGS ${base_test_ARGS}) +function(inference_base_test TARGET) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS ARGS DEPS) + cmake_parse_arguments(base_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + inference_base_test_build(${TARGET} SRCS ${base_test_SRCS} DEPS + ${base_test_DEPS}) + inference_base_test_run(${TARGET} COMMAND ${TARGET} ARGS ${base_test_ARGS}) endfunction() - diff --git a/paddle/fluid/inference/utils/CMakeLists.txt b/paddle/fluid/inference/utils/CMakeLists.txt index 9a495194a8a..a32a61842a5 100644 --- a/paddle/fluid/inference/utils/CMakeLists.txt +++ b/paddle/fluid/inference/utils/CMakeLists.txt @@ -1,8 +1,23 @@ -cc_library(benchmark SRCS benchmark.cc DEPS enforce) -cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark) -cc_library(infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor shape_range_info_proto) -cc_test(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils) +cc_library( + benchmark + SRCS benchmark.cc + DEPS enforce) +cc_test( + test_benchmark + SRCS benchmark_tester.cc + DEPS benchmark) +cc_library( + infer_io_utils + SRCS io_utils.cc + DEPS paddle_inference_api lod_tensor shape_range_info_proto) +cc_test( + infer_io_utils_tester + SRCS io_utils_tester.cc + DEPS infer_io_utils) cc_library(table_printer SRCS table_printer.cc) -cc_test(test_table_printer SRCS table_printer_tester.cc DEPS table_printer) +cc_test( + test_table_printer + SRCS table_printer_tester.cc + DEPS table_printer) proto_library(shape_range_info_proto SRCS shape_range_info.proto) diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 53e79939455..1f72482eef7 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -1,57 +1,89 @@ add_subdirectory(detail) add_subdirectory(allocation) -if (WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) -else () - set(MKLDNN_CTX_DEPS) +if(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +else() + set(MKLDNN_CTX_DEPS) endif() -cc_library(malloc SRCS malloc.cc DEPS - place enforce allocator_facade profiler ${MKLDNN_CTX_DEPS}) -cc_library(memcpy SRCS memcpy.cc DEPS place device_context) -cc_library(stats SRCS stats.cc DEPS enforce) +cc_library( + malloc + SRCS malloc.cc + DEPS place enforce allocator_facade profiler ${MKLDNN_CTX_DEPS}) +cc_library( + memcpy + SRCS memcpy.cc + DEPS place device_context) +cc_library( + stats + SRCS stats.cc + DEPS enforce) cc_library(memory DEPS malloc memcpy stats) -cc_test(memory_stats_test SRCS memory_stats_test.cc DEPS memory) -cc_test(stats_test SRCS stats_test.cc DEPS stats) - -if (WITH_GPU) - nv_test(malloc_test - SRCS malloc_test.cu - DEPS device_context malloc) - nv_test(stream_safe_cuda_alloc_test - SRCS stream_safe_cuda_alloc_test.cu - DEPS malloc cuda_graph_with_memory_pool) - nv_test(cuda_managed_memory_test - SRCS cuda_managed_memory_test.cu - DEPS malloc gpu_info place) - - if(WITH_TESTING AND TEST stream_safe_cuda_alloc_test) - set_tests_properties(stream_safe_cuda_alloc_test PROPERTIES - ENVIRONMENT "FLAGS_use_stream_safe_cuda_allocator=true;FLAGS_allocator_strategy=auto_growth") - endif() +cc_test( + memory_stats_test + SRCS memory_stats_test.cc + DEPS memory) +cc_test( + stats_test + SRCS stats_test.cc + DEPS stats) + +if(WITH_GPU) + nv_test( + malloc_test + SRCS malloc_test.cu + DEPS device_context malloc) + nv_test( + stream_safe_cuda_alloc_test + SRCS stream_safe_cuda_alloc_test.cu + DEPS malloc cuda_graph_with_memory_pool) + nv_test( + cuda_managed_memory_test + SRCS cuda_managed_memory_test.cu + DEPS malloc gpu_info place) + + if(WITH_TESTING AND TEST stream_safe_cuda_alloc_test) + set_tests_properties( + stream_safe_cuda_alloc_test + PROPERTIES + ENVIRONMENT + "FLAGS_use_stream_safe_cuda_allocator=true;FLAGS_allocator_strategy=auto_growth" + ) + endif() endif() -if (WITH_ROCM) - hip_test(malloc_test - SRCS malloc_test.cu - DEPS device_context malloc) - hip_test(cuda_managed_memory_test - SRCS cuda_managed_memory_test.cu - DEPS malloc gpu_info place) +if(WITH_ROCM) + hip_test( + malloc_test + SRCS malloc_test.cu + DEPS device_context malloc) + hip_test( + cuda_managed_memory_test + SRCS cuda_managed_memory_test.cu + DEPS malloc gpu_info place) endif() if(WITH_TESTING AND TEST cuda_managed_memory_test) -set_tests_properties(cuda_managed_memory_test PROPERTIES - ENVIRONMENT "FLAGS_use_cuda_managed_memory=true;FLAGS_allocator_strategy=auto_growth" - TIMEOUT 50) + set_tests_properties( + cuda_managed_memory_test + PROPERTIES + ENVIRONMENT + "FLAGS_use_cuda_managed_memory=true;FLAGS_allocator_strategy=auto_growth" + TIMEOUT 50) endif() -if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info) - set_tests_properties(get_base_ptr_test PROPERTIES - ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; +if(WITH_GPU + AND WITH_TESTING + AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + nv_test( + get_base_ptr_test + SRCS get_base_ptr_test.cu + DEPS malloc gpu_info) + set_tests_properties( + get_base_ptr_test + PROPERTIES ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; FLAGS_use_stream_safe_cuda_allocator=true;") endif() diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 5af13f76b36..109afd06f4d 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -1,137 +1,264 @@ -cc_library(allocator SRCS allocator.cc DEPS place stats) -cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) -cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) -cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) -cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) -cc_library(naive_best_fit_allocator SRCS naive_best_fit_allocator.cc DEPS allocator buddy_allocator profiler) -cc_test(naive_best_fit_allocator_test SRCS naive_best_fit_allocator_test.cc DEPS naive_best_fit_allocator) -cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS locked_allocator buffered_allocator cpu_allocator best_fit_allocator) - -if (WITH_MKLDNN) +cc_library( + allocator + SRCS allocator.cc + DEPS place stats) +cc_library( + cpu_allocator + SRCS cpu_allocator.cc + DEPS allocator) +cc_library( + locked_allocator + SRCS locked_allocator.cc + DEPS allocator) +cc_library( + buffered_allocator + SRCS buffered_allocator.cc + DEPS allocator) +cc_library( + best_fit_allocator + SRCS best_fit_allocator.cc + DEPS allocator) +cc_library( + naive_best_fit_allocator + SRCS naive_best_fit_allocator.cc + DEPS allocator buddy_allocator profiler) +cc_test( + naive_best_fit_allocator_test + SRCS naive_best_fit_allocator_test.cc + DEPS naive_best_fit_allocator) +cc_test( + buffered_allocator_test + SRCS buffered_allocator_test.cc + DEPS locked_allocator buffered_allocator cpu_allocator best_fit_allocator) + +if(WITH_MKLDNN) set(MKLDNN_CTX_DEPS mkldnn) -else () +else() set(MKLDNN_CTX_DEPS) endif() -if (WITH_GPU) - nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard stats) - nv_library(cuda_managed_allocator SRCS cuda_managed_allocator.cc DEPS allocator cuda_device_guard gpu_info) - nv_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator) - nv_library(stream_safe_cuda_allocator SRCS stream_safe_cuda_allocator.cc DEPS allocator cuda_graph) - nv_library(thread_local_allocator SRCS thread_local_allocator.cc DEPS allocator) +if(WITH_GPU) + nv_library( + cuda_allocator + SRCS cuda_allocator.cc + DEPS allocator cuda_device_guard stats) + nv_library( + cuda_managed_allocator + SRCS cuda_managed_allocator.cc + DEPS allocator cuda_device_guard gpu_info) + nv_library( + pinned_allocator + SRCS pinned_allocator.cc + DEPS allocator) + nv_library( + stream_safe_cuda_allocator + SRCS stream_safe_cuda_allocator.cc + DEPS allocator cuda_graph) + nv_library( + thread_local_allocator + SRCS thread_local_allocator.cc + DEPS allocator) - cc_test(thread_local_allocator_test SRCS thread_local_allocator_test.cc DEPS thread_local_allocator) + cc_test( + thread_local_allocator_test + SRCS thread_local_allocator_test.cc + DEPS thread_local_allocator) if(CUDA_VERSION GREATER_EQUAL 10.2) - nv_library(cuda_virtual_mem_allocator SRCS cuda_virtual_mem_allocator.cc DEPS dynload_cuda) + nv_library( + cuda_virtual_mem_allocator + SRCS cuda_virtual_mem_allocator.cc + DEPS dynload_cuda) endif() endif() -if (WITH_ROCM) - hip_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard stats) - hip_library(cuda_managed_allocator SRCS cuda_managed_allocator.cc DEPS allocator cuda_device_guard gpu_info) - hip_library(pinned_allocator SRCS pinned_allocator.cc DEPS allocator) - hip_library(stream_safe_cuda_allocator SRCS stream_safe_cuda_allocator.cc DEPS allocator) - hip_library(thread_local_allocator SRCS thread_local_allocator.cc DEPS allocator) - - cc_test(thread_local_allocator_test SRCS thread_local_allocator_test.cc DEPS thread_local_allocator) +if(WITH_ROCM) + hip_library( + cuda_allocator + SRCS cuda_allocator.cc + DEPS allocator cuda_device_guard stats) + hip_library( + cuda_managed_allocator + SRCS cuda_managed_allocator.cc + DEPS allocator cuda_device_guard gpu_info) + hip_library( + pinned_allocator + SRCS pinned_allocator.cc + DEPS allocator) + hip_library( + stream_safe_cuda_allocator + SRCS stream_safe_cuda_allocator.cc + DEPS allocator) + hip_library( + thread_local_allocator + SRCS thread_local_allocator.cc + DEPS allocator) + + cc_test( + thread_local_allocator_test + SRCS thread_local_allocator_test.cc + DEPS thread_local_allocator) endif() -if (WITH_ASCEND_CL) - cc_library(npu_allocator SRCS npu_allocator.cc DEPS allocator npu_info) - cc_library(npu_pinned_allocator SRCS npu_pinned_allocator.cc DEPS allocator npu_info) +if(WITH_ASCEND_CL) + cc_library( + npu_allocator + SRCS npu_allocator.cc + DEPS allocator npu_info) + cc_library( + npu_pinned_allocator + SRCS npu_pinned_allocator.cc + DEPS allocator npu_info) endif() -cc_library(retry_allocator SRCS retry_allocator.cc DEPS allocator) +cc_library( + retry_allocator + SRCS retry_allocator.cc + DEPS allocator) -if (WITH_GPU OR WITH_ROCM) - set(AllocatorFacadeDeps gpu_info cuda_allocator cuda_managed_allocator pinned_allocator cuda_device_guard thread_local_allocator stream_safe_cuda_allocator device_context) - if(CUDA_VERSION GREATER_EQUAL 10.2) - list(APPEND AllocatorFacadeDeps cuda_virtual_mem_allocator) - endif() +if(WITH_GPU OR WITH_ROCM) + set(AllocatorFacadeDeps + gpu_info + cuda_allocator + cuda_managed_allocator + pinned_allocator + cuda_device_guard + thread_local_allocator + stream_safe_cuda_allocator + device_context) + if(CUDA_VERSION GREATER_EQUAL 10.2) + list(APPEND AllocatorFacadeDeps cuda_virtual_mem_allocator) + endif() elseif(WITH_XPU) - set(AllocatorFacadeDeps xpu_info) + set(AllocatorFacadeDeps xpu_info) elseif(WITH_IPU) - set(AllocatorFacadeDeps ipu_info) + set(AllocatorFacadeDeps ipu_info) elseif(WITH_ASCEND) - set(AllocatorFacadeDeps ascend_npu_info) -else () - set(AllocatorFacadeDeps) + set(AllocatorFacadeDeps ascend_npu_info) +else() + set(AllocatorFacadeDeps) endif() -if (WITH_CUSTOM_DEVICE) - cc_library(custom_allocator SRCS custom_allocator.cc DEPS allocator device_manager) +if(WITH_CUSTOM_DEVICE) + cc_library( + custom_allocator + SRCS custom_allocator.cc + DEPS allocator device_manager) set(AllocatorFacadeDeps ${AllocatorFacadeDeps} custom_allocator) endif() -if (WITH_GPU) - nv_test(best_fit_allocator_test - SRCS best_fit_allocator_test.cc - best_fit_allocator_test.cu - DEPS best_fit_allocator - locked_allocator - cpu_allocator - cuda_allocator - device_context - memcpy) -elseif (WITH_ROCM) - hip_test(best_fit_allocator_test - SRCS best_fit_allocator_test.cc - best_fit_allocator_test.cu - DEPS best_fit_allocator - locked_allocator - cpu_allocator - cuda_allocator - device_context - memcpy) +if(WITH_GPU) + nv_test( + best_fit_allocator_test + SRCS best_fit_allocator_test.cc best_fit_allocator_test.cu + DEPS best_fit_allocator locked_allocator cpu_allocator cuda_allocator + device_context memcpy) +elseif(WITH_ROCM) + hip_test( + best_fit_allocator_test + SRCS best_fit_allocator_test.cc best_fit_allocator_test.cu + DEPS best_fit_allocator locked_allocator cpu_allocator cuda_allocator + device_context memcpy) else() - cc_test(best_fit_allocator_test - SRCS best_fit_allocator_test.cc - DEPS best_fit_allocator - locked_allocator - cpu_allocator) + cc_test( + best_fit_allocator_test + SRCS best_fit_allocator_test.cc + DEPS best_fit_allocator locked_allocator cpu_allocator) endif() -list(APPEND AllocatorFacadeDeps cpu_allocator locked_allocator aligned_allocator retry_allocator buffered_allocator naive_best_fit_allocator auto_growth_best_fit_allocator virtual_memory_auto_growth_best_fit_allocator best_fit_allocator) +list( + APPEND + AllocatorFacadeDeps + cpu_allocator + locked_allocator + aligned_allocator + retry_allocator + buffered_allocator + naive_best_fit_allocator + auto_growth_best_fit_allocator + virtual_memory_auto_growth_best_fit_allocator + best_fit_allocator) -if (WITH_ASCEND_CL) - list(APPEND AllocatorFacadeDeps npu_pinned_allocator) +if(WITH_ASCEND_CL) + list(APPEND AllocatorFacadeDeps npu_pinned_allocator) endif() +cc_library( + aligned_allocator + SRCS aligned_allocator.cc + DEPS allocator) +cc_test( + test_aligned_allocator + SRCS test_aligned_allocator.cc + DEPS aligned_allocator) +cc_library( + allocator_strategy + SRCS allocator_strategy.cc + DEPS gflags ${AllocatorFacadeDeps}) +cc_library( + allocator_facade + SRCS allocator_facade.cc + DEPS allocator_strategy stats) -cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) -cc_test(test_aligned_allocator SRCS test_aligned_allocator.cc DEPS aligned_allocator) -cc_library(allocator_strategy SRCS allocator_strategy.cc DEPS gflags ${AllocatorFacadeDeps}) -cc_library(allocator_facade SRCS allocator_facade.cc DEPS allocator_strategy stats) - -if (WITH_GPU) +if(WITH_GPU) target_link_libraries(allocator_facade cuda_graph) endif() -cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator locked_allocator cpu_allocator) -if (WITH_TESTING) - if ((WITH_GPU OR WITH_ROCM) AND TARGET retry_allocator_test) +cc_test( + retry_allocator_test + SRCS retry_allocator_test.cc + DEPS retry_allocator locked_allocator cpu_allocator) +if(WITH_TESTING) + if((WITH_GPU OR WITH_ROCM) AND TARGET retry_allocator_test) target_link_libraries(retry_allocator_test cuda_allocator) endif() - if (TEST retry_allocator_test) - set_tests_properties(retry_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + if(TEST retry_allocator_test) + set_tests_properties(retry_allocator_test PROPERTIES LABELS + "RUN_TYPE=EXCLUSIVE") endif() endif() -cc_test(allocator_facade_abs_flags_test SRCS allocator_facade_abs_flags_test.cc DEPS allocator_facade) +cc_test( + allocator_facade_abs_flags_test + SRCS allocator_facade_abs_flags_test.cc + DEPS allocator_facade) -cc_test(allocator_facade_frac_flags_test SRCS allocator_facade_frac_flags_test.cc DEPS allocator_facade) +cc_test( + allocator_facade_frac_flags_test + SRCS allocator_facade_frac_flags_test.cc + DEPS allocator_facade) -cc_library(auto_growth_best_fit_allocator SRCS auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator flags) -cc_test(auto_growth_best_fit_allocator_facade_test SRCS auto_growth_best_fit_allocator_facade_test.cc DEPS cpu_allocator auto_growth_best_fit_allocator) -cc_test(auto_growth_best_fit_allocator_test SRCS auto_growth_best_fit_allocator_test.cc DEPS auto_growth_best_fit_allocator) +cc_library( + auto_growth_best_fit_allocator + SRCS auto_growth_best_fit_allocator.cc + DEPS allocator aligned_allocator flags) +cc_test( + auto_growth_best_fit_allocator_facade_test + SRCS auto_growth_best_fit_allocator_facade_test.cc + DEPS cpu_allocator auto_growth_best_fit_allocator) +cc_test( + auto_growth_best_fit_allocator_test + SRCS auto_growth_best_fit_allocator_test.cc + DEPS auto_growth_best_fit_allocator) -cc_library(virtual_memory_auto_growth_best_fit_allocator SRCS virtual_memory_auto_growth_best_fit_allocator.cc DEPS allocator aligned_allocator) +cc_library( + virtual_memory_auto_growth_best_fit_allocator + SRCS virtual_memory_auto_growth_best_fit_allocator.cc + DEPS allocator aligned_allocator) if(NOT WIN32) - cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator) - cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator) - if (WITH_GPU) - cc_library(cuda_ipc_allocator SRCS cuda_ipc_allocator.cc DEPS allocator) + cc_library( + mmap_allocator + SRCS mmap_allocator.cc + DEPS allocator) + cc_test( + mmap_allocator_test + SRCS mmap_allocator_test.cc + DEPS mmap_allocator allocator) + if(WITH_GPU) + cc_library( + cuda_ipc_allocator + SRCS cuda_ipc_allocator.cc + DEPS allocator) endif() endif(NOT WIN32) diff --git a/paddle/fluid/memory/detail/CMakeLists.txt b/paddle/fluid/memory/detail/CMakeLists.txt index a039cd8f418..afe5c0dba0f 100644 --- a/paddle/fluid/memory/detail/CMakeLists.txt +++ b/paddle/fluid/memory/detail/CMakeLists.txt @@ -1,47 +1,78 @@ include(ExternalProject) -cc_library(memory_block SRCS memory_block.cc memory_block_desc.cc meta_cache.cc DEPS place) +cc_library( + memory_block + SRCS memory_block.cc memory_block_desc.cc meta_cache.cc + DEPS place) if(WITH_GPU) - nv_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info place) + nv_library( + system_allocator + SRCS system_allocator.cc + DEPS gflags cpu_info gpu_info place) elseif(WITH_ROCM) - hip_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info place) + hip_library( + system_allocator + SRCS system_allocator.cc + DEPS gflags cpu_info gpu_info place) elseif(${WITH_ASCEND_CL}) - cc_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info npu_info place) + cc_library( + system_allocator + SRCS system_allocator.cc + DEPS gflags cpu_info npu_info place) elseif(WITH_MLU) - cc_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info mlu_info place) + cc_library( + system_allocator + SRCS system_allocator.cc + DEPS gflags cpu_info mlu_info place) else() - cc_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info place) + cc_library( + system_allocator + SRCS system_allocator.cc + DEPS gflags cpu_info place) endif() -cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator) +cc_test( + system_allocator_test + SRCS system_allocator_test.cc + DEPS system_allocator) -cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog) +cc_library( + buddy_allocator + SRCS buddy_allocator.cc + DEPS memory_block system_allocator glog) -cc_test(buddy_allocator_test SRCS buddy_allocator_test.cc DEPS buddy_allocator) +cc_test( + buddy_allocator_test + SRCS buddy_allocator_test.cc + DEPS buddy_allocator) -FUNCTION(file_download_and_uncompress URL NAME) - MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}") - SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME} PARENT_SCOPE) +function(file_download_and_uncompress URL NAME) + message(STATUS "Download dependence[${NAME}] from ${URL}") + set(${NAME}_INCLUDE_DIR + ${THIRD_PARTY_PATH}/${NAME} + PARENT_SCOPE) ExternalProject_Add( - extern_download_${NAME} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${THIRD_PARTY_PATH}/${NAME} - URL ${URL} - DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME} - SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME} - DOWNLOAD_NO_PROGRESS 1 - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND "" - ) - set(third_party_deps ${third_party_deps} extern_download_${NAME} PARENT_SCOPE) -ENDFUNCTION() + extern_download_${NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${THIRD_PARTY_PATH}/${NAME} + URL ${URL} + DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME} + SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND "") + set(third_party_deps + ${third_party_deps} extern_download_${NAME} + PARENT_SCOPE) +endfunction() if(WITH_TESTING) if(TEST buddy_allocator_test) - set_tests_properties(buddy_allocator_test PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(buddy_allocator_test PROPERTIES LABELS + "RUN_TYPE=EXCLUSIVE") endif() set(URL "https://paddle-ci.cdn.bcebos.com/buddy_allocator_test_data.tar") file_download_and_uncompress(URL "buddy_allocator") diff --git a/paddle/fluid/operators/amp/CMakeLists.txt b/paddle/fluid/operators/amp/CMakeLists.txt index 2ea8bbcbc61..cbedb02f868 100644 --- a/paddle/fluid/operators/amp/CMakeLists.txt +++ b/paddle/fluid/operators/amp/CMakeLists.txt @@ -1,10 +1,14 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/amp. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/amp. + include(unity_build_rule.cmake) endif() register_operators() if(WITH_ASCEND_CL) - cc_test(check_finite_and_unscale_op_npu_test SRCS check_finite_and_unscale_op_npu_test.cc DEPS op_registry check_finite_and_unscale_op scope device_context enforce executor) + cc_test( + check_finite_and_unscale_op_npu_test + SRCS check_finite_and_unscale_op_npu_test.cc + DEPS op_registry check_finite_and_unscale_op scope device_context enforce + executor) endif() diff --git a/paddle/fluid/operators/amp/unity_build_rule.cmake b/paddle/fluid/operators/amp/unity_build_rule.cmake index bfdab0cd962..fa460e33c80 100644 --- a/paddle/fluid/operators/amp/unity_build_rule.cmake +++ b/paddle/fluid/operators/amp/unity_build_rule.cmake @@ -4,9 +4,7 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - check_finite_and_unscale_op.cc - update_loss_scaling_op.cc) -register_unity_group(cu - check_finite_and_unscale_op.cu - update_loss_scaling_op.cu) +register_unity_group(cc check_finite_and_unscale_op.cc + update_loss_scaling_op.cc) +register_unity_group(cu check_finite_and_unscale_op.cu + update_loss_scaling_op.cu) diff --git a/paddle/fluid/operators/benchmark/CMakeLists.txt b/paddle/fluid/operators/benchmark/CMakeLists.txt index e5023d8eb35..e05011eaf6b 100644 --- a/paddle/fluid/operators/benchmark/CMakeLists.txt +++ b/paddle/fluid/operators/benchmark/CMakeLists.txt @@ -1,3 +1,14 @@ -cc_test(op_tester SRCS op_tester.cc op_tester_config.cc - DEPS memory timer framework_proto proto_desc lod_tensor op_registry - device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} eigen_function) +cc_test( + op_tester + SRCS op_tester.cc op_tester_config.cc + DEPS memory + timer + framework_proto + proto_desc + lod_tensor + op_registry + device_context + scope + ${GLOB_OP_LIB} + ${GLOB_OPERATOR_DEPS} + eigen_function) diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt index 862a0d04fbd..f2a4201fd96 100644 --- a/paddle/fluid/operators/cinn/CMakeLists.txt +++ b/paddle/fluid/operators/cinn/CMakeLists.txt @@ -1,19 +1,67 @@ include(operators) -cc_library(cinn_op_helper SRCS cinn_op_helper.cc DEPS operator device_context) -cc_library(cinn_launch_context SRCS cinn_launch_context.cc DEPS ddim lod_tensor scope proto_desc graph build_strategy device_context parallel_executor transform_type cinn) +cc_library( + cinn_op_helper + SRCS cinn_op_helper.cc + DEPS operator device_context) +cc_library( + cinn_launch_context + SRCS cinn_launch_context.cc + DEPS ddim + lod_tensor + scope + proto_desc + graph + build_strategy + device_context + parallel_executor + transform_type + cinn) -SET(CINN_OP_DEPS parallel_executor string_helper variable_helper cinn cinn_compiler cinn_op_helper cinn_launch_context transform_type) +set(CINN_OP_DEPS + parallel_executor + string_helper + variable_helper + cinn + cinn_compiler + cinn_op_helper + cinn_launch_context + transform_type) register_operators(DEPS ${CINN_OP_DEPS}) -if (WITH_TESTING) - cc_test(cinn_launch_context_test SRCS cinn_launch_context_test.cc DEPS ddim lod_tensor scope proto_desc graph cinn_launch_context cinn_instruction_run_op cinn) - set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN") +if(WITH_TESTING) + cc_test( + cinn_launch_context_test + SRCS cinn_launch_context_test.cc + DEPS ddim + lod_tensor + scope + proto_desc + graph + cinn_launch_context + cinn_instruction_run_op + cinn) + set_tests_properties(cinn_launch_context_test PROPERTIES LABELS + "RUN_TYPE=CINN") - SET(CINN_RUN_ENVIRONMENT "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda") - cc_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op elementwise_add_op gflags) - set_tests_properties(cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}") + set(CINN_RUN_ENVIRONMENT + "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda" + ) + cc_test( + cinn_launch_op_test + SRCS cinn_launch_op_test.cc + DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op + elementwise_add_op gflags) + set_tests_properties( + cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT + "${CINN_RUN_ENVIRONMENT}") - cc_test(cinn_instruction_run_op_test SRCS cinn_instruction_run_op_test.cc DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op elementwise_add_op) - set_tests_properties(cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT "${CINN_RUN_ENVIRONMENT}") + cc_test( + cinn_instruction_run_op_test + SRCS cinn_instruction_run_op_test.cc + DEPS cinn_compiler cinn_launch_op cinn_instruction_run_op + elementwise_add_op) + set_tests_properties( + cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT + "${CINN_RUN_ENVIRONMENT}") endif() diff --git a/paddle/fluid/operators/collective/CMakeLists.txt b/paddle/fluid/operators/collective/CMakeLists.txt index 89c573d2dcb..c94b0c93eb3 100644 --- a/paddle/fluid/operators/collective/CMakeLists.txt +++ b/paddle/fluid/operators/collective/CMakeLists.txt @@ -2,72 +2,154 @@ include(operators) set(COLLECTIVE_DEPS "") -set(COLLECTIVE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") +set(COLLECTIVE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" +) -file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") +file( + GLOB OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*_op.cc") list(REMOVE_DUPLICATES OPS) foreach(src ${OPS}) - set_source_files_properties(${src} PROPERTIES COMPILE_FLAGS ${COLLECTIVE_COMPILE_FLAGS}) + set_source_files_properties(${src} PROPERTIES COMPILE_FLAGS + ${COLLECTIVE_COMPILE_FLAGS}) endforeach() -register_operators(EXCLUDES c_gen_bkcl_id_op gen_bkcl_id_op c_gen_nccl_id_op gen_nccl_id_op c_gen_hccl_id_op gen_hccl_id_op c_gen_cncl_id_op DEPS ${COLLECTIVE_DEPS}) +register_operators( + EXCLUDES + c_gen_bkcl_id_op + gen_bkcl_id_op + c_gen_nccl_id_op + gen_nccl_id_op + c_gen_hccl_id_op + gen_hccl_id_op + c_gen_cncl_id_op + DEPS + ${COLLECTIVE_DEPS}) if(WITH_NCCL OR WITH_RCCL) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper) - op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) - op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper) + op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) + op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) endif() if(WITH_GLOO) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper) endif() if(WITH_XPU_BKCL) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper) - op_library(c_gen_bkcl_id_op DEPS ${COLLECTIVE_DEPS}) - op_library(gen_bkcl_id_op DEPS ${COLLECTIVE_DEPS}) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper) + op_library(c_gen_bkcl_id_op DEPS ${COLLECTIVE_DEPS}) + op_library(gen_bkcl_id_op DEPS ${COLLECTIVE_DEPS}) endif() if(WITH_CNCL) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper) - op_library(c_gen_cncl_id_op DEPS ${COLLECTIVE_DEPS}) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper) + op_library(c_gen_cncl_id_op DEPS ${COLLECTIVE_DEPS}) endif() if(WITH_ASCEND_CL) - cc_library(gen_hccl_id_op_helper SRCS gen_hccl_id_op_helper.cc DEPS dynload_warpctc dynamic_loader scope) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper gen_hccl_id_op_helper) - op_library(c_gen_hccl_id_op DEPS ${COLLECTIVE_DEPS}) - op_library(gen_hccl_id_op DEPS ${COLLECTIVE_DEPS}) + cc_library( + gen_hccl_id_op_helper + SRCS gen_hccl_id_op_helper.cc + DEPS dynload_warpctc dynamic_loader scope) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} collective_helper + gen_hccl_id_op_helper) + op_library(c_gen_hccl_id_op DEPS ${COLLECTIVE_DEPS}) + op_library(gen_hccl_id_op DEPS ${COLLECTIVE_DEPS}) endif() -set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COLLECTIVE_DEPS} PARENT_SCOPE) -set(GLOB_COLLECTIVE_DEPS ${COLLECTIVE_DEPS} CACHE INTERNAL "collective dependency") +set(OPERATOR_DEPS + ${OPERATOR_DEPS} ${COLLECTIVE_DEPS} + PARENT_SCOPE) +set(GLOB_COLLECTIVE_DEPS + ${COLLECTIVE_DEPS} + CACHE INTERNAL "collective dependency") if(WITH_ASCEND_CL) - set(COMMON_TEST_DEPS_FOR_HCOM c_comm_init_hccl_op c_gen_hccl_id_op gen_hccl_id_op_helper - gen_hccl_id_op op_registry ascend_hccl flags - dynamic_loader dynload_warpctc scope device_context enforce executor) - cc_test(c_broadcast_op_npu_test SRCS c_broadcast_op_npu_test.cc - DEPS c_broadcast_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_allreduce_sum_op_npu_test SRCS c_allreduce_sum_op_npu_test.cc - DEPS c_allreduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_reducescatter_op_npu_test SRCS c_reducescatter_op_npu_test.cc - DEPS c_reducescatter_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_allgather_op_npu_test SRCS c_allgather_op_npu_test.cc - DEPS c_allgather_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_reduce_sum_op_npu_test SRCS c_reduce_sum_op_npu_test.cc - DEPS c_reduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_allreduce_max_op_npu_test SRCS c_allreduce_max_op_npu_test.cc - DEPS c_allreduce_max_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(send_v2_op_npu_test SRCS send_v2_op_npu_test.cc - DEPS send_v2_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(recv_v2_op_npu_test SRCS recv_v2_op_npu_test.cc - DEPS recv_v2_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(checknumeric SRCS checknumeric_npu_test.cc - DEPS c_allreduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) - cc_test(c_sync_comm_stream_op_npu_test SRCS c_sync_comm_stream_op_npu_test.cc - DEPS op_registry c_broadcast_op c_comm_init_hccl_op c_sync_comm_stream_op c_gen_hccl_id_op gen_hccl_id_op_helper ${COLLECTIVE_DEPS} ascend_hccl dynamic_loader dynload_warpctc scope device_context enforce executor) - cc_test(c_sync_calc_stream_op_npu_test SRCS c_sync_calc_stream_op_npu_test.cc - DEPS op_registry elementwise_add_op c_sync_calc_stream_op c_gen_hccl_id_op gen_hccl_id_op_helper ${COLLECTIVE_DEPS} ascend_hccl dynamic_loader dynload_warpctc scope device_context enforce executor) + set(COMMON_TEST_DEPS_FOR_HCOM + c_comm_init_hccl_op + c_gen_hccl_id_op + gen_hccl_id_op_helper + gen_hccl_id_op + op_registry + ascend_hccl + flags + dynamic_loader + dynload_warpctc + scope + device_context + enforce + executor) + cc_test( + c_broadcast_op_npu_test + SRCS c_broadcast_op_npu_test.cc + DEPS c_broadcast_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_allreduce_sum_op_npu_test + SRCS c_allreduce_sum_op_npu_test.cc + DEPS c_allreduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_reducescatter_op_npu_test + SRCS c_reducescatter_op_npu_test.cc + DEPS c_reducescatter_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_allgather_op_npu_test + SRCS c_allgather_op_npu_test.cc + DEPS c_allgather_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_reduce_sum_op_npu_test + SRCS c_reduce_sum_op_npu_test.cc + DEPS c_reduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_allreduce_max_op_npu_test + SRCS c_allreduce_max_op_npu_test.cc + DEPS c_allreduce_max_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + send_v2_op_npu_test + SRCS send_v2_op_npu_test.cc + DEPS send_v2_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + recv_v2_op_npu_test + SRCS recv_v2_op_npu_test.cc + DEPS recv_v2_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + checknumeric + SRCS checknumeric_npu_test.cc + DEPS c_allreduce_sum_op ${COLLECTIVE_DEPS} ${COMMON_TEST_DEPS_FOR_HCOM}) + cc_test( + c_sync_comm_stream_op_npu_test + SRCS c_sync_comm_stream_op_npu_test.cc + DEPS op_registry + c_broadcast_op + c_comm_init_hccl_op + c_sync_comm_stream_op + c_gen_hccl_id_op + gen_hccl_id_op_helper + ${COLLECTIVE_DEPS} + ascend_hccl + dynamic_loader + dynload_warpctc + scope + device_context + enforce + executor) + cc_test( + c_sync_calc_stream_op_npu_test + SRCS c_sync_calc_stream_op_npu_test.cc + DEPS op_registry + elementwise_add_op + c_sync_calc_stream_op + c_gen_hccl_id_op + gen_hccl_id_op_helper + ${COLLECTIVE_DEPS} + ascend_hccl + dynamic_loader + dynload_warpctc + scope + device_context + enforce + executor) endif() diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt index 0c18522fa32..193c5c45056 100644 --- a/paddle/fluid/operators/controlflow/CMakeLists.txt +++ b/paddle/fluid/operators/controlflow/CMakeLists.txt @@ -1,24 +1,51 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/controlflow. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/controlflow. + include(unity_build_rule.cmake) endif() register_operators(EXCLUDES conditional_block_op DEPS naive_executor) -cc_library(conditional_block_op SRCS conditional_block_op.cc DEPS executor) -cc_library(op_variant SRCS op_variant.cc DEPS operator proto_desc) -cc_library(conditional_block_op_helper SRCS conditional_block_op_helper.cc DEPS operator op_variant conditional_block_op) -cc_library(recurrent_op_helper SRCS recurrent_op_helper.cc DEPS operator op_variant recurrent_op) -cc_library(while_op_helper SRCS while_op_helper.cc DEPS operator op_variant) +cc_library( + conditional_block_op + SRCS conditional_block_op.cc + DEPS executor) +cc_library( + op_variant + SRCS op_variant.cc + DEPS operator proto_desc) +cc_library( + conditional_block_op_helper + SRCS conditional_block_op_helper.cc + DEPS operator op_variant conditional_block_op) +cc_library( + recurrent_op_helper + SRCS recurrent_op_helper.cc + DEPS operator op_variant recurrent_op) +cc_library( + while_op_helper + SRCS while_op_helper.cc + DEPS operator op_variant) -cc_test(conditional_block_op_test SRCS conditional_block_op_test.cc DEPS conditional_block_op executor) +cc_test( + conditional_block_op_test + SRCS conditional_block_op_test.cc + DEPS conditional_block_op executor) if(WITH_UNITY_BUILD) - target_link_libraries(paddle_operators_controlflow_unity conditional_block_op) + target_link_libraries(paddle_operators_controlflow_unity conditional_block_op) else() - target_link_libraries(conditional_block_infer_op conditional_block_op) + target_link_libraries(conditional_block_infer_op conditional_block_op) endif() -file(APPEND ${pybind_file} "USE_OP_ITSELF(less_than);\nUSE_OP_ITSELF(equal_all);\nUSE_NO_KERNEL_OP(read_from_array);\n") -file(APPEND ${pybind_file} "USE_OP_ITSELF(logical_and);\nUSE_OP_ITSELF(logical_or);\nUSE_OP_ITSELF(logical_xor);\nUSE_OP_ITSELF(logical_not);\n") -file(APPEND ${pybind_file} "USE_OP_ITSELF(bitwise_and);\nUSE_OP_ITSELF(bitwise_or);\nUSE_OP_ITSELF(bitwise_xor);\nUSE_OP_ITSELF(bitwise_not);\n") +file( + APPEND ${pybind_file} + "USE_OP_ITSELF(less_than);\nUSE_OP_ITSELF(equal_all);\nUSE_NO_KERNEL_OP(read_from_array);\n" +) +file( + APPEND ${pybind_file} + "USE_OP_ITSELF(logical_and);\nUSE_OP_ITSELF(logical_or);\nUSE_OP_ITSELF(logical_xor);\nUSE_OP_ITSELF(logical_not);\n" +) +file( + APPEND ${pybind_file} + "USE_OP_ITSELF(bitwise_and);\nUSE_OP_ITSELF(bitwise_or);\nUSE_OP_ITSELF(bitwise_xor);\nUSE_OP_ITSELF(bitwise_not);\n" +) diff --git a/paddle/fluid/operators/controlflow/unity_build_rule.cmake b/paddle/fluid/operators/controlflow/unity_build_rule.cmake index 690a332d20b..594ae3a36cf 100644 --- a/paddle/fluid/operators/controlflow/unity_build_rule.cmake +++ b/paddle/fluid/operators/controlflow/unity_build_rule.cmake @@ -4,20 +4,18 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - compare_all_op.cc - compare_op.cc - conditional_block_infer_op.cc - feed_op.cc - fetch_op.cc - fetch_v2_op.cc - get_places_op.cc - logical_op.cc - bitwise_op.cc - tensor_array_read_write_op.cc - while_op.cc) -register_unity_group(cu - logical_op.cu - bitwise_op.cu - compare_op.cu - compare_all_op.cu) +register_unity_group( + cc + compare_all_op.cc + compare_op.cc + conditional_block_infer_op.cc + feed_op.cc + fetch_op.cc + fetch_v2_op.cc + get_places_op.cc + logical_op.cc + bitwise_op.cc + tensor_array_read_write_op.cc + while_op.cc) +register_unity_group(cu logical_op.cu bitwise_op.cu compare_op.cu + compare_all_op.cu) diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 99a69007aa5..6e5ea3e8aa7 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -1,50 +1,58 @@ set(LOCAL_DETECTION_LIBS) function(detection_library TARGET_NAME) - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - set(options "") - set(common_deps op_registry) - set(pybind_flag 0) - cmake_parse_arguments(detection_library "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) - set(srcs) - # filter cuda source file when not build with cuda/rocm - foreach(src ${detection_library_SRCS}) - if (NOT WITH_GPU AND NOT WITH_ROCM) - if(${src} MATCHES ".*\\.cc$") - list(APPEND srcs ${src}) - endif() - else() + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + set(options "") + set(common_deps op_registry) + set(pybind_flag 0) + cmake_parse_arguments(detection_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + set(srcs) + # filter cuda source file when not build with cuda/rocm + foreach(src ${detection_library_SRCS}) + if(NOT WITH_GPU AND NOT WITH_ROCM) + if(${src} MATCHES ".*\\.cc$") list(APPEND srcs ${src}) endif() - endforeach() - - op_library(${TARGET_NAME} SRCS ${srcs} DEPS ${common_deps} ${detection_library_DEPS}) + else() + list(APPEND srcs ${src}) + endif() + endforeach() + + op_library(${TARGET_NAME} SRCS ${srcs} DEPS ${common_deps} + ${detection_library_DEPS}) - set(LOCAL_DETECTION_LIBS - ${TARGET_NAME} - ${LOCAL_DETECTION_LIBS} - PARENT_SCOPE) + set(LOCAL_DETECTION_LIBS + ${TARGET_NAME} ${LOCAL_DETECTION_LIBS} + PARENT_SCOPE) endfunction() -if (WITH_ASCEND_CL) - detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu box_coder_op_npu.cc) - detection_library(density_prior_box_op SRCS density_prior_box_op.cc density_prior_box_op.cu density_prior_box_op_npu.cc) +if(WITH_ASCEND_CL) + detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu + box_coder_op_npu.cc) + detection_library(density_prior_box_op SRCS density_prior_box_op.cc + density_prior_box_op.cu density_prior_box_op_npu.cc) else() - detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu) - detection_library(density_prior_box_op SRCS density_prior_box_op.cc density_prior_box_op.cu) + detection_library(box_coder_op SRCS box_coder_op.cc box_coder_op.cu) + detection_library(density_prior_box_op SRCS density_prior_box_op.cc + density_prior_box_op.cu) endif() if(WITH_XPU) - detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_xpu.cc) + detection_library(iou_similarity_op SRCS iou_similarity_op.cc + iou_similarity_op_xpu.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc) - detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc generate_proposals_v2_op_xpu.cc) + detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc + generate_proposals_v2_op_xpu.cc) elseif(WITH_ASCEND_CL) - detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_npu.cc) - detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu prior_box_op_npu.cc) + detection_library(iou_similarity_op SRCS iou_similarity_op.cc + iou_similarity_op_npu.cc) + detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu + prior_box_op_npu.cc) else() - detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op.cu) + detection_library(iou_similarity_op SRCS iou_similarity_op.cc + iou_similarity_op.cu) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) # detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc) endif() @@ -52,49 +60,70 @@ endif() detection_library(bipartite_match_op SRCS bipartite_match_op.cc) detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc) detection_library(anchor_generator_op SRCS anchor_generator_op.cc -anchor_generator_op.cu) -detection_library(target_assign_op SRCS target_assign_op.cc -target_assign_op.cu) + anchor_generator_op.cu) +detection_library(target_assign_op SRCS target_assign_op.cc target_assign_op.cu) detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc -polygon_box_transform_op.cu) + polygon_box_transform_op.cu) detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) -detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) +detection_library(generate_proposal_labels_op SRCS + generate_proposal_labels_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc) detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc) detection_library(matrix_nms_op SRCS matrix_nms_op.cc DEPS gpc) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc) detection_library(yolo_box_op SRCS yolo_box_op.cc) -detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) -detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc sigmoid_focal_loss_op.cu) -detection_library(retinanet_detection_output_op SRCS retinanet_detection_output_op.cc) +detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc + box_decoder_and_assign_op.cu) +detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc + sigmoid_focal_loss_op.cu) +detection_library(retinanet_detection_output_op SRCS + retinanet_detection_output_op.cc) detection_library(nms_op SRCS nms_op.cc nms_op.cu) if(WITH_GPU OR WITH_ROCM) set(TMPDEPS memory) if(WITH_GPU) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - set(TMPDEPS memory cub) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + set(TMPDEPS memory cub) endif() endif() - detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS ${TMPDEPS}) - detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc generate_proposals_v2_op.cu DEPS ${TMPDEPS}) - detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc distribute_fpn_proposals_op.cu DEPS ${TMPDEPS}) - detection_library(collect_fpn_proposals_op SRCS collect_fpn_proposals_op.cc collect_fpn_proposals_op.cu DEPS ${TMPDEPS}) + detection_library(generate_proposals_op SRCS generate_proposals_op.cc + generate_proposals_op.cu DEPS ${TMPDEPS}) + detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc + generate_proposals_v2_op.cu DEPS ${TMPDEPS}) + detection_library( + distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc + distribute_fpn_proposals_op.cu DEPS ${TMPDEPS}) + detection_library(collect_fpn_proposals_op SRCS collect_fpn_proposals_op.cc + collect_fpn_proposals_op.cu DEPS ${TMPDEPS}) else() detection_library(generate_proposals_op SRCS generate_proposals_op.cc) if(NOT WITH_XPU) detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc) endif() - detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc) + detection_library(distribute_fpn_proposals_op SRCS + distribute_fpn_proposals_op.cc) detection_library(collect_fpn_proposals_op SRCS collect_fpn_proposals_op.cc) endif() -detection_library(roi_perspective_transform_op SRCS roi_perspective_transform_op.cc roi_perspective_transform_op.cu) +detection_library( + roi_perspective_transform_op SRCS roi_perspective_transform_op.cc + roi_perspective_transform_op.cu) #Export local libraries to parent # set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE) -cc_library(mask_util SRCS mask_util.cc DEPS memory) -cc_test(mask_util_test SRCS mask_util_test.cc DEPS memory mask_util) -cc_library(gpc SRCS gpc.cc DEPS op_registry) -detection_library(generate_mask_labels_op SRCS generate_mask_labels_op.cc DEPS mask_util) +cc_library( + mask_util + SRCS mask_util.cc + DEPS memory) +cc_test( + mask_util_test + SRCS mask_util_test.cc + DEPS memory mask_util) +cc_library( + gpc + SRCS gpc.cc + DEPS op_registry) +detection_library(generate_mask_labels_op SRCS generate_mask_labels_op.cc DEPS + mask_util) diff --git a/paddle/fluid/operators/dlnne/CMakeLists.txt b/paddle/fluid/operators/dlnne/CMakeLists.txt index 4fe9cf214ea..11347f0f94e 100644 --- a/paddle/fluid/operators/dlnne/CMakeLists.txt +++ b/paddle/fluid/operators/dlnne/CMakeLists.txt @@ -1,39 +1,30 @@ # compile flags -set(DLNNE_FLAGS - -Wno-error=non-virtual-dtor - -Wno-error=unused-variable - -Wno-error=attributes - ${fsanitize} -) +set(DLNNE_FLAGS -Wno-error=non-virtual-dtor -Wno-error=unused-variable + -Wno-error=attributes ${fsanitize}) foreach(flag ${DLNNE_FLAGS}) safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) endforeach() - # add nne -find_path(DLNNE_INCLUDE_DIR dlnne.h - PATHS - $ENV{SOFTWARE_SOURCE_DIR} $ENV{SOFTWARE_SOURCE_DIR}/driver/nne/include - NO_DEFAULT_PATH -) - -find_library(DLNNE_LIB libdlnne.so - PATHS - $ENV{SOFTWARE_BUILD_DIR} $ENV{SOFTWARE_BUILD_DIR}/driver/nne - NO_DEFAULT_PATH -) +find_path( + DLNNE_INCLUDE_DIR dlnne.h + PATHS $ENV{SOFTWARE_SOURCE_DIR} $ENV{SOFTWARE_SOURCE_DIR}/driver/nne/include + NO_DEFAULT_PATH) -find_path(CUDA_INCLUDE_DIR cuda.h - $ENV{SOFTWARE_BUILD_DIR}/llvm-project-10/cuda/include -) +find_library( + DLNNE_LIB libdlnne.so + PATHS $ENV{SOFTWARE_BUILD_DIR} $ENV{SOFTWARE_BUILD_DIR}/driver/nne + NO_DEFAULT_PATH) -find_library(CURT_LIB libcurt.so - PATHS - $ENV{SOFTWARE_BUILD_DIR} $ENV{SOFTWARE_BUILD_DIR}/llvm-project-10/cuda/lib - NO_DEFAULT_PATH -) +find_path(CUDA_INCLUDE_DIR cuda.h + $ENV{SOFTWARE_BUILD_DIR}/llvm-project-10/cuda/include) +find_library( + CURT_LIB libcurt.so + PATHS $ENV{SOFTWARE_BUILD_DIR} + $ENV{SOFTWARE_BUILD_DIR}/llvm-project-10/cuda/lib + NO_DEFAULT_PATH) message("DLNNE_INCLUDE_DIR: "${DLNNE_INCLUDE_DIR}) message("DLNNE_LIB: "${DLNNE_LIB}) @@ -43,7 +34,15 @@ message("CURT_LIB: "${CURT_LIB}) include_directories("${DLNNE_INCLUDE_DIR}") include_directories("${CUDA_INCLUDE_DIR}") -op_library(dlnne_engine_op DEPS ${GLOB_OPERATOR_DEPS} framework_proto boost device_context op_registry scope) +op_library( + dlnne_engine_op + DEPS + ${GLOB_OPERATOR_DEPS} + framework_proto + boost + device_context + op_registry + scope) #message("PYBIND_FILE:${pybind_file}") #file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(dlnne_engine);\n") @@ -51,4 +50,7 @@ op_library(dlnne_engine_op DEPS ${GLOB_OPERATOR_DEPS} framework_proto boost devi target_link_libraries(dlnne_engine_op ${DLNNE_LIB} ${CURT_LIB}) -cc_test(test_dlnne_engine_op SRCS dlnne_engine_op_test.cc DEPS dlnne_engine_op analysis) +cc_test( + test_dlnne_engine_op + SRCS dlnne_engine_op_test.cc + DEPS dlnne_engine_op analysis) diff --git a/paddle/fluid/operators/elementwise/CMakeLists.txt b/paddle/fluid/operators/elementwise/CMakeLists.txt index 216a3f79d6f..25b34a2c0a2 100644 --- a/paddle/fluid/operators/elementwise/CMakeLists.txt +++ b/paddle/fluid/operators/elementwise/CMakeLists.txt @@ -1,14 +1,32 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/elementwise. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/elementwise. + include(unity_build_rule.cmake) endif() register_operators(DEPS op_version_registry) -cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor) -cc_test(test_elementwise_div_grad_grad SRCS test_elementwise_div_grad_grad.cc DEPS op_registry elementwise_div_op scope device_context enforce executor) -cc_test(test_elementwise_add_grad_grad SRCS test_elementwise_add_grad_grad.cc DEPS op_registry elementwise_add_op scope device_context enforce executor) +cc_test( + test_elementwise_add_op_inplace + SRCS test_elementwise_add_op_inplace.cc + DEPS op_registry elementwise_add_op scope device_context enforce executor) +cc_test( + test_elementwise_div_grad_grad + SRCS test_elementwise_div_grad_grad.cc + DEPS op_registry elementwise_div_op scope device_context enforce executor) +cc_test( + test_elementwise_add_grad_grad + SRCS test_elementwise_add_grad_grad.cc + DEPS op_registry elementwise_add_op scope device_context enforce executor) if(WITH_ASCEND_CL) -cc_test(elementwise_op_npu_test SRCS elementwise_op_npu_test.cc DEPS op_registry elementwise_add_op elementwise_sub_op scope device_context enforce executor) + cc_test( + elementwise_op_npu_test + SRCS elementwise_op_npu_test.cc + DEPS op_registry + elementwise_add_op + elementwise_sub_op + scope + device_context + enforce + executor) endif() diff --git a/paddle/fluid/operators/elementwise/unity_build_rule.cmake b/paddle/fluid/operators/elementwise/unity_build_rule.cmake index ea001fe4385..060c990ea87 100644 --- a/paddle/fluid/operators/elementwise/unity_build_rule.cmake +++ b/paddle/fluid/operators/elementwise/unity_build_rule.cmake @@ -4,25 +4,27 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - elementwise_add_op.cc - mkldnn/elementwise_add_mkldnn_op.cc - elementwise_div_op.cc - elementwise_floordiv_op.cc - elementwise_max_op.cc - elementwise_min_op.cc - elementwise_mod_op.cc - elementwise_mul_op.cc - mkldnn/elementwise_mul_mkldnn_op.cc - elementwise_pow_op.cc - elementwise_sub_op.cc) -register_unity_group(cu - elementwise_add_op.cu - elementwise_div_op.cu - elementwise_floordiv_op.cu - elementwise_max_op.cu - elementwise_min_op.cu - elementwise_mod_op.cu - elementwise_mul_op.cu - elementwise_pow_op.cu - elementwise_sub_op.cu) +register_unity_group( + cc + elementwise_add_op.cc + mkldnn/elementwise_add_mkldnn_op.cc + elementwise_div_op.cc + elementwise_floordiv_op.cc + elementwise_max_op.cc + elementwise_min_op.cc + elementwise_mod_op.cc + elementwise_mul_op.cc + mkldnn/elementwise_mul_mkldnn_op.cc + elementwise_pow_op.cc + elementwise_sub_op.cc) +register_unity_group( + cu + elementwise_add_op.cu + elementwise_div_op.cu + elementwise_floordiv_op.cu + elementwise_max_op.cu + elementwise_min_op.cu + elementwise_mod_op.cu + elementwise_mul_op.cu + elementwise_pow_op.cu + elementwise_sub_op.cu) diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index e23891d899d..4ffb96d3c51 100755 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -1,97 +1,149 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/fused. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/fused. + include(unity_build_rule.cmake) endif() -register_operators(EXCLUDES - fused_bn_activation_op - conv_fusion_op - fusion_transpose_flatten_concat_op - fusion_conv_inception_op - fused_fc_elementwise_layernorm_op - multihead_matmul_op - skip_layernorm_op - yolo_box_head_op - yolo_box_post_op - fused_embedding_eltwise_layernorm_op - fusion_group_op - fusion_gru_op - fusion_lstm_op - fused_bn_add_activation_op - fused_attention_op - fused_transformer_op - fused_feedforward_op - fused_multi_transformer_op - fused_bias_dropout_residual_layer_norm_op - resnet_unit_op - fused_gemm_epilogue_op - fused_gate_attention_op) +register_operators( + EXCLUDES + fused_bn_activation_op + conv_fusion_op + fusion_transpose_flatten_concat_op + fusion_conv_inception_op + fused_fc_elementwise_layernorm_op + multihead_matmul_op + skip_layernorm_op + yolo_box_head_op + yolo_box_post_op + fused_embedding_eltwise_layernorm_op + fusion_group_op + fusion_gru_op + fusion_lstm_op + fused_bn_add_activation_op + fused_attention_op + fused_transformer_op + fused_feedforward_op + fused_multi_transformer_op + fused_bias_dropout_residual_layer_norm_op + resnet_unit_op + fused_gemm_epilogue_op + fused_gate_attention_op) # fusion_gru_op does not have CUDA kernel op_library(fusion_gru_op) op_library(fusion_lstm_op) +if(WITH_GPU OR WITH_ROCM) + # fused_bn_activation_op needs cudnn 7.4.1 above + # HIP not support bn act fuse in MIOPEN + if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401)) + op_library(fused_bn_activation_op) + endif() + # conv_fusion_op needs cudnn 7 above + if(NOT ${CUDNN_VERSION} VERSION_LESS 7100) + op_library(conv_fusion_op) + endif() + # fusion_transpose_flatten_concat_op + # HIP not support cudnnTransformTensor + if(NOT WITH_ROCM) + op_library(fusion_transpose_flatten_concat_op) + endif() + # fusion_conv_inception_op needs cudnn 7 above + # HIP not support cudnnConvolutionBiasActivationForward + if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7100)) + op_library(fusion_conv_inception_op) + endif() + # fused_fc_elementwise_layernorm_op + op_library(fused_fc_elementwise_layernorm_op) + # multihead_matmul_op + op_library(multihead_matmul_op) + op_library(skip_layernorm_op) + op_library(yolo_box_head_op) + op_library(yolo_box_post_op) + op_library(fused_embedding_eltwise_layernorm_op) + op_library(fused_gate_attention_op) + # fusion_group + if(NOT APPLE AND NOT WIN32) + op_library(fusion_group_op DEPS device_code) + cc_test( + test_fusion_group_op + SRCS fusion_group_op_test.cc + DEPS fusion_group_op) + endif() + # fused_bn_add_activation + # HIP not support bn act fuse in MIOPEN + if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401)) + op_library(fused_bn_add_activation_op) + endif() + # fused_dropout + # only support CUDA + if(NOT WITH_ROCM) + nv_test( + test_fused_residual_dropout_bias + SRCS fused_residual_dropout_bias_test.cu + DEPS tensor + op_registry + dropout_op + layer_norm_op + device_context + generator + memory) + nv_test( + test_fused_dropout_act_bias + SRCS fused_dropout_act_bias_test.cu + DEPS tensor + op_registry + dropout_op + layer_norm_op + device_context + generator + memory) + nv_test( + test_fused_layernorm_residual_dropout_bias + SRCS fused_layernorm_residual_dropout_bias_test.cu + DEPS tensor + op_registry + dropout_op + layer_norm_op + device_context + generator + memory) -if (WITH_GPU OR WITH_ROCM) - # fused_bn_activation_op needs cudnn 7.4.1 above - # HIP not support bn act fuse in MIOPEN - if ((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401)) - op_library(fused_bn_activation_op) - endif() - # conv_fusion_op needs cudnn 7 above - if (NOT ${CUDNN_VERSION} VERSION_LESS 7100) - op_library(conv_fusion_op) - endif() - # fusion_transpose_flatten_concat_op - # HIP not support cudnnTransformTensor - if(NOT WITH_ROCM) - op_library(fusion_transpose_flatten_concat_op) - endif() - # fusion_conv_inception_op needs cudnn 7 above - # HIP not support cudnnConvolutionBiasActivationForward - if ((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7100)) - op_library(fusion_conv_inception_op) - endif() - # fused_fc_elementwise_layernorm_op - op_library(fused_fc_elementwise_layernorm_op) - # multihead_matmul_op - op_library(multihead_matmul_op) - op_library(skip_layernorm_op) - op_library(yolo_box_head_op) - op_library(yolo_box_post_op) - op_library(fused_embedding_eltwise_layernorm_op) - op_library(fused_gate_attention_op) - # fusion_group - if(NOT APPLE AND NOT WIN32) - op_library(fusion_group_op DEPS device_code) - cc_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS fusion_group_op) - endif() - # fused_bn_add_activation - # HIP not support bn act fuse in MIOPEN - if ((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401)) - op_library(fused_bn_add_activation_op) - endif() - # fused_dropout - # only support CUDA - if(NOT WITH_ROCM) - nv_test(test_fused_residual_dropout_bias SRCS fused_residual_dropout_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory) - nv_test(test_fused_dropout_act_bias SRCS fused_dropout_act_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory) - nv_test(test_fused_layernorm_residual_dropout_bias SRCS fused_layernorm_residual_dropout_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory) + op_library(fused_feedforward_op) + # fused_attention_op + op_library(fused_attention_op) + op_library(fused_multi_transformer_op) + op_library(fused_bias_dropout_residual_layer_norm_op) + endif() + # resnet_unit needs cudnn 8.0 above + if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000)) + op_library(resnet_unit_op) + cc_test( + test_cudnn_norm_conv + SRCS cudnn_norm_conv_test.cc + DEPS conv_op + blas + im2col + vol2col + depthwise_conv + eigen_function + tensor + op_registry + device_context + generator + memory) + cc_test( + test_cudnn_bn_add_relu + SRCS cudnn_bn_add_relu_test.cc + DEPS batch_norm_op + fused_bn_add_activation_op + tensor + op_registry + device_context + generator + memory) + endif() - op_library(fused_feedforward_op) - # fused_attention_op - op_library(fused_attention_op) - op_library(fused_multi_transformer_op) - op_library(fused_bias_dropout_residual_layer_norm_op) - endif() - # resnet_unit needs cudnn 8.0 above - if ((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000)) - op_library(resnet_unit_op) - cc_test(test_cudnn_norm_conv SRCS cudnn_norm_conv_test.cc DEPS conv_op blas im2col vol2col depthwise_conv eigen_function tensor op_registry device_context generator memory) - cc_test(test_cudnn_bn_add_relu SRCS cudnn_bn_add_relu_test.cc DEPS batch_norm_op fused_bn_add_activation_op tensor op_registry device_context generator memory) - endif() - - if (CUDA_VERSION GREATER_EQUAL 11.6) - op_library(fused_gemm_epilogue_op) - endif() + if(CUDA_VERSION GREATER_EQUAL 11.6) + op_library(fused_gemm_epilogue_op) + endif() endif() diff --git a/paddle/fluid/operators/fused/unity_build_rule.cmake b/paddle/fluid/operators/fused/unity_build_rule.cmake index c428b7456bb..8605cd3cdae 100644 --- a/paddle/fluid/operators/fused/unity_build_rule.cmake +++ b/paddle/fluid/operators/fused/unity_build_rule.cmake @@ -4,16 +4,17 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - fused_elemwise_activation_op.cc - fused_embedding_fc_lstm_op.cc - fused_embedding_seq_pool_op.cc - fusion_lstm_op.cc - fusion_repeated_fc_relu_op.cc - fusion_seqconv_eltadd_relu_op.cc - fusion_seqexpand_concat_fc_op.cc - fusion_seqpool_concat_op.cc - fusion_squared_mat_sub_op.cc - multi_gru_op.cc - mkldnn/multi_gru_mkldnn_op.cc - fusion_seqpool_cvm_concat_op.cc) +register_unity_group( + cc + fused_elemwise_activation_op.cc + fused_embedding_fc_lstm_op.cc + fused_embedding_seq_pool_op.cc + fusion_lstm_op.cc + fusion_repeated_fc_relu_op.cc + fusion_seqconv_eltadd_relu_op.cc + fusion_seqexpand_concat_fc_op.cc + fusion_seqpool_concat_op.cc + fusion_squared_mat_sub_op.cc + multi_gru_op.cc + mkldnn/multi_gru_mkldnn_op.cc + fusion_seqpool_cvm_concat_op.cc) diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt index 080e7f7d5e8..a6f10e5fbda 100644 --- a/paddle/fluid/operators/jit/CMakeLists.txt +++ b/paddle/fluid/operators/jit/CMakeLists.txt @@ -1,16 +1,25 @@ - set(jit_file ${PADDLE_BINARY_DIR}/paddle/fluid/operators/jit/kernels.h.tmp) set(jit_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/operators/jit/kernels.h) -file(WRITE ${jit_file} "// Generated by the paddle/fluid/operators/jit/CMakeLists.txt. DO NOT EDIT!\n\n") +file( + WRITE ${jit_file} + "// Generated by the paddle/fluid/operators/jit/CMakeLists.txt. DO NOT EDIT!\n\n" +) file(APPEND ${jit_file} "\#pragma once\n") file(APPEND ${jit_file} "\#include \"paddle/fluid/operators/jit/helper.h\"\n") -file(APPEND ${jit_file} "\#include \"paddle/fluid/operators/jit/registry.h\"\n\n") +file(APPEND ${jit_file} + "\#include \"paddle/fluid/operators/jit/registry.h\"\n\n") set(JIT_KERNEL_DEPS cpu_info cblas gflags enforce place xxhash) -file(GLOB jit_kernel_cc_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") +file( + GLOB jit_kernel_cc_srcs + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") list(REMOVE_ITEM jit_kernel_cc_srcs test.cc benchmark.cc) -cc_library(jit_kernel_base SRCS ${jit_kernel_cc_srcs} DEPS ${JIT_KERNEL_DEPS}) +cc_library( + jit_kernel_base + SRCS ${jit_kernel_cc_srcs} + DEPS ${JIT_KERNEL_DEPS}) copy_if_different(${jit_file} ${jit_file_final}) @@ -18,14 +27,27 @@ copy_if_different(${jit_file} ${jit_file_final}) add_subdirectory(refer) add_subdirectory(more) if(WITH_XBYAK) - add_subdirectory(gen) + add_subdirectory(gen) endif() -cc_library(jit_kernel_helper INTERFACE SRCS ${jit_kernel_cc_srcs} DEPS jit_kernel_base ${JIT_KERNEL_DEPS}) -cc_test(jit_kernel_test SRCS test.cc DEPS jit_kernel_helper) +cc_library( + jit_kernel_helper INTERFACE + SRCS ${jit_kernel_cc_srcs} + DEPS jit_kernel_base ${JIT_KERNEL_DEPS}) +cc_test( + jit_kernel_test + SRCS test.cc + DEPS jit_kernel_helper) if(NOT WIN32) - cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS jit_kernel_helper device_tracer tensor) + cc_binary( + jit_kernel_benchmark + SRCS + benchmark.cc + DEPS + jit_kernel_helper + device_tracer + tensor) endif() if(WITH_TESTING AND TEST jit_kernel_test) - set_tests_properties(jit_kernel_test PROPERTIES TIMEOUT 120) + set_tests_properties(jit_kernel_test PROPERTIES TIMEOUT 120) endif() diff --git a/paddle/fluid/operators/jit/gen/CMakeLists.txt b/paddle/fluid/operators/jit/gen/CMakeLists.txt index ab8829b7baf..60e29ea81d5 100644 --- a/paddle/fluid/operators/jit/gen/CMakeLists.txt +++ b/paddle/fluid/operators/jit/gen/CMakeLists.txt @@ -1,38 +1,45 @@ +file( + GLOB jitcode_cc_srcs + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") -file(GLOB jitcode_cc_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") - -cc_library(jit_kernel_jitcode SRCS ${jitcode_cc_srcs} DEPS jit_kernel_base xbyak) -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} xbyak jit_kernel_jitcode PARENT_SCOPE) +cc_library( + jit_kernel_jitcode + SRCS ${jitcode_cc_srcs} + DEPS jit_kernel_base xbyak) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} xbyak jit_kernel_jitcode + PARENT_SCOPE) function(USE_JITKERNEL_GEN TARGET) - file(APPEND ${jit_file} "USE_JITKERNEL_GEN(${TARGET});\n") + file(APPEND ${jit_file} "USE_JITKERNEL_GEN(${TARGET});\n") endfunction() # use gen jitcode kernel by name -USE_JITKERNEL_GEN(kMatMul) -USE_JITKERNEL_GEN(kVMul) -USE_JITKERNEL_GEN(kVAdd) -USE_JITKERNEL_GEN(kVSub) -USE_JITKERNEL_GEN(kVAddRelu) -USE_JITKERNEL_GEN(kVScal) -USE_JITKERNEL_GEN(kVAddBias) -USE_JITKERNEL_GEN(kVRelu) -USE_JITKERNEL_GEN(kVSquare) -USE_JITKERNEL_GEN(kVIdentity) -USE_JITKERNEL_GEN(kVExp) -USE_JITKERNEL_GEN(kVSigmoid) -USE_JITKERNEL_GEN(kVTanh) -USE_JITKERNEL_GEN(kLSTMCtHt) -USE_JITKERNEL_GEN(kLSTMC1H1) -USE_JITKERNEL_GEN(kGRUH1) -USE_JITKERNEL_GEN(kGRUHtPart1) -USE_JITKERNEL_GEN(kGRUHtPart2) -USE_JITKERNEL_GEN(kNCHW16CMulNC) -USE_JITKERNEL_GEN(kSeqPool) -USE_JITKERNEL_GEN(kHMax) -USE_JITKERNEL_GEN(kHSum) -USE_JITKERNEL_GEN(kEmbSeqPool) -USE_JITKERNEL_GEN(kAdam) -USE_JITKERNEL_GEN(kAdamW) -USE_JITKERNEL_GEN(kSgd) -USE_JITKERNEL_GEN(kVBroadcast) +use_jitkernel_gen(kMatMul) +use_jitkernel_gen(kVMul) +use_jitkernel_gen(kVAdd) +use_jitkernel_gen(kVSub) +use_jitkernel_gen(kVAddRelu) +use_jitkernel_gen(kVScal) +use_jitkernel_gen(kVAddBias) +use_jitkernel_gen(kVRelu) +use_jitkernel_gen(kVSquare) +use_jitkernel_gen(kVIdentity) +use_jitkernel_gen(kVExp) +use_jitkernel_gen(kVSigmoid) +use_jitkernel_gen(kVTanh) +use_jitkernel_gen(kLSTMCtHt) +use_jitkernel_gen(kLSTMC1H1) +use_jitkernel_gen(kGRUH1) +use_jitkernel_gen(kGRUHtPart1) +use_jitkernel_gen(kGRUHtPart2) +use_jitkernel_gen(kNCHW16CMulNC) +use_jitkernel_gen(kSeqPool) +use_jitkernel_gen(kHMax) +use_jitkernel_gen(kHSum) +use_jitkernel_gen(kEmbSeqPool) +use_jitkernel_gen(kAdam) +use_jitkernel_gen(kAdamW) +use_jitkernel_gen(kSgd) +use_jitkernel_gen(kVBroadcast) diff --git a/paddle/fluid/operators/jit/more/CMakeLists.txt b/paddle/fluid/operators/jit/more/CMakeLists.txt index fa503356baa..0851ca065b5 100644 --- a/paddle/fluid/operators/jit/more/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/CMakeLists.txt @@ -1,17 +1,18 @@ - function(USE_JITKERNEL_MORE TARGET TYPE) - file(APPEND ${jit_file} "USE_JITKERNEL_MORE(${TARGET} ${TYPE});\n") + file(APPEND ${jit_file} "USE_JITKERNEL_MORE(${TARGET} ${TYPE});\n") endfunction() if(WITH_MKLML) - add_subdirectory(mkl) + add_subdirectory(mkl) endif() if(WITH_AVX) - add_subdirectory(intrinsic) + add_subdirectory(intrinsic) endif() # mix should be last add_subdirectory(mix) -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} PARENT_SCOPE) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} + PARENT_SCOPE) diff --git a/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt b/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt index 468937a4f6b..c6222c9b29b 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/intrinsic/CMakeLists.txt @@ -1,9 +1,16 @@ +file( + GLOB jit_kernel_cc_intrinsic + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") +cc_library( + jit_kernel_intrinsic + SRCS ${jit_kernel_cc_intrinsic} + DEPS jit_kernel_base) -file(GLOB jit_kernel_cc_intrinsic RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -cc_library(jit_kernel_intrinsic SRCS ${jit_kernel_cc_intrinsic} DEPS jit_kernel_base) - -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} jit_kernel_intrinsic PARENT_SCOPE) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} jit_kernel_intrinsic + PARENT_SCOPE) # use mkl kernels by name and type -USE_JITKERNEL_MORE(kCRFDecoding, intrinsic) -USE_JITKERNEL_MORE(kLayerNorm, intrinsic) +use_jitkernel_more(kCRFDecoding, intrinsic) +use_jitkernel_more(kLayerNorm, intrinsic) diff --git a/paddle/fluid/operators/jit/more/mix/CMakeLists.txt b/paddle/fluid/operators/jit/more/mix/CMakeLists.txt index dd039d29152..b5bc6c84575 100644 --- a/paddle/fluid/operators/jit/more/mix/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mix/CMakeLists.txt @@ -1,15 +1,21 @@ +file( + GLOB jit_kernel_mix_cc + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") +cc_library( + jit_kernel_mix + SRCS ${jit_kernel_mix_cc} + DEPS jit_kernel_base) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} jit_kernel_mix + PARENT_SCOPE) -file(GLOB jit_kernel_mix_cc RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -cc_library(jit_kernel_mix SRCS ${jit_kernel_mix_cc} DEPS jit_kernel_base) - -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} jit_kernel_mix PARENT_SCOPE) - -USE_JITKERNEL_MORE(kVSigmoid, mix) -USE_JITKERNEL_MORE(kVTanh, mix) -USE_JITKERNEL_MORE(kLSTMCtHt, mix) -USE_JITKERNEL_MORE(kLSTMC1H1, mix) -USE_JITKERNEL_MORE(kGRUH1, mix) -USE_JITKERNEL_MORE(kGRUHtPart1, mix) -USE_JITKERNEL_MORE(kGRUHtPart2, mix) -USE_JITKERNEL_MORE(kSoftmax, mix) +use_jitkernel_more(kVSigmoid, mix) +use_jitkernel_more(kVTanh, mix) +use_jitkernel_more(kLSTMCtHt, mix) +use_jitkernel_more(kLSTMC1H1, mix) +use_jitkernel_more(kGRUH1, mix) +use_jitkernel_more(kGRUHtPart1, mix) +use_jitkernel_more(kGRUHtPart2, mix) +use_jitkernel_more(kSoftmax, mix) diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt index 56f1a62ad4e..609ddd3c284 100644 --- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt +++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt @@ -1,20 +1,24 @@ - -cc_library(jit_kernel_mkl SRCS mkl.cc DEPS jit_kernel_base dynload_mklml) -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE) +cc_library( + jit_kernel_mkl + SRCS mkl.cc + DEPS jit_kernel_base dynload_mklml) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl + PARENT_SCOPE) # use mkl kernels by name and type -USE_JITKERNEL_MORE(kMatMul, mkl) -USE_JITKERNEL_MORE(kVMul, mkl) -USE_JITKERNEL_MORE(kVAdd, mkl) -USE_JITKERNEL_MORE(kVScal, mkl) -USE_JITKERNEL_MORE(kStrideScal, mkl) -USE_JITKERNEL_MORE(kVExp, mkl) -USE_JITKERNEL_MORE(kVSquare, mkl) -USE_JITKERNEL_MORE(kVCopy, mkl) -USE_JITKERNEL_MORE(kVSigmoid, mkl) -USE_JITKERNEL_MORE(kVTanh, mkl) -USE_JITKERNEL_MORE(kSeqPool, mkl) -USE_JITKERNEL_MORE(kSoftmax, mkl) -USE_JITKERNEL_MORE(kEmbSeqPool, mkl) -USE_JITKERNEL_MORE(kSgd, mkl) -USE_JITKERNEL_MORE(kVBroadcast, mkl) +use_jitkernel_more(kMatMul, mkl) +use_jitkernel_more(kVMul, mkl) +use_jitkernel_more(kVAdd, mkl) +use_jitkernel_more(kVScal, mkl) +use_jitkernel_more(kStrideScal, mkl) +use_jitkernel_more(kVExp, mkl) +use_jitkernel_more(kVSquare, mkl) +use_jitkernel_more(kVCopy, mkl) +use_jitkernel_more(kVSigmoid, mkl) +use_jitkernel_more(kVTanh, mkl) +use_jitkernel_more(kSeqPool, mkl) +use_jitkernel_more(kSoftmax, mkl) +use_jitkernel_more(kEmbSeqPool, mkl) +use_jitkernel_more(kSgd, mkl) +use_jitkernel_more(kVBroadcast, mkl) diff --git a/paddle/fluid/operators/jit/refer/CMakeLists.txt b/paddle/fluid/operators/jit/refer/CMakeLists.txt index a1ee4508f72..5ef93f989df 100644 --- a/paddle/fluid/operators/jit/refer/CMakeLists.txt +++ b/paddle/fluid/operators/jit/refer/CMakeLists.txt @@ -1,42 +1,46 @@ - -cc_library(jit_kernel_refer SRCS refer.cc DEPS jit_kernel_base) -set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} jit_kernel_refer PARENT_SCOPE) +cc_library( + jit_kernel_refer + SRCS refer.cc + DEPS jit_kernel_base) +set(JIT_KERNEL_DEPS + ${JIT_KERNEL_DEPS} jit_kernel_refer + PARENT_SCOPE) function(USE_JITKERNEL_REFER TARGET) - file(APPEND ${jit_file} "USE_JITKERNEL_REFER(${TARGET});\n") + file(APPEND ${jit_file} "USE_JITKERNEL_REFER(${TARGET});\n") endfunction() # use refer kernel by name -USE_JITKERNEL_REFER(kVMul) -USE_JITKERNEL_REFER(kVAdd) -USE_JITKERNEL_REFER(kVAddRelu) -USE_JITKERNEL_REFER(kVSub) -USE_JITKERNEL_REFER(kVScal) -USE_JITKERNEL_REFER(kStrideScal) -USE_JITKERNEL_REFER(kVAddBias) -USE_JITKERNEL_REFER(kVCopy) -USE_JITKERNEL_REFER(kVRelu) -USE_JITKERNEL_REFER(kVIdentity) -USE_JITKERNEL_REFER(kVExp) -USE_JITKERNEL_REFER(kVSigmoid) -USE_JITKERNEL_REFER(kVTanh) -USE_JITKERNEL_REFER(kLSTMCtHt) -USE_JITKERNEL_REFER(kLSTMC1H1) -USE_JITKERNEL_REFER(kGRUH1) -USE_JITKERNEL_REFER(kGRUHtPart1) -USE_JITKERNEL_REFER(kGRUHtPart2) -USE_JITKERNEL_REFER(kCRFDecoding) -USE_JITKERNEL_REFER(kLayerNorm) -USE_JITKERNEL_REFER(kNCHW16CMulNC) -USE_JITKERNEL_REFER(kSeqPool) -USE_JITKERNEL_REFER(kMatMul) -USE_JITKERNEL_REFER(kVSquare) -USE_JITKERNEL_REFER(kHSum) -USE_JITKERNEL_REFER(kHMax) -USE_JITKERNEL_REFER(kStrideASum) -USE_JITKERNEL_REFER(kSoftmax) -USE_JITKERNEL_REFER(kEmbSeqPool) -USE_JITKERNEL_REFER(kAdam) -USE_JITKERNEL_REFER(kAdamW) -USE_JITKERNEL_REFER(kSgd) -USE_JITKERNEL_REFER(kVBroadcast) +use_jitkernel_refer(kVMul) +use_jitkernel_refer(kVAdd) +use_jitkernel_refer(kVAddRelu) +use_jitkernel_refer(kVSub) +use_jitkernel_refer(kVScal) +use_jitkernel_refer(kStrideScal) +use_jitkernel_refer(kVAddBias) +use_jitkernel_refer(kVCopy) +use_jitkernel_refer(kVRelu) +use_jitkernel_refer(kVIdentity) +use_jitkernel_refer(kVExp) +use_jitkernel_refer(kVSigmoid) +use_jitkernel_refer(kVTanh) +use_jitkernel_refer(kLSTMCtHt) +use_jitkernel_refer(kLSTMC1H1) +use_jitkernel_refer(kGRUH1) +use_jitkernel_refer(kGRUHtPart1) +use_jitkernel_refer(kGRUHtPart2) +use_jitkernel_refer(kCRFDecoding) +use_jitkernel_refer(kLayerNorm) +use_jitkernel_refer(kNCHW16CMulNC) +use_jitkernel_refer(kSeqPool) +use_jitkernel_refer(kMatMul) +use_jitkernel_refer(kVSquare) +use_jitkernel_refer(kHSum) +use_jitkernel_refer(kHMax) +use_jitkernel_refer(kStrideASum) +use_jitkernel_refer(kSoftmax) +use_jitkernel_refer(kEmbSeqPool) +use_jitkernel_refer(kAdam) +use_jitkernel_refer(kAdamW) +use_jitkernel_refer(kSgd) +use_jitkernel_refer(kVBroadcast) diff --git a/paddle/fluid/operators/lite/CMakeLists.txt b/paddle/fluid/operators/lite/CMakeLists.txt index 5bb78925908..3955c6e322b 100644 --- a/paddle/fluid/operators/lite/CMakeLists.txt +++ b/paddle/fluid/operators/lite/CMakeLists.txt @@ -1,2 +1,5 @@ op_library(lite_engine_op DEPS lite_engine lite_tensor_utils) -cc_test(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op analysis) +cc_test( + test_lite_engine_op + SRCS lite_engine_op_test.cc + DEPS lite_engine_op analysis) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 913ce07ec67..ac538cfbd5c 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -1,14 +1,17 @@ -if (WITH_ASCEND_CL) - cc_library(beam_search_npu SRCS beam_search_npu.cc DEPS npu_op_runner) +if(WITH_ASCEND_CL) + cc_library( + beam_search_npu + SRCS beam_search_npu.cc + DEPS npu_op_runner) endif() # please add new math_library in alphabetical order -if (WITH_ASCEND_CL) -math_library(concat_and_split DEPS concat_and_split_functor npu_op_runner) -elseif (WITH_MLU) -math_library(concat_and_split DEPS concat_and_split_functor mlu_baseop) +if(WITH_ASCEND_CL) + math_library(concat_and_split DEPS concat_and_split_functor npu_op_runner) +elseif(WITH_MLU) + math_library(concat_and_split DEPS concat_and_split_functor mlu_baseop) else() -math_library(concat_and_split DEPS concat_and_split_functor) + math_library(concat_and_split DEPS concat_and_split_functor) endif() math_library(context_project DEPS im2col math_function) math_library(cross_entropy) @@ -22,23 +25,30 @@ math_library(sampler DEPS generator) math_library(maxouting) if(WITH_MKLDNN) - math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mkldnn_axpy_handler mixed_vector) + math_library( + selected_rows_functor + DEPS + selected_rows_utils + math_function + blas + mkldnn_axpy_handler + mixed_vector) else() - math_library(selected_rows_functor DEPS selected_rows_utils math_function blas mixed_vector) + math_library(selected_rows_functor DEPS selected_rows_utils math_function + blas mixed_vector) endif() math_library(sequence_padding) math_library(sequence_pooling DEPS math_function jit_kernel_helper) math_library(sequence_scale) math_library(softmax DEPS math_function jit_kernel_helper) -if (WITH_ASCEND_CL) - math_library(beam_search DEPS math_function beam_search_npu) +if(WITH_ASCEND_CL) + math_library(beam_search DEPS math_function beam_search_npu) else() - math_library(beam_search DEPS math_function) + math_library(beam_search DEPS math_function) endif() math_library(matrix_bit_code) - math_library(unpooling) math_library(vol2col) math_library(prelu) @@ -46,28 +56,58 @@ math_library(bert_encoder_functor) math_library(tree2col DEPS math_function) math_library(matrix_solve) -cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) -cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) -cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) -cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) -cc_test(sequence_pooling_test SRCS sequence_pooling_test.cc DEPS sequence_pooling) -cc_test(beam_search_test SRCS beam_search_test.cc DEPS beam_search) +cc_test( + selected_rows_functor_test + SRCS selected_rows_functor_test.cc + DEPS selected_rows_functor) +cc_test( + im2col_test + SRCS im2col_test.cc + DEPS im2col) +cc_test( + vol2col_test + SRCS vol2col_test.cc + DEPS vol2col) +cc_test( + sequence_padding_test + SRCS sequence_padding_test.cc + DEPS sequence_padding) +cc_test( + sequence_pooling_test + SRCS sequence_pooling_test.cc + DEPS sequence_pooling) +cc_test( + beam_search_test + SRCS beam_search_test.cc + DEPS beam_search) if(WITH_GPU) - nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) + nv_test( + selected_rows_functor_gpu_test + SRCS selected_rows_functor_test.cu.cc + DEPS selected_rows_functor math_function) endif() if(WITH_ROCM) - hip_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc DEPS selected_rows_functor math_function) + hip_test( + selected_rows_functor_gpu_test + SRCS selected_rows_functor_test.cu.cc + DEPS selected_rows_functor math_function) endif() -cc_test(concat_test SRCS concat_test.cc DEPS concat_and_split) +cc_test( + concat_test + SRCS concat_test.cc + DEPS concat_and_split) if(WITH_GPU AND (NOT WITH_ROCM)) -#currenty not yet support ROCM -#the generic conversion APIs of dense and sparse are only supported after cuda11.2 - if((NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.2)) - cc_test(cusparse_conversion_api_test SRCS cusparse_conversion_api_test.cc DEPS tensor) - endif() + #currenty not yet support ROCM + #the generic conversion APIs of dense and sparse are only supported after cuda11.2 + if((NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.2)) + cc_test( + cusparse_conversion_api_test + SRCS cusparse_conversion_api_test.cc + DEPS tensor) + endif() endif() if(WITH_TESTING AND TEST im2col_test) - set_tests_properties(im2col_test PROPERTIES TIMEOUT 120) + set_tests_properties(im2col_test PROPERTIES TIMEOUT 120) endif() diff --git a/paddle/fluid/operators/metrics/CMakeLists.txt b/paddle/fluid/operators/metrics/CMakeLists.txt index 101939dde2c..b968dbf288e 100644 --- a/paddle/fluid/operators/metrics/CMakeLists.txt +++ b/paddle/fluid/operators/metrics/CMakeLists.txt @@ -1,6 +1,6 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/metrics. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/metrics. + include(unity_build_rule.cmake) endif() register_operators() diff --git a/paddle/fluid/operators/metrics/unity_build_rule.cmake b/paddle/fluid/operators/metrics/unity_build_rule.cmake index fcb690a7b6a..58acbc3b1e6 100644 --- a/paddle/fluid/operators/metrics/unity_build_rule.cmake +++ b/paddle/fluid/operators/metrics/unity_build_rule.cmake @@ -4,10 +4,5 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - accuracy_op.cc - auc_op.cc - precision_recall_op.cc) -register_unity_group(cu - accuracy_op.cu - auc_op.cu) +register_unity_group(cc accuracy_op.cc auc_op.cc precision_recall_op.cc) +register_unity_group(cu accuracy_op.cu auc_op.cu) diff --git a/paddle/fluid/operators/mkldnn/CMakeLists.txt b/paddle/fluid/operators/mkldnn/CMakeLists.txt index ce95ec560c2..f40286ad5d8 100644 --- a/paddle/fluid/operators/mkldnn/CMakeLists.txt +++ b/paddle/fluid/operators/mkldnn/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(mkldnn_axpy_handler SRCS axpy_handler.cc DEPS place device_context enforce) +cc_library( + mkldnn_axpy_handler + SRCS axpy_handler.cc + DEPS place device_context enforce) diff --git a/paddle/fluid/operators/mkldnn/caching_tests.cmake b/paddle/fluid/operators/mkldnn/caching_tests.cmake index f48a5d822f8..49f08622265 100644 --- a/paddle/fluid/operators/mkldnn/caching_tests.cmake +++ b/paddle/fluid/operators/mkldnn/caching_tests.cmake @@ -1,6 +1,20 @@ -set(TEST_MKLDNN_CACHING_DEPS op_registry elementwise_mul_op elementwise_add_op activation_op softmax_op conv_op im2col vol2col softmax scope device_context enforce) -if (WITH_GPU OR WITH_ROCM) +set(TEST_MKLDNN_CACHING_DEPS + op_registry + elementwise_mul_op + elementwise_add_op + activation_op + softmax_op + conv_op + im2col + vol2col + softmax + scope + device_context + enforce) +if(WITH_GPU OR WITH_ROCM) set(TEST_MKLDNN_CACHING_DEPS ${TEST_MKLDNN_CACHING_DEPS} depthwise_conv) endif() -cc_test(test_mkldnn_caching SRCS mkldnn/test_mkldnn_caching.cc DEPS ${TEST_MKLDNN_CACHING_DEPS}) - +cc_test( + test_mkldnn_caching + SRCS mkldnn/test_mkldnn_caching.cc + DEPS ${TEST_MKLDNN_CACHING_DEPS}) diff --git a/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake b/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake index c03ce74df7d..18893e22ec8 100644 --- a/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/inplace_op_tests.cmake @@ -1,2 +1,12 @@ -cc_test(test_mkldnn_op_inplace SRCS mkldnn/test_mkldnn_op_inplace.cc DEPS op_registry elementwise_add_op activation_op softmax_op softmax scope device_context enforce executor) - +cc_test( + test_mkldnn_op_inplace + SRCS mkldnn/test_mkldnn_op_inplace.cc + DEPS op_registry + elementwise_add_op + activation_op + softmax_op + softmax + scope + device_context + enforce + executor) diff --git a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake index 8bad3e86b29..4c94bc3f3ad 100644 --- a/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake +++ b/paddle/fluid/operators/mkldnn/nhwc_op_tests.cmake @@ -1 +1,14 @@ -cc_test(test_mkldnn_op_nhwc SRCS mkldnn/test_mkldnn_op_nhwc.cc DEPS op_registry pool_op shape_op crop_op activation_op pooling transpose_op scope device_context enforce executor) +cc_test( + test_mkldnn_op_nhwc + SRCS mkldnn/test_mkldnn_op_nhwc.cc + DEPS op_registry + pool_op + shape_op + crop_op + activation_op + pooling + transpose_op + scope + device_context + enforce + executor) diff --git a/paddle/fluid/operators/mlu/CMakeLists.txt b/paddle/fluid/operators/mlu/CMakeLists.txt index efd6aeb8eeb..c383edecaac 100644 --- a/paddle/fluid/operators/mlu/CMakeLists.txt +++ b/paddle/fluid/operators/mlu/CMakeLists.txt @@ -1,5 +1,10 @@ - -IF(WITH_MLU) - cc_library(mlu_baseop SRCS mlu_baseop.cc DEPS neuware_lib device_context) - cc_test(activation_op_mlu_test SRCS activation_op_mlu_test.cc DEPS op_registry activation_op scope device_context executor) -ENDIF() +if(WITH_MLU) + cc_library( + mlu_baseop + SRCS mlu_baseop.cc + DEPS neuware_lib device_context) + cc_test( + activation_op_mlu_test + SRCS activation_op_mlu_test.cc + DEPS op_registry activation_op scope device_context executor) +endif() diff --git a/paddle/fluid/operators/nccl/CMakeLists.txt b/paddle/fluid/operators/nccl/CMakeLists.txt index b3d53f0d390..218d53aa630 100644 --- a/paddle/fluid/operators/nccl/CMakeLists.txt +++ b/paddle/fluid/operators/nccl/CMakeLists.txt @@ -1,24 +1,38 @@ -if (NOT (WITH_NCCL OR WITH_RCCL)) +if(NOT (WITH_NCCL OR WITH_RCCL)) return() endif() if(WITH_GPU AND NOT WIN32) - nv_library(nccl_common SRCS nccl_gpu_common.cc DEPS device_context operator ) + nv_library( + nccl_common + SRCS nccl_gpu_common.cc + DEPS device_context operator) endif() if(WITH_ROCM AND NOT WIN32) - hip_library(nccl_common SRCS nccl_gpu_common.cc DEPS device_context operator ) + hip_library( + nccl_common + SRCS nccl_gpu_common.cc + DEPS device_context operator) endif() if(WITH_GPU OR WITH_ROCM) - op_library(nccl_op DEPS nccl_common) - set(OPERATOR_DEPS ${OPERATOR_DEPS} nccl_common PARENT_SCOPE) + op_library(nccl_op DEPS nccl_common) + set(OPERATOR_DEPS + ${OPERATOR_DEPS} nccl_common + PARENT_SCOPE) endif() if(WITH_GPU AND NOT WIN32) - nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) + nv_test( + nccl_op_test + SRCS nccl_op_test.cu.cc + DEPS nccl_op gpu_info device_context) endif() if(WITH_ROCM AND NOT WIN32) - hip_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) + hip_test( + nccl_op_test + SRCS nccl_op_test.cu.cc + DEPS nccl_op gpu_info device_context) endif() diff --git a/paddle/fluid/operators/optimizers/CMakeLists.txt b/paddle/fluid/operators/optimizers/CMakeLists.txt index 6989447fc04..7a27dda735c 100644 --- a/paddle/fluid/operators/optimizers/CMakeLists.txt +++ b/paddle/fluid/operators/optimizers/CMakeLists.txt @@ -1,6 +1,6 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/optimizers. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/optimizers. + include(unity_build_rule.cmake) endif() register_operators() diff --git a/paddle/fluid/operators/optimizers/unity_build_rule.cmake b/paddle/fluid/operators/optimizers/unity_build_rule.cmake index 769bb781d6e..61e63ad9a6e 100644 --- a/paddle/fluid/operators/optimizers/unity_build_rule.cmake +++ b/paddle/fluid/operators/optimizers/unity_build_rule.cmake @@ -4,32 +4,34 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - ftrl_op.cc - lars_momentum_op.cc - momentum_op.cc - sgd_op.cc - proximal_adagrad_op.cc - adagrad_op.cc - adam_op.cc - adamax_op.cc - dgc_momentum_op.cc - proximal_gd_op.cc - decayed_adagrad_op.cc - adadelta_op.cc - lamb_op.cc - dpsgd_op.cc - rmsprop_op.cc) -register_unity_group(cu - ftrl_op.cu - lars_momentum_op.cu - momentum_op.cu - sgd_op.cu - proximal_adagrad_op.cu - adagrad_op.cu - adam_op.cu - adamax_op.cu - decayed_adagrad_op.cu - adadelta_op.cu - lamb_op.cu - rmsprop_op.cu) +register_unity_group( + cc + ftrl_op.cc + lars_momentum_op.cc + momentum_op.cc + sgd_op.cc + proximal_adagrad_op.cc + adagrad_op.cc + adam_op.cc + adamax_op.cc + dgc_momentum_op.cc + proximal_gd_op.cc + decayed_adagrad_op.cc + adadelta_op.cc + lamb_op.cc + dpsgd_op.cc + rmsprop_op.cc) +register_unity_group( + cu + ftrl_op.cu + lars_momentum_op.cu + momentum_op.cu + sgd_op.cu + proximal_adagrad_op.cu + adagrad_op.cu + adam_op.cu + adamax_op.cu + decayed_adagrad_op.cu + adadelta_op.cu + lamb_op.cu + rmsprop_op.cu) diff --git a/paddle/fluid/operators/prim_ops/CMakeLists.txt b/paddle/fluid/operators/prim_ops/CMakeLists.txt index a58ee6dc1f7..d29933bc196 100644 --- a/paddle/fluid/operators/prim_ops/CMakeLists.txt +++ b/paddle/fluid/operators/prim_ops/CMakeLists.txt @@ -1,11 +1,11 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/prim_ops. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/prim_ops. + include(unity_build_rule.cmake) endif() register_operators() -SET(PRIM_OP_SRCS +set(PRIM_OP_SRCS reshape_p_op.cc broadcast_p_op.cc reduce_p_op.cc @@ -25,4 +25,7 @@ SET(PRIM_OP_SRCS matmul_p_op.cc fill_constant_p_op.cc) -cc_test(prim_op_test SRCS prim_op_test.cc ${PRIM_OP_SRCS} DEPS op_registry) +cc_test( + prim_op_test + SRCS prim_op_test.cc ${PRIM_OP_SRCS} + DEPS op_registry) diff --git a/paddle/fluid/operators/prim_ops/unity_build_rule.cmake b/paddle/fluid/operators/prim_ops/unity_build_rule.cmake index 5d6a732272b..74b04d234fc 100644 --- a/paddle/fluid/operators/prim_ops/unity_build_rule.cmake +++ b/paddle/fluid/operators/prim_ops/unity_build_rule.cmake @@ -1,20 +1,20 @@ -register_unity_group(cc - reshape_p_op.cc - broadcast_p_op.cc - reduce_p_op.cc - transpose_p_op.cc - split_p_op.cc - concat_p_op.cc - slice_select_p_op.cc - slice_assign_p_op.cc - gather_p_op.cc - scatter_add_p_op.cc - add_p_op.cc - sub_p_op.cc - mul_p_op.cc - div_p_op.cc - sqrt_p_op.cc - tanh_p_op.cc - matmul_p_op.cc - fill_constant_p_op.cc - ) +register_unity_group( + cc + reshape_p_op.cc + broadcast_p_op.cc + reduce_p_op.cc + transpose_p_op.cc + split_p_op.cc + concat_p_op.cc + slice_select_p_op.cc + slice_assign_p_op.cc + gather_p_op.cc + scatter_add_p_op.cc + add_p_op.cc + sub_p_op.cc + mul_p_op.cc + div_p_op.cc + sqrt_p_op.cc + tanh_p_op.cc + matmul_p_op.cc + fill_constant_p_op.cc) diff --git a/paddle/fluid/operators/pscore/CMakeLists.txt b/paddle/fluid/operators/pscore/CMakeLists.txt index de0ee481aa6..04407ea117d 100755 --- a/paddle/fluid/operators/pscore/CMakeLists.txt +++ b/paddle/fluid/operators/pscore/CMakeLists.txt @@ -1,49 +1,152 @@ -if (WITH_PSLIB) - return() +if(WITH_PSLIB) + return() endif() include(operators) set(DISTRIBUTE_DEPS "") -if (WITH_ARM_BRPC) - list(APPEND DISTRIBUTE_DEPS executor fleet ps_service brpc_utils heter_server heter_client ps_framework_proto framework_proto sendrecv_rpc arm_brpc gflags glog snappy device_context) +if(WITH_ARM_BRPC) + list( + APPEND + DISTRIBUTE_DEPS + executor + fleet + ps_service + brpc_utils + heter_server + heter_client + ps_framework_proto + framework_proto + sendrecv_rpc + arm_brpc + gflags + glog + snappy + device_context) else() - list(APPEND DISTRIBUTE_DEPS executor fleet ps_service brpc_utils heter_server heter_client ps_framework_proto framework_proto sendrecv_rpc brpc leveldb ssl crypto protobuf gflags glog zlib snappy device_context) + list( + APPEND + DISTRIBUTE_DEPS + executor + fleet + ps_service + brpc_utils + heter_server + heter_client + ps_framework_proto + framework_proto + sendrecv_rpc + brpc + leveldb + ssl + crypto + protobuf + gflags + glog + zlib + snappy + device_context) endif() -set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses") +set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=parentheses" +) -if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") +if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() -file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc") +file( + GLOB OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*_op.cc") list(REMOVE_DUPLICATES OPS) -foreach (src ${OPS}) - set_source_files_properties(${src} PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -endforeach () +foreach(src ${OPS}) + set_source_files_properties(${src} PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +endforeach() register_operators(DEPS ${DISTRIBUTE_DEPS}) -set(OPERATOR_DEPS ${OPERATOR_DEPS} ${DISTRIBUTE_DEPS} PARENT_SCOPE) +set(OPERATOR_DEPS + ${OPERATOR_DEPS} ${DISTRIBUTE_DEPS} + PARENT_SCOPE) -set_source_files_properties(heter_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(heter_server_test SRCS heter_server_test.cc DEPS ${RPC_DEPS} ${DISTRIBUTE_DEPS} executor scope proto_desc scale_op eigen_function) +set_source_files_properties( + heter_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + heter_server_test + SRCS heter_server_test.cc + DEPS ${RPC_DEPS} + ${DISTRIBUTE_DEPS} + executor + scope + proto_desc + scale_op + eigen_function) -set_source_files_properties(send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(send_and_recv_cpu_test SRCS send_and_recv_op_cpu_test.cc DEPS executor scope proto_desc scale_op send_and_recv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) +set_source_files_properties( + send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + send_and_recv_cpu_test + SRCS send_and_recv_op_cpu_test.cc + DEPS executor + scope + proto_desc + scale_op + send_and_recv_op + ${RPC_DEPS} + ${DISTRIBUTE_DEPS} + eigen_function) -set_source_files_properties(send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(send_and_recv_gpu_test SRCS send_and_recv_op_gpu_test.cc DEPS executor scope proto_desc scale_op send_and_recv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) +set_source_files_properties( + send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + send_and_recv_gpu_test + SRCS send_and_recv_op_gpu_test.cc + DEPS executor + scope + proto_desc + scale_op + send_and_recv_op + ${RPC_DEPS} + ${DISTRIBUTE_DEPS} + eigen_function) -set_source_files_properties(heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test(heter_listen_and_server_test SRCS heter_listen_and_server_test.cc DEPS executor scope proto_desc scale_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) +set_source_files_properties( + heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS + ${DISTRIBUTE_COMPILE_FLAGS}) +cc_test( + heter_listen_and_server_test + SRCS heter_listen_and_server_test.cc + DEPS executor + scope + proto_desc + scale_op + heter_listen_and_serv_op + ${RPC_DEPS} + ${DISTRIBUTE_DEPS} + eigen_function) #set_source_files_properties(heter_cloud_comm_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) #cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc scale_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) -set_source_files_properties(switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_binary(switch_server_test SRCS switch_server_test.cc DEPS executor scope proto_desc scale_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) +set_source_files_properties( + switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +cc_binary( + switch_server_test + SRCS + switch_server_test.cc + DEPS + executor + scope + proto_desc + scale_op + heter_listen_and_serv_op + ${RPC_DEPS} + ${DISTRIBUTE_DEPS} + eigen_function) diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 7e06b45943c..9dd59de98d5 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -1,26 +1,36 @@ include(operators) -cc_library(reader_op_registry SRCS reader_op_registry.cc DEPS operator op_registry reader) +cc_library( + reader_op_registry + SRCS reader_op_registry.cc + DEPS operator op_registry reader) set(LOCAL_READER_LIBS) function(reader_library TARGET_NAME) - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - set(options "") - set(common_deps reader_op_registry) - cmake_parse_arguments(reader_library "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN}) - op_library(${TARGET_NAME} SRCS ${reader_library_SRCS} DEPS ${common_deps} ${reader_library_DEPS}) - set(LOCAL_READER_LIBS - ${TARGET_NAME} - ${LOCAL_READER_LIBS} - PARENT_SCOPE) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + set(options "") + set(common_deps reader_op_registry) + cmake_parse_arguments(reader_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + op_library(${TARGET_NAME} SRCS ${reader_library_SRCS} DEPS ${common_deps} + ${reader_library_DEPS}) + set(LOCAL_READER_LIBS + ${TARGET_NAME} ${LOCAL_READER_LIBS} + PARENT_SCOPE) endfunction() -cc_library(py_reader SRCS py_reader.cc DEPS reader) -cc_library(buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool) +cc_library( + py_reader + SRCS py_reader.cc + DEPS reader) +cc_library( + buffered_reader + SRCS buffered_reader.cc + DEPS reader simple_threadpool) -reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc DEPS buffered_reader) +reader_library(create_double_buffer_reader_op SRCS + create_double_buffer_reader_op.cc DEPS buffered_reader) reader_library(create_py_reader_op SRCS create_py_reader_op.cc DEPS py_reader) op_library(read_op DEPS py_reader buffered_reader) diff --git a/paddle/fluid/operators/reduce_ops/CMakeLists.txt b/paddle/fluid/operators/reduce_ops/CMakeLists.txt index 9a2abfd93d0..7c2f91999e9 100644 --- a/paddle/fluid/operators/reduce_ops/CMakeLists.txt +++ b/paddle/fluid/operators/reduce_ops/CMakeLists.txt @@ -1,30 +1,42 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/reduce_ops. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/reduce_ops. + include(unity_build_rule.cmake) endif() if(WITH_GPU) - if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - register_operators(DEPS cub) - else() - register_operators() - endif() -else() + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + register_operators(DEPS cub) + else() register_operators() + endif() +else() + register_operators() endif() if(WITH_GPU) - if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) - nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor cub) - else() - nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor) - endif() + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + nv_test( + check_reduce_rank_test + SRCS check_reduce_rank_test.cu + DEPS tensor cub) + else() + nv_test( + check_reduce_rank_test + SRCS check_reduce_rank_test.cu + DEPS tensor) + endif() endif() if(WITH_ROCM) - hip_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor) + hip_test( + check_reduce_rank_test + SRCS check_reduce_rank_test.cu + DEPS tensor) endif() if(WITH_ASCEND_CL) - cc_test(reduce_any_op_npu_test SRCS reduce_any_op_npu_test.cc DEPS op_registry reduce_any_op scope device_context enforce executor) + cc_test( + reduce_any_op_npu_test + SRCS reduce_any_op_npu_test.cc + DEPS op_registry reduce_any_op scope device_context enforce executor) endif() diff --git a/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake b/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake index c4f32a8d257..f5c1af004f3 100644 --- a/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake +++ b/paddle/fluid/operators/reduce_ops/unity_build_rule.cmake @@ -4,18 +4,16 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - reduce_all_op.cc - reduce_any_op.cc - reduce_prod_op.cc - reduce_sum_op.cc) -register_unity_group(cu - reduce_all_op.cu - reduce_any_op.cu - reduce_prod_op.cu - reduce_prod_op.part.cu - reduce_sum_op.cu - reduce_sum_op.part.cu) +register_unity_group(cc reduce_all_op.cc reduce_any_op.cc reduce_prod_op.cc + reduce_sum_op.cc) +register_unity_group( + cu + reduce_all_op.cu + reduce_any_op.cu + reduce_prod_op.cu + reduce_prod_op.part.cu + reduce_sum_op.cu + reduce_sum_op.part.cu) # The following groups are to make better use of `/MP` which MSVC's parallel # compilation instruction when compiling in Unity Build. register_unity_group(cu frobenius_norm_op.cu) diff --git a/paddle/fluid/operators/sequence_ops/CMakeLists.txt b/paddle/fluid/operators/sequence_ops/CMakeLists.txt index 0ca88409f41..fe36afd96c5 100644 --- a/paddle/fluid/operators/sequence_ops/CMakeLists.txt +++ b/paddle/fluid/operators/sequence_ops/CMakeLists.txt @@ -1,6 +1,6 @@ include(operators) if(WITH_UNITY_BUILD) - # Load Unity Build rules for operators in paddle/fluid/operators/sequence_ops. - include(unity_build_rule.cmake) + # Load Unity Build rules for operators in paddle/fluid/operators/sequence_ops. + include(unity_build_rule.cmake) endif() register_operators() diff --git a/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake b/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake index 9ccc4432df5..9a87e27b241 100644 --- a/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake +++ b/paddle/fluid/operators/sequence_ops/unity_build_rule.cmake @@ -4,36 +4,38 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - sequence_concat_op.cc - sequence_conv_op.cc - sequence_enumerate_op.cc - sequence_erase_op.cc - sequence_expand_op.cc - sequence_mask_op.cc - sequence_pad_op.cc - sequence_pool_op.cc - sequence_expand_as_op.cc - sequence_reshape_op.cc - sequence_reverse_op.cc - sequence_scatter_op.cc - sequence_slice_op.cc - sequence_softmax_op.cc - sequence_topk_avg_pooling_op.cc - sequence_unpad_op.cc - sequence_concat_op.cu.cc - sequence_conv_op.cu.cc) -register_unity_group(cu - sequence_enumerate_op.cu - sequence_erase_op.cu - sequence_expand_op.cu - sequence_mask_op.cu - sequence_pad_op.cu - sequence_pool_op.cu - sequence_expand_as_op.cu - sequence_reshape_op.cu - sequence_reverse_op.cu - sequence_slice_op.cu - sequence_softmax_cudnn_op.cu.cc - sequence_softmax_op.cu - sequence_unpad_op.cu) +register_unity_group( + cc + sequence_concat_op.cc + sequence_conv_op.cc + sequence_enumerate_op.cc + sequence_erase_op.cc + sequence_expand_op.cc + sequence_mask_op.cc + sequence_pad_op.cc + sequence_pool_op.cc + sequence_expand_as_op.cc + sequence_reshape_op.cc + sequence_reverse_op.cc + sequence_scatter_op.cc + sequence_slice_op.cc + sequence_softmax_op.cc + sequence_topk_avg_pooling_op.cc + sequence_unpad_op.cc + sequence_concat_op.cu.cc + sequence_conv_op.cu.cc) +register_unity_group( + cu + sequence_enumerate_op.cu + sequence_erase_op.cu + sequence_expand_op.cu + sequence_mask_op.cu + sequence_pad_op.cu + sequence_pool_op.cu + sequence_expand_as_op.cu + sequence_reshape_op.cu + sequence_reverse_op.cu + sequence_slice_op.cu + sequence_softmax_cudnn_op.cu.cc + sequence_softmax_op.cu + sequence_unpad_op.cu) diff --git a/paddle/fluid/operators/string/unity_build_rule.cmake b/paddle/fluid/operators/string/unity_build_rule.cmake index a4b209d2df1..90922407ec7 100644 --- a/paddle/fluid/operators/string/unity_build_rule.cmake +++ b/paddle/fluid/operators/string/unity_build_rule.cmake @@ -4,5 +4,4 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - faster_tokenizer_op.cc) \ No newline at end of file +register_unity_group(cc faster_tokenizer_op.cc) diff --git a/paddle/fluid/operators/tensorrt/CMakeLists.txt b/paddle/fluid/operators/tensorrt/CMakeLists.txt index a7f18245ab9..e0fed2804a9 100644 --- a/paddle/fluid/operators/tensorrt/CMakeLists.txt +++ b/paddle/fluid/operators/tensorrt/CMakeLists.txt @@ -1,4 +1,6 @@ -op_library(tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter infer_io_utils analysis_helper) -nv_test(test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc - DEPS tensorrt_engine_op - analysis) +op_library(tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter + infer_io_utils analysis_helper) +nv_test( + test_tensorrt_engine_op + SRCS tensorrt_engine_op_test.cc + DEPS tensorrt_engine_op analysis) diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 1be8f3387db..62aa990ca7b 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -4,533 +4,569 @@ # Generally, the combination rules in this file do not need to be modified. # If there are some redefined error in compiling with the source file which # in combination rule, you can remove the source file from the following rules. -register_unity_group(cc - abs_op.cc - add_position_encoding_op.cc - addmm_op.cc - affine_channel_op.cc - affine_grid_op.cc - allclose_op.cc - argsort_op.cc - array_to_lod_tensor_op.cc - assert_op.cc - assign_op.cc - assign_value_op.cc - attention_lstm_op.cc - average_accumulates_op.cc - batch_fc_op.cc - bce_loss_op.cc - beam_search_op.cc - beam_search_decode_op.cc - bernoulli_op.cc - bilateral_slice_op.cc) -register_unity_group(cc - mkldnn/batch_norm_mkldnn_op.cc - bilinear_tensor_product_op.cc - bmm_op.cc - bpr_loss_op.cc - cast_op.cc - mkldnn/cast_mkldnn_op.cc - cholesky_op.cc - chunk_eval_op.cc - clip_by_norm_op.cc - clip_op.cc - coalesce_tensor_op.cc - mkldnn/activation_mkldnn_op.cc - mkldnn/interpolate_mkldnn_op.cc - mkldnn/pool_mkldnn_op.cc - mkldnn/softmax_mkldnn_op.cc) -register_unity_group(cc - center_loss_op.cc - mkldnn/concat_mkldnn_op.cc - mkldnn/conv_mkldnn_op.cc - mkldnn/conv_transpose_mkldnn_op.cc - correlation_op.cc - cos_sim_op.cc - crf_decoding_op.cc - crop_op.cc - ascend_trigger_op.cc - conj_op.cc - imag_op.cc - kldiv_loss_op.cc - memcpy_op.cc) -register_unity_group(cc - cross_entropy_op.cc - cross_op.cc - ctc_align_op.cc - cudnn_lstm_op.cc - cumsum_op.cc - cvm_op.cc - data_norm_op.cc - deformable_conv_op.cc - deformable_conv_v1_op.cc - deformable_psroi_pooling_op.cc - delete_var_op.cc - dequantize_abs_max_op.cc - dequantize_op.cc - mkldnn/dequantize_mkldnn_op.cc) -register_unity_group(cc - dequeue_op.cc - detection_map_op.cc - dgc_clip_by_norm_op.cc - diag_embed_op.cc - diag_op.cc - diag_v2_op.cc - dot_op.cc - edit_distance_op.cc - empty_op.cc - enqueue_op.cc - erf_op.cc - py_func_op.cc - real_op.cc - sync_batch_norm_op.cc - top_k_op.cc - conv_op.cc - conv_transpose_op.cc - gru_unit_op.cc) -register_unity_group(cc - expand_v2_op.cc - fake_dequantize_op.cc - fc_op.cc - mkldnn/fc_mkldnn_op.cc - fill_any_like_op.cc - fill_constant_batch_size_like_op.cc - fill_constant_op.cc - fill_op.cc - fill_zeros_like_op.cc - filter_by_instag_op.cc) -register_unity_group(cc - flatten_op.cc - flip_op.cc - fsp_op.cc - gather_nd_op.cc - gather_op.cc - gather_tree_op.cc - gaussian_random_batch_size_like_op.cc - gaussian_random_op.cc - mkldnn/gaussian_random_mkldnn_op.cc - group_norm_op.cc gru_op.cc) -register_unity_group(cc - hash_op.cc - hierarchical_sigmoid_op.cc - hinge_loss_op.cc - histogram_op.cc - huber_loss_op.cc - im2sequence_op.cc - increment_op.cc - index_sample_op.cc - index_select_op.cc - interpolate_op.cc - isfinite_v2_op.cc) -register_unity_group(cc - inplace_abn_op.cc - interpolate_v2_op.cc - inverse_op.cc - is_empty_op.cc - isfinite_op.cc - kron_op.cc - l1_norm_op.cc - label_smooth_op.cc - layer_norm_op.cc - mkldnn/layer_norm_mkldnn_op.cc - mkldnn/layer_norm_mkldnn_op.cc - linspace_op.cc - load_combine_op.cc - load_op.cc) -register_unity_group(cc - lod_array_length_op.cc - lod_rank_table_op.cc - lod_reset_op.cc - lod_tensor_to_array_op.cc - log_softmax_op.cc - lookup_table_dequant_op.cc - lrn_op.cc - mkldnn/lrn_mkldnn_op.cc - lstm_unit_op.cc - lstmp_op.cc) -register_unity_group(cc - log_loss_op.cc - lookup_table_v2_op.cc - margin_rank_loss_op.cc - masked_select_op.cc - match_matrix_tensor_op.cc - matmul_op.cc - mkldnn/matmul_mkldnn_op.cc - max_sequence_len_op.cc - maxout_op.cc - merge_lod_tensor_op.cc - merge_selected_rows_op.cc - meshgrid_op.cc) -register_unity_group(cc - concat_op.cc - conv_shift_op.cc - dequantize_log_op.cc - dropout_op.cc - expand_op.cc - fake_quantize_op.cc - gelu_op.cc - get_tensor_from_selected_rows_op.cc - lookup_table_op.cc - matmul_v2_op.cc) -register_unity_group(cc - mean_iou_op.cc - mean_op.cc - minus_op.cc - mish_op.cc - mul_op.cc - multinomial_op.cc - multiplex_op.cc - mv_op.cc - nce_op.cc - nll_loss_op.cc - norm_op.cc - one_hot_op.cc - one_hot_v2_op.cc - pad2d_op.cc - pad3d_op.cc - pad_constant_like_op.cc - pad_op.cc) -register_unity_group(cc - modified_huber_loss_op.cc - partial_sum_op.cc - pixel_shuffle_op.cc - pool_op.cc - pool_with_index_op.cc - positive_negative_pair_op.cc - prelu_op.cc - print_op.cc - prroi_pool_op.cc - psroi_pool_op.cc - pull_box_extended_sparse_op.cc - pull_box_sparse_op.cc - pull_sparse_op.cc - pull_sparse_v2_op.cc) -register_unity_group(cc - push_dense_op.cc - quantize_op.cc - mkldnn/quantize_mkldnn_op.cc - queue_generator_op.cc - randint_op.cc - random_crop_op.cc - randperm_op.cc - range_op.cc - rank_attention_op.cc - rank_loss_op.cc - recurrent_op.cc - reorder_lod_tensor_by_rank_op.cc - requantize_op.cc - mkldnn/requantize_mkldnn_op.cc - reshape_op.cc - reverse_op.cc) -register_unity_group(cc - rnn_memory_helper_op.cc - roi_align_op.cc - roll_op.cc - run_program_op.cc - sample_logits_op.cc - sampling_id_op.cc - save_combine_op.cc - save_op.cc - scale_op.cc - mkldnn/scale_mkldnn_op.cc - scatter_nd_add_op.cc - scatter_op.cc - seed_op.cc - select_input_op.cc - select_output_op.cc) -register_unity_group(cc - roi_pool_op.cc - selu_op.cc - shape_op.cc - shard_index_op.cc - shrink_rnn_memory_op.cc - shuffle_batch_op.cc - shuffle_channel_op.cc - sigmoid_cross_entropy_with_logits_op.cc - sign_op.cc - similarity_focus_op.cc - size_op.cc - slice_op.cc - softmax_op.cc) -register_unity_group(cc - space_to_depth_op.cc - spectral_norm_op.cc - split_lod_tensor_op.cc - split_op.cc - split_selected_rows_op.cc - spp_op.cc - squared_l2_norm_op.cc - squeeze_op.cc - stack_op.cc - strided_slice_op.cc - sum_op.cc - mkldnn/sum_mkldnn_op.cc - tdm_child_op.cc - tdm_sampler_op.cc - teacher_student_sigmoid_loss_op.cc - temporal_shift_op.cc) -register_unity_group(cc - row_conv_op.cc - tensor_array_to_tensor_op.cc - tile_op.cc - top_k_v2_op.cc - trace_op.cc - transpose_op.cc - mkldnn/transpose_mkldnn_op.cc - tree_conv_op.cc - tril_triu_op.cc - truncated_gaussian_random_op.cc - unbind_op.cc - unfold_op.cc) -register_unity_group(cc - smooth_l1_loss_op.cc - uniform_random_batch_size_like_op.cc - uniform_random_op.cc - unique_op.cc - unique_with_counts_op.cc - unpool_op.cc - unsqueeze_op.cc - unstack_op.cc - var_conv_2d_op.cc - where_index_op.cc - where_op.cc) -register_unity_group(cc - affine_grid_cudnn_op.cu.cc - beam_search_op.cu.cc - cudnn_lstm_op.cu.cc - empty_op.cu.cc - fc_op.cu.cc - fill_constant_batch_size_like_op.cu.cc - fill_constant_op.cu.cc - fill_op.cu.cc - fill_zeros_like_op.cu.cc - flatten_op.cu.cc - grid_sampler_cudnn_op.cu.cc - gru_op.cu.cc - inverse_op.cu.cc - is_empty_op.cu.cc - maxout_op.cu.cc - mul_op.cu.cc - concat_op.cu.cc - mul_op.cu.cc - pool_op.cu.cc - pool_cudnn_op.cu.cc - pool_with_index_op.cu.cc - run_program_op.cu.cc - softmax_op.cu.cc - softmax_cudnn_op.cu.cc - spp_op.cu.cc - squeeze_op.cu.cc - unbind_op.cu.cc - unpool_op.cu.cc - unsqueeze_op.cu.cc) -register_unity_group(cc - arg_max_op.cc - arg_min_op.cc - squared_l2_distance_op.cc) -register_unity_group(cc - linear_chain_crf_op.cc - lstm_op.cc - partial_concat_op.cc - pyramid_hash_op.cc - recurrent_op.cc - run_program_op.cc - softmax_with_cross_entropy_op.cc - warpctc_op.cc) -register_unity_group(cc - conv_op.cu.cc - lstm_op.cu.cc - rnn_op.cu.cc - split_op.cu.cc - activation_cudnn_op.cu.cc - assign_value_op.cu.cc - merge_selected_rows_op.cu.cc - run_program_op.cu.cc - warpctc_op.cu.cc) -register_unity_group(cu - addmm_op.cu - affine_channel_op.cu - allclose_op.cu - assign_value_op.cu - bce_loss_op.cu - bernoulli_op.cu - bilateral_slice_op.cu - batch_norm_op.cu) -register_unity_group(cu - bilinear_tensor_product_op.cu - bmm_op.cu - cast_op.cu - cholesky_op.cu - clip_by_norm_op.cu - clip_op.cu - conv_cudnn_op.cu - affine_grid_op.cu) -register_unity_group(cu - center_loss_op.cu - conv_op.cu - conv_transpose_cudnn_op.cu - conv_transpose_op.cu - cos_sim_op.cu - crop_op.cu - average_accumulates_op.cu - conj_op.cu - correlation_op.cu) -register_unity_group(cu - cross_entropy_op.cu - cross_op.cu - ctc_align_op.cu - cumsum_op.cu - cvm_op.cu - data_norm_op.cu - deformable_conv_op.cu - deformable_conv_v1_op.cu - dequantize_abs_max_op.cu) -register_unity_group(cu - dgc_clip_by_norm_op.cu - diag_embed_op.cu - diag_op.cu - diag_v2_op.cu - edit_distance_op.cu - erf_op.cu - meshgrid_op.cu - imag_op.cu) -register_unity_group(cu - expand_v2_op.cu - fake_dequantize_op.cu - fill_any_like_op.cu) -register_unity_group(cu - flip_op.cu - fsp_op.cu - gather_nd_op.cu - gather_op.cu - gather_tree_op.cu - gaussian_random_op.cu - grid_sampler_op.cu - group_norm_op.cu) -register_unity_group(cu - hinge_loss_op.cu - histogram_op.cu - huber_loss_op.cu - im2sequence_op.cu - increment_op.cu - index_sample_op.cu - index_select_op.cu - interpolate_op.cu - isfinite_v2_op.cu) -register_unity_group(cu - inplace_abn_op.cu - interpolate_v2_op.cu - isfinite_op.cu - l1_norm_op.cu - label_smooth_op.cu - linspace_op.cu - load_combine_op.cu - load_op.cu) -register_unity_group(cu - lod_reset_op.cu - log_softmax_op.cu - lrn_op.cu - lstm_unit_op.cu - dot_op.cu - psroi_pool_op.cu - rank_loss_op.cu - real_op.cu) -register_unity_group(cu - log_loss_op.cu - lookup_table_v2_op.cu - margin_rank_loss_op.cu - masked_select_op.cu - merge_selected_rows_op.cu - lstmp_op.cu - shuffle_channel_op.cu - softmax_cudnn_op.cu - squared_l2_distance_op.cu) -register_unity_group(cu - conv_shift_op.cu - dequantize_log_op.cu - dropout_op.cu - fake_quantize_op.cu - gelu_op.cu - lookup_table_op.cu - sigmoid_cross_entropy_with_logits_op.cu - softmax_with_cross_entropy_op.cu) -register_unity_group(cu - mean_iou_op.cu - mean_op.cu - minus_op.cu - mish_op.cu - multinomial_op.cu - multiplex_op.cu - mv_op.cu - nll_loss_op.cu - norm_op.cu - one_hot_op.cu - pad2d_op.cu - pad3d_op.cu - pad_constant_like_op.cu - pad_op.cu) -register_unity_group(cu - partial_sum_op.cu - pixel_shuffle_op.cu - prelu_op.cu - prroi_pool_op.cu - pull_box_extended_sparse_op.cu - pull_box_sparse_op.cu) -register_unity_group(cu - randint_op.cu - random_crop_op.cu - randperm_op.cu - range_op.cu - reverse_op.cu - partial_concat_op.cu - kldiv_loss_op.cu - instance_norm_op.cu) -register_unity_group(cu - roi_align_op.cu - roll_op.cu - sample_logits_op.cu - sampling_id_op.cu - save_combine_op.cu - save_op.cu - scale_op.cu - scatter_nd_add_op.cu - scatter_op.cu - seed_op.cu) -register_unity_group(cu - roi_pool_op.cu - selu_op.cu - shape_op.cu - shard_index_op.cu - sign_op.cu - size_op.cu - slice_op.cu) -register_unity_group(cu - space_to_depth_op.cu - spectral_norm_op.cu - split_op.cu - split_selected_rows_op.cu - squared_l2_norm_op.cu - sum_op.cu - temporal_shift_op.cu - arg_max_op.cu) -register_unity_group(cu - row_conv_op.cu - tree_conv_op.cu - tril_triu_op.cu - truncated_gaussian_random_op.cu - unfold_op.cu - arg_min_op.cu - crop_tensor_op.cu) -register_unity_group(cu - smooth_l1_loss_op.cu - uniform_random_op.cu - unstack_op.cu - where_index_op.cu - where_op.cu - layer_norm_op.cu) -register_unity_group(cu - expand_as_op.cu - stack_op.cu) +register_unity_group( + cc + abs_op.cc + add_position_encoding_op.cc + addmm_op.cc + affine_channel_op.cc + affine_grid_op.cc + allclose_op.cc + argsort_op.cc + array_to_lod_tensor_op.cc + assert_op.cc + assign_op.cc + assign_value_op.cc + attention_lstm_op.cc + average_accumulates_op.cc + batch_fc_op.cc + bce_loss_op.cc + beam_search_op.cc + beam_search_decode_op.cc + bernoulli_op.cc + bilateral_slice_op.cc) +register_unity_group( + cc + mkldnn/batch_norm_mkldnn_op.cc + bilinear_tensor_product_op.cc + bmm_op.cc + bpr_loss_op.cc + cast_op.cc + mkldnn/cast_mkldnn_op.cc + cholesky_op.cc + chunk_eval_op.cc + clip_by_norm_op.cc + clip_op.cc + coalesce_tensor_op.cc + mkldnn/activation_mkldnn_op.cc + mkldnn/interpolate_mkldnn_op.cc + mkldnn/pool_mkldnn_op.cc + mkldnn/softmax_mkldnn_op.cc) +register_unity_group( + cc + center_loss_op.cc + mkldnn/concat_mkldnn_op.cc + mkldnn/conv_mkldnn_op.cc + mkldnn/conv_transpose_mkldnn_op.cc + correlation_op.cc + cos_sim_op.cc + crf_decoding_op.cc + crop_op.cc + ascend_trigger_op.cc + conj_op.cc + imag_op.cc + kldiv_loss_op.cc + memcpy_op.cc) +register_unity_group( + cc + cross_entropy_op.cc + cross_op.cc + ctc_align_op.cc + cudnn_lstm_op.cc + cumsum_op.cc + cvm_op.cc + data_norm_op.cc + deformable_conv_op.cc + deformable_conv_v1_op.cc + deformable_psroi_pooling_op.cc + delete_var_op.cc + dequantize_abs_max_op.cc + dequantize_op.cc + mkldnn/dequantize_mkldnn_op.cc) +register_unity_group( + cc + dequeue_op.cc + detection_map_op.cc + dgc_clip_by_norm_op.cc + diag_embed_op.cc + diag_op.cc + diag_v2_op.cc + dot_op.cc + edit_distance_op.cc + empty_op.cc + enqueue_op.cc + erf_op.cc + py_func_op.cc + real_op.cc + sync_batch_norm_op.cc + top_k_op.cc + conv_op.cc + conv_transpose_op.cc + gru_unit_op.cc) +register_unity_group( + cc + expand_v2_op.cc + fake_dequantize_op.cc + fc_op.cc + mkldnn/fc_mkldnn_op.cc + fill_any_like_op.cc + fill_constant_batch_size_like_op.cc + fill_constant_op.cc + fill_op.cc + fill_zeros_like_op.cc + filter_by_instag_op.cc) +register_unity_group( + cc + flatten_op.cc + flip_op.cc + fsp_op.cc + gather_nd_op.cc + gather_op.cc + gather_tree_op.cc + gaussian_random_batch_size_like_op.cc + gaussian_random_op.cc + mkldnn/gaussian_random_mkldnn_op.cc + group_norm_op.cc + gru_op.cc) +register_unity_group( + cc + hash_op.cc + hierarchical_sigmoid_op.cc + hinge_loss_op.cc + histogram_op.cc + huber_loss_op.cc + im2sequence_op.cc + increment_op.cc + index_sample_op.cc + index_select_op.cc + interpolate_op.cc + isfinite_v2_op.cc) +register_unity_group( + cc + inplace_abn_op.cc + interpolate_v2_op.cc + inverse_op.cc + is_empty_op.cc + isfinite_op.cc + kron_op.cc + l1_norm_op.cc + label_smooth_op.cc + layer_norm_op.cc + mkldnn/layer_norm_mkldnn_op.cc + mkldnn/layer_norm_mkldnn_op.cc + linspace_op.cc + load_combine_op.cc + load_op.cc) +register_unity_group( + cc + lod_array_length_op.cc + lod_rank_table_op.cc + lod_reset_op.cc + lod_tensor_to_array_op.cc + log_softmax_op.cc + lookup_table_dequant_op.cc + lrn_op.cc + mkldnn/lrn_mkldnn_op.cc + lstm_unit_op.cc + lstmp_op.cc) +register_unity_group( + cc + log_loss_op.cc + lookup_table_v2_op.cc + margin_rank_loss_op.cc + masked_select_op.cc + match_matrix_tensor_op.cc + matmul_op.cc + mkldnn/matmul_mkldnn_op.cc + max_sequence_len_op.cc + maxout_op.cc + merge_lod_tensor_op.cc + merge_selected_rows_op.cc + meshgrid_op.cc) +register_unity_group( + cc + concat_op.cc + conv_shift_op.cc + dequantize_log_op.cc + dropout_op.cc + expand_op.cc + fake_quantize_op.cc + gelu_op.cc + get_tensor_from_selected_rows_op.cc + lookup_table_op.cc + matmul_v2_op.cc) +register_unity_group( + cc + mean_iou_op.cc + mean_op.cc + minus_op.cc + mish_op.cc + mul_op.cc + multinomial_op.cc + multiplex_op.cc + mv_op.cc + nce_op.cc + nll_loss_op.cc + norm_op.cc + one_hot_op.cc + one_hot_v2_op.cc + pad2d_op.cc + pad3d_op.cc + pad_constant_like_op.cc + pad_op.cc) +register_unity_group( + cc + modified_huber_loss_op.cc + partial_sum_op.cc + pixel_shuffle_op.cc + pool_op.cc + pool_with_index_op.cc + positive_negative_pair_op.cc + prelu_op.cc + print_op.cc + prroi_pool_op.cc + psroi_pool_op.cc + pull_box_extended_sparse_op.cc + pull_box_sparse_op.cc + pull_sparse_op.cc + pull_sparse_v2_op.cc) +register_unity_group( + cc + push_dense_op.cc + quantize_op.cc + mkldnn/quantize_mkldnn_op.cc + queue_generator_op.cc + randint_op.cc + random_crop_op.cc + randperm_op.cc + range_op.cc + rank_attention_op.cc + rank_loss_op.cc + recurrent_op.cc + reorder_lod_tensor_by_rank_op.cc + requantize_op.cc + mkldnn/requantize_mkldnn_op.cc + reshape_op.cc + reverse_op.cc) +register_unity_group( + cc + rnn_memory_helper_op.cc + roi_align_op.cc + roll_op.cc + run_program_op.cc + sample_logits_op.cc + sampling_id_op.cc + save_combine_op.cc + save_op.cc + scale_op.cc + mkldnn/scale_mkldnn_op.cc + scatter_nd_add_op.cc + scatter_op.cc + seed_op.cc + select_input_op.cc + select_output_op.cc) +register_unity_group( + cc + roi_pool_op.cc + selu_op.cc + shape_op.cc + shard_index_op.cc + shrink_rnn_memory_op.cc + shuffle_batch_op.cc + shuffle_channel_op.cc + sigmoid_cross_entropy_with_logits_op.cc + sign_op.cc + similarity_focus_op.cc + size_op.cc + slice_op.cc + softmax_op.cc) +register_unity_group( + cc + space_to_depth_op.cc + spectral_norm_op.cc + split_lod_tensor_op.cc + split_op.cc + split_selected_rows_op.cc + spp_op.cc + squared_l2_norm_op.cc + squeeze_op.cc + stack_op.cc + strided_slice_op.cc + sum_op.cc + mkldnn/sum_mkldnn_op.cc + tdm_child_op.cc + tdm_sampler_op.cc + teacher_student_sigmoid_loss_op.cc + temporal_shift_op.cc) +register_unity_group( + cc + row_conv_op.cc + tensor_array_to_tensor_op.cc + tile_op.cc + top_k_v2_op.cc + trace_op.cc + transpose_op.cc + mkldnn/transpose_mkldnn_op.cc + tree_conv_op.cc + tril_triu_op.cc + truncated_gaussian_random_op.cc + unbind_op.cc + unfold_op.cc) +register_unity_group( + cc + smooth_l1_loss_op.cc + uniform_random_batch_size_like_op.cc + uniform_random_op.cc + unique_op.cc + unique_with_counts_op.cc + unpool_op.cc + unsqueeze_op.cc + unstack_op.cc + var_conv_2d_op.cc + where_index_op.cc + where_op.cc) +register_unity_group( + cc + affine_grid_cudnn_op.cu.cc + beam_search_op.cu.cc + cudnn_lstm_op.cu.cc + empty_op.cu.cc + fc_op.cu.cc + fill_constant_batch_size_like_op.cu.cc + fill_constant_op.cu.cc + fill_op.cu.cc + fill_zeros_like_op.cu.cc + flatten_op.cu.cc + grid_sampler_cudnn_op.cu.cc + gru_op.cu.cc + inverse_op.cu.cc + is_empty_op.cu.cc + maxout_op.cu.cc + mul_op.cu.cc + concat_op.cu.cc + mul_op.cu.cc + pool_op.cu.cc + pool_cudnn_op.cu.cc + pool_with_index_op.cu.cc + run_program_op.cu.cc + softmax_op.cu.cc + softmax_cudnn_op.cu.cc + spp_op.cu.cc + squeeze_op.cu.cc + unbind_op.cu.cc + unpool_op.cu.cc + unsqueeze_op.cu.cc) +register_unity_group(cc arg_max_op.cc arg_min_op.cc squared_l2_distance_op.cc) +register_unity_group( + cc + linear_chain_crf_op.cc + lstm_op.cc + partial_concat_op.cc + pyramid_hash_op.cc + recurrent_op.cc + run_program_op.cc + softmax_with_cross_entropy_op.cc + warpctc_op.cc) +register_unity_group( + cc + conv_op.cu.cc + lstm_op.cu.cc + rnn_op.cu.cc + split_op.cu.cc + activation_cudnn_op.cu.cc + assign_value_op.cu.cc + merge_selected_rows_op.cu.cc + run_program_op.cu.cc + warpctc_op.cu.cc) +register_unity_group( + cu + addmm_op.cu + affine_channel_op.cu + allclose_op.cu + assign_value_op.cu + bce_loss_op.cu + bernoulli_op.cu + bilateral_slice_op.cu + batch_norm_op.cu) +register_unity_group( + cu + bilinear_tensor_product_op.cu + bmm_op.cu + cast_op.cu + cholesky_op.cu + clip_by_norm_op.cu + clip_op.cu + conv_cudnn_op.cu + affine_grid_op.cu) +register_unity_group( + cu + center_loss_op.cu + conv_op.cu + conv_transpose_cudnn_op.cu + conv_transpose_op.cu + cos_sim_op.cu + crop_op.cu + average_accumulates_op.cu + conj_op.cu + correlation_op.cu) +register_unity_group( + cu + cross_entropy_op.cu + cross_op.cu + ctc_align_op.cu + cumsum_op.cu + cvm_op.cu + data_norm_op.cu + deformable_conv_op.cu + deformable_conv_v1_op.cu + dequantize_abs_max_op.cu) +register_unity_group( + cu + dgc_clip_by_norm_op.cu + diag_embed_op.cu + diag_op.cu + diag_v2_op.cu + edit_distance_op.cu + erf_op.cu + meshgrid_op.cu + imag_op.cu) +register_unity_group(cu expand_v2_op.cu fake_dequantize_op.cu + fill_any_like_op.cu) +register_unity_group( + cu + flip_op.cu + fsp_op.cu + gather_nd_op.cu + gather_op.cu + gather_tree_op.cu + gaussian_random_op.cu + grid_sampler_op.cu + group_norm_op.cu) +register_unity_group( + cu + hinge_loss_op.cu + histogram_op.cu + huber_loss_op.cu + im2sequence_op.cu + increment_op.cu + index_sample_op.cu + index_select_op.cu + interpolate_op.cu + isfinite_v2_op.cu) +register_unity_group( + cu + inplace_abn_op.cu + interpolate_v2_op.cu + isfinite_op.cu + l1_norm_op.cu + label_smooth_op.cu + linspace_op.cu + load_combine_op.cu + load_op.cu) +register_unity_group( + cu + lod_reset_op.cu + log_softmax_op.cu + lrn_op.cu + lstm_unit_op.cu + dot_op.cu + psroi_pool_op.cu + rank_loss_op.cu + real_op.cu) +register_unity_group( + cu + log_loss_op.cu + lookup_table_v2_op.cu + margin_rank_loss_op.cu + masked_select_op.cu + merge_selected_rows_op.cu + lstmp_op.cu + shuffle_channel_op.cu + softmax_cudnn_op.cu + squared_l2_distance_op.cu) +register_unity_group( + cu + conv_shift_op.cu + dequantize_log_op.cu + dropout_op.cu + fake_quantize_op.cu + gelu_op.cu + lookup_table_op.cu + sigmoid_cross_entropy_with_logits_op.cu + softmax_with_cross_entropy_op.cu) +register_unity_group( + cu + mean_iou_op.cu + mean_op.cu + minus_op.cu + mish_op.cu + multinomial_op.cu + multiplex_op.cu + mv_op.cu + nll_loss_op.cu + norm_op.cu + one_hot_op.cu + pad2d_op.cu + pad3d_op.cu + pad_constant_like_op.cu + pad_op.cu) +register_unity_group( + cu + partial_sum_op.cu + pixel_shuffle_op.cu + prelu_op.cu + prroi_pool_op.cu + pull_box_extended_sparse_op.cu + pull_box_sparse_op.cu) +register_unity_group( + cu + randint_op.cu + random_crop_op.cu + randperm_op.cu + range_op.cu + reverse_op.cu + partial_concat_op.cu + kldiv_loss_op.cu + instance_norm_op.cu) +register_unity_group( + cu + roi_align_op.cu + roll_op.cu + sample_logits_op.cu + sampling_id_op.cu + save_combine_op.cu + save_op.cu + scale_op.cu + scatter_nd_add_op.cu + scatter_op.cu + seed_op.cu) +register_unity_group( + cu + roi_pool_op.cu + selu_op.cu + shape_op.cu + shard_index_op.cu + sign_op.cu + size_op.cu + slice_op.cu) +register_unity_group( + cu + space_to_depth_op.cu + spectral_norm_op.cu + split_op.cu + split_selected_rows_op.cu + squared_l2_norm_op.cu + sum_op.cu + temporal_shift_op.cu + arg_max_op.cu) +register_unity_group( + cu + row_conv_op.cu + tree_conv_op.cu + tril_triu_op.cu + truncated_gaussian_random_op.cu + unfold_op.cu + arg_min_op.cu + crop_tensor_op.cu) +register_unity_group( + cu + smooth_l1_loss_op.cu + uniform_random_op.cu + unstack_op.cu + where_index_op.cu + where_op.cu + layer_norm_op.cu) +register_unity_group(cu expand_as_op.cu stack_op.cu) # The following groups are to make better use of `/MP` which MSVC's parallel # compilation instruction when compiling in Unity Build. register_unity_group(cu activation_op.cu) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 24d39c25cf3..247ff43b8a0 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -1,229 +1,448 @@ -proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto simple_threadpool) +proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto + simple_threadpool) if(WITH_GPU) proto_library(external_error_proto SRCS external_error.proto) endif(WITH_GPU) -if (WITH_PYTHON) +if(WITH_PYTHON) py_proto_compile(profiler_py_proto SRCS profiler.proto) - add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) + add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E + touch __init__.py) add_dependencies(profiler_py_proto profiler_py_proto_init) - if (NOT WIN32) - add_custom_command(TARGET profiler_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler - COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler - COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if(NOT WIN32) + add_custom_command( + TARGET profiler_py_proto + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler + COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler + COMMENT + "Copy generated python proto into directory paddle/fluid/proto/profiler." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) else(NOT WIN32) - string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/") - add_custom_command(TARGET profiler_py_proto POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler - COMMAND copy /Y *.py ${proto_dstpath} - COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + string(REPLACE "/" "\\" proto_dstpath + "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/") + add_custom_command( + TARGET profiler_py_proto + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory + ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler + COMMAND copy /Y *.py ${proto_dstpath} + COMMENT + "Copy generated python proto into directory paddle/fluid/proto/profiler." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif(NOT WIN32) endif() -cc_library(flags SRCS flags.cc DEPS gflags boost) -cc_library(denormal SRCS denormal.cc DEPS) +cc_library( + flags + SRCS flags.cc + DEPS gflags boost) +cc_library( + denormal + SRCS denormal.cc + DEPS) -cc_test(errors_test SRCS errors_test.cc DEPS errors enforce) +cc_test( + errors_test + SRCS errors_test.cc + DEPS errors enforce) set(enforce_deps flags errors boost flags phi_enforce) if(WITH_GPU) set(enforce_deps ${enforce_deps} external_error_proto) endif() -cc_library(enforce INTERFACE SRCS enforce.cc DEPS ${enforce_deps}) +cc_library( + enforce INTERFACE + SRCS enforce.cc + DEPS ${enforce_deps}) cc_library(monitor SRCS monitor.cc) -cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) +cc_test( + enforce_test + SRCS enforce_test.cc + DEPS stringpiece enforce) set(CPU_INFO_DEPS gflags glog enforce) -IF(WITH_XBYAK) - list(APPEND CPU_INFO_DEPS xbyak) -ENDIF() -cc_library(cpu_info SRCS cpu_info.cc DEPS ${CPU_INFO_DEPS}) -cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) -cc_library(os_info SRCS os_info.cc DEPS enforce) -cc_test(os_info_test SRCS os_info_test.cc DEPS os_info) - -IF(WITH_GPU) - nv_library(cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS device_context allocator_facade cuda_graph) -ELSE() - cc_library(cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS device_context allocator_facade) -ENDIF() - -cc_library(place SRCS place.cc DEPS enforce boost phi_place) -cc_test(place_test SRCS place_test.cc DEPS place glog gflags) - -IF(WITH_MKLDNN) - set(MKLDNN_CTX_DEPS mkldnn) -ELSE() - set(MKLDNN_CTX_DEPS) -ENDIF() +if(WITH_XBYAK) + list(APPEND CPU_INFO_DEPS xbyak) +endif() +cc_library( + cpu_info + SRCS cpu_info.cc + DEPS ${CPU_INFO_DEPS}) +cc_test( + cpu_info_test + SRCS cpu_info_test.cc + DEPS cpu_info) +cc_library( + os_info + SRCS os_info.cc + DEPS enforce) +cc_test( + os_info_test + SRCS os_info_test.cc + DEPS os_info) + +if(WITH_GPU) + nv_library( + cuda_graph_with_memory_pool + SRCS cuda_graph_with_memory_pool.cc + DEPS device_context allocator_facade cuda_graph) +else() + cc_library( + cuda_graph_with_memory_pool + SRCS cuda_graph_with_memory_pool.cc + DEPS device_context allocator_facade) +endif() + +cc_library( + place + SRCS place.cc + DEPS enforce boost phi_place) +cc_test( + place_test + SRCS place_test.cc + DEPS place glog gflags) + +if(WITH_MKLDNN) + set(MKLDNN_CTX_DEPS mkldnn) +else() + set(MKLDNN_CTX_DEPS) +endif() add_subdirectory(device) add_subdirectory(dynload) add_subdirectory(stream) -cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce) -cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper) +cc_library( + cpu_helper + SRCS cpu_helper.cc + DEPS cblas enforce) +cc_test( + cpu_helper_test + SRCS cpu_helper_test.cc + DEPS cpu_helper) set(dgc_deps "") -IF(WITH_DGC) - set(dgc_deps dgc) -ENDIF() - -IF(WITH_GPU OR WITH_ROCM) - set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream) -ENDIF() - -IF(WITH_IPU) - set(IPU_CTX_DEPS ipu_info) -ELSE() - set(IPU_CTX_DEPS) -ENDIF(WITH_IPU) - -IF(WITH_ASCEND_CL) - set(NPU_CTX_DEPS npu_stream npu_info) -ENDIF() - -IF(WITH_MLU) - set(MLU_CTX_DEPS mlu_device_context) -ENDIF() - -IF(WITH_ASCEND_CL OR WITH_MLU) -cc_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce) -ENDIF() - -IF(WITH_GPU) - nv_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce) -ENDIF() -IF(WITH_ROCM) - hip_library(stream_callback_manager SRCS stream_callback_manager.cc DEPS simple_threadpool enforce) -ENDIF() - -IF(WITH_GPU OR WITH_ROCM) +if(WITH_DGC) + set(dgc_deps dgc) +endif() + +if(WITH_GPU OR WITH_ROCM) + set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream) +endif() + +if(WITH_IPU) + set(IPU_CTX_DEPS ipu_info) +else() + set(IPU_CTX_DEPS) +endif(WITH_IPU) + +if(WITH_ASCEND_CL) + set(NPU_CTX_DEPS npu_stream npu_info) +endif() + +if(WITH_MLU) + set(MLU_CTX_DEPS mlu_device_context) +endif() + +if(WITH_ASCEND_CL OR WITH_MLU) + cc_library( + stream_callback_manager + SRCS stream_callback_manager.cc + DEPS simple_threadpool enforce) +endif() + +if(WITH_GPU) + nv_library( + stream_callback_manager + SRCS stream_callback_manager.cc + DEPS simple_threadpool enforce) +endif() +if(WITH_ROCM) + hip_library( + stream_callback_manager + SRCS stream_callback_manager.cc + DEPS simple_threadpool enforce) +endif() + +if(WITH_GPU OR WITH_ROCM) set(STREAM_CALLBACK_DEPS stream_callback_manager) -ELSEIF(WITH_ASCEND_CL) +elseif(WITH_ASCEND_CL) set(STREAM_CALLBACK_DEPS stream_callback_manager) -ELSE() +else() set(STREAM_CALLBACK_DEPS) -ENDIF() +endif() if(WITH_GLOO) - cc_library(gloo_context SRCS gloo_context.cc DEPS framework_proto gloo_wrapper enforce) + cc_library( + gloo_context + SRCS gloo_context.cc + DEPS framework_proto gloo_wrapper enforce) endif() -cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost) +cc_library( + cudnn_workspace_helper + SRCS cudnn_workspace_helper.cc + DEPS boost) # separate init from device_context to avoid cycle dependencies -cc_library(init SRCS init.cc DEPS device_context custom_kernel context_pool) +cc_library( + init + SRCS init.cc + DEPS device_context custom_kernel context_pool) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies -cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS} - place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} - ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator) +cc_library( + device_context + SRCS device_context.cc + DEPS simple_threadpool + malloc + xxhash + ${STREAM_CALLBACK_DEPS} + place + phi_place + eigen3 + stringpiece + cpu_helper + cpu_info + framework_proto + ${IPU_CTX_DEPS} + ${GPU_CTX_DEPS} + ${NPU_CTX_DEPS} + ${MKLDNN_CTX_DEPS} + ${dgc_deps} + dlpack + cudnn_workspace_helper + ${XPU_CTX_DEPS} + ${MLU_CTX_DEPS} + eigen3 + cpu_context + generator) if(WITH_XPU) target_link_libraries(device_context xpu_context xpu_resource_pool) endif() -cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce) +cc_library( + collective_helper + SRCS collective_helper.cc gen_comm_id_helper.cc + DEPS framework_proto device_context enforce) if(WITH_ASCEND_CL) - target_link_libraries(collective_helper npu_collective_helper) + target_link_libraries(collective_helper npu_collective_helper) endif() if(WITH_CNCL) - target_link_libraries(collective_helper mlu_collective_helper) + target_link_libraries(collective_helper mlu_collective_helper) endif() if(WITH_GPU OR WITH_ROCM) - target_link_libraries(device_context gpu_info gpu_context phi_gpu_info) - target_link_libraries(device_context gpu_resource_pool) + target_link_libraries(device_context gpu_info gpu_context phi_gpu_info) + target_link_libraries(device_context gpu_resource_pool) endif() -if (WITH_CUSTOM_DEVICE) - target_link_libraries(device_context custom_context) +if(WITH_CUSTOM_DEVICE) + target_link_libraries(device_context custom_context) endif() if(WITH_ASCEND_CL) - target_link_libraries(device_context npu_resource_pool) + target_link_libraries(device_context npu_resource_pool) endif() if(WITH_MLU) - target_link_libraries(device_context mlu_resource_pool) + target_link_libraries(device_context mlu_resource_pool) endif() if(WITH_CUSTOM_DEVICE) - target_link_libraries(device_context custom_context) + target_link_libraries(device_context custom_context) endif() -cc_test(init_test SRCS init_test.cc DEPS device_context) +cc_test( + init_test + SRCS init_test.cc + DEPS device_context) # Manage all device event library set(DEVICE_EVENT_LIBS) -cc_library(device_event_base SRCS device_event_base.cc DEPS place enforce device_context op_registry) -set(DEVICE_EVENT_LIBS device_event_base CACHE INTERNAL "device event libs") - +cc_library( + device_event_base + SRCS device_event_base.cc + DEPS place enforce device_context op_registry) +set(DEVICE_EVENT_LIBS + device_event_base + CACHE INTERNAL "device event libs") if(WITH_GPU) - nv_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base) - set(DEVICE_EVENT_LIBS device_event_gpu CACHE INTERNAL "device event libs") - nv_test(device_event_test SRCS device_event_test.cc DEPS device_event_gpu) - - nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) - nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) + nv_library( + device_event_gpu + SRCS device_event_gpu.cc + DEPS device_event_base) + set(DEVICE_EVENT_LIBS + device_event_gpu + CACHE INTERNAL "device event libs") + nv_test( + device_event_test + SRCS device_event_test.cc + DEPS device_event_gpu) + + nv_test( + device_context_test + SRCS device_context_test.cu + DEPS device_context gpu_info) + nv_test( + transform_test + SRCS transform_test.cu + DEPS memory place device_context) endif() if(WITH_ROCM) - hip_library(device_event_gpu SRCS device_event_gpu.cc DEPS device_event_base) - set(DEVICE_EVENT_LIBS device_event_gpu CACHE INTERNAL "device event libs") - hip_test(device_event_test SRCS device_event_test.cc DEPS device_event_gpu) - - hip_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) - hip_test(transform_test SRCS transform_test.cu DEPS memory place device_context) + hip_library( + device_event_gpu + SRCS device_event_gpu.cc + DEPS device_event_base) + set(DEVICE_EVENT_LIBS + device_event_gpu + CACHE INTERNAL "device event libs") + hip_test( + device_event_test + SRCS device_event_test.cc + DEPS device_event_gpu) + + hip_test( + device_context_test + SRCS device_context_test.cu + DEPS device_context gpu_info) + hip_test( + transform_test + SRCS transform_test.cu + DEPS memory place device_context) endif() cc_library(timer SRCS timer.cc) -cc_test(timer_test SRCS timer_test.cc DEPS timer) - -cc_library(lodtensor_printer SRCS lodtensor_printer.cc DEPS ddim place tensor scope lod_tensor variable_helper framework_proto) -cc_test(lodtensor_printer_test SRCS lodtensor_printer_test.cc DEPS lodtensor_printer) +cc_test( + timer_test + SRCS timer_test.cc + DEPS timer) + +cc_library( + lodtensor_printer + SRCS lodtensor_printer.cc + DEPS ddim + place + tensor + scope + lod_tensor + variable_helper + framework_proto) +cc_test( + lodtensor_printer_test + SRCS lodtensor_printer_test.cc + DEPS lodtensor_printer) add_subdirectory(profiler) -cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) +cc_library( + device_tracer + SRCS device_tracer.cc + DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) - nv_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce dynload_cuda new_profiler stats) - nv_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) + nv_library( + profiler + SRCS profiler.cc profiler.cu + DEPS os_info + device_tracer + gpu_info + enforce + dynload_cuda + new_profiler + stats) + nv_library( + device_memory_aligment + SRCS device_memory_aligment.cc + DEPS cpu_info gpu_info place) elseif(WITH_ROCM) - hip_library(profiler SRCS profiler.cc profiler.cu DEPS os_info device_tracer gpu_info enforce new_profiler stats) - hip_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info gpu_info place) + hip_library( + profiler + SRCS profiler.cc profiler.cu + DEPS os_info device_tracer gpu_info enforce new_profiler stats) + hip_library( + device_memory_aligment + SRCS device_memory_aligment.cc + DEPS cpu_info gpu_info place) else() - cc_library(profiler SRCS profiler.cc DEPS os_info device_tracer enforce new_profiler stats) - cc_library(device_memory_aligment SRCS device_memory_aligment.cc DEPS cpu_info place) + cc_library( + profiler + SRCS profiler.cc + DEPS os_info device_tracer enforce new_profiler stats) + cc_library( + device_memory_aligment + SRCS device_memory_aligment.cc + DEPS cpu_info place) endif() -cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) -cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) -cc_test(bfloat16_test SRCS bfloat16_test.cc DEPS lod_tensor) -cc_test(complex_test SRCS complex_test.cc DEPS lod_tensor) +cc_test( + profiler_test + SRCS profiler_test.cc + DEPS profiler) +cc_test( + float16_test + SRCS float16_test.cc + DEPS lod_tensor) +cc_test( + bfloat16_test + SRCS bfloat16_test.cc + DEPS lod_tensor) +cc_test( + complex_test + SRCS complex_test.cc + DEPS lod_tensor) -IF(WITH_GPU) - nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) - nv_test(bfloat16_gpu_test SRCS bfloat16_test.cu DEPS lod_tensor) - nv_test(complex_gpu_test SRCS complex_test.cu DEPS lod_tensor) - nv_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags) - nv_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info) -ENDIF() +if(WITH_GPU) + nv_test( + float16_gpu_test + SRCS float16_test.cu + DEPS lod_tensor) + nv_test( + bfloat16_gpu_test + SRCS bfloat16_test.cu + DEPS lod_tensor) + nv_test( + complex_gpu_test + SRCS complex_test.cu + DEPS lod_tensor) + nv_test( + test_limit_gpu_memory + SRCS test_limit_gpu_memory.cu + DEPS gpu_info flags) + nv_library( + cuda_device_guard + SRCS cuda_device_guard.cc + DEPS gpu_info) +endif() -IF(WITH_ROCM) - hip_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) - hip_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags) - hip_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info) -ENDIF() +if(WITH_ROCM) + hip_test( + float16_gpu_test + SRCS float16_test.cu + DEPS lod_tensor) + hip_test( + test_limit_gpu_memory + SRCS test_limit_gpu_memory.cu + DEPS gpu_info flags) + hip_library( + cuda_device_guard + SRCS cuda_device_guard.cc + DEPS gpu_info) +endif() if(NOT APPLE AND NOT WIN32) - cc_library(device_code SRCS device_code.cc DEPS device_context) + cc_library( + device_code + SRCS device_code.cc + DEPS device_context) if(WITH_GPU OR WITH_ROCM) - cc_test(device_code_test SRCS device_code_test.cc DEPS device_code lod_tensor) + cc_test( + device_code_test + SRCS device_code_test.cc + DEPS device_code lod_tensor) endif() endif() diff --git a/paddle/fluid/platform/device/CMakeLists.txt b/paddle/fluid/platform/device/CMakeLists.txt index cbf3fdd263b..62745883023 100644 --- a/paddle/fluid/platform/device/CMakeLists.txt +++ b/paddle/fluid/platform/device/CMakeLists.txt @@ -1,27 +1,26 @@ - set(DEV_LIBS custom_device) # GPU -IF(WITH_GPU OR WITH_ROCM) +if(WITH_GPU OR WITH_ROCM) add_subdirectory(gpu) -ENDIF() +endif() # XPU -IF(WITH_XPU) +if(WITH_XPU) add_subdirectory(xpu) -ENDIF() +endif() # NPU -IF(WITH_ASCEND OR WITH_ASCEND_CL) +if(WITH_ASCEND OR WITH_ASCEND_CL) add_subdirectory(npu) -ENDIF() +endif() # IPU -IF(WITH_IPU) +if(WITH_IPU) add_subdirectory(ipu) -ENDIF() +endif() # MLU -IF(WITH_MLU) +if(WITH_MLU) add_subdirectory(mlu) -ENDIF() +endif() diff --git a/paddle/fluid/platform/device/gpu/CMakeLists.txt b/paddle/fluid/platform/device/gpu/CMakeLists.txt index f7c13ec7ed5..66120f55f7c 100644 --- a/paddle/fluid/platform/device/gpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/CMakeLists.txt @@ -1,15 +1,30 @@ -IF(WITH_GPU) - add_subdirectory(cuda) - nv_library(gpu_info SRCS gpu_info.cc DEPS phi_gpu_info gflags glog enforce monitor dynload_cuda) +if(WITH_GPU) + add_subdirectory(cuda) + nv_library( + gpu_info + SRCS gpu_info.cc + DEPS phi_gpu_info gflags glog enforce monitor dynload_cuda) - nv_test(cuda_helper_test SRCS cuda_helper_test.cu) - nv_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda) -ELSEIF(WITH_ROCM) - add_subdirectory(rocm) - hip_library(gpu_info SRCS gpu_info.cc DEPS phi_gpu_info gflags glog enforce monitor dynload_cuda) + nv_test(cuda_helper_test SRCS cuda_helper_test.cu) + nv_test( + cudnn_desc_test + SRCS cudnn_desc_test.cc + DEPS dynload_cuda) +elseif(WITH_ROCM) + add_subdirectory(rocm) + hip_library( + gpu_info + SRCS gpu_info.cc + DEPS phi_gpu_info gflags glog enforce monitor dynload_cuda) - hip_test(cuda_helper_test SRCS cuda_helper_test.cu) - hip_test(cudnn_desc_test SRCS cudnn_desc_test.cc DEPS dynload_cuda) -ENDIF() + hip_test(cuda_helper_test SRCS cuda_helper_test.cu) + hip_test( + cudnn_desc_test + SRCS cudnn_desc_test.cc + DEPS dynload_cuda) +endif() -cc_library(gpu_resource_pool SRCS gpu_resource_pool.cc DEPS gpu_info) +cc_library( + gpu_resource_pool + SRCS gpu_resource_pool.cc + DEPS gpu_info) diff --git a/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt b/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt index 85050038d5a..da9121550e0 100644 --- a/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/cuda/CMakeLists.txt @@ -1,4 +1,13 @@ -nv_library(cuda_graph SRCS cuda_graph.cc DEPS enforce allocator_facade) -nv_library(cuda_profiler SRCS cuda_profiler.cc DEPS enforce) +nv_library( + cuda_graph + SRCS cuda_graph.cc + DEPS enforce allocator_facade) +nv_library( + cuda_profiler + SRCS cuda_profiler.cc + DEPS enforce) -nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda phi) +nv_test( + cudnn_helper_test + SRCS cudnn_helper_test.cc + DEPS dynload_cuda phi) diff --git a/paddle/fluid/platform/device/gpu/rocm/CMakeLists.txt b/paddle/fluid/platform/device/gpu/rocm/CMakeLists.txt index 988807258c1..070312adbc2 100644 --- a/paddle/fluid/platform/device/gpu/rocm/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/rocm/CMakeLists.txt @@ -1 +1,4 @@ -hip_test(miopen_helper_test SRCS miopen_helper_test.cc DEPS dynload_cuda) +hip_test( + miopen_helper_test + SRCS miopen_helper_test.cc + DEPS dynload_cuda) diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index 7712ede8fd2..29f2a2955e0 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -1,35 +1,42 @@ if(WITH_IPU) set(paddle_ipu_handler ${CMAKE_CURRENT_BINARY_DIR}/paddle_ipu_handler.h.tmp) set(paddle_ipu_handler_final ${CMAKE_CURRENT_BINARY_DIR}/paddle_ipu_handler.h) - file(WRITE ${paddle_ipu_handler} "// Auto generated from CMake. DO NOT EDIT!\n\n") + file(WRITE ${paddle_ipu_handler} + "// Auto generated from CMake. DO NOT EDIT!\n\n") file(APPEND ${paddle_ipu_handler} "\#pragma once\n") - file(APPEND ${paddle_ipu_handler} "\#include \"paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h\"\n\n") - file(GLOB POPART_CANONICALIZATION_SRC ${CMAKE_CURRENT_SOURCE_DIR}/popart_canonicalization/*.cc) + file( + APPEND ${paddle_ipu_handler} + "\#include \"paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h\"\n\n" + ) + file(GLOB POPART_CANONICALIZATION_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/popart_canonicalization/*.cc) copy_if_different(${paddle_ipu_handler} ${paddle_ipu_handler_final}) foreach(file_path ${POPART_CANONICALIZATION_SRC}) file(READ ${file_path} file_content) - string(REGEX MATCHALL "(REGISTER_HANDLER)(\\()([A-Za-z0-9_]+)(,)" op_handlers ${file_content}) + string(REGEX MATCHALL "(REGISTER_HANDLER)(\\()([A-Za-z0-9_]+)(,)" + op_handlers ${file_content}) string(REPLACE "REGISTER_HANDLER(" "" op_handlers "${op_handlers}") string(REPLACE "," "" op_handlers "${op_handlers}") foreach(op_handler ${op_handlers}) file(APPEND ${paddle_ipu_handler} "USE_HANDLER(${op_handler});\n") endforeach() endforeach() - - set(IPU_BACKEND_SRC - "ipu_strategy.cc" - "ipu_executor.cc" - "ipu_compiler.cc" - "ipu_backend.cc" - "ipu_utils.cc" - ) - set(IPU_INFO_SRC - "ipu_info.cc" - "ipu_device.cc" - ) - cc_library(popart_canonicalization SRCS ${POPART_CANONICALIZATION_SRC} DEPS graph) - cc_library(ipu_backend SRCS ${IPU_BACKEND_SRC} DEPS popart-only graph graph_helper popdist popart_canonicalization) - cc_library(ipu_info SRCS ${IPU_INFO_SRC} DEPS popart-only enforce) + set(IPU_BACKEND_SRC "ipu_strategy.cc" "ipu_executor.cc" "ipu_compiler.cc" + "ipu_backend.cc" "ipu_utils.cc") + set(IPU_INFO_SRC "ipu_info.cc" "ipu_device.cc") + + cc_library( + popart_canonicalization + SRCS ${POPART_CANONICALIZATION_SRC} + DEPS graph) + cc_library( + ipu_backend + SRCS ${IPU_BACKEND_SRC} + DEPS popart-only graph graph_helper popdist popart_canonicalization) + cc_library( + ipu_info + SRCS ${IPU_INFO_SRC} + DEPS popart-only enforce) endif() diff --git a/paddle/fluid/platform/device/mlu/CMakeLists.txt b/paddle/fluid/platform/device/mlu/CMakeLists.txt index 1f3a7670849..08b33c9b58f 100644 --- a/paddle/fluid/platform/device/mlu/CMakeLists.txt +++ b/paddle/fluid/platform/device/mlu/CMakeLists.txt @@ -1,12 +1,32 @@ - if(NOT WITH_MLU) - return() + return() endif() -cc_test(mlu_enforce_test SRCS enforce_test.cc DEPS stringpiece) -cc_library(mlu_info SRCS mlu_info.cc DEPS enforce glog monitor neuware_lib) -cc_library(mlu_stream SRCS mlu_stream.cc DEPS boost mlu_info stream_callback_manager eigen3 ${MKLDNN_CTX_DEPS}) -cc_library(mlu_device_context SRCS device_context.cc DEPS mlu_stream) -cc_test(mlu_device_context_test SRCS device_context_test.cc DEPS mlu_device_context) -cc_library(mlu_collective_helper SRCS mlu_collective_helper.cc DEPS mlu_stream mlu_info) -cc_library(mlu_resource_pool SRCS mlu_resource_pool.cc DEPS mlu_info) +cc_test( + mlu_enforce_test + SRCS enforce_test.cc + DEPS stringpiece) +cc_library( + mlu_info + SRCS mlu_info.cc + DEPS enforce glog monitor neuware_lib) +cc_library( + mlu_stream + SRCS mlu_stream.cc + DEPS boost mlu_info stream_callback_manager eigen3 ${MKLDNN_CTX_DEPS}) +cc_library( + mlu_device_context + SRCS device_context.cc + DEPS mlu_stream) +cc_test( + mlu_device_context_test + SRCS device_context_test.cc + DEPS mlu_device_context) +cc_library( + mlu_collective_helper + SRCS mlu_collective_helper.cc + DEPS mlu_stream mlu_info) +cc_library( + mlu_resource_pool + SRCS mlu_resource_pool.cc + DEPS mlu_info) diff --git a/paddle/fluid/platform/device/npu/CMakeLists.txt b/paddle/fluid/platform/device/npu/CMakeLists.txt index 52db36d131e..9015a76e9cd 100644 --- a/paddle/fluid/platform/device/npu/CMakeLists.txt +++ b/paddle/fluid/platform/device/npu/CMakeLists.txt @@ -3,13 +3,31 @@ add_subdirectory(dynload) if(WITH_ASCEND) - cc_library(ascend_npu_info SRCS ascend_npu_info.cc DEPS gflags glog enforce atlas_acl) + cc_library( + ascend_npu_info + SRCS ascend_npu_info.cc + DEPS gflags glog enforce atlas_acl) endif() if(WITH_ASCEND_CL) - cc_library(npu_info SRCS npu_info.cc DEPS gflags glog enforce monitor ascendcl acl_op_compiler) - cc_library(npu_resource_pool SRCS npu_resource_pool.cc DEPS npu_info) - cc_library(npu_stream SRCS npu_stream.cc DEPS enforce boost stream_callback_manager) - cc_library(npu_collective_helper SRCS npu_collective_helper.cc DEPS npu_stream npu_info data_type) - cc_library(npu_op_runner SRCS npu_op_runner.cc DEPS operator npu_info) + cc_library( + npu_info + SRCS npu_info.cc + DEPS gflags glog enforce monitor ascendcl acl_op_compiler) + cc_library( + npu_resource_pool + SRCS npu_resource_pool.cc + DEPS npu_info) + cc_library( + npu_stream + SRCS npu_stream.cc + DEPS enforce boost stream_callback_manager) + cc_library( + npu_collective_helper + SRCS npu_collective_helper.cc + DEPS npu_stream npu_info data_type) + cc_library( + npu_op_runner + SRCS npu_op_runner.cc + DEPS operator npu_info) endif() diff --git a/paddle/fluid/platform/device/npu/dynload/CMakeLists.txt b/paddle/fluid/platform/device/npu/dynload/CMakeLists.txt index 7232d51a602..9f36942524b 100644 --- a/paddle/fluid/platform/device/npu/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/device/npu/dynload/CMakeLists.txt @@ -1,3 +1,6 @@ if(WITH_ASCEND_CL) - cc_library(npu_hccl SRCS hccl.cc DEPS dynamic_loader warpctc) + cc_library( + npu_hccl + SRCS hccl.cc + DEPS dynamic_loader warpctc) endif() diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt index 3399fff087f..19656bf1cce 100644 --- a/paddle/fluid/platform/device/xpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt @@ -2,11 +2,32 @@ if(NOT WITH_XPU) return() endif() -set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl) +set(XPU_CTX_DEPS + xpulib + ssl + crypto + rt + z + resolv + dl) - -cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info) -cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type) -cc_library(xpu_resource_pool SRCS xpu_resource_pool.cc DEPS xpu_info) +cc_library( + xpu_info + SRCS xpu_info.cc + DEPS gflags + glog + enforce + xpulib + device_context + place + phi_xpu_info) +cc_library( + xpu_op_list + SRCS xpu_op_list.cc + DEPS gflags glog enforce xpulib device_context op_kernel_type) +cc_library( + xpu_resource_pool + SRCS xpu_resource_pool.cc + DEPS xpu_info) add_subdirectory(tests) diff --git a/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt b/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt index 6d98fefcf83..e51896df615 100644 --- a/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt +++ b/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt @@ -1 +1,4 @@ -cc_test(enforce_xpu_test SRCS enforce_xpu_test.cc DEPS stringpiece) +cc_test( + enforce_xpu_test + SRCS enforce_xpu_test.cc + DEPS stringpiece) diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 1f95e121271..bba0ad35e02 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,55 +1,89 @@ -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce phi_dynamic_loader) +cc_library( + dynamic_loader + SRCS dynamic_loader.cc + DEPS glog gflags enforce phi_dynamic_loader) -list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc) +list( + APPEND + CUDA_SRCS + cublas.cc + cublasLt.cc + cudnn.cc + curand.cc + cusolver.cc + cusparse.cc + nvtx.cc + cufft.cc) -if (NOT WITH_NV_JETSON) - list(APPEND CUDA_SRCS nvjpeg.cc) +if(NOT WITH_NV_JETSON) + list(APPEND CUDA_SRCS nvjpeg.cc) endif() -if (WITH_ROCM) +if(WITH_ROCM) list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc hipfft.cc) endif() # There is no macOS version of NCCL. # Disable nvrtc and cuda_driver api on MacOS, and only do a early test on Linux and Windows. -if (NOT APPLE) - list(APPEND CUDA_SRCS nvrtc.cc cuda_driver.cc) - if (WITH_NCCL) +if(NOT APPLE) + list(APPEND CUDA_SRCS nvrtc.cc cuda_driver.cc) + if(WITH_NCCL) list(APPEND CUDA_SRCS nccl.cc) endif() - if (WITH_ROCM) + if(WITH_ROCM) list(APPEND HIP_SRCS hiprtc.cc rocm_driver.cc) - if (WITH_RCCL) + if(WITH_RCCL) list(APPEND HIP_SRCS rccl.cc) endif() endif() endif() -if (TENSORRT_FOUND) +if(TENSORRT_FOUND) list(APPEND CUDA_SRCS tensorrt.cc) endif() configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) -if (CUPTI_FOUND) - list(APPEND CUDA_SRCS cupti.cc) +if(CUPTI_FOUND) + list(APPEND CUDA_SRCS cupti.cc) endif(CUPTI_FOUND) if(WITH_ROCM) - hip_library(dynload_cuda SRCS ${HIP_SRCS} DEPS dynamic_loader phi_dynload_cuda) - cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc phi_dynload_warpctc) -elseif (WITH_ASCEND_CL) - cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc npu_hccl phi_dynload_warpctc) + hip_library( + dynload_cuda + SRCS ${HIP_SRCS} + DEPS dynamic_loader phi_dynload_cuda) + cc_library( + dynload_warpctc + SRCS warpctc.cc + DEPS dynamic_loader warpctc phi_dynload_warpctc) +elseif(WITH_ASCEND_CL) + cc_library( + dynload_warpctc + SRCS warpctc.cc + DEPS dynamic_loader warpctc npu_hccl phi_dynload_warpctc) else() - nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader phi_dynload_cuda) - cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc phi_dynload_warpctc) + nv_library( + dynload_cuda + SRCS ${CUDA_SRCS} + DEPS dynamic_loader phi_dynload_cuda) + cc_library( + dynload_warpctc + SRCS warpctc.cc + DEPS dynamic_loader warpctc phi_dynload_warpctc) endif() -if (WITH_MKLML) - cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml phi_dynload_mklml) +if(WITH_MKLML) + cc_library( + dynload_mklml + SRCS mklml.cc + DEPS dynamic_loader mklml phi_dynload_mklml) endif() # TODO(TJ): add iomp, mkldnn? -if (MKL_FOUND AND WITH_ONEMKL) +if(MKL_FOUND AND WITH_ONEMKL) message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") - cc_library(dynload_mklrt SRCS mklrt.cc DEPS dynamic_loader phi_dynload_mklrt) + cc_library( + dynload_mklrt + SRCS mklrt.cc + DEPS dynamic_loader phi_dynload_mklrt) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) endif() diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt index 084bc44dbc7..ea3111b7361 100644 --- a/paddle/fluid/platform/profiler/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -1,14 +1,52 @@ -cc_library(host_tracer SRCS host_tracer.cc DEPS enforce) -cc_library(cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc DEPS workqueue_utils enforce glog) +cc_library( + host_tracer + SRCS host_tracer.cc + DEPS enforce) +cc_library( + cuda_tracer + SRCS cuda_tracer.cc cupti_data_process.cc + DEPS workqueue_utils enforce glog) add_subdirectory(mlu) -cc_library(event_node SRCS event_node.cc DEPS enforce) -cc_library(profiler_utils SRCS utils.cc DEPS enforce glog) +cc_library( + event_node + SRCS event_node.cc + DEPS enforce) +cc_library( + profiler_utils + SRCS utils.cc + DEPS enforce glog) add_subdirectory(dump) -cc_library(profiler_logger SRCS chrometracing_logger.cc dump/serialization_logger.cc dump/deserialization_reader.cc DEPS nodetreeproto event_node profiler_utils) -cc_library(event_bind SRCS event_python.cc DEPS profiler_logger) -cc_library(cpu_utilization SRCS cpu_utilization.cc DEPS cpu_info os_info enforce glog) -cc_library(new_profiler SRCS profiler.cc DEPS host_tracer cuda_tracer profiler_utils cpu_utilization event_bind mlu_tracer) -cc_test(test_event_node SRCS test_event_node.cc DEPS event_node profiler_logger) -cc_test(test_extra_info SRCS test_extra_info.cc DEPS profiler_utils) -cc_test(test_serialization_logger SRCS dump/test_serialization_logger.cc DEPS event_bind) -cc_test(new_profiler_test SRCS profiler_test.cc DEPS new_profiler) +cc_library( + profiler_logger + SRCS chrometracing_logger.cc dump/serialization_logger.cc + dump/deserialization_reader.cc + DEPS nodetreeproto event_node profiler_utils) +cc_library( + event_bind + SRCS event_python.cc + DEPS profiler_logger) +cc_library( + cpu_utilization + SRCS cpu_utilization.cc + DEPS cpu_info os_info enforce glog) +cc_library( + new_profiler + SRCS profiler.cc + DEPS host_tracer cuda_tracer profiler_utils cpu_utilization event_bind + mlu_tracer) +cc_test( + test_event_node + SRCS test_event_node.cc + DEPS event_node profiler_logger) +cc_test( + test_extra_info + SRCS test_extra_info.cc + DEPS profiler_utils) +cc_test( + test_serialization_logger + SRCS dump/test_serialization_logger.cc + DEPS event_bind) +cc_test( + new_profiler_test + SRCS profiler_test.cc + DEPS new_profiler) diff --git a/paddle/fluid/platform/profiler/mlu/CMakeLists.txt b/paddle/fluid/platform/profiler/mlu/CMakeLists.txt index 01b3757ea69..d510edb0457 100644 --- a/paddle/fluid/platform/profiler/mlu/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/mlu/CMakeLists.txt @@ -2,4 +2,7 @@ if(WITH_MLU) set(MLU_INFO mlu_info) endif() -cc_library(mlu_tracer SRCS mlu_tracer.cc cnpapi_data_process.cc DEPS workqueue_utils enforce glog ${MLU_INFO}) +cc_library( + mlu_tracer + SRCS mlu_tracer.cc cnpapi_data_process.cc + DEPS workqueue_utils enforce glog ${MLU_INFO}) diff --git a/paddle/fluid/platform/stream/CMakeLists.txt b/paddle/fluid/platform/stream/CMakeLists.txt index 6a825e9077c..25d2874ca04 100644 --- a/paddle/fluid/platform/stream/CMakeLists.txt +++ b/paddle/fluid/platform/stream/CMakeLists.txt @@ -1,3 +1,6 @@ -IF(WITH_GPU OR WITH_ROCM) - cc_library(cuda_stream SRCS cuda_stream.cc DEPS enforce boost eigen3 ${MKLDNN_CTX_DEPS}) -ENDIF() +if(WITH_GPU OR WITH_ROCM) + cc_library( + cuda_stream + SRCS cuda_stream.cc + DEPS enforce boost eigen3 ${MKLDNN_CTX_DEPS}) +endif() diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 90a86aaf31f..bf74d118432 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,46 +1,82 @@ -set(PYBIND_DEPS init pybind python proto_desc memory executor fleet_wrapper box_wrapper metrics prune - feed_fetch_method pass generate_pass pass_builder parallel_executor profiler layer tracer engine scope_pool - analysis_predictor imperative_profiler imperative_flag save_load_util dlpack_tensor device_context - gloo_wrapper infer_io_utils heter_wrapper generator op_version_registry ps_gpu_wrapper custom_operator - cost_model cuda_graph_with_memory_pool fleet_executor global_utils phi_utils tcp_store new_profiler) - -if (WITH_PSCORE) +set(PYBIND_DEPS + init + pybind + python + proto_desc + memory + executor + fleet_wrapper + box_wrapper + metrics + prune + feed_fetch_method + pass + generate_pass + pass_builder + parallel_executor + profiler + layer + tracer + engine + scope_pool + analysis_predictor + imperative_profiler + imperative_flag + save_load_util + dlpack_tensor + device_context + gloo_wrapper + infer_io_utils + heter_wrapper + generator + op_version_registry + ps_gpu_wrapper + custom_operator + cost_model + cuda_graph_with_memory_pool + fleet_executor + global_utils + phi_utils + tcp_store + new_profiler) + +if(WITH_PSCORE) set(PYBIND_DEPS ${PYBIND_DEPS} ps_service) set(PYBIND_DEPS ${PYBIND_DEPS} graph_py_service) - if (WITH_HETERPS) + if(WITH_HETERPS) set(PYBIND_DEPS ${PYBIND_DEPS} graph_gpu_wrapper) endif() endif() -if (WITH_GPU OR WITH_ROCM) +if(WITH_GPU OR WITH_ROCM) set(PYBIND_DEPS ${PYBIND_DEPS} dynload_cuda) set(PYBIND_DEPS ${PYBIND_DEPS} cuda_device_guard) endif() -if (WITH_GPU) +if(WITH_GPU) set(PYBIND_DEPS ${PYBIND_DEPS} cuda_profiler) endif() -if (WITH_IPU) +if(WITH_IPU) set(PYBIND_DEPS ${PYBIND_DEPS} ipu_info) endif() -if (WITH_NCCL OR WITH_RCCL) +if(WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) endif() -if (WITH_XPU_BKCL) +if(WITH_XPU_BKCL) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) set(PYBIND_DEPS ${PYBIND_DEPS} bkcl_context) set(PYBIND_DEPS ${PYBIND_DEPS} heter_ccl_context) endif() -if (WITH_ASCEND_CL) +if(WITH_ASCEND_CL) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) set(PYBIND_DEPS ${PYBIND_DEPS} hccl_context) set(PYBIND_DEPS ${PYBIND_DEPS} heter_ccl_context) endif() -if (WITH_CNCL) +if(WITH_CNCL) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) set(PYBIND_DEPS ${PYBIND_DEPS} cncl_context) endif() @@ -48,10 +84,10 @@ endif() if(NOT WIN32) set(PYBIND_DEPS ${PYBIND_DEPS} data_loader) set(PYBIND_DEPS ${PYBIND_DEPS} mmap_allocator) - if (WITH_GPU) + if(WITH_GPU) set(PYBIND_DEPS ${PYBIND_DEPS} cuda_ipc_allocator) endif() - if (WITH_NCCL OR WITH_RCCL) + if(WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_context) set(PYBIND_DEPS ${PYBIND_DEPS} heter_ccl_context) endif() @@ -63,45 +99,45 @@ if(WITH_PYTHON) endif() set(PYBIND_SRCS - pybind.cc - exception.cc - protobuf.cc - const_value.cc - global_value_getter_setter.cc - reader_py.cc - fleet_wrapper_py.cc - heter_wrapper_py.cc - ps_gpu_wrapper_py.cc - gloo_wrapper_py.cc - box_helper_py.cc - metrics_py.cc - data_set_py.cc - imperative.cc - ir.cc - bind_cost_model.cc - bind_fleet_executor.cc - inference_api.cc - compatible.cc - io.cc - generator_py.cc - communication.cc - cuda_streams_py.cc) + pybind.cc + exception.cc + protobuf.cc + const_value.cc + global_value_getter_setter.cc + reader_py.cc + fleet_wrapper_py.cc + heter_wrapper_py.cc + ps_gpu_wrapper_py.cc + gloo_wrapper_py.cc + box_helper_py.cc + metrics_py.cc + data_set_py.cc + imperative.cc + ir.cc + bind_cost_model.cc + bind_fleet_executor.cc + inference_api.cc + compatible.cc + io.cc + generator_py.cc + communication.cc + cuda_streams_py.cc) if(NOT ON_INFER) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) - if (WITH_NCCL) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) - if (WITH_PSCORE) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) + if(WITH_NCCL) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) + if(WITH_PSCORE) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() endif() - if (WITH_GLOO) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo) + if(WITH_GLOO) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_gloo) endif() if(WITH_ASCEND_CL) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl) - if (WITH_PSCORE) - set (PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_hccl) + if(WITH_PSCORE) + set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_heter) endif() endif() set(PYBIND_SRCS ${PYBIND_SRCS} distributed_py.cc) @@ -119,45 +155,69 @@ if(WITH_GLOO) set(PYBIND_DEPS ${PYBIND_DEPS} reducer) endif(WITH_GLOO) -if (WITH_CRYPTO) +if(WITH_CRYPTO) set(PYBIND_DEPS ${PYBIND_DEPS} paddle_crypto) set(PYBIND_SRCS ${PYBIND_SRCS} crypto.cc) -endif (WITH_CRYPTO) - -if (WITH_PSLIB) - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=type-limits -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result") - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) - set(DISTRIBUTE_COMPILE_FLAGS - "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") +endif(WITH_CRYPTO) + +if(WITH_PSLIB) + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=type-limits -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result" + ) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) + set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") endif() - set_source_files_properties(heter_wrapper_py.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + heter_wrapper_py.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) endif(WITH_PSLIB) -if (WITH_PSCORE) - if (WITH_ARM_BRPC) - set(DISTRIBUTE_COMPILE_FLAGS "-faligned-new -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result") +if(WITH_PSCORE) + if(WITH_ARM_BRPC) + set(DISTRIBUTE_COMPILE_FLAGS + "-faligned-new -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result" + ) else() - set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result") + set(DISTRIBUTE_COMPILE_FLAGS + "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor -Wno-error=sign-compare -Wno-error=unused-variable -Wno-error=return-type -Wno-error=unused-but-set-variable -Wno-error=unknown-pragmas -Wno-error=parentheses -Wno-error=unused-result" + ) endif() - set_source_files_properties(fleet_py.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + set_source_files_properties( + fleet_py.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) list(APPEND PYBIND_DEPS fleet communicator index_wrapper index_sampler) list(APPEND PYBIND_SRCS fleet_py.cc) endif() -if (WITH_NCCL OR WITH_RCCL) +if(WITH_NCCL OR WITH_RCCL) list(APPEND PYBIND_SRCS nccl_wrapper_py.cc) endif() if(WITH_PYTHON) # generate op pybind functions automatically for dygraph. - if (WITH_ASCEND_CL) - set(OP_FUNCTION_GENERETOR_DEPS pybind proto_desc executor layer tracer engine imperative_profiler imperative_flag ascend_wrapper) + if(WITH_ASCEND_CL) + set(OP_FUNCTION_GENERETOR_DEPS + pybind + proto_desc + executor + layer + tracer + engine + imperative_profiler + imperative_flag + ascend_wrapper) else() - set(OP_FUNCTION_GENERETOR_DEPS pybind proto_desc executor layer tracer engine imperative_profiler imperative_flag) + set(OP_FUNCTION_GENERETOR_DEPS + pybind + proto_desc + executor + layer + tracer + engine + imperative_profiler + imperative_flag) endif() list(APPEND OP_FUNCTION_GENERETOR_DEPS ${GLOB_OP_LIB}) list(APPEND OP_FUNCTION_GENERETOR_DEPS ${GLOB_OPERATOR_DEPS}) - if (WITH_NCCL OR WITH_RCCL) + if(WITH_NCCL OR WITH_RCCL) list(APPEND OP_FUNCTION_GENERETOR_DEPS nccl_context) endif() @@ -176,13 +236,15 @@ if(WITH_PYTHON) add_executable(op_function_generator op_function_generator.cc) target_link_libraries(op_function_generator ${OP_FUNCTION_GENERETOR_DEPS}) add_executable(eager_op_function_generator eager_op_function_generator.cc) - target_link_libraries(eager_op_function_generator ${OP_FUNCTION_GENERETOR_DEPS}) + target_link_libraries(eager_op_function_generator + ${OP_FUNCTION_GENERETOR_DEPS}) if(NOT WIN32) add_executable(kernel_signature_generator kernel_signature_generator.cc) - target_link_libraries(kernel_signature_generator ${OP_FUNCTION_GENERETOR_DEPS}) + target_link_libraries(kernel_signature_generator + ${OP_FUNCTION_GENERETOR_DEPS}) endif() - get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) + get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(op_function_generator ${os_dependency_modules}) target_link_libraries(eager_op_function_generator ${os_dependency_modules}) if(WITH_ROCM) @@ -193,11 +255,13 @@ if(WITH_PYTHON) set(impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function_impl.h) set(tmp_impl_file ${impl_file}.tmp) - set(eager_impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function_impl.h) + set(eager_impl_file + ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function_impl.h) set(tmp_eager_impl_file ${eager_impl_file}.tmp) set(OP_IMPL_DEPS op_function_generator) - set(EAGER_OP_IMPL_DEPS eager_op_function_generator eager_final_state_python_c_codegen) + set(EAGER_OP_IMPL_DEPS eager_op_function_generator + eager_final_state_python_c_codegen) if(WIN32) if("${CMAKE_GENERATOR}" STREQUAL "Ninja") @@ -206,81 +270,103 @@ if(WITH_PYTHON) set(op_impl_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") endif() - file(WRITE ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat "" - "set build_times=1\n" - ":retry\n" - "ECHO op_function_generator run %build_times% time\n" - "taskkill /f /im op_function_generator.exe 2>NUL\n" - "${op_impl_path}/op_function_generator.exe ${tmp_impl_file}\n" - "if %ERRORLEVEL% NEQ 0 (\n" - " set /a build_times=%build_times%+1\n" - " if %build_times% GEQ 10 (\n" - " exit /b 1\n" - " ) else (\n" - " goto :retry\n" - " )\n" - ")\n" - "exit /b 0") - - file(WRITE ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat "" - "set build_times=1\n" - ":retry\n" - "ECHO eager_op_function_generator run %build_times% time\n" - "taskkill /f /im eager_op_function_generator.exe 2>NUL\n" - "${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n" - "if %ERRORLEVEL% NEQ 0 (\n" - " set /a build_times=%build_times%+1\n" - " if %build_times% GEQ 10 (\n" - " exit /b 1\n" - " ) else (\n" - " goto :retry\n" - " )\n" - ")\n" - "exit /b 0") + file( + WRITE + ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat + "" + "set build_times=1\n" + ":retry\n" + "ECHO op_function_generator run %build_times% time\n" + "taskkill /f /im op_function_generator.exe 2>NUL\n" + "${op_impl_path}/op_function_generator.exe ${tmp_impl_file}\n" + "if %ERRORLEVEL% NEQ 0 (\n" + " set /a build_times=%build_times%+1\n" + " if %build_times% GEQ 10 (\n" + " exit /b 1\n" + " ) else (\n" + " goto :retry\n" + " )\n" + ")\n" + "exit /b 0") + + file( + WRITE + ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat + "" + "set build_times=1\n" + ":retry\n" + "ECHO eager_op_function_generator run %build_times% time\n" + "taskkill /f /im eager_op_function_generator.exe 2>NUL\n" + "${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n" + "if %ERRORLEVEL% NEQ 0 (\n" + " set /a build_times=%build_times%+1\n" + " if %build_times% GEQ 10 (\n" + " exit /b 1\n" + " ) else (\n" + " goto :retry\n" + " )\n" + ")\n" + "exit /b 0") if(${CBLAS_PROVIDER} STREQUAL MKLML) - ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/libiomp5md.dll - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${op_impl_path} + add_custom_command( + OUTPUT ${op_impl_path}/libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} + ${op_impl_path} DEPENDS mklml) list(APPEND OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll) else(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS) - ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/openblas.dll + add_custom_command( + OUTPUT ${op_impl_path}/openblas.dll COMMAND ${CMAKE_COMMAND} -E copy ${OPENBLAS_SHARED_LIB} ${op_impl_path} DEPENDS extern_openblas) list(APPEND OP_IMPL_DEPS ${op_impl_path}/openblas.dll) list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/openblas.dll) endif() if(WITH_MKLDNN) - ADD_CUSTOM_COMMAND(OUTPUT ${op_impl_path}/mkldnn.dll + add_custom_command( + OUTPUT ${op_impl_path}/mkldnn.dll COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${op_impl_path} DEPENDS mkldnn) - list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) - list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) + list(APPEND OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) + list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/mkldnn.dll) endif() if(WITH_ONNXRUNTIME) - ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll + COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_SHARED_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS paddle2onnx) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll) - list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll) + list(APPEND EAGER_OP_IMPL_DEPS + ${CMAKE_CURRENT_BINARY_DIR}/paddle2onnx.dll) - ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll - COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll + COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_SHARED_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS onnxruntime) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll) - list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll) + list(APPEND EAGER_OP_IMPL_DEPS + ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime.dll) endif() - add_custom_command(OUTPUT ${impl_file} - COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} ${impl_file} + add_custom_command( + OUTPUT ${impl_file} + COMMAND + ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} + ${impl_file} COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}" DEPENDS ${OP_IMPL_DEPS}) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_custom_command(OUTPUT ${eager_impl_file} - COMMAND ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file} + add_custom_command( + OUTPUT ${eager_impl_file} + COMMAND + ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/eager_op_function_generator_retry.bat + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} + ${eager_impl_file} COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}" DEPENDS ${EAGER_OP_IMPL_DEPS}) endif() @@ -290,79 +376,120 @@ if(WITH_PYTHON) # LD_LIBRARY_PATH. This is different with Windows platformm, which search # *.dll in current directory automatically. if(WITH_ONNXRUNTIME) - if (APPLE) - set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.dylib) - set(ONNXRUNTIME_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.dylib) + if(APPLE) + set(PADDLE2ONNX_PYBIND_OUT + ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.dylib) + set(ONNXRUNTIME_PYBIND_OUT + ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.dylib) else() - set(PADDLE2ONNX_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.so) - set(ONNXRUNTIME_PYBIND_OUT ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.so) + set(PADDLE2ONNX_PYBIND_OUT + ${CMAKE_CURRENT_BINARY_DIR}/libpaddle2onnx.so) + set(ONNXRUNTIME_PYBIND_OUT + ${CMAKE_CURRENT_BINARY_DIR}/libonnxruntime.so) endif() - ADD_CUSTOM_COMMAND(OUTPUT ${PADDLE2ONNX_PYBIND_OUT} - COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${PADDLE2ONNX_PYBIND_OUT} + COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE2ONNX_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS paddle2onnx) list(APPEND OP_IMPL_DEPS ${PADDLE2ONNX_PYBIND_OUT}) list(APPEND EAGER_OP_IMPL_DEPS ${PADDLE2ONNX_PYBIND_OUT}) - ADD_CUSTOM_COMMAND(OUTPUT ${ONNXRUNTIME_PYBIND_OUT} - COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${ONNXRUNTIME_PYBIND_OUT} + COMMAND ${CMAKE_COMMAND} -E copy ${ONNXRUNTIME_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS onnxruntime) list(APPEND OP_IMPL_DEPS ${ONNXRUNTIME_PYBIND_OUT}) list(APPEND EAGER_OP_IMPL_DEPS ${ONNXRUNTIME_PYBIND_OUT}) endif() if(WITH_MKLML) - ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_SHARED_IOMP_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS mklml) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libiomp5.so) endif() if(WITH_MKLDNN) - ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0 - COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} ${CMAKE_CURRENT_BINARY_DIR} + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0 + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_SHARED_LIB} + ${CMAKE_CURRENT_BINARY_DIR} DEPENDS mkldnn) list(APPEND OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) endif() - add_custom_command(OUTPUT ${impl_file} - COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." - "${CMAKE_CURRENT_BINARY_DIR}/op_function_generator" - "${tmp_impl_file}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} ${impl_file} - COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}" - DEPENDS ${OP_IMPL_DEPS} - VERBATIM) + add_custom_command( + OUTPUT ${impl_file} + COMMAND + ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." + "${CMAKE_CURRENT_BINARY_DIR}/op_function_generator" "${tmp_impl_file}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} + ${impl_file} + COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}" + DEPENDS ${OP_IMPL_DEPS} + VERBATIM) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_custom_command(OUTPUT ${eager_impl_file} - COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." - "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator" - "${tmp_eager_impl_file}" - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file} - COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}" - DEPENDS ${EAGER_OP_IMPL_DEPS} - VERBATIM) - endif() + add_custom_command( + OUTPUT ${eager_impl_file} + COMMAND + ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." + "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator" + "${tmp_eager_impl_file}" + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} + ${eager_impl_file} + COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}" + DEPENDS ${EAGER_OP_IMPL_DEPS} + VERBATIM) + endif() endif(WIN32) add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file}) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - add_custom_target(eager_op_function_generator_cmd ALL DEPENDS ${eager_impl_file}) + add_custom_target(eager_op_function_generator_cmd ALL + DEPENDS ${eager_impl_file}) endif() - list(APPEND PYBIND_DEPS interpretercore standalone_executor staticgraph_executor_statistics) - cc_library(op_function_common SRCS op_function_common.cc DEPS ${PYBIND_DEPS}) + list(APPEND PYBIND_DEPS interpretercore standalone_executor + staticgraph_executor_statistics) + cc_library( + op_function_common + SRCS op_function_common.cc + DEPS ${PYBIND_DEPS}) list(APPEND PYBIND_DEPS op_function_common) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - cc_library(paddle_eager - SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc eager_py_layer.cc - DEPS eager_api autograd_meta backward grad_node_info phi op_function_common final_dygraph_function final_dygraph_node dygraph_function dygraph_node accumulation_node py_layer_node global_utils utils python custom_operator custom_operator_node) + cc_library( + paddle_eager + SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc + eager_utils.cc eager_py_layer.cc + DEPS eager_api + autograd_meta + backward + grad_node_info + phi + op_function_common + final_dygraph_function + final_dygraph_node + dygraph_function + dygraph_node + accumulation_node + py_layer_node + global_utils + utils + python + custom_operator + custom_operator_node) add_dependencies(paddle_eager eager_codegen) add_dependencies(paddle_eager eager_op_function_generator_cmd) list(APPEND PYBIND_DEPS paddle_eager) endif() - cc_library(paddle_pybind SHARED + cc_library( + paddle_pybind SHARED SRCS ${PYBIND_SRCS} DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${GLOB_DEV_LIB}) @@ -374,7 +501,7 @@ if(WITH_PYTHON) target_link_libraries(paddle_pybind ${ROCM_HIPRTC_LIB}) endif() - get_property (os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) + get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(paddle_pybind ${os_dependency_modules}) add_dependencies(paddle_pybind op_function_generator_cmd) endif(WITH_PYTHON) diff --git a/paddle/infrt/CMakeLists.txt b/paddle/infrt/CMakeLists.txt index e5f224bf6ad..3846acbde48 100644 --- a/paddle/infrt/CMakeLists.txt +++ b/paddle/infrt/CMakeLists.txt @@ -1,10 +1,10 @@ -if (NOT WITH_INFRT) - return() +if(NOT WITH_INFRT) + return() endif() -option(INFRT_WITH_PHI "Compile INFRT with PHI" ON) -option(INFRT_WITH_GPU "Compile INFRT with GPU" OFF) -option(INFRT_WITH_TRT "Compile INFRT with TensorRT" OFF) +option(INFRT_WITH_PHI "Compile INFRT with PHI" ON) +option(INFRT_WITH_GPU "Compile INFRT with GPU" OFF) +option(INFRT_WITH_TRT "Compile INFRT with TensorRT" OFF) #TODO(xiaowei) remove fluid include_directories(${PADDLE_SOURCE_DIR}/paddle/fluid/platform) @@ -13,13 +13,13 @@ if(WITH_GPU) set(INFRT_WITH_GPU ON) endif() -if (INFRT_WITH_PHI) +if(INFRT_WITH_PHI) add_definitions("-DINFRT_WITH_PHI") # TODO(wilber): Now Infrt gpu/trt depends on phi's components, Modify compile dependency options later. - if (INFRT_WITH_GPU) + if(INFRT_WITH_GPU) add_definitions("-DINFRT_WITH_GPU") - if (INFRT_WITH_TRT) + if(INFRT_WITH_TRT) add_definitions("-DINFRT_WITH_TRT") endif() endif() @@ -32,8 +32,8 @@ foreach(flag ${INFRT_FLAGS}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) endforeach() -set(INFRT_SOURCE_DIR "${PADDLE_SOURCE_DIR}/paddle/infrt" ) -set(INFRT_BINARY_DIR "${PADDLE_BINARY_DIR}/paddle/infrt" ) +set(INFRT_SOURCE_DIR "${PADDLE_SOURCE_DIR}/paddle/infrt") +set(INFRT_BINARY_DIR "${PADDLE_BINARY_DIR}/paddle/infrt") set(INFRT_TEST_TARGETS CACHE INTERNAL "") include(infrt_lib) @@ -41,21 +41,29 @@ set(infrt_src CACHE INTERNAL "" FORCE) # Gather headers for library publish. function(core_gather_headers) - file(GLOB includes LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR} *.h) - - foreach(header ${includes}) - set(core_includes "${core_includes};${header}" CACHE INTERNAL "") - endforeach() + file( + GLOB includes + LIST_DIRECTORIES false + RELATIVE ${CMAKE_SOURCE_DIR} + *.h) + + foreach(header ${includes}) + set(core_includes + "${core_includes};${header}" + CACHE INTERNAL "") + endforeach() endfunction() function(gather_srcs SRC_GROUP) - set(options) - set(oneValueArgs) - set(multiValueArgs "SRCS") - cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN}) - foreach(cpp ${prefix_SRCS}) - set(${SRC_GROUP} "${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}" CACHE INTERNAL "") - endforeach() + set(options) + set(oneValueArgs) + set(multiValueArgs "SRCS") + cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN}) + foreach(cpp ${prefix_SRCS}) + set(${SRC_GROUP} + "${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}" + CACHE INTERNAL "") + endforeach() endfunction() # This method is similar to the global cc_test, but discard the huge amount default dependencies those are @@ -65,28 +73,36 @@ function(cc_test_tiny TARGET_NAME) set(options SERIAL) set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS) - cmake_parse_arguments(cc_test_tiny "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(cc_test_tiny "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_tiny_SRCS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries(${TARGET_NAME} ${cc_test_tiny_DEPS} ${os_dependency_modules} infrt_gtest_main gtest ) - add_dependencies(${TARGET_NAME} ${cc_test_tiny_DEPS} infrt_gtest_main gtest extern_gtest) + target_link_libraries(${TARGET_NAME} ${cc_test_tiny_DEPS} + ${os_dependency_modules} infrt_gtest_main gtest) + add_dependencies(${TARGET_NAME} ${cc_test_tiny_DEPS} infrt_gtest_main gtest + extern_gtest) - add_test(NAME ${TARGET_NAME} + add_test( + NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} "${cc_test_tiny_ARGS}" - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - if (${cc_test_tiny_SERIAL}) + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if(${cc_test_tiny_SERIAL}) set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) endif() - set(INFRT_TEST_TARGETS ${INFRT_TEST_TARGETS} ${TARGET_NAME} CACHE INTERNAL "") + set(INFRT_TEST_TARGETS + ${INFRT_TEST_TARGETS} ${TARGET_NAME} + CACHE INTERNAL "") endif() endfunction() -if (WITH_TESTING) - cc_library(infrt_gtest_main SRCS gtest_main.cc DEPS gtest glog gflags) +if(WITH_TESTING) + cc_library( + infrt_gtest_main + SRCS gtest_main.cc + DEPS gtest glog gflags) endif() - add_subdirectory(api) add_subdirectory(backends) add_subdirectory(common) @@ -99,27 +115,24 @@ add_subdirectory(external_kernels) add_subdirectory(paddle) add_subdirectory(tests) - # MLIR td file generations -set(infrt_mlir_incs - basic_kernels_inc - test_kernels_inc - tensor_shape_inc - dense_tensor_inc - pd_extra_ops_inc - trt_ops_inc - ) - -if (INFRT_WITH_PHI) - set(phi_libs phi) - set(infrt_mlir_incs ${infrt_mlir_incs} - MLIRinfrt_phi_tensorIncGen - MLIRinfrt_phi_baseIncGen - ) +set(infrt_mlir_incs basic_kernels_inc test_kernels_inc tensor_shape_inc + dense_tensor_inc pd_extra_ops_inc trt_ops_inc) + +if(INFRT_WITH_PHI) + set(phi_libs phi) + set(infrt_mlir_incs ${infrt_mlir_incs} MLIRinfrt_phi_tensorIncGen + MLIRinfrt_phi_baseIncGen) endif() -cc_library(infrt SHARED SRCS ${infrt_src} DEPS glog boost ${mlir_libs} ${phi_libs} paddle_framework_proto infrt_naive) -cc_library(infrt_static SRCS ${infrt_src} DEPS glog boost ${mlir_libs} ${phi_libs} paddle_framework_proto) +cc_library( + infrt SHARED + SRCS ${infrt_src} + DEPS glog boost ${mlir_libs} ${phi_libs} paddle_framework_proto infrt_naive) +cc_library( + infrt_static + SRCS ${infrt_src} + DEPS glog boost ${mlir_libs} ${phi_libs} paddle_framework_proto) add_dependencies(infrt ${infrt_mlir_incs} mlir-headers) add_custom_target(test_infrt_exec DEPENDS ${INFRT_TEST_TARGETS}) diff --git a/paddle/infrt/api/CMakeLists.txt b/paddle/infrt/api/CMakeLists.txt index 6d4604edee6..2d88af7d5b5 100644 --- a/paddle/infrt/api/CMakeLists.txt +++ b/paddle/infrt/api/CMakeLists.txt @@ -1,9 +1,8 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - infrt_api.cc - ) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc.in ${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc) +gather_srcs(infrt_src SRCS infrt_api.cc) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc.in + ${CMAKE_CURRENT_SOURCE_DIR}/infrt_api_test.cc) # Disable temporarily for the external-kernel's mkldnn is outdate cc_test_tiny(test_infrt_api SRCS infrt_api_test.cc DEPS infrt ${MLIR_IR_LIBS}) diff --git a/paddle/infrt/backends/CMakeLists.txt b/paddle/infrt/backends/CMakeLists.txt index b639f892925..606fec5d92d 100644 --- a/paddle/infrt/backends/CMakeLists.txt +++ b/paddle/infrt/backends/CMakeLists.txt @@ -1,3 +1,5 @@ -if (INFRT_WITH_PHI AND WITH_GPU AND WITH_TENSORRT) +if(INFRT_WITH_PHI + AND WITH_GPU + AND WITH_TENSORRT) add_subdirectory(tensorrt) endif() diff --git a/paddle/infrt/backends/tensorrt/CMakeLists.txt b/paddle/infrt/backends/tensorrt/CMakeLists.txt index 672515ea4b7..9a9db6b737c 100644 --- a/paddle/infrt/backends/tensorrt/CMakeLists.txt +++ b/paddle/infrt/backends/tensorrt/CMakeLists.txt @@ -4,4 +4,11 @@ core_gather_headers() gather_srcs(infrt_src SRCS trt_engine.cc) -cc_test_tiny(test_infrt_trt SRCS test_trt_engine.cc DEPS infrt phi_dynload_cuda tensorrt_converter) +cc_test_tiny( + test_infrt_trt + SRCS + test_trt_engine.cc + DEPS + infrt + phi_dynload_cuda + tensorrt_converter) diff --git a/paddle/infrt/common/CMakeLists.txt b/paddle/infrt/common/CMakeLists.txt index 931e3e42307..c77f099aef4 100644 --- a/paddle/infrt/common/CMakeLists.txt +++ b/paddle/infrt/common/CMakeLists.txt @@ -1,14 +1,17 @@ core_gather_headers() -set(core_includes "${core_includes};infrt/common/dtype.def" CACHE INTERNAL "") +set(core_includes + "${core_includes};infrt/common/dtype.def" + CACHE INTERNAL "") -gather_srcs(infrt_src SRCS - dtype.cc - global.cc - target.cc - type.cc - shared.cc - object.cc - string.cc - buffer.cc - memory.cc - ) +gather_srcs( + infrt_src + SRCS + dtype.cc + global.cc + target.cc + type.cc + shared.cc + object.cc + string.cc + buffer.cc + memory.cc) diff --git a/paddle/infrt/dialect/CMakeLists.txt b/paddle/infrt/dialect/CMakeLists.txt index cf3906c32e5..33206dbd56b 100644 --- a/paddle/infrt/dialect/CMakeLists.txt +++ b/paddle/infrt/dialect/CMakeLists.txt @@ -1,13 +1,14 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - dialect.cc - init_dialects.cc - tensor_shape.cc - dense_tensor.cc - mlir_loader.cc - diagnostic_utils.cc - ) +gather_srcs( + infrt_src + SRCS + dialect.cc + init_dialects.cc + tensor_shape.cc + dense_tensor.cc + mlir_loader.cc + diagnostic_utils.cc) mlir_tablegen_on(tensor_shape DIALECT ts) mlir_tablegen_on(dense_tensor DIALECT dt) @@ -18,12 +19,13 @@ target_link_libraries(infrtopt infrt) add_executable(print-ir print_ir.cc) target_link_libraries(print-ir infrt ${mlir_libs}) -cc_test_tiny(test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_mlir_loader SRCS mlir_loader_test.cc DEPS infrt + ${MLIR_IR_LIBS}) add_subdirectory(infrt) add_subdirectory(pd) add_subdirectory(tensorrt) -if (INFRT_WITH_PHI) - add_subdirectory(phi) +if(INFRT_WITH_PHI) + add_subdirectory(phi) endif() diff --git a/paddle/infrt/dialect/infrt/common/CMakeLists.txt b/paddle/infrt/dialect/infrt/common/CMakeLists.txt index f693c82b506..593030be0a5 100644 --- a/paddle/infrt/dialect/infrt/common/CMakeLists.txt +++ b/paddle/infrt/dialect/infrt/common/CMakeLists.txt @@ -1,6 +1,3 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - types.cc - utils.cc - ) +gather_srcs(infrt_src SRCS types.cc utils.cc) diff --git a/paddle/infrt/dialect/infrt/ir/CMakeLists.txt b/paddle/infrt/dialect/infrt/ir/CMakeLists.txt index 7c009bdb267..103c603e765 100644 --- a/paddle/infrt/dialect/infrt/ir/CMakeLists.txt +++ b/paddle/infrt/dialect/infrt/ir/CMakeLists.txt @@ -1,10 +1,6 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - infrt_dialect.cc - basic_kernels.cc - test_kernels.cc - ) +gather_srcs(infrt_src SRCS infrt_dialect.cc basic_kernels.cc test_kernels.cc) add_mlir_dialect(infrt_ops infrt) diff --git a/paddle/infrt/dialect/infrt/pass/CMakeLists.txt b/paddle/infrt/dialect/infrt/pass/CMakeLists.txt index ab06c00d143..7fa0ee1c716 100644 --- a/paddle/infrt/dialect/infrt/pass/CMakeLists.txt +++ b/paddle/infrt/dialect/infrt/pass/CMakeLists.txt @@ -1,8 +1,5 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - infrt_op_fuse_pass.cc - infrt_weights_unfold_pass.cc - ) +gather_srcs(infrt_src SRCS infrt_op_fuse_pass.cc infrt_weights_unfold_pass.cc) mlir_add_rewriter(infrt_op_fuse) diff --git a/paddle/infrt/dialect/pd/common/CMakeLists.txt b/paddle/infrt/dialect/pd/common/CMakeLists.txt index ee1b0d4c30d..d253a847557 100644 --- a/paddle/infrt/dialect/pd/common/CMakeLists.txt +++ b/paddle/infrt/dialect/pd/common/CMakeLists.txt @@ -1,4 +1,3 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - ) +gather_srcs(infrt_src SRCS) diff --git a/paddle/infrt/dialect/pd/ir/CMakeLists.txt b/paddle/infrt/dialect/pd/ir/CMakeLists.txt index 8aacfc97623..7c1c99a97a0 100644 --- a/paddle/infrt/dialect/pd/ir/CMakeLists.txt +++ b/paddle/infrt/dialect/pd/ir/CMakeLists.txt @@ -1,7 +1,5 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - pd_ops.cc - ) +gather_srcs(infrt_src SRCS pd_ops.cc) add_mlir_dialect(pd_ops pd) mlir_tablegen_on(pd_extra_ops) diff --git a/paddle/infrt/dialect/pd/pass/CMakeLists.txt b/paddle/infrt/dialect/pd/pass/CMakeLists.txt index 827df597b76..be87052794e 100644 --- a/paddle/infrt/dialect/pd/pass/CMakeLists.txt +++ b/paddle/infrt/dialect/pd/pass/CMakeLists.txt @@ -1,8 +1,5 @@ - core_gather_headers() -gather_srcs(infrt_src SRCS - pd_op_fuse_pass.cc - ) +gather_srcs(infrt_src SRCS pd_op_fuse_pass.cc) mlir_add_rewriter(pd_op_fuse) diff --git a/paddle/infrt/dialect/phi/CMakeLists.txt b/paddle/infrt/dialect/phi/CMakeLists.txt index 67f6bb8a2d7..f07c6f70fb6 100644 --- a/paddle/infrt/dialect/phi/CMakeLists.txt +++ b/paddle/infrt/dialect/phi/CMakeLists.txt @@ -1,5 +1,5 @@ -if (NOT INFRT_WITH_PHI) - return() +if(NOT INFRT_WITH_PHI) + return() endif() add_subdirectory(ir) @@ -8,5 +8,4 @@ add_subdirectory(pass) add_executable(phi-exec phi_exec.cc) target_link_libraries(phi-exec infrt) -gather_srcs(infrt_src SRCS - data_type.cc) +gather_srcs(infrt_src SRCS data_type.cc) diff --git a/paddle/infrt/dialect/phi/ir/CMakeLists.txt b/paddle/infrt/dialect/phi/ir/CMakeLists.txt index 0497b983211..e038da564be 100644 --- a/paddle/infrt/dialect/phi/ir/CMakeLists.txt +++ b/paddle/infrt/dialect/phi/ir/CMakeLists.txt @@ -6,7 +6,4 @@ add_mlir_dialect(phi_gpu_kernels phi_gpu) #mlir_tablegen_on(infrt_phi_tensor) -gather_srcs(infrt_src SRCS - phi_base.cc - infrt_phi_tensor.cc - phi_kernels.cc) +gather_srcs(infrt_src SRCS phi_base.cc infrt_phi_tensor.cc phi_kernels.cc) diff --git a/paddle/infrt/dialect/phi/pass/CMakeLists.txt b/paddle/infrt/dialect/phi/pass/CMakeLists.txt index dc60ecf63fe..e664e05f9dd 100644 --- a/paddle/infrt/dialect/phi/pass/CMakeLists.txt +++ b/paddle/infrt/dialect/phi/pass/CMakeLists.txt @@ -1,9 +1,9 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - proto_arg_map_context.cc - phi_op_convert_pass.cc - kernel_op_desc.cc - ) +gather_srcs(infrt_src SRCS proto_arg_map_context.cc phi_op_convert_pass.cc + kernel_op_desc.cc) -cc_test(test_kernel_op_desc SRCS kernel_op_desc_test.cc DEPS infrt) +cc_test( + test_kernel_op_desc + SRCS kernel_op_desc_test.cc + DEPS infrt) diff --git a/paddle/infrt/dialect/tensorrt/CMakeLists.txt b/paddle/infrt/dialect/tensorrt/CMakeLists.txt index 5b62b78e4da..68c6da27464 100755 --- a/paddle/infrt/dialect/tensorrt/CMakeLists.txt +++ b/paddle/infrt/dialect/tensorrt/CMakeLists.txt @@ -1,13 +1,14 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - trt_ops.cc - trt_op_converter_pass.cc - trt_op_teller_pass.cc - trt_graph_fuse_pass.cc - trt_graph_split_pass.cc - trt_type_convert_pass.cc - ) +gather_srcs( + infrt_src + SRCS + trt_ops.cc + trt_op_converter_pass.cc + trt_op_teller_pass.cc + trt_graph_fuse_pass.cc + trt_graph_split_pass.cc + trt_type_convert_pass.cc) mlir_tablegen_on(trt_ops) mlir_add_rewriter(pd_lower_to_trt) diff --git a/paddle/infrt/external_kernels/CMakeLists.txt b/paddle/infrt/external_kernels/CMakeLists.txt index 9e90c1896c7..96cfe2b73d8 100644 --- a/paddle/infrt/external_kernels/CMakeLists.txt +++ b/paddle/infrt/external_kernels/CMakeLists.txt @@ -8,6 +8,8 @@ set(external_kernels_lib "${CMAKE_CURRENT_BINARY_DIR}/libexternal_kernels.so") message(STATUS "basic_mlir: ${basic_mlir}") message(STATUS "external_kernels_lib: ${external_kernels_lib}") add_test( - NAME run_and_check_external_kernels - COMMAND sh -c "${CMAKE_BINARY_DIR}/infrt/host_context/infrtexec -i ${basic_mlir} --shared_libs=${external_kernels_lib} | ${LLVM_PATH}/bin/FileCheck ${basic_mlir}" + NAME run_and_check_external_kernels + COMMAND + sh -c + "${CMAKE_BINARY_DIR}/infrt/host_context/infrtexec -i ${basic_mlir} --shared_libs=${external_kernels_lib} | ${LLVM_PATH}/bin/FileCheck ${basic_mlir}" ) diff --git a/paddle/infrt/host_context/CMakeLists.txt b/paddle/infrt/host_context/CMakeLists.txt index 14cbea70ca8..2901a282cda 100644 --- a/paddle/infrt/host_context/CMakeLists.txt +++ b/paddle/infrt/host_context/CMakeLists.txt @@ -1,26 +1,33 @@ core_gather_headers() -gather_srcs(infrt_src SRCS - kernel_frame.cc - kernel_registry.cc - value.cc - kernel_utils.cc - symbol_table.cc - op_executable.cc - core_runtime.cc - mlir_to_runtime_translate.cc - function.cc - mlir_function_executable.cc - mlir_program_executor.cc - paddle_mlir.cc - ) +gather_srcs( + infrt_src + SRCS + kernel_frame.cc + kernel_registry.cc + value.cc + kernel_utils.cc + symbol_table.cc + op_executable.cc + core_runtime.cc + mlir_to_runtime_translate.cc + function.cc + mlir_function_executable.cc + mlir_program_executor.cc + paddle_mlir.cc) -cc_test_tiny(test_infrt_host_context_value SRCS value_test.cc DEPS infrt ${MLIR_IR_LIBS}) -cc_test_tiny(test_infrt_kernel_utils SRCS kernel_utils_test.cc DEPS infrt ${MLIR_IR_LIBS}) -cc_test_tiny(test_infrt_kernel_registry SRCS kernel_registry_test.cc DEPS infrt ${MLIR_IR_LIBS}) -cc_test_tiny(test_infrt_op_executable SRCS op_executable_test.cc DEPS infrt ${MLIR_IR_LIBS}) -cc_test_tiny(test_infrt_core_runtime SRCS core_runtime_test.cc DEPS infrt ${MLIR_IR_LIBS}) -cc_test_tiny(test_infrt_mlir_to_runtime_translate SRCS mlir_to_runtime_translate_test.cc DEPS infrt ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_host_context_value SRCS value_test.cc DEPS infrt + ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_kernel_utils SRCS kernel_utils_test.cc DEPS infrt + ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_kernel_registry SRCS kernel_registry_test.cc DEPS infrt + ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_op_executable SRCS op_executable_test.cc DEPS infrt + ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_core_runtime SRCS core_runtime_test.cc DEPS infrt + ${MLIR_IR_LIBS}) +cc_test_tiny(test_infrt_mlir_to_runtime_translate SRCS + mlir_to_runtime_translate_test.cc DEPS infrt ${MLIR_IR_LIBS}) add_executable(paddle-mlir-convert paddle_mlir_converter.cc) target_link_libraries(paddle-mlir-convert infrt ${MLIR_IR_LIBS}) diff --git a/paddle/infrt/kernel/CMakeLists.txt b/paddle/infrt/kernel/CMakeLists.txt index f20344f6f6b..6a18047885d 100644 --- a/paddle/infrt/kernel/CMakeLists.txt +++ b/paddle/infrt/kernel/CMakeLists.txt @@ -3,11 +3,12 @@ add_subdirectory(tensorrt) core_gather_headers() -gather_srcs(infrt_src SRCS - basic_kernels.cc - # phi_kernels.cc - test_kernels.cc - tensor_shape_kernels.cc - tensor_kernels.cc - control_flow_kernels.cc - ) +gather_srcs( + infrt_src + SRCS + basic_kernels.cc + # phi_kernels.cc + test_kernels.cc + tensor_shape_kernels.cc + tensor_kernels.cc + control_flow_kernels.cc) diff --git a/paddle/infrt/kernel/phi/CMakeLists.txt b/paddle/infrt/kernel/phi/CMakeLists.txt index 22a59ab2faf..92e4a49cd84 100644 --- a/paddle/infrt/kernel/phi/CMakeLists.txt +++ b/paddle/infrt/kernel/phi/CMakeLists.txt @@ -1,34 +1,39 @@ -if (NOT INFRT_WITH_PHI) - return() +if(NOT INFRT_WITH_PHI) + return() endif() core_gather_headers() -gather_srcs(infrt_src SRCS - registry.cc - dense_tensor_kernels.cc - context_kernels.cc -) +gather_srcs(infrt_src SRCS registry.cc dense_tensor_kernels.cc + context_kernels.cc) -set(infrt_register_phi_kernels_gen_source_file ${CMAKE_SOURCE_DIR}/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc) -set(infrt_register_phi_kernels_gen_file ${CMAKE_SOURCE_DIR}/tools/infrt/get_phi_kernel_function.sh) -set(wrapped_infermeta_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.h) -set(wrapped_infermeta_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.cc) +set(infrt_register_phi_kernels_gen_source_file + ${CMAKE_SOURCE_DIR}/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launchers.cc +) +set(infrt_register_phi_kernels_gen_file + ${CMAKE_SOURCE_DIR}/tools/infrt/get_phi_kernel_function.sh) +set(wrapped_infermeta_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.h) +set(wrapped_infermeta_source_file + ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.cc) add_custom_command( - OUTPUT ${infrt_register_phi_kernels_gen_source_file} - COMMAND bash ${infrt_register_phi_kernels_gen_file} - DEPENDS wrapped_infermeta - VERBATIM) -add_custom_target(infrt_register_phi_kernel - COMMAND bash ${infrt_register_phi_kernels_gen_file} - DEPENDS wrapped_infermeta - COMMENT "infrt generate ${infrt_register_phi_kernels_gen_source_file}" - VERBATIM) + OUTPUT ${infrt_register_phi_kernels_gen_source_file} + COMMAND bash ${infrt_register_phi_kernels_gen_file} + DEPENDS wrapped_infermeta + VERBATIM) +add_custom_target( + infrt_register_phi_kernel + COMMAND bash ${infrt_register_phi_kernels_gen_file} + DEPENDS wrapped_infermeta + COMMENT "infrt generate ${infrt_register_phi_kernels_gen_source_file}" + VERBATIM) -cc_library(infrt_naive SRCS infershaped/infershaped_kernel_launcher.cc - infershaped/infershaped_kernel_launchers.cc - DEPS phi wrapped_infermeta) +cc_library( + infrt_naive + SRCS infershaped/infershaped_kernel_launcher.cc + infershaped/infershaped_kernel_launchers.cc + DEPS phi wrapped_infermeta) cc_test_tiny(test_infrt_infershape_launchers SRCS -infershaped/infershape_launchers_test.cc DEPS infrt) + infershaped/infershape_launchers_test.cc DEPS infrt) diff --git a/paddle/infrt/kernel/tensorrt/CMakeLists.txt b/paddle/infrt/kernel/tensorrt/CMakeLists.txt index cd35fccbe2a..2cb595f7ba4 100644 --- a/paddle/infrt/kernel/tensorrt/CMakeLists.txt +++ b/paddle/infrt/kernel/tensorrt/CMakeLists.txt @@ -1,10 +1,10 @@ -if (NOT (INFRT_WITH_PHI AND INFRT_WITH_GPU AND INFRT_WITH_TRT)) +if(NOT + (INFRT_WITH_PHI + AND INFRT_WITH_GPU + AND INFRT_WITH_TRT)) return() endif() core_gather_headers() -gather_srcs(infrt_src SRCS - registry.cc - trt_kernels.cc -) +gather_srcs(infrt_src SRCS registry.cc trt_kernels.cc) diff --git a/paddle/infrt/paddle/CMakeLists.txt b/paddle/infrt/paddle/CMakeLists.txt index 21c117535fe..5f894626f80 100644 --- a/paddle/infrt/paddle/CMakeLists.txt +++ b/paddle/infrt/paddle/CMakeLists.txt @@ -5,14 +5,16 @@ add_subdirectory(pb) core_gather_headers() -gather_srcs(infrt_src SRCS - model_parser.cc - scope.cc - tensor.cc - ) +gather_srcs(infrt_src SRCS model_parser.cc scope.cc tensor.cc) -file(GLOB includes LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR} *.h) +file( + GLOB includes + LIST_DIRECTORIES false + RELATIVE ${CMAKE_SOURCE_DIR} + *.h) foreach(header ${includes}) - set(core_includes "${core_includes};${header}" CACHE INTERNAL "") + set(core_includes + "${core_includes};${header}" + CACHE INTERNAL "") endforeach() diff --git a/paddle/infrt/paddle/cpp/CMakeLists.txt b/paddle/infrt/paddle/cpp/CMakeLists.txt index 8b48603bddf..99477471084 100644 --- a/paddle/infrt/paddle/cpp/CMakeLists.txt +++ b/paddle/infrt/paddle/cpp/CMakeLists.txt @@ -1,5 +1,11 @@ -file(GLOB includes LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR} *.h) +file( + GLOB includes + LIST_DIRECTORIES false + RELATIVE ${CMAKE_SOURCE_DIR} + *.h) foreach(header ${includes}) - set(core_includes "${core_includes};${header}" CACHE INTERNAL "") + set(core_includes + "${core_includes};${header}" + CACHE INTERNAL "") endforeach() diff --git a/paddle/infrt/paddle/pb/CMakeLists.txt b/paddle/infrt/paddle/pb/CMakeLists.txt index b3491cfe136..3614201a95f 100644 --- a/paddle/infrt/paddle/pb/CMakeLists.txt +++ b/paddle/infrt/paddle/pb/CMakeLists.txt @@ -1,12 +1,13 @@ -gather_srcs(infrt_src SRCS - var_desc.cc - op_desc.cc - block_desc.cc - program_desc.cc - ) +gather_srcs(infrt_src SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc) -file(GLOB includes LIST_DIRECTORIES false RELATIVE ${CMAKE_SOURCE_DIR} *.h) +file( + GLOB includes + LIST_DIRECTORIES false + RELATIVE ${CMAKE_SOURCE_DIR} + *.h) foreach(header ${includes}) - set(core_includes "${core_includes};${header}" CACHE INTERNAL "") + set(core_includes + "${core_includes};${header}" + CACHE INTERNAL "") endforeach() diff --git a/paddle/infrt/tensor/CMakeLists.txt b/paddle/infrt/tensor/CMakeLists.txt index 95d4090a9a3..b1c3149276c 100644 --- a/paddle/infrt/tensor/CMakeLists.txt +++ b/paddle/infrt/tensor/CMakeLists.txt @@ -2,13 +2,14 @@ core_gather_headers() add_subdirectory(phi) -gather_srcs(infrt_src SRCS +gather_srcs( + infrt_src + SRCS tensor_map.cc tensor_metadata.cc dense_tensor_view.cc dense_host_tensor.cc - tensor_shape.cc - ) + tensor_shape.cc) # set(tensor_map_mlir "${CMAKE_SOURCE_DIR}/infrt/dialect/mlir_tests/tensor_map.mlir") # set(external_kernels_lib "${CMAKE_BINARY_DIR}/paddle/libexternal_kernels.so") diff --git a/paddle/infrt/tensor/phi/CMakeLists.txt b/paddle/infrt/tensor/phi/CMakeLists.txt index 97e26661266..94658e223e2 100644 --- a/paddle/infrt/tensor/phi/CMakeLists.txt +++ b/paddle/infrt/tensor/phi/CMakeLists.txt @@ -1,3 +1 @@ -gather_srcs(infrt_src SRCS - tensor_map.cc -) +gather_srcs(infrt_src SRCS tensor_map.cc) diff --git a/paddle/infrt/tests/CMakeLists.txt b/paddle/infrt/tests/CMakeLists.txt index a720ad82479..22e5e232d54 100644 --- a/paddle/infrt/tests/CMakeLists.txt +++ b/paddle/infrt/tests/CMakeLists.txt @@ -1,11 +1,21 @@ cc_test_tiny(test_abs_model SRCS models/test_abs.cc DEPS infrt ${MLIR_IR_LIBS}) -configure_file(lit.cfg.py.in "${CMAKE_SOURCE_DIR}/paddle/infrt/tests/lit.cfg.py") +configure_file(lit.cfg.py.in + "${CMAKE_SOURCE_DIR}/paddle/infrt/tests/lit.cfg.py") -add_test(NAME test_infrt_by_lit COMMAND sh -c "lit -v ${CMAKE_SOURCE_DIR}/paddle/infrt/tests --filter-out \"disabled_*\"" +add_test( + NAME test_infrt_by_lit + COMMAND + sh -c + "lit -v ${CMAKE_SOURCE_DIR}/paddle/infrt/tests --filter-out \"disabled_*\"" DEPENDS infrtopt infrtexec) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir.in ${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir.in ${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir.in ${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir.in ${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir.in + ${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensor/tensor_map.mlir) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir.in + ${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/linear_cpu.mlir) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir.in + ${CMAKE_CURRENT_SOURCE_DIR}/dialect/phi/resnet50.mlir) +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir.in + ${CMAKE_CURRENT_SOURCE_DIR}/dialect/tensorrt/disabled_linear.mlir) diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 58ad42ddd1f..7f3dd1ddc38 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -23,14 +23,33 @@ add_subdirectory(tools) add_subdirectory(tests) # make an unity target for compile deps -set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos sparse_csr_tensor sparse_coo_tensor string_tensor api_scalar api_int_array) +set(PHI_DEPS + convert_utils + dense_tensor + phi_context + kernel_factory + kernel_context + arg_map_context + infermeta + lod_utils + op_compat_infos + sparse_csr_tensor + sparse_coo_tensor + string_tensor + api_scalar + api_int_array) get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) create_dummy_static_lib(phi LIBS ${PHI_DEPS} LIMIT 100) -set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h CACHE INTERNAL "phi/extension.h file") -file(WRITE ${phi_extension_header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n") +set(phi_extension_header_file + ${CMAKE_CURRENT_SOURCE_DIR}/extension.h + CACHE INTERNAL "phi/extension.h file") +file( + WRITE ${phi_extension_header_file} + "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n" +) # generate inner headers include dir for users generate_unify_header(backends) diff --git a/paddle/phi/api/CMakeLists.txt b/paddle/phi/api/CMakeLists.txt index d575759db32..b1d97cbc7fa 100644 --- a/paddle/phi/api/CMakeLists.txt +++ b/paddle/phi/api/CMakeLists.txt @@ -1,2 +1,6 @@ add_subdirectory(lib) -cc_library(phi_api SRCS all.cc DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api strings_api) +cc_library( + phi_api + SRCS all.cc + DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api + strings_api) diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 004ed8de520..a1c6989555f 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -1,11 +1,20 @@ add_subdirectory(utils) -if (WITH_GPU) - nv_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce) -elseif (WITH_ROCM) - hip_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce) +if(WITH_GPU) + nv_library( + phi_tensor_raw + SRCS tensor.cc + DEPS tensor_base dense_tensor phi_api_utils phi_enforce) +elseif(WITH_ROCM) + hip_library( + phi_tensor_raw + SRCS tensor.cc + DEPS tensor_base dense_tensor phi_api_utils phi_enforce) else() - cc_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce) + cc_library( + phi_tensor_raw + SRCS tensor.cc + DEPS tensor_base dense_tensor phi_api_utils phi_enforce) endif() set(api_gen_base ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_base.py) @@ -13,71 +22,94 @@ set(api_gen_base ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_base.py) # forward api file set(api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_gen.py) set(api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api.yaml) -set(new_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/new_api.yaml) +set(new_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/new_api.yaml) set(api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/api.h) set(api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/api.cc) set(api_header_file_tmp ${api_header_file}.tmp) set(api_source_file_tmp ${api_source_file}.tmp) # backward api file -set(bw_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/backward_api_gen.py) -set(bw_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/backward.yaml) -set(new_bw_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/new_backward.yaml) -set(bw_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/backward_api.h) +set(bw_api_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/backward_api_gen.py) +set(bw_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/backward.yaml) +set(new_bw_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/new_backward.yaml) +set(bw_api_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/backward_api.h) set(bw_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/backward_api.cc) set(bw_api_header_file_tmp ${bw_api_header_file}.tmp) set(bw_api_source_file_tmp ${bw_api_source_file}.tmp) # dygraph(intermediate) api file -set(im_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/intermediate_api_gen.py) -set(dygraph_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/dygraph_api.h) -set(dygraph_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/dygraph_api.cc) +set(im_api_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/intermediate_api_gen.py) +set(dygraph_api_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/dygraph_api.h) +set(dygraph_api_source_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/dygraph_api.cc) set(dygraph_api_header_file_tmp ${dygraph_api_header_file}.tmp) set(dygraph_api_source_file_tmp ${dygraph_api_source_file}.tmp) # sparse api file -set(sparse_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api_gen.py) -set(sparse_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api.yaml) -set(sparse_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/sparse_api.h) +set(sparse_api_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api_gen.py) +set(sparse_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_api.yaml) +set(sparse_api_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/sparse_api.h) set(sparse_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/sparse_api.cc) set(sparse_api_header_file_tmp ${sparse_api_header_file}.tmp) set(sparse_api_source_file_tmp ${sparse_api_source_file}.tmp) # sparse bw api file -set(sparse_bw_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api_gen.py) -set(sparse_bw_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api.yaml) -set(sparse_bw_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/sparse_bw_api.h) -set(sparse_bw_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/sparse_bw_api.cc) +set(sparse_bw_api_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api_gen.py) +set(sparse_bw_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/sparse_bw_api.yaml) +set(sparse_bw_api_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/backward/sparse_bw_api.h) +set(sparse_bw_api_source_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/sparse_bw_api.cc) set(sparse_bw_api_header_file_tmp ${sparse_bw_api_header_file}.tmp) set(sparse_bw_api_source_file_tmp ${sparse_bw_api_source_file}.tmp) # strings api file -set(strings_api_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api_gen.py) -set(strings_api_yaml_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml) -set(strings_api_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/strings_api.h) -set(strings_api_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/strings_api.cc) +set(strings_api_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api_gen.py) +set(strings_api_yaml_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/strings_api.yaml) +set(strings_api_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/include/strings_api.h) +set(strings_api_source_file + ${CMAKE_SOURCE_DIR}/paddle/phi/api/lib/strings_api.cc) set(strings_api_header_file_tmp ${strings_api_header_file}.tmp) set(strings_api_source_file_tmp ${strings_api_source_file}.tmp) # wrapped infermeta file -set(wrapped_infermeta_gen_file ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/wrapped_infermeta_gen.py) -set(wrapped_infermeta_header_file ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.h) -set(wrapped_infermeta_source_file ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.cc) +set(wrapped_infermeta_gen_file + ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/wrapped_infermeta_gen.py) +set(wrapped_infermeta_header_file + ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.h) +set(wrapped_infermeta_source_file + ${CMAKE_SOURCE_DIR}/paddle/phi/infermeta/generated.cc) -if (NOT PYTHON_EXECUTABLE) +if(NOT PYTHON_EXECUTABLE) find_package(PythonInterp REQUIRED) endif() # install extra dependencies -execute_process( - COMMAND ${PYTHON_EXECUTABLE} -m pip install -U pyyaml jinja2 -) +execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install -U pyyaml jinja2) # parse apis set(parsed_api_dir ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/parsed_apis) -set(generated_op_path ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_op.cc) -set(generated_argument_mapping_path ${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc) -message("parse api yamls: +set(generated_op_path + ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_op.cc) +set(generated_argument_mapping_path + ${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc) +message( + "parse api yamls: - ${api_yaml_file} - ${new_api_yaml_file} - ${bw_api_yaml_file} @@ -85,24 +117,18 @@ message("parse api yamls: execute_process( WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_api_dir} - COMMAND ${PYTHON_EXECUTABLE} parse_api.py - --api_yaml_path ./api.yaml - --output_path ./parsed_apis/api.parsed.yaml - COMMAND ${PYTHON_EXECUTABLE} parse_api.py - --api_yaml_path ./new_api.yaml - --output_path ./parsed_apis/new_api.parsed.yaml - COMMAND ${PYTHON_EXECUTABLE} parse_api.py - --api_yaml_path ./backward.yaml - --output_path ./parsed_apis/backward_api.parsed.yaml - --backward - COMMAND ${PYTHON_EXECUTABLE} parse_api.py - --api_yaml_path ./new_backward.yaml - --output_path ./parsed_apis/new_backward_api.parsed.yaml - --backward - RESULTS_VARIABLE _results -) + COMMAND ${PYTHON_EXECUTABLE} parse_api.py --api_yaml_path ./api.yaml + --output_path ./parsed_apis/api.parsed.yaml + COMMAND ${PYTHON_EXECUTABLE} parse_api.py --api_yaml_path ./new_api.yaml + --output_path ./parsed_apis/new_api.parsed.yaml + COMMAND ${PYTHON_EXECUTABLE} parse_api.py --api_yaml_path ./backward.yaml + --output_path ./parsed_apis/backward_api.parsed.yaml --backward + COMMAND + ${PYTHON_EXECUTABLE} parse_api.py --api_yaml_path ./new_backward.yaml + --output_path ./parsed_apis/new_backward_api.parsed.yaml --backward + RESULTS_VARIABLE _results) foreach(_result in ${_results}) - if (${_result}) + if(${_result}) message(FATAL_ERROR "api yaml parsing failed, exiting.") endif() endforeach() @@ -113,52 +139,67 @@ message("validate api yaml: - ${parsed_api_dir}/new_backward_api.parsed.yaml") execute_process( WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen - COMMAND ${PYTHON_EXECUTABLE} cross_validate.py - --forward_yaml_paths ./parsed_apis/api.parsed.yaml ./parsed_apis/new_api.parsed.yaml - --backward_yaml_paths ./parsed_apis/backward_api.parsed.yaml ./parsed_apis/new_backward_api.parsed.yaml - RESULT_VARIABLE _result -) -if (${_result}) - message(FATAL_ERROR "api validation failed, exiting." ) + COMMAND + ${PYTHON_EXECUTABLE} cross_validate.py --forward_yaml_paths + ./parsed_apis/api.parsed.yaml ./parsed_apis/new_api.parsed.yaml + --backward_yaml_paths ./parsed_apis/backward_api.parsed.yaml + ./parsed_apis/new_backward_api.parsed.yaml + RESULT_VARIABLE _result) +if(${_result}) + message(FATAL_ERROR "api validation failed, exiting.") endif() # code generation for op, op makers, and argument mapping functions -message("create or remove auto-geneated operators: ${generated_op_path}.tmp -create or remove auto-geneated argument mappings: ${generated_argument_mapping_path}.tmp") +message( + "create or remove auto-geneated operators: ${generated_op_path}.tmp +create or remove auto-geneated argument mappings: ${generated_argument_mapping_path}.tmp" +) execute_process( WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen - COMMAND ${PYTHON_EXECUTABLE} generate_op.py - --api_yaml_path ./parsed_apis/new_api.parsed.yaml - --backward_api_yaml_path ./parsed_apis/new_backward_api.parsed.yaml - --output_op_path "${generated_op_path}.tmp" - --output_arg_map_path "${generated_argument_mapping_path}.tmp" - RESULT_VARIABLE _result -) -if (${_result}) - message(FATAL_ERROR "operator codegen failed, exiting." ) + COMMAND + ${PYTHON_EXECUTABLE} generate_op.py --api_yaml_path + ./parsed_apis/new_api.parsed.yaml --backward_api_yaml_path + ./parsed_apis/new_backward_api.parsed.yaml --output_op_path + "${generated_op_path}.tmp" --output_arg_map_path + "${generated_argument_mapping_path}.tmp" + RESULT_VARIABLE _result) +if(${_result}) + message(FATAL_ERROR "operator codegen failed, exiting.") endif() - if(EXISTS "${generated_op_path}.tmp" AND EXISTS "${generated_op_path}") - execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${generated_op_path}.tmp" "${generated_op_path}") + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${generated_op_path}.tmp" "${generated_op_path}") message("copy if different ${generated_op_path}.tmp ${generated_op_path}") elseif(EXISTS "${generated_op_path}.tmp") - execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_op_path}.tmp" "${generated_op_path}") + execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_op_path}.tmp" + "${generated_op_path}") message("copy ${generated_op_path}.tmp ${generated_op_path}") else() execute_process(COMMAND ${CMAKE_COMMAND} -E remove -f "${generated_op_path}") message("remove ${generated_op_path}") endif() - -if(EXISTS "${generated_argument_mapping_path}.tmp" AND EXISTS "${generated_argument_mapping_path}") - execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${generated_argument_mapping_path}.tmp" "${generated_argument_mapping_path}") - message("copy if different ${generated_argument_mapping_path}.tmp ${generated_argument_mapping_path}") +if(EXISTS "${generated_argument_mapping_path}.tmp" + AND EXISTS "${generated_argument_mapping_path}") + execute_process( + COMMAND + ${CMAKE_COMMAND} -E copy_if_different + "${generated_argument_mapping_path}.tmp" + "${generated_argument_mapping_path}") + message( + "copy if different ${generated_argument_mapping_path}.tmp ${generated_argument_mapping_path}" + ) elseif(EXISTS "${generated_argument_mapping_path}.tmp") - execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_argument_mapping_path}.tmp" "${generated_argument_mapping_path}") - message("copy ${generated_argument_mapping_path}.tmp ${generated_argument_mapping_path}") + execute_process( + COMMAND ${CMAKE_COMMAND} -E copy "${generated_argument_mapping_path}.tmp" + "${generated_argument_mapping_path}") + message( + "copy ${generated_argument_mapping_path}.tmp ${generated_argument_mapping_path}" + ) else() - execute_process(COMMAND ${CMAKE_COMMAND} -E remove -f "${generated_argument_mapping_path}") + execute_process(COMMAND ${CMAKE_COMMAND} -E remove -f + "${generated_argument_mapping_path}") message("remove ${generated_argument_mapping_path}") endif() @@ -166,26 +207,31 @@ endif() add_custom_command( OUTPUT ${api_header_file} ${api_source_file} COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml - COMMAND ${PYTHON_EXECUTABLE} ${api_gen_file} - --api_yaml_path ${api_yaml_file} ${new_api_yaml_file} - --api_header_path ${api_header_file_tmp} - --api_header_path ${api_header_file_tmp} - --api_source_path ${api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file} + COMMAND + ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} + ${new_api_yaml_file} --api_header_path ${api_header_file_tmp} + --api_header_path ${api_header_file_tmp} --api_source_path + ${api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} + ${api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} + ${api_source_file} COMMENT "copy_if_different ${api_header_file} ${api_source_file}" DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_base} VERBATIM) # generate backward api add_custom_command( - OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp} ${bw_api_source_file_tmp} - COMMAND ${PYTHON_EXECUTABLE} ${bw_api_gen_file} - --backward_yaml_path ${bw_api_yaml_file} ${new_bw_api_yaml_file} - --backward_header_path ${bw_api_header_file_tmp} - --backward_source_path ${bw_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} ${bw_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file} + OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp} + ${bw_api_source_file_tmp} + COMMAND + ${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path + ${bw_api_yaml_file} ${new_bw_api_yaml_file} --backward_header_path + ${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} + ${bw_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} + ${bw_api_source_file} COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}" DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base} VERBATIM) @@ -193,82 +239,177 @@ add_custom_command( # generate sparse api add_custom_command( OUTPUT ${sparse_api_header_file} ${sparse_api_source_file} - COMMAND ${PYTHON_EXECUTABLE} ${sparse_api_gen_file} - --api_yaml_path ${sparse_api_yaml_file} - --api_header_path ${sparse_api_header_file_tmp} - --api_source_path ${sparse_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp} ${sparse_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp} ${sparse_api_source_file} - COMMENT "copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}" - DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base} ${api_gen_file} + COMMAND + ${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path + ${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp} + --api_source_path ${sparse_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp} + ${sparse_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp} + ${sparse_api_source_file} + COMMENT + "copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}" + DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base} + ${api_gen_file} VERBATIM) # generate backward sparse api add_custom_command( OUTPUT ${sparse_bw_api_header_file} ${sparse_bw_api_source_file} - COMMAND ${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} - --api_yaml_path ${sparse_bw_api_yaml_file} - --api_header_path ${sparse_bw_api_header_file_tmp} - --api_source_path ${sparse_bw_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp} ${sparse_bw_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp} ${sparse_bw_api_source_file} - COMMENT "copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}" - DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base} ${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file} + COMMAND + ${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path + ${sparse_bw_api_yaml_file} --api_header_path + ${sparse_bw_api_header_file_tmp} --api_source_path + ${sparse_bw_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp} + ${sparse_bw_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp} + ${sparse_bw_api_source_file} + COMMENT + "copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}" + DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base} + ${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file} VERBATIM) # generate strings api add_custom_command( OUTPUT ${strings_api_header_file} ${strings_api_source_file} - COMMAND ${PYTHON_EXECUTABLE} ${strings_api_gen_file} - --api_yaml_path ${strings_api_yaml_file} - --api_header_path ${strings_api_header_file_tmp} - --api_source_path ${strings_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp} ${strings_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp} ${strings_api_source_file} - COMMENT "copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}" - DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base} ${api_gen_file} + COMMAND + ${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path + ${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp} + --api_source_path ${strings_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp} + ${strings_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp} + ${strings_api_source_file} + COMMENT + "copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}" + DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base} + ${api_gen_file} VERBATIM) # generate dygraph(intermediate) api add_custom_command( OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file} - COMMAND ${PYTHON_EXECUTABLE} ${im_api_gen_file} - --api_yaml_path ${api_yaml_file} ${new_api_yaml_file} - --sparse_api_yaml_path ${sparse_api_yaml_file} - --dygraph_api_header_path ${dygraph_api_header_file_tmp} - --dygraph_api_source_path ${dygraph_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp} ${dygraph_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp} ${dygraph_api_source_file} - DEPENDS ${api_yaml_file} ${sparse_api_yaml_file} ${im_api_gen_file} ${api_gen_base} ${api_gen_file} + COMMAND + ${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file} + ${new_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file} + --dygraph_api_header_path ${dygraph_api_header_file_tmp} + --dygraph_api_source_path ${dygraph_api_source_file_tmp} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp} + ${dygraph_api_header_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp} + ${dygraph_api_source_file} + DEPENDS ${api_yaml_file} ${sparse_api_yaml_file} ${im_api_gen_file} + ${api_gen_base} ${api_gen_file} VERBATIM) # generate wrapped infermeta add_custom_command( OUTPUT ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file} - COMMAND ${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} - --api_yaml_path ${api_yaml_file} ${new_api_yaml_file} - --wrapped_infermeta_header_path ${wrapped_infermeta_header_file} - --wrapped_infermeta_source_path ${wrapped_infermeta_source_file} + COMMAND + ${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path + ${api_yaml_file} ${new_api_yaml_file} --wrapped_infermeta_header_path + ${wrapped_infermeta_header_file} --wrapped_infermeta_source_path + ${wrapped_infermeta_source_file} DEPENDS ${api_yaml_file} ${wrapped_infermeta_gen_file} ${api_gen_base} VERBATIM) -cc_library(op_meta_info SRCS op_meta_info.cc DEPS phi_tensor_raw) -cc_library(wrapped_infermeta SRCS ${wrapped_infermeta_source_file} DEPS phi) -cc_library(context_pool SRCS context_pool.cc DEPS phi_context phi_enforce place) +cc_library( + op_meta_info + SRCS op_meta_info.cc + DEPS phi_tensor_raw) +cc_library( + wrapped_infermeta + SRCS ${wrapped_infermeta_source_file} + DEPS phi) +cc_library( + context_pool + SRCS context_pool.cc + DEPS phi_context phi_enforce place) -cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS phi_tensor_raw phi_context kernel_factory context_pool) -cc_library(api_gen_utils SRCS api_gen_utils.cc DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor) -cc_library(phi_data_transform SRCS data_transform.cc DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor) -cc_library(api_custom_impl SRCS api_custom_impl.cc DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils backward_infermeta phi_data_transform) -cc_library(sparse_api_custom_impl SRCS sparse_api_custom_impl.cc DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform tensor_copy) +cc_library( + kernel_dispatch + SRCS kernel_dispatch.cc + DEPS phi_tensor_raw phi_context kernel_factory context_pool) +cc_library( + api_gen_utils + SRCS api_gen_utils.cc + DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor) +cc_library( + phi_data_transform + SRCS data_transform.cc + DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor) +cc_library( + api_custom_impl + SRCS api_custom_impl.cc + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils backward_infermeta + phi_data_transform) +cc_library( + sparse_api_custom_impl + SRCS sparse_api_custom_impl.cc + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform + tensor_copy) -cc_library(phi_function_api SRCS ${api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform api_custom_impl) -cc_library(phi_bw_function_api SRCS ${bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils backward_infermeta phi_data_transform phi_function_api api_custom_impl global_utils) -cc_library(sparse_api SRCS ${sparse_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl) -cc_library(sparse_bw_api SRCS ${sparse_bw_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api sparse_api_custom_impl) -cc_library(phi_dygraph_api SRCS ${dygraph_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform phi_function_api sparse_api) -cc_library(strings_api SRCS ${strings_api_source_file} DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils) -cc_library(phi_tensor SRCS tensor_method.cc DEPS phi_tensor_raw phi_function_api api_gen_utils kernel_dispatch infermeta sparse_api strings_api) -cc_library(tensor_copy SRCS tensor_copy.cc DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils) -cc_library(api_scalar SRCS scalar.cc DEPS tensor_copy) -cc_library(api_int_array SRCS int_array.cc DEPS tensor_copy) +cc_library( + phi_function_api + SRCS ${api_source_file} + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform + api_custom_impl) +cc_library( + phi_bw_function_api + SRCS ${bw_api_source_file} + DEPS phi_tensor_raw + phi + kernel_dispatch + api_gen_utils + backward_infermeta + phi_data_transform + phi_function_api + api_custom_impl + global_utils) +cc_library( + sparse_api + SRCS ${sparse_api_source_file} + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api_custom_impl) +cc_library( + sparse_bw_api + SRCS ${sparse_bw_api_source_file} + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils sparse_api + sparse_api_custom_impl) +cc_library( + phi_dygraph_api + SRCS ${dygraph_api_source_file} + DEPS phi_tensor_raw + phi + kernel_dispatch + api_gen_utils + phi_data_transform + phi_function_api + sparse_api) +cc_library( + strings_api + SRCS ${strings_api_source_file} + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils) +cc_library( + phi_tensor + SRCS tensor_method.cc + DEPS phi_tensor_raw + phi_function_api + api_gen_utils + kernel_dispatch + infermeta + sparse_api + strings_api) +cc_library( + tensor_copy + SRCS tensor_copy.cc + DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils) +cc_library( + api_scalar + SRCS scalar.cc + DEPS tensor_copy) +cc_library( + api_int_array + SRCS int_array.cc + DEPS tensor_copy) diff --git a/paddle/phi/api/lib/utils/CMakeLists.txt b/paddle/phi/api/lib/utils/CMakeLists.txt index 0e1cd0cb83f..ef99a158628 100644 --- a/paddle/phi/api/lib/utils/CMakeLists.txt +++ b/paddle/phi/api/lib/utils/CMakeLists.txt @@ -1,2 +1,13 @@ -cc_library(phi_api_utils SRCS tensor_utils.cc DEPS -tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits string_tensor int_array scalar) +cc_library( + phi_api_utils + SRCS tensor_utils.cc + DEPS tensor_base + convert_utils + dense_tensor + lod_tensor + selected_rows_utils + place + var_type_traits + string_tensor + int_array + scalar) diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 5f616155546..c981b625192 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -12,7 +12,10 @@ if(WITH_XPU) add_subdirectory(xpu) endif() -cc_library(phi_context SRCS all_context.cc DEPS device_context cpu_context) +cc_library( + phi_context + SRCS all_context.cc + DEPS device_context cpu_context) if(WITH_XPU) add_dependencies(phi_context xpu_context) @@ -24,11 +27,31 @@ endif() if(WITH_CUSTOM_DEVICE) add_dependencies(phi_context custom_context) - cc_library(callback_manager SRCS callback_manager.cc DEPS enforce place) - cc_library(device_guard SRCS device_guard.cc DEPS enforce place) - cc_library(stream SRCS stream.cc DEPS callback_manager) - cc_library(event SRCS event.cc DEPS enforce place) - cc_library(device_base SRCS device_base.cc DEPS stream event callback_manager device_guard device_context flags) - cc_library(device_manager SRCS device_manager.cc DEPS custom_device) - set(GLOB_DEV_LIB device_manager custom_device CACHE INTERNAL "Global DEV library") + cc_library( + callback_manager + SRCS callback_manager.cc + DEPS enforce place) + cc_library( + device_guard + SRCS device_guard.cc + DEPS enforce place) + cc_library( + stream + SRCS stream.cc + DEPS callback_manager) + cc_library( + event + SRCS event.cc + DEPS enforce place) + cc_library( + device_base + SRCS device_base.cc + DEPS stream event callback_manager device_guard device_context flags) + cc_library( + device_manager + SRCS device_manager.cc + DEPS custom_device) + set(GLOB_DEV_LIB + device_manager custom_device + CACHE INTERNAL "Global DEV library") endif() diff --git a/paddle/phi/backends/cpu/CMakeLists.txt b/paddle/phi/backends/cpu/CMakeLists.txt index 82ea42566fc..e32aa17758b 100644 --- a/paddle/phi/backends/cpu/CMakeLists.txt +++ b/paddle/phi/backends/cpu/CMakeLists.txt @@ -1,6 +1,12 @@ if(WITH_MKLDNN) # TODO(wilber): support mkldnn context. - cc_library(cpu_context SRCS cpu_context.cc DEPS phi_device_context mkldnn eigen3) + cc_library( + cpu_context + SRCS cpu_context.cc + DEPS phi_device_context mkldnn eigen3) else() - cc_library(cpu_context SRCS cpu_context.cc DEPS phi_device_context eigen3) + cc_library( + cpu_context + SRCS cpu_context.cc + DEPS phi_device_context eigen3) endif() diff --git a/paddle/phi/backends/custom/CMakeLists.txt b/paddle/phi/backends/custom/CMakeLists.txt index 5b46afb4ce9..d8ed6706eba 100644 --- a/paddle/phi/backends/custom/CMakeLists.txt +++ b/paddle/phi/backends/custom/CMakeLists.txt @@ -1,5 +1,14 @@ -if (WITH_CUSTOM_DEVICE) - cc_library(custom_context SRCS custom_context.cc DEPS phi_device_context device_manager) - cc_library(custom_device SRCS custom_device.cc DEPS device_base device_context) - cc_test(custom_device_test SRCS custom_device_test.cc DEPS device_manager device_context) +if(WITH_CUSTOM_DEVICE) + cc_library( + custom_context + SRCS custom_context.cc + DEPS phi_device_context device_manager) + cc_library( + custom_device + SRCS custom_device.cc + DEPS device_base device_context) + cc_test( + custom_device_test + SRCS custom_device_test.cc + DEPS device_manager device_context) endif() diff --git a/paddle/phi/backends/dynload/CMakeLists.txt b/paddle/phi/backends/dynload/CMakeLists.txt index bc5ef3cd5c0..91dbafe0cd3 100644 --- a/paddle/phi/backends/dynload/CMakeLists.txt +++ b/paddle/phi/backends/dynload/CMakeLists.txt @@ -1,57 +1,94 @@ -cc_library(phi_dynamic_loader SRCS dynamic_loader.cc DEPS enforce glog gflags) +cc_library( + phi_dynamic_loader + SRCS dynamic_loader.cc + DEPS enforce glog gflags) -list(APPEND CUDA_SRCS cublas.cc cublasLt.cc cudnn.cc curand.cc cusolver.cc cusparse.cc nvtx.cc cufft.cc) +list( + APPEND + CUDA_SRCS + cublas.cc + cublasLt.cc + cudnn.cc + curand.cc + cusolver.cc + cusparse.cc + nvtx.cc + cufft.cc) -if (NOT WITH_NV_JETSON) +if(NOT WITH_NV_JETSON) list(APPEND CUDA_SRCS nvjpeg.cc) endif() -if (WITH_ROCM) +if(WITH_ROCM) list(APPEND HIP_SRCS rocblas.cc miopen.cc hiprand.cc hipfft.cc) endif() # There is no macOS version of NCCL. # Disable nvrtc and cuda_driver api on MacOS, and only do a early test on Linux and Windows. -if (NOT APPLE) +if(NOT APPLE) list(APPEND CUDA_SRCS nvrtc.cc cuda_driver.cc) - if (WITH_NCCL) + if(WITH_NCCL) list(APPEND CUDA_SRCS nccl.cc) endif() - if (WITH_ROCM) + if(WITH_ROCM) list(APPEND HIP_SRCS hiprtc.cc rocm_driver.cc) - if (WITH_RCCL) + if(WITH_RCCL) list(APPEND HIP_SRCS rccl.cc) endif() endif() endif() -if (TENSORRT_FOUND) +if(TENSORRT_FOUND) list(APPEND CUDA_SRCS tensorrt.cc) endif() configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) -if (CUPTI_FOUND) +if(CUPTI_FOUND) list(APPEND CUDA_SRCS cupti.cc) endif(CUPTI_FOUND) if(WITH_ROCM) - hip_library(phi_dynload_cuda SRCS ${HIP_SRCS} DEPS phi_dynamic_loader) - cc_library(phi_dynload_warpctc SRCS warpctc.cc DEPS phi_dynamic_loader warpctc) -elseif (WITH_ASCEND_CL) - cc_library(phi_dynload_warpctc SRCS warpctc.cc DEPS phi_dynamic_loader warpctc npu_hccl) + hip_library( + phi_dynload_cuda + SRCS ${HIP_SRCS} + DEPS phi_dynamic_loader) + cc_library( + phi_dynload_warpctc + SRCS warpctc.cc + DEPS phi_dynamic_loader warpctc) +elseif(WITH_ASCEND_CL) + cc_library( + phi_dynload_warpctc + SRCS warpctc.cc + DEPS phi_dynamic_loader warpctc npu_hccl) else() - nv_library(phi_dynload_cuda SRCS ${CUDA_SRCS} DEPS phi_dynamic_loader) - cc_library(phi_dynload_warpctc SRCS warpctc.cc DEPS phi_dynamic_loader warpctc) + nv_library( + phi_dynload_cuda + SRCS ${CUDA_SRCS} + DEPS phi_dynamic_loader) + cc_library( + phi_dynload_warpctc + SRCS warpctc.cc + DEPS phi_dynamic_loader warpctc) endif() -if (WITH_MKLML) - cc_library(phi_dynload_mklml SRCS mklml.cc DEPS phi_dynamic_loader mklml) +if(WITH_MKLML) + cc_library( + phi_dynload_mklml + SRCS mklml.cc + DEPS phi_dynamic_loader mklml) endif() -cc_library(phi_dynload_lapack SRCS lapack.cc DEPS phi_dynamic_loader) +cc_library( + phi_dynload_lapack + SRCS lapack.cc + DEPS phi_dynamic_loader) add_dependencies(phi_dynload_lapack extern_lapack) # TODO(TJ): add iomp, mkldnn? -if (MKL_FOUND AND WITH_ONEMKL) +if(MKL_FOUND AND WITH_ONEMKL) message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") - cc_library(phi_dynload_mklrt SRCS mklrt.cc DEPS phi_dynamic_loader) + cc_library( + phi_dynload_mklrt + SRCS mklrt.cc + DEPS phi_dynamic_loader) target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE}) endif() diff --git a/paddle/phi/backends/gpu/CMakeLists.txt b/paddle/phi/backends/gpu/CMakeLists.txt index ebe8f1ca4c1..6d9f2de67d5 100644 --- a/paddle/phi/backends/gpu/CMakeLists.txt +++ b/paddle/phi/backends/gpu/CMakeLists.txt @@ -1,10 +1,22 @@ if(WITH_GPU) add_subdirectory(cuda) - nv_library(phi_gpu_info SRCS gpu_info.cc DEPS phi_cuda_info gflags glog enforce phi_dynload_cuda) + nv_library( + phi_gpu_info + SRCS gpu_info.cc + DEPS phi_cuda_info gflags glog enforce phi_dynload_cuda) elseif(WITH_ROCM) add_subdirectory(rocm) - hip_library(phi_gpu_info SRCS gpu_info.cc DEPS phi_rocm_info gflags glog enforce phi_dynload_cuda) + hip_library( + phi_gpu_info + SRCS gpu_info.cc + DEPS phi_rocm_info gflags glog enforce phi_dynload_cuda) endif() -cc_library(gpu_resources SRCS gpu_resources.cc DEPS phi_device_context phi_gpu_info) -cc_library(gpu_context SRCS gpu_context.cc DEPS phi_device_context phi_gpu_info eigen3 gpu_resources) +cc_library( + gpu_resources + SRCS gpu_resources.cc + DEPS phi_device_context phi_gpu_info) +cc_library( + gpu_context + SRCS gpu_context.cc + DEPS phi_device_context phi_gpu_info eigen3 gpu_resources) diff --git a/paddle/phi/backends/gpu/cuda/CMakeLists.txt b/paddle/phi/backends/gpu/cuda/CMakeLists.txt index a3393f97d75..9765f5dc03b 100644 --- a/paddle/phi/backends/gpu/cuda/CMakeLists.txt +++ b/paddle/phi/backends/gpu/cuda/CMakeLists.txt @@ -1 +1,4 @@ -nv_library(phi_cuda_info SRCS cuda_info.cc DEPS gflags glog enforce phi_dynload_cuda) +nv_library( + phi_cuda_info + SRCS cuda_info.cc + DEPS gflags glog enforce phi_dynload_cuda) diff --git a/paddle/phi/backends/gpu/rocm/CMakeLists.txt b/paddle/phi/backends/gpu/rocm/CMakeLists.txt index 257e4cc8afb..730aad5d2fd 100644 --- a/paddle/phi/backends/gpu/rocm/CMakeLists.txt +++ b/paddle/phi/backends/gpu/rocm/CMakeLists.txt @@ -1 +1,4 @@ -hip_library(phi_rocm_info SRCS rocm_info.cc DEPS gflags glog enforce phi_dynload_cuda) +hip_library( + phi_rocm_info + SRCS rocm_info.cc + DEPS gflags glog enforce phi_dynload_cuda) diff --git a/paddle/phi/backends/xpu/CMakeLists.txt b/paddle/phi/backends/xpu/CMakeLists.txt index 4d885757bb1..861b57956ba 100644 --- a/paddle/phi/backends/xpu/CMakeLists.txt +++ b/paddle/phi/backends/xpu/CMakeLists.txt @@ -1,2 +1,8 @@ -cc_library(phi_xpu_info SRCS xpu_info.cc DEPS enforce xpulib phi_place) -cc_library(xpu_context SRCS xpu_context.cc DEPS phi_device_context phi_xpu_info) +cc_library( + phi_xpu_info + SRCS xpu_info.cc + DEPS enforce xpulib phi_place) +cc_library( + xpu_context + SRCS xpu_context.cc + DEPS phi_device_context phi_xpu_info) diff --git a/paddle/phi/common/CMakeLists.txt b/paddle/phi/common/CMakeLists.txt index b1ca4d1f8a8..d9266bd06d2 100644 --- a/paddle/phi/common/CMakeLists.txt +++ b/paddle/phi/common/CMakeLists.txt @@ -1,3 +1,9 @@ cc_library(phi_place SRCS place.cc) -cc_library(scalar SRCS scalar.cc DEPS phi_enforce tensor) -cc_library(int_array SRCS int_array.cc DEPS phi_enforce tensor) +cc_library( + scalar + SRCS scalar.cc + DEPS phi_enforce tensor) +cc_library( + int_array + SRCS int_array.cc + DEPS phi_enforce tensor) diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index 41f654bfc8f..8b180a2c2ae 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -6,30 +6,78 @@ set(phi_enforce_deps errors flags) if(WITH_GPU) set(phi_enforce_deps ${phi_enforce_deps} external_error_proto) endif() -cc_library(phi_enforce SRCS enforce.cc DEPS ${phi_enforce_deps}) +cc_library( + phi_enforce + SRCS enforce.cc + DEPS ${phi_enforce_deps}) -cc_library(kernel_factory SRCS kernel_factory.cc DEPS phi_enforce fluid_convert_utils) -cc_library(kernel_context SRCS kernel_context.cc DEPS phi_enforce phi_context) +cc_library( + kernel_factory + SRCS kernel_factory.cc + DEPS phi_enforce fluid_convert_utils) +cc_library( + kernel_context + SRCS kernel_context.cc + DEPS phi_enforce phi_context) -cc_library(ddim SRCS ddim.cc DEPS phi_enforce) -cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS phi_enforce) -cc_library(tensor_meta SRCS tensor_meta.cc DEPS phi_enforce) -cc_library(lod_utils SRCS lod_utils.cc DEPS phi_enforce) +cc_library( + ddim + SRCS ddim.cc + DEPS phi_enforce) +cc_library( + tensor_base + SRCS tensor_base.cc allocator.cc + DEPS phi_enforce) +cc_library( + tensor_meta + SRCS tensor_meta.cc + DEPS phi_enforce) +cc_library( + lod_utils + SRCS lod_utils.cc + DEPS phi_enforce) -cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS convert_utils fluid_convert_utils tensor_meta tensor_base) -cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) -cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) -cc_library(string_tensor SRCS string_tensor.cc DEPS convert_utils tensor_meta tensor_base) +cc_library( + dense_tensor + SRCS dense_tensor.cc dense_tensor_impl.cc + DEPS convert_utils fluid_convert_utils tensor_meta tensor_base) +cc_library( + sparse_coo_tensor + SRCS sparse_coo_tensor.cc + DEPS tensor_meta tensor_base) +cc_library( + sparse_csr_tensor + SRCS sparse_csr_tensor.cc + DEPS dense_tensor tensor_base) +cc_library( + string_tensor + SRCS string_tensor.cc + DEPS convert_utils tensor_meta tensor_base) -cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) -cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) -cc_library(selected_rows SRCS selected_rows_impl.cc selected_rows.cc DEPS tensor_base dense_tensor phi_enforce ddim memcpy) -cc_library(phi_device_context SRCS device_context.cc DEPS dense_tensor selected_rows) +cc_library( + meta_tensor + SRCS meta_tensor.cc + DEPS tensor_base tensor_meta dense_tensor) +cc_library( + infermeta_utils + SRCS infermeta_utils.cc + DEPS meta_tensor) +cc_library( + selected_rows + SRCS selected_rows_impl.cc selected_rows.cc + DEPS tensor_base dense_tensor phi_enforce ddim memcpy) +cc_library( + phi_device_context + SRCS device_context.cc + DEPS dense_tensor selected_rows) -cc_library(custom_kernel SRCS custom_kernel.cc DEPS kernel_factory) +cc_library( + custom_kernel + SRCS custom_kernel.cc + DEPS kernel_factory) # Will remove once we implemented MKLDNN_Tensor if(WITH_MKLDNN) - add_dependencies(dense_tensor mkldnn) - add_dependencies(tensor_base mkldnn) + add_dependencies(dense_tensor mkldnn) + add_dependencies(tensor_base mkldnn) endif() diff --git a/paddle/phi/core/compat/CMakeLists.txt b/paddle/phi/core/compat/CMakeLists.txt index 3423e380970..3fd9b74255c 100644 --- a/paddle/phi/core/compat/CMakeLists.txt +++ b/paddle/phi/core/compat/CMakeLists.txt @@ -1,5 +1,11 @@ -cc_library(arg_map_context SRCS arg_map_context.cc DEPS phi_enforce) -cc_library(op_utils SRCS op_utils.cc DEPS arg_map_context enforce) +cc_library( + arg_map_context + SRCS arg_map_context.cc + DEPS phi_enforce) +cc_library( + op_utils + SRCS op_utils.cc + DEPS arg_map_context enforce) set(convert_utils_deps data_type place op_utils) @@ -13,4 +19,7 @@ endif() if(WITH_CUSTOM_DEVICE) set(convert_utils_deps ${convert_utils_deps} device_manager) endif() -cc_library(convert_utils SRCS convert_utils.cc DEPS ${convert_utils_deps}) +cc_library( + convert_utils + SRCS convert_utils.cc + DEPS ${convert_utils_deps}) diff --git a/paddle/phi/infermeta/CMakeLists.txt b/paddle/phi/infermeta/CMakeLists.txt index 1a19fd00322..92b64ab4e66 100644 --- a/paddle/phi/infermeta/CMakeLists.txt +++ b/paddle/phi/infermeta/CMakeLists.txt @@ -1,3 +1,9 @@ -cc_library(infermeta SRCS nullary.cc unary.cc binary.cc ternary.cc multiary.cc DEPS convert_utils meta_tensor infermeta_utils) -cc_library(backward_infermeta SRCS backward.cc DEPS meta_tensor convert_utils) +cc_library( + infermeta + SRCS nullary.cc unary.cc binary.cc ternary.cc multiary.cc + DEPS convert_utils meta_tensor infermeta_utils) +cc_library( + backward_infermeta + SRCS backward.cc + DEPS meta_tensor convert_utils) add_subdirectory(strings) diff --git a/paddle/phi/infermeta/strings/CMakeLists.txt b/paddle/phi/infermeta/strings/CMakeLists.txt index 3e1a947728f..c2f891fe712 100644 --- a/paddle/phi/infermeta/strings/CMakeLists.txt +++ b/paddle/phi/infermeta/strings/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(string_infermeta SRCS nullary.cc unary.cc DEPS convert_utils infermeta_utils) +cc_library( + string_infermeta + SRCS nullary.cc unary.cc + DEPS convert_utils infermeta_utils) diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 437c55c840f..67795c2a8aa 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -1,7 +1,14 @@ -set(kernel_declare_file ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h.tmp CACHE INTERNAL "declarations.h file") -set(kernel_declare_file_final ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h) -file(WRITE ${kernel_declare_file} "// Generated by the paddle/phi/kernels/CMakeLists.txt. DO NOT EDIT!\n\n#pragma once\n\n") -file(APPEND ${kernel_declare_file} "#include \"paddle/phi/core/kernel_registry.h\"\n\n") +set(kernel_declare_file + ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h.tmp + CACHE INTERNAL "declarations.h file") +set(kernel_declare_file_final + ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h) +file( + WRITE ${kernel_declare_file} + "// Generated by the paddle/phi/kernels/CMakeLists.txt. DO NOT EDIT!\n\n#pragma once\n\n" +) +file(APPEND ${kernel_declare_file} + "#include \"paddle/phi/core/kernel_registry.h\"\n\n") # phi functors and functions called by kernels add_subdirectory(funcs) @@ -13,8 +20,25 @@ add_subdirectory(autotune) set_property(GLOBAL PROPERTY PHI_KERNELS "") # [ 1. Common kernel compilation dependencies ] -set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor selected_rows_functor) +set(COMMON_KERNEL_DEPS + dense_tensor + sparse_coo_tensor + sparse_csr_tensor + kernel_context + kernel_factory + arg_map_context + convert_utils + lod_utils + custom_kernel) +set(COMMON_KERNEL_DEPS + ${COMMON_KERNEL_DEPS} + eigen_function + blas + math_function + im2col + vol2col + concat_and_split_functor + selected_rows_functor) # remove this dep after removing fluid deps on tensor creation set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) @@ -30,50 +54,105 @@ kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel) # Some kernels depend on some targets that are not commonly used. # These targets are not suitable for common dependencies. # In this case, you need to manually generate them here. -set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel conv_transpose_kernel conv_transpose_grad_kernel) -set(MANUAL_BUILD_KERNELS ${AUTOTUNE_KERNELS} cross_entropy_kernel adam_kernel adamw_kernel deformable_conv_kernel deformable_conv_grad_kernel eigh_kernel - gumbel_softmax_kernel gumbel_softmax_grad_kernel hierarchical_sigmoid_kernel hierarchical_sigmoid_grad_kernel - matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel - put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel - softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel - triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel reduce_mean_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel) +set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel + conv_transpose_kernel conv_transpose_grad_kernel) +set(MANUAL_BUILD_KERNELS + ${AUTOTUNE_KERNELS} + cross_entropy_kernel + adam_kernel + adamw_kernel + deformable_conv_kernel + deformable_conv_grad_kernel + eigh_kernel + gumbel_softmax_kernel + gumbel_softmax_grad_kernel + hierarchical_sigmoid_kernel + hierarchical_sigmoid_grad_kernel + matrix_power_kernel + matrix_power_grad_kernel + maxout_kernel + maxout_grad_kernel + pool_kernel + put_along_axis_kernel + put_along_axis_grad_kernel + segment_pool_kernel + segment_pool_grad_kernel + softmax_kernel + softmax_grad_kernel + take_along_axis_kernel + take_along_axis_grad_kernel + triangular_solve_grad_kernel + determinant_grad_kernel + reduce_sum_kernel + reduce_mean_kernel + rnn_kernel + rnn_grad_kernel + warpctc_kernel + warpctc_grad_kernel) foreach(src ${AUTOTUNE_KERNELS}) kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune) endforeach() -kernel_library(adam_kernel DEPS gflags glog flags ${COMMON_KERNEL_DEPS} selected_rows_functor threadpool jit_kernel_helper) +kernel_library( + adam_kernel + DEPS + gflags + glog + flags + ${COMMON_KERNEL_DEPS} + selected_rows_functor + threadpool + jit_kernel_helper) kernel_library(adamw_kernel DEPS ${COMMON_KERNEL_DEPS} adam_kernel) -kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax cross_entropy) -kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS} deformable_conv_functor) -kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS} deformable_conv_functor) -kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) +kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax + cross_entropy) +kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS} + deformable_conv_functor) +kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + deformable_conv_functor) +kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + matrix_inverse) kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) -kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_bit_code) -kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_bit_code) +kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS} + matrix_bit_code) +kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + matrix_bit_code) kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) -kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) +kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + matrix_inverse) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling) -kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel) -kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel) +kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} + gather_scatter_kernel) +kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + gather_scatter_kernel) kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling) -kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling) +kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + segment_pooling) kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel) -kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gather_scatter_kernel) -kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce) -kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor lstm_compute gru_compute) -kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor lstm_compute gru_compute) -kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc sequence_padding sequence_scale) -kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc sequence_padding sequence_scale) +kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} + gather_scatter_kernel) +kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + gather_scatter_kernel) +kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + matrix_reduce) +kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor + lstm_compute gru_compute) +kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + concat_and_split_functor lstm_compute gru_compute) +kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc + sequence_padding sequence_scale) +kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS} + phi_dynload_warpctc sequence_padding sequence_scale) # 4. auto parse and build kernel targets by cmake -register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS} ) +register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS + ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS}) # phi sparse kernels add_subdirectory(sparse) diff --git a/paddle/phi/kernels/autotune/CMakeLists.txt b/paddle/phi/kernels/autotune/CMakeLists.txt index 63dc2245944..a7a6c2f8e4d 100644 --- a/paddle/phi/kernels/autotune/CMakeLists.txt +++ b/paddle/phi/kernels/autotune/CMakeLists.txt @@ -1,12 +1,33 @@ -if (WITH_GPU) - nv_test(gpu_timer_test SRCS gpu_timer_test.cu DEPS gtest) - nv_test(auto_tune_test SRCS auto_tune_test.cu DEPS gtest) -elseif (WITH_ROCM) - hip_test(gpu_timer_test SRCS gpu_timer_test.cu DEPS gtest) - hip_test(auto_tune_test SRCS auto_tune_test.cu DEPS gtest) +if(WITH_GPU) + nv_test( + gpu_timer_test + SRCS gpu_timer_test.cu + DEPS gtest) + nv_test( + auto_tune_test + SRCS auto_tune_test.cu + DEPS gtest) +elseif(WITH_ROCM) + hip_test( + gpu_timer_test + SRCS gpu_timer_test.cu + DEPS gtest) + hip_test( + auto_tune_test + SRCS auto_tune_test.cu + DEPS gtest) endif() -cc_library(cache SRCS cache.cc DEPS boost) -cc_library(switch_autotune SRCS switch_autotune.cc DEPS cache flags) +cc_library( + cache + SRCS cache.cc + DEPS boost) +cc_library( + switch_autotune + SRCS switch_autotune.cc + DEPS cache flags) -cc_test(cache_test SRCS cache_test.cc DEPS gtest cache) +cc_test( + cache_test + SRCS cache_test.cc + DEPS gtest cache) diff --git a/paddle/phi/kernels/funcs/blas/CMakeLists.txt b/paddle/phi/kernels/funcs/blas/CMakeLists.txt index cb054cc76e1..732114f2a6e 100644 --- a/paddle/phi/kernels/funcs/blas/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/blas/CMakeLists.txt @@ -1 +1,4 @@ -cc_library(blas SRCS blas.cc DEPS cblas framework_proto device_context) +cc_library( + blas + SRCS blas.cc + DEPS cblas framework_proto device_context) diff --git a/paddle/phi/kernels/funcs/eigen/CMakeLists.txt b/paddle/phi/kernels/funcs/eigen/CMakeLists.txt index 8b64e35b935..de771f12fbf 100644 --- a/paddle/phi/kernels/funcs/eigen/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/eigen/CMakeLists.txt @@ -1,9 +1,24 @@ -file(GLOB EIGEN_CC_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu") +file( + GLOB EIGEN_CC_SOURCES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") +file( + GLOB EIGEN_CU_SOURCES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cu") if(WITH_GPU) - nv_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3) + nv_library( + eigen_function + SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} + DEPS eigen3) elseif(WITH_ROCM) - hip_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3) + hip_library( + eigen_function + SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} + DEPS eigen3) else() - cc_library(eigen_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3) + cc_library( + eigen_function + SRCS ${EIGEN_CC_SOURCES} + DEPS eigen3) endif() diff --git a/paddle/phi/kernels/selected_rows/CMakeLists.txt b/paddle/phi/kernels/selected_rows/CMakeLists.txt index c6fb621ffc0..520536d8235 100644 --- a/paddle/phi/kernels/selected_rows/CMakeLists.txt +++ b/paddle/phi/kernels/selected_rows/CMakeLists.txt @@ -1,3 +1,13 @@ - -set(SELECTED_ROWS_KERNEL_DEPS dense_tensor selected_rows selected_rows_functor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel) +set(SELECTED_ROWS_KERNEL_DEPS + dense_tensor + selected_rows + selected_rows_functor + sparse_csr_tensor + kernel_context + kernel_factory + arg_map_context + convert_utils + lod_utils + math_function + custom_kernel) register_kernels(DEPS ${SELECTED_ROWS_KERNEL_DEPS} SUB_DIR "selected_rows") diff --git a/paddle/phi/kernels/sparse/CMakeLists.txt b/paddle/phi/kernels/sparse/CMakeLists.txt index 479d5304294..6c5e7dee4cb 100644 --- a/paddle/phi/kernels/sparse/CMakeLists.txt +++ b/paddle/phi/kernels/sparse/CMakeLists.txt @@ -1,3 +1,13 @@ - -set(SPARSE_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel copy_kernel) +set(SPARSE_KERNEL_DEPS + dense_tensor + sparse_coo_tensor + sparse_csr_tensor + kernel_context + kernel_factory + arg_map_context + convert_utils + lod_utils + math_function + custom_kernel + copy_kernel) register_kernels(DEPS ${SPARSE_KERNEL_DEPS} SUB_DIR "sparse") diff --git a/paddle/phi/kernels/strings/CMakeLists.txt b/paddle/phi/kernels/strings/CMakeLists.txt index 54eeeb290e1..7cbba08e161 100644 --- a/paddle/phi/kernels/strings/CMakeLists.txt +++ b/paddle/phi/kernels/strings/CMakeLists.txt @@ -3,10 +3,23 @@ if(WITH_GPU OR WITH_ROCM) add_subdirectory(gpu) endif() -cc_library(unicode SRCS unicode.cc DEPS utf8proc) +cc_library( + unicode + SRCS unicode.cc + DEPS utf8proc) set_property(GLOBAL PROPERTY STRING_KERNELS "") -set(STRING_KERNEL_DEPS dense_tensor string_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils custom_kernel) +set(STRING_KERNEL_DEPS + dense_tensor + string_tensor + sparse_coo_tensor + sparse_csr_tensor + kernel_context + kernel_factory + arg_map_context + convert_utils + lod_utils + custom_kernel) set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} eigen_function blas math_function) # remove this dep after removing fluid deps on tensor creation set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} phi_api_utils) diff --git a/paddle/phi/ops/compat/CMakeLists.txt b/paddle/phi/ops/compat/CMakeLists.txt index baae70903c6..34ded6653cf 100644 --- a/paddle/phi/ops/compat/CMakeLists.txt +++ b/paddle/phi/ops/compat/CMakeLists.txt @@ -1,7 +1,13 @@ -set(op_utils_header ${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h.tmp CACHE INTERNAL "op_args_fns.cc file") -set(op_utils_header_final ${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h) -file(WRITE ${op_utils_header} "// Generated by the paddle/phi/ops/compat/CMakeLists.txt. DO NOT EDIT!\n\n") -file(APPEND ${op_utils_header} "#include \"paddle/phi/core/compat/op_utils.h\"\n\n") +set(op_utils_header + ${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h.tmp + CACHE INTERNAL "op_args_fns.cc file") +set(op_utils_header_final + ${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h) +file( + WRITE ${op_utils_header} + "// Generated by the paddle/phi/ops/compat/CMakeLists.txt. DO NOT EDIT!\n\n") +file(APPEND ${op_utils_header} + "#include \"paddle/phi/core/compat/op_utils.h\"\n\n") # Automatically generate the registration code of all arg map functions # and compile the corresponding target to avoid frequent code conflicts diff --git a/paddle/phi/tests/api/CMakeLists.txt b/paddle/phi/tests/api/CMakeLists.txt index 2333f82d626..a337e4ee4bd 100644 --- a/paddle/phi/tests/api/CMakeLists.txt +++ b/paddle/phi/tests/api/CMakeLists.txt @@ -1,33 +1,111 @@ if(WITH_GPU) - nv_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog) + nv_test( + test_phi_tensor + SRCS test_pten_tensor.cc + DEPS phi_tensor glog) elseif(WITH_ROCM) - hip_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog) + hip_test( + test_phi_tensor + SRCS test_pten_tensor.cc + DEPS phi_tensor glog) else() - cc_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog) + cc_test( + test_phi_tensor + SRCS test_pten_tensor.cc + DEPS phi_tensor glog) endif() -cc_test(test_phi_exception SRCS test_pten_exception.cc DEPS gtest) +cc_test( + test_phi_exception + SRCS test_pten_exception.cc + DEPS gtest) set(COMMON_API_TEST_DEPS phi_tensor phi_api phi_api_utils) -cc_test(test_mean_api SRCS test_mean_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_dot_api SRCS test_dot_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_empty_api SRCS test_empty_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_fill_api SRCS test_fill_api.cc DEPS ${COMMON_API_TEST_DEPS} api_scalar) -cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_embedding_api SRCS test_embedding_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_cast_api SRCS test_cast_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_to_api SRCS test_to_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_slice_api SRCS test_slice_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_sum_api SRCS test_sum_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_scale_api SRCS test_scale_api.cc DEPS ${COMMON_API_TEST_DEPS} api_scalar) -cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_conj_api SRCS test_conj_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_concat_api SRCS test_concat_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_split_api SRCS test_split_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_data_transform SRCS test_data_transform.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_sparse_utils_api SRCS test_sparse_utils_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_sparse_conv_api SRCS test_sparse_conv_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_strings_empty_api SRCS test_strings_empty_api.cc DEPS ${COMMON_API_TEST_DEPS}) -cc_test(test_strings_lower_upper_api SRCS test_strings_lower_upper_api.cc DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_mean_api + SRCS test_mean_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_dot_api + SRCS test_dot_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_matmul_api + SRCS test_matmul_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_empty_api + SRCS test_empty_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_fill_api + SRCS test_fill_api.cc + DEPS ${COMMON_API_TEST_DEPS} api_scalar) +cc_test( + test_elementwise_api + SRCS test_elementwise_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_embedding_api + SRCS test_embedding_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_cast_api + SRCS test_cast_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_reshape_api + SRCS test_reshape_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_to_api + SRCS test_to_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_slice_api + SRCS test_slice_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_sum_api + SRCS test_sum_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_scale_api + SRCS test_scale_api.cc + DEPS ${COMMON_API_TEST_DEPS} api_scalar) +cc_test( + test_scale_benchmark + SRCS test_scale_benchmark.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_conj_api + SRCS test_conj_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_concat_api + SRCS test_concat_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_split_api + SRCS test_split_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_data_transform + SRCS test_data_transform.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_sparse_utils_api + SRCS test_sparse_utils_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_sparse_conv_api + SRCS test_sparse_conv_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_strings_empty_api + SRCS test_strings_empty_api.cc + DEPS ${COMMON_API_TEST_DEPS}) +cc_test( + test_strings_lower_upper_api + SRCS test_strings_lower_upper_api.cc + DEPS ${COMMON_API_TEST_DEPS}) diff --git a/paddle/phi/tests/common/CMakeLists.txt b/paddle/phi/tests/common/CMakeLists.txt index 150336a1ed6..3499489541d 100644 --- a/paddle/phi/tests/common/CMakeLists.txt +++ b/paddle/phi/tests/common/CMakeLists.txt @@ -1,11 +1,32 @@ -cc_test(phi_test_backend SRCS test_backend.cc DEPS gtest) -cc_test(phi_test_data_layout SRCS test_data_layout.cc DEPS gtest) -cc_test(phi_test_data_type SRCS test_data_type.cc DEPS gtest) -cc_test(phi_test_place SRCS test_place.cc DEPS phi_place) -cc_test(phi_test_int_array SRCS test_int_array.cc DEPS int_array api_int_array phi phi_api) -if (WITH_GPU) - nv_test(phi_test_scalar SRCS test_scalar.cu DEPS scalar api_scalar) +cc_test( + phi_test_backend + SRCS test_backend.cc + DEPS gtest) +cc_test( + phi_test_data_layout + SRCS test_data_layout.cc + DEPS gtest) +cc_test( + phi_test_data_type + SRCS test_data_type.cc + DEPS gtest) +cc_test( + phi_test_place + SRCS test_place.cc + DEPS phi_place) +cc_test( + phi_test_int_array + SRCS test_int_array.cc + DEPS int_array api_int_array phi phi_api) +if(WITH_GPU) + nv_test( + phi_test_scalar + SRCS test_scalar.cu + DEPS scalar api_scalar) endif() if(WITH_ROCM) - hip_test(phi_test_scalar SRCS test_scalar.cu DEPS scalar api_scalar) + hip_test( + phi_test_scalar + SRCS test_scalar.cu + DEPS scalar api_scalar) endif() diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index 7d2fd90e6bb..57a55963d5c 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -1,27 +1,66 @@ -cc_test(test_custom_kernel SRCS test_custom_kernel.cc DEPS custom_kernel scalar) -cc_test(test_dense_tensor SRCS test_dense_tensor.cc DEPS dense_tensor) +cc_test( + test_custom_kernel + SRCS test_custom_kernel.cc + DEPS custom_kernel scalar) +cc_test( + test_dense_tensor + SRCS test_dense_tensor.cc + DEPS dense_tensor) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) -cc_test(test_kernel_factory SRCS test_kernel_factory.cc DEPS kernel_factory scale_kernel) -cc_test(test_sparse_coo_tensor SRCS test_sparse_coo_tensor.cc DEPS dense_tensor sparse_coo_tensor) -cc_test(test_sparse_csr_tensor SRCS test_sparse_csr_tensor.cc DEPS dense_tensor sparse_csr_tensor) -cc_test(test_op_utils SRCS test_op_utils.cc DEPS op_compat_infos) -cc_test(test_phi_device_context SRCS test_device_context.cc DEPS phi_context cpu_context) -cc_test(test_meta_fn_utils SRCS test_meta_fn_utils.cc DEPS dense_tensor wrapped_infermeta infermeta infermeta_utils) +cc_test( + test_kernel_factory + SRCS test_kernel_factory.cc + DEPS kernel_factory scale_kernel) +cc_test( + test_sparse_coo_tensor + SRCS test_sparse_coo_tensor.cc + DEPS dense_tensor sparse_coo_tensor) +cc_test( + test_sparse_csr_tensor + SRCS test_sparse_csr_tensor.cc + DEPS dense_tensor sparse_csr_tensor) +cc_test( + test_op_utils + SRCS test_op_utils.cc + DEPS op_compat_infos) +cc_test( + test_phi_device_context + SRCS test_device_context.cc + DEPS phi_context cpu_context) +cc_test( + test_meta_fn_utils + SRCS test_meta_fn_utils.cc + DEPS dense_tensor wrapped_infermeta infermeta infermeta_utils) -cc_test(test_ddim SRCS test_ddim.cc DEPS ddim) +cc_test( + test_ddim + SRCS test_ddim.cc + DEPS ddim) if(WITH_GPU) - nv_test(test_dim SRCS test_dim.cu DEPS ddim) + nv_test( + test_dim + SRCS test_dim.cu + DEPS ddim) elseif(WITH_ROCM) - hip_test(test_dim SRCS test_dim.cu DEPS ddim) + hip_test( + test_dim + SRCS test_dim.cu + DEPS ddim) endif() -cc_test(selected_rows_test SRCS test_selected_rows.cc DEPS selected_rows) +cc_test( + selected_rows_test + SRCS test_selected_rows.cc + DEPS selected_rows) if(WITH_TESTING AND TEST selected_rows_test) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) endif() -if (NOT WIN32) -cc_test(test_rw_lock SRCS test_rw_lock.cc) -endif (NOT WIN32) -cc_test(test_string_tensor SRCS test_string_tensor.cc DEPS string_tensor) +if(NOT WIN32) + cc_test(test_rw_lock SRCS test_rw_lock.cc) +endif(NOT WIN32) +cc_test( + test_string_tensor + SRCS test_string_tensor.cc + DEPS string_tensor) cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc) diff --git a/paddle/phi/tests/kernels/CMakeLists.txt b/paddle/phi/tests/kernels/CMakeLists.txt index a02e4f3d57a..b7d53b31bc3 100644 --- a/paddle/phi/tests/kernels/CMakeLists.txt +++ b/paddle/phi/tests/kernels/CMakeLists.txt @@ -1,43 +1,127 @@ -cc_test(test_copy_dev_api SRCS test_copy_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_dot_dev_api SRCS test_dot_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_creation_dev_api SRCS test_creation_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_flatten_dev_api SRCS test_flatten_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_matmul_dev_api SRCS test_matmul_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_mean_dev_api SRCS test_mean_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_scale_dev_api SRCS test_scale_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_cast_dev_api SRCS test_cast_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_elementwise_dev_api SRCS test_elementwise_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_reshape_dev_api SRCS test_reshape_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_sum_dev_api SRCS test_sum_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_conj_dev_api SRCS test_conj_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_concat_dev_api SRCS test_concat_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_split_dev_api SRCS test_split_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_sparse_utils_dev_api SRCS test_sparse_utils_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_sparse_conv3d_dev_api SRCS test_sparse_conv3d_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_sparse_pool_dev_api SRCS test_sparse_pool_dev_api.cc DEPS phi phi_api_utils) -cc_test(test_sparse_activation_dev_api SRCS test_sparse_activation_dev_api.cc DEPS phi phi_api_utils) +cc_test( + test_copy_dev_api + SRCS test_copy_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_dot_dev_api + SRCS test_dot_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_creation_dev_api + SRCS test_creation_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_flatten_dev_api + SRCS test_flatten_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_matmul_dev_api + SRCS test_matmul_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_mean_dev_api + SRCS test_mean_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_scale_dev_api + SRCS test_scale_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_cast_dev_api + SRCS test_cast_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_elementwise_dev_api + SRCS test_elementwise_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_reshape_dev_api + SRCS test_reshape_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_sum_dev_api + SRCS test_sum_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_conj_dev_api + SRCS test_conj_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_concat_dev_api + SRCS test_concat_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_split_dev_api + SRCS test_split_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_sparse_utils_dev_api + SRCS test_sparse_utils_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_sparse_conv3d_dev_api + SRCS test_sparse_conv3d_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_sparse_pool_dev_api + SRCS test_sparse_pool_dev_api.cc + DEPS phi phi_api_utils) +cc_test( + test_sparse_activation_dev_api + SRCS test_sparse_activation_dev_api.cc + DEPS phi phi_api_utils) -cc_test(test_math_function SRCS test_math_function.cc DEPS math_function) +cc_test( + test_math_function + SRCS test_math_function.cc + DEPS math_function) if(WITH_GPU) - nv_test(test_math_function_gpu SRCS test_math_function.cu DEPS math_function) + nv_test( + test_math_function_gpu + SRCS test_math_function.cu + DEPS math_function) endif() if(WITH_ROCM) - hip_test(test_math_function_gpu SRCS test_math_function.cu DEPS math_function) + hip_test( + test_math_function_gpu + SRCS test_math_function.cu + DEPS math_function) endif() -cc_test(test_cpu_vec SRCS test_cpu_vec.cc DEPS blas cpu_info) +cc_test( + test_cpu_vec + SRCS test_cpu_vec.cc + DEPS blas cpu_info) # For String Kernels -cc_test(test_strings_lower_upper_dev_api SRCS test_strings_lower_upper_dev_api.cc DEPS phi phi_api_utils) -IF(WITH_GPU) - nv_test(test_strings_lower_upper_dev_gpu_api SRCS test_strings_lower_upper_dev_api.cu DEPS phi phi_api_utils) -ELSEIF(WITH_ROCM) - hip_test(test_strings_lower_upper_dev_gpu_api SRCS test_strings_lower_upper_dev_api.cu DEPS phi phi_api_utils) -ENDIF() +cc_test( + test_strings_lower_upper_dev_api + SRCS test_strings_lower_upper_dev_api.cc + DEPS phi phi_api_utils) +if(WITH_GPU) + nv_test( + test_strings_lower_upper_dev_gpu_api + SRCS test_strings_lower_upper_dev_api.cu + DEPS phi phi_api_utils) +elseif(WITH_ROCM) + hip_test( + test_strings_lower_upper_dev_gpu_api + SRCS test_strings_lower_upper_dev_api.cu + DEPS phi phi_api_utils) +endif() -cc_test(test_strings_copy_dev_api SRCS test_strings_copy_dev_api.cc DEPS phi phi_api_utils) -IF(WITH_GPU) - nv_test(test_strings_copy_dev_gpu_api SRCS test_strings_copy_dev_api.cu DEPS phi phi_api_utils) -ELSEIF(WITH_ROCM) - hip_test(test_strings_copy_dev_gpu_api SRCS test_strings_copy_dev_api.cu DEPS phi phi_api_utils) -ENDIF() +cc_test( + test_strings_copy_dev_api + SRCS test_strings_copy_dev_api.cc + DEPS phi phi_api_utils) +if(WITH_GPU) + nv_test( + test_strings_copy_dev_gpu_api + SRCS test_strings_copy_dev_api.cu + DEPS phi phi_api_utils) +elseif(WITH_ROCM) + hip_test( + test_strings_copy_dev_gpu_api + SRCS test_strings_copy_dev_api.cu + DEPS phi phi_api_utils) +endif() diff --git a/paddle/phi/tests/ops/CMakeLists.txt b/paddle/phi/tests/ops/CMakeLists.txt index 58ad3276696..634af80f05a 100644 --- a/paddle/phi/tests/ops/CMakeLists.txt +++ b/paddle/phi/tests/ops/CMakeLists.txt @@ -1 +1,4 @@ -cc_test(test_op_signature SRCS test_op_signature.cc DEPS op_utils) +cc_test( + test_op_signature + SRCS test_op_signature.cc + DEPS op_utils) diff --git a/paddle/phi/tools/CMakeLists.txt b/paddle/phi/tools/CMakeLists.txt index 5693a46d977..d1df5ec3275 100644 --- a/paddle/phi/tools/CMakeLists.txt +++ b/paddle/phi/tools/CMakeLists.txt @@ -1,8 +1,8 @@ add_executable(print_pten_kernels print_pten_kernels.cc) target_link_libraries(print_pten_kernels phi phi_api_utils) if(WIN32) - target_link_libraries(print_pten_kernels shlwapi.lib) + target_link_libraries(print_pten_kernels shlwapi.lib) endif() if(WITH_ROCM) - target_link_libraries(print_pten_kernels ${ROCM_HIPRTC_LIB}) + target_link_libraries(print_pten_kernels ${ROCM_HIPRTC_LIB}) endif() diff --git a/paddle/scripts/CMakeLists.txt b/paddle/scripts/CMakeLists.txt index 68cb5a19f99..ced98cc643e 100644 --- a/paddle/scripts/CMakeLists.txt +++ b/paddle/scripts/CMakeLists.txt @@ -1,7 +1,13 @@ -configure_file(submit_local.sh.in - paddle - @ONLY) +configure_file(submit_local.sh.in paddle @ONLY) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle DESTINATION bin - PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle + DESTINATION bin + PERMISSIONS + OWNER_EXECUTE + OWNER_WRITE + OWNER_READ + GROUP_EXECUTE + GROUP_READ + WORLD_EXECUTE + WORLD_READ) diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index f5cfd14e6b8..7b02aef22e8 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -1,11 +1,21 @@ # for paddle test case if(WITH_TESTING) - set(paddle_gtest_main_deps device_context gtest gflags init memory phi_utils proto_desc) + set(paddle_gtest_main_deps + device_context + gtest + gflags + init + memory + phi_utils + proto_desc) - if (WITH_GPU OR WITH_ROCM) + if(WITH_GPU OR WITH_ROCM) list(APPEND paddle_gtest_main_deps gpu_info) endif() - cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS ${paddle_gtest_main_deps}) + cc_library( + paddle_gtest_main + SRCS paddle_gtest_main.cc + DEPS ${paddle_gtest_main_deps}) endif() diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index 7669c06b2c2..a428b176d67 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -1,5 +1,14 @@ add_subdirectory(string) -cc_test(array_ref_test SRCS array_ref_test.cc DEPS gtest gflags) -cc_test(small_vector_test SRCS small_vector_test.cc DEPS gtest gflags) -cc_test(variant_test SRCS variant_test.cc DEPS gtest) +cc_test( + array_ref_test + SRCS array_ref_test.cc + DEPS gtest gflags) +cc_test( + small_vector_test + SRCS small_vector_test.cc + DEPS gtest gflags) +cc_test( + variant_test + SRCS variant_test.cc + DEPS gtest) diff --git a/paddle/utils/string/CMakeLists.txt b/paddle/utils/string/CMakeLists.txt index db3cb542ba3..3e35da9d62d 100644 --- a/paddle/utils/string/CMakeLists.txt +++ b/paddle/utils/string/CMakeLists.txt @@ -1,8 +1,26 @@ -cc_library(stringpiece SRCS piece.cc DEPS flags) -cc_library(pretty_log SRCS pretty_log.cc DEPS flags) -cc_library(string_helper SRCS string_helper.cc DEPS flags) -cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece gflags) -cc_test(stringprintf_test SRCS printf_test.cc DEPS gflags) +cc_library( + stringpiece + SRCS piece.cc + DEPS flags) +cc_library( + pretty_log + SRCS pretty_log.cc + DEPS flags) +cc_library( + string_helper + SRCS string_helper.cc + DEPS flags) +cc_test( + stringpiece_test + SRCS piece_test.cc + DEPS stringpiece gflags) +cc_test( + stringprintf_test + SRCS printf_test.cc + DEPS gflags) cc_test(to_string_test SRCS to_string_test.cc) cc_test(split_test SRCS split_test.cc) -cc_test(string_helper_test SRCS string_helper_test.cc DEPS string_helper) +cc_test( + string_helper_test + SRCS string_helper_test.cc + DEPS string_helper) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fdcd5606581..0c1089b1fd4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,23 +1,21 @@ file(GLOB UTILS_PY_FILES . ./paddle/legacy/utils/*.py) file(GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/*.py) -set(PY_FILES paddle/__init__.py - ${UTILS_PY_FILES} - ${FLUID_PY_FILES}) +set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) if(WITH_GPU) - SET(PACKAGE_NAME "paddlepaddle-gpu") + set(PACKAGE_NAME "paddlepaddle-gpu") elseif(WITH_MLU) - SET(PACKAGE_NAME "paddlepaddle-mlu") + set(PACKAGE_NAME "paddlepaddle-mlu") elseif(WITH_ROCM) - SET(PACKAGE_NAME "paddlepaddle-rocm") + set(PACKAGE_NAME "paddlepaddle-rocm") elseif(WITH_ASCEND_CL) - SET(PACKAGE_NAME "paddlepaddle-npu") + set(PACKAGE_NAME "paddlepaddle-npu") elseif(WITH_XPU) - SET(PACKAGE_NAME "paddlepaddle-xpu") + set(PACKAGE_NAME "paddlepaddle-xpu") elseif(WITH_IPU) - SET(PACKAGE_NAME "paddlepaddle-ipu") + set(PACKAGE_NAME "paddlepaddle-ipu") else() - SET(PACKAGE_NAME "paddlepaddle") + set(PACKAGE_NAME "paddlepaddle") endif() set(SETUP_LOG_FILE "setup.py.log") @@ -26,7 +24,9 @@ set(FLUID_CORE_NAME "core") if(WITH_AVX AND AVX_FOUND) set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx") if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "") - message(STATUS "MESSAGE: This is just a message for publishing release. + message( + STATUS + "MESSAGE: This is just a message for publishing release. You are building AVX version without NOAVX core. So the wheel package may fail on NOAVX machine. You can add -DNOAVX_CORE_FILE=/path/to/your/core_noavx.* in cmake command @@ -44,29 +44,31 @@ else() endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in - ${CMAKE_CURRENT_BINARY_DIR}/setup.py) + ${CMAKE_CURRENT_BINARY_DIR}/setup.py) set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/) -IF(WIN32) - # Python would use the .pyd by default under Windows series platform - set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd) - set(FLUID_CORE_LIB ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.lib) - - add_custom_command(OUTPUT ${FLUID_CORE} - COMMAND cmake -E copy $ ${FLUID_CORE} - COMMAND cmake -E copy $ ${FLUID_CORE_LIB} - DEPENDS paddle_pybind) - - set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd) -ELSE() - set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so) - add_custom_command(OUTPUT ${FLUID_CORE} - COMMAND cmake -E copy $ ${FLUID_CORE} - DEPENDS paddle_pybind) - - set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so) -ENDIF() +if(WIN32) + # Python would use the .pyd by default under Windows series platform + set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd) + set(FLUID_CORE_LIB ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.lib) + + add_custom_command( + OUTPUT ${FLUID_CORE} + COMMAND cmake -E copy $ ${FLUID_CORE} + COMMAND cmake -E copy $ ${FLUID_CORE_LIB} + DEPENDS paddle_pybind) + + set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd) +else() + set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so) + add_custom_command( + OUTPUT ${FLUID_CORE} + COMMAND cmake -E copy $ ${FLUID_CORE} + DEPENDS paddle_pybind) + + set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so) +endif() set(FLUID_CORE_DEPS ${FLUID_CORE}) @@ -75,40 +77,55 @@ if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}") get_filename_component(NOAVX_CORE_EXT ${NOAVX_CORE_FILE} EXT) if(WIN32) if(NOT NOAVX_CORE_EXT STREQUAL ".pyd") - message(FATAL_ERROR "Wrong file ${NOAVX_CORE_NAME}, the ext does not match windows *.pyd!") + message( + FATAL_ERROR + "Wrong file ${NOAVX_CORE_NAME}, the ext does not match windows *.pyd!" + ) endif() else() if(NOT NOAVX_CORE_EXT STREQUAL ".so") - message(FATAL_ERROR "Wrong file ${NOAVX_CORE_NAME}, the ext does not match *.so!") + message( + FATAL_ERROR + "Wrong file ${NOAVX_CORE_NAME}, the ext does not match *.so!") endif() endif() - add_custom_command(OUTPUT ${FLUID_NOAVX_CORE} - COMMAND cmake -E copy ${NOAVX_CORE_FILE} ${FLUID_NOAVX_CORE} DEPENDS paddle_pybind) + add_custom_command( + OUTPUT ${FLUID_NOAVX_CORE} + COMMAND cmake -E copy ${NOAVX_CORE_FILE} ${FLUID_NOAVX_CORE} + DEPENDS paddle_pybind) list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE}) endif() add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS}) -IF(WIN32) - add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp - COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle/ - COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel +if(WIN32) + add_custom_command( + OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND + ${CMAKE_COMMAND} -E copy_directory ${PADDLE_SOURCE_DIR}/python/paddle + ${PADDLE_BINARY_DIR}/python/paddle/ + COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py + bdist_wheel COMMENT "Packing whl packages------>>>" - DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto pass_desc_py_proto ${PY_FILES}) -ELSE(WIN32) - add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto + profiler_py_proto pass_desc_py_proto ${PY_FILES}) +else(WIN32) + add_custom_command( + OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND touch stub.cc COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMENT "Packing whl packages------>>>" - DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto pass_desc_py_proto ${PY_FILES}) -ENDIF() + DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto + profiler_py_proto pass_desc_py_proto ${PY_FILES}) +endif() -add_custom_target(paddle_python ALL DEPENDS ${PADDLE_PYTHON_BUILD_DIR}/.timestamp) +add_custom_target(paddle_python ALL + DEPENDS ${PADDLE_PYTHON_BUILD_DIR}/.timestamp) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) -if (WITH_TESTING) +if(WITH_TESTING) add_subdirectory(paddle/reader/tests) add_subdirectory(paddle/dataset/tests) add_subdirectory(paddle/tests) @@ -117,8 +134,7 @@ if (WITH_TESTING) add_subdirectory(paddle/fluid/contrib/slim/tests) endif() install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR} - DESTINATION opt/paddle/share/wheels -) + DESTINATION opt/paddle/share/wheels) if(APPLE) find_program(INSTALL_NAME_TOOL_EXECUTABLE install_name_tool) @@ -126,10 +142,13 @@ if(APPLE) message(FATAL_ERROR "install_name_tool not found, please check.\n") endif() endif() -if(LINUX AND NOT WITH_SW AND NOT WITH_ARM) +if(LINUX + AND NOT WITH_SW + AND NOT WITH_ARM) find_program(PATCHELF_EXECUTABLE patchelf) if(NOT PATCHELF_EXECUTABLE) - message(FATAL_ERROR "patchelf not found, please install it.\n" - "For Ubuntu, the command is: apt-get install -y patchelf.") + message( + FATAL_ERROR "patchelf not found, please install it.\n" + "For Ubuntu, the command is: apt-get install -y patchelf.") endif() endif() diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 0140283b915..88dc33f581a 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -1,352 +1,523 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -function(_inference_analysis_python_api_int8_test target model_dir data_path filename use_mkldnn) - py_test(${target} SRCS ${filename} - ENVS CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - FLAGS_use_mkldnn=${use_mkldnn} - ARGS --infer_model ${model_dir}/model - --infer_data ${data_path} - --int8_model_save_path int8_models/${target} - --warmup_batch_size ${WARMUP_BATCH_SIZE} - --batch_size 50) +function(_inference_analysis_python_api_int8_test target model_dir data_path + filename use_mkldnn) + py_test( + ${target} + SRCS ${filename} + ENVS + CPU_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + FLAGS_use_mkldnn=${use_mkldnn} + ARGS + --infer_model + ${model_dir}/model + --infer_data + ${data_path} + --int8_model_save_path + int8_models/${target} + --warmup_batch_size + ${WARMUP_BATCH_SIZE} + --batch_size + 50) endfunction() -function(inference_analysis_python_api_int8_test target model_dir data_path filename) - _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} ${filename} False) +function(inference_analysis_python_api_int8_test target model_dir data_path + filename) + _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} + ${filename} False) endfunction() -function(inference_analysis_python_api_int8_test_custom_warmup_batch_size target model_dir data_dir filename warmup_batch_size) - set(WARMUP_BATCH_SIZE ${warmup_batch_size}) - inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_dir} ${filename}) +function(inference_analysis_python_api_int8_test_custom_warmup_batch_size + target model_dir data_dir filename warmup_batch_size) + set(WARMUP_BATCH_SIZE ${warmup_batch_size}) + inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_dir} + ${filename}) endfunction() -function(inference_analysis_python_api_int8_test_mkldnn target model_dir data_path filename) - _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} ${filename} True) +function(inference_analysis_python_api_int8_test_mkldnn target model_dir + data_path filename) + _inference_analysis_python_api_int8_test(${target} ${model_dir} ${data_path} + ${filename} True) endfunction() function(download_data install_dir url data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${url} ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${url} ${data_file} + ${check_sum}) + endif() endfunction() function(download_quant_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 + ${data_file} ${check_sum}) + endif() endfunction() function(download_quant_model install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress( + ${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) + endif() endfunction() function(download_quant_fp32_model install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models/fp32 ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress( + ${install_dir} ${INFERENCE_URL}/int8/QAT_models/fp32 ${data_file} + ${check_sum}) + endif() endfunction() function(download_lstm_model install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/lstm ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/lstm + ${data_file} ${check_sum}) + endif() endfunction() -function(inference_quant_int8_image_classification_test target quant_model_dir dataset_path) - py_test(${target} SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant_int8_image_classification_comparison.py" - ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - FLAGS_use_mkldnn=true - ARGS --quant_model ${quant_model_dir} - --infer_data ${dataset_path} - --batch_size 25 - --batch_num 2 - --acc_diff_threshold 0.1) +function(inference_quant_int8_image_classification_test target quant_model_dir + dataset_path) + py_test( + ${target} + SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant_int8_image_classification_comparison.py" + ENVS + FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + FLAGS_use_mkldnn=true + ARGS + --quant_model + ${quant_model_dir} + --infer_data + ${dataset_path} + --batch_size + 25 + --batch_num + 2 + --acc_diff_threshold + 0.1) endfunction() - -# set batch_size 10 for UT only (avoid OOM). For whole dataset, use batch_size 25 -function(inference_quant2_int8_image_classification_test target quant_model_dir fp32_model_dir dataset_path) - py_test(${target} SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_image_classification_comparison.py" - ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - FLAGS_use_mkldnn=true - ARGS --quant_model ${quant_model_dir} - --fp32_model ${fp32_model_dir} - --infer_data ${dataset_path} - --batch_size 50 - --batch_num 2 - --acc_diff_threshold 0.1) +# set batch_size 10 for UT only (avoid OOM). For whole dataset, use batch_size 25 +function(inference_quant2_int8_image_classification_test target quant_model_dir + fp32_model_dir dataset_path) + py_test( + ${target} + SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_image_classification_comparison.py" + ENVS + FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + FLAGS_use_mkldnn=true + ARGS + --quant_model + ${quant_model_dir} + --fp32_model + ${fp32_model_dir} + --infer_data + ${dataset_path} + --batch_size + 50 + --batch_num + 2 + --acc_diff_threshold + 0.1) endfunction() -# set batch_size 10 for UT only (avoid OOM). For whole dataset, use batch_size 20 -function(inference_quant2_int8_nlp_test target quant_model_dir fp32_model_dir dataset_path labels_path ops_to_quantize) - py_test(${target} SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_nlp_comparison.py" - ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} - FLAGS_use_mkldnn=true - ARGS --quant_model ${quant_model_dir} - --fp32_model ${fp32_model_dir} - --infer_data ${dataset_path} - --labels ${labels_path} - --batch_size 10 - --batch_num 2 - --acc_diff_threshold 0.1 - --ops_to_quantize ${ops_to_quantize}) +# set batch_size 10 for UT only (avoid OOM). For whole dataset, use batch_size 20 +function( + inference_quant2_int8_nlp_test + target + quant_model_dir + fp32_model_dir + dataset_path + labels_path + ops_to_quantize) + py_test( + ${target} + SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_nlp_comparison.py" + ENVS + FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI} + FLAGS_use_mkldnn=true + ARGS + --quant_model + ${quant_model_dir} + --fp32_model + ${fp32_model_dir} + --infer_data + ${dataset_path} + --labels + ${labels_path} + --batch_size + 10 + --batch_num + 2 + --acc_diff_threshold + 0.1 + --ops_to_quantize + ${ops_to_quantize}) endfunction() -function(inference_quant2_int8_lstm_model_test target fp32_model quant_model dataset_path) - py_test(${target} SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_lstm_model.py" - ARGS --fp32_model ${fp32_model} - --quant_model ${quant_model} - --infer_data ${dataset_path} - --num_threads 1 - --mkldnn_cache_capacity 100 - --warmup_iter 100 - --acc_diff_threshold 0.11) +function(inference_quant2_int8_lstm_model_test target fp32_model quant_model + dataset_path) + py_test( + ${target} + SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_lstm_model.py" + ARGS + --fp32_model + ${fp32_model} + --quant_model + ${quant_model} + --infer_data + ${dataset_path} + --num_threads + 1 + --mkldnn_cache_capacity + 100 + --warmup_iter + 100 + --acc_diff_threshold + 0.11) endfunction() function(download_quant_data install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8 + ${data_file} ${check_sum}) + endif() endfunction() function(download_quant_model install_dir data_file check_sum) - if (NOT EXISTS ${install_dir}/${data_file}) - inference_download_and_uncompress(${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) - endif() + if(NOT EXISTS ${install_dir}/${data_file}) + inference_download_and_uncompress( + ${install_dir} ${INFERENCE_URL}/int8/QAT_models ${data_file} ${check_sum}) + endif() endfunction() function(save_quant_ic_model_test target quant_model_dir int8_model_save_path) - py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/save_quant_model.py - ARGS --quant_model_path ${quant_model_dir} - --int8_model_save_path ${int8_model_save_path} - --debug) + py_test( + ${target} + SRCS ${CMAKE_CURRENT_SOURCE_DIR}/save_quant_model.py + ARGS + --quant_model_path + ${quant_model_dir} + --int8_model_save_path + ${int8_model_save_path} + --debug) endfunction() -function(save_quant_nlp_model_test target quant_model_dir int8_model_save_path ops_to_quantize) - py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/save_quant_model.py - ARGS --quant_model_path ${quant_model_dir} - --int8_model_save_path ${int8_model_save_path} - --ops_to_quantize ${ops_to_quantize}) +function(save_quant_nlp_model_test target quant_model_dir int8_model_save_path + ops_to_quantize) + py_test( + ${target} + SRCS ${CMAKE_CURRENT_SOURCE_DIR}/save_quant_model.py + ARGS + --quant_model_path + ${quant_model_dir} + --int8_model_save_path + ${int8_model_save_path} + --ops_to_quantize + ${ops_to_quantize}) endfunction() -function(convert_model2dot_test target model_path save_graph_dir save_graph_name) - py_test(${target} SRCS ${CMAKE_CURRENT_SOURCE_DIR}/convert_model2dot.py - ARGS --model_path ${model_path} - --save_graph_dir ${save_graph_dir} - --save_graph_name ${save_graph_name}) +function(convert_model2dot_test target model_path save_graph_dir + save_graph_name) + py_test( + ${target} + SRCS ${CMAKE_CURRENT_SOURCE_DIR}/convert_model2dot.py + ARGS + --model_path + ${model_path} + --save_graph_dir + ${save_graph_dir} + --save_graph_name + ${save_graph_name}) endfunction() if(WIN32) - list(REMOVE_ITEM TEST_OPS test_light_nas) - list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mnist) - list(REMOVE_ITEM TEST_OPS test_post_training_quantization_while) - list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1) - list(REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50) - list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model) - list(REMOVE_ITEM TEST_OPS test_imperative_ptq) - list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1) - list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2) - list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp) + list(REMOVE_ITEM TEST_OPS test_light_nas) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mnist) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_while) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50) + list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model) + list(REMOVE_ITEM TEST_OPS test_imperative_ptq) + list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1) + list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2) + list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp) endif() if(LINUX AND WITH_MKLDNN) - #### Image classification dataset: ImageNet (small) - # The dataset should already be downloaded for INT8v2 unit tests - set(IMAGENET_DATA_PATH "${INFERENCE_DEMO_INSTALL_DIR}/imagenet/data.bin") - - #### INT8 image classification python api test - # Models should be already downloaded for INT8v2 unit tests - - set(INT8_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2") - - #### QUANT & INT8 comparison python api tests - - set(QUANT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant") - - ### Quant1 for image classification - - # Quant ResNet50 - set(QUANT_RESNET50_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant") - set(QUANT_RESNET50_MODEL_ARCHIVE "ResNet50_qat_model.tar.gz") - download_quant_model(${QUANT_RESNET50_MODEL_DIR} ${QUANT_RESNET50_MODEL_ARCHIVE} ff89b934ab961c3a4a844193ece2e8a7) - inference_quant_int8_image_classification_test(test_quant_int8_resnet50_mkldnn ${QUANT_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant ResNet101 - set(QUANT_RESNET101_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet101_quant") - set(QUANT_RESNET101_MODEL_ARCHIVE "ResNet101_qat_model.tar.gz") - download_quant_model(${QUANT_RESNET101_MODEL_DIR} ${QUANT_RESNET101_MODEL_ARCHIVE} 95c6d01e3aeba31c13efb2ba8057d558) - # inference_quant_int8_image_classification_test(test_quant_int8_resnet101_mkldnn ${QUANT_RESNET101_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant GoogleNet - set(QUANT_GOOGLENET_MODEL_DIR "${QUANT_INSTALL_DIR}/GoogleNet_quant") - set(QUANT_GOOGLENET_MODEL_ARCHIVE "GoogleNet_qat_model.tar.gz") - download_quant_model(${QUANT_GOOGLENET_MODEL_DIR} ${QUANT_GOOGLENET_MODEL_ARCHIVE} 1d4a7383baa63e7d1c423e8db2b791d5) - inference_quant_int8_image_classification_test(test_quant_int8_googlenet_mkldnn ${QUANT_GOOGLENET_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant MobileNetV1 - set(QUANT_MOBILENETV1_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV1_quant") - set(QUANT_MOBILENETV1_MODEL_ARCHIVE "MobileNetV1_qat_model.tar.gz") - download_quant_model(${QUANT_MOBILENETV1_MODEL_DIR} ${QUANT_MOBILENETV1_MODEL_ARCHIVE} 3b774d94a9fcbb604d09bdb731fc1162) - inference_quant_int8_image_classification_test(test_quant_int8_mobilenetv1_mkldnn ${QUANT_MOBILENETV1_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant MobileNetV2 - set(QUANT_MOBILENETV2_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV2_quant") - set(QUANT_MOBILENETV2_MODEL_ARCHIVE "MobileNetV2_qat_model.tar.gz") - download_quant_model(${QUANT_MOBILENETV2_MODEL_DIR} ${QUANT_MOBILENETV2_MODEL_ARCHIVE} 758a99d9225d8b73e1a8765883f96cdd) - inference_quant_int8_image_classification_test(test_quant_int8_mobilenetv2_mkldnn ${QUANT_MOBILENETV2_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant VGG16 - set(QUANT_VGG16_MODEL_DIR "${QUANT_INSTALL_DIR}/VGG16_quant") - set(QUANT_VGG16_MODEL_ARCHIVE "VGG16_qat_model.tar.gz") - download_quant_model(${QUANT_VGG16_MODEL_DIR} ${QUANT_VGG16_MODEL_ARCHIVE} c37e63ca82a102f47be266f8068b0b55) - # inference_quant_int8_image_classification_test(test_quant_int8_vgg16_mkldnn ${QUANT_VGG16_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant VGG19 - set(QUANT_VGG19_MODEL_DIR "${QUANT_INSTALL_DIR}/VGG19_quant") - set(QUANT_VGG19_MODEL_ARCHIVE "VGG19_qat_model.tar.gz") - download_quant_model(${QUANT_VGG19_MODEL_DIR} ${QUANT_VGG19_MODEL_ARCHIVE} 62bcd4b6c3ca2af67e8251d1c96ea18f) - # inference_quant_int8_image_classification_test(test_quant_int8_vgg19_mkldnn ${QUANT_VGG19_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - ### Quant2 for image classification - - # Quant2 ResNet50 with input/output scales in `fake_quantize_moving_average_abs_max` operators, - # with weight scales in `fake_dequantize_max_abs` operators - set(QUANT2_RESNET50_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant2") - set(QUANT2_RESNET50_MODEL_ARCHIVE "ResNet50_qat_perf.tar.gz") - download_quant_model(${QUANT2_RESNET50_MODEL_DIR} ${QUANT2_RESNET50_MODEL_ARCHIVE} e87309457e8c462a579340607f064d66) - set(FP32_RESNET50_MODEL_DIR "${INT8_INSTALL_DIR}/resnet50") - inference_quant2_int8_image_classification_test(test_quant2_int8_resnet50_mkldnn ${QUANT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf/float ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant2 ResNet50 with input/output scales in `fake_quantize_range_abs_max` operators and the `out_threshold` attributes, - # with weight scales in `fake_dequantize_max_abs` operators - set(QUANT2_RESNET50_RANGE_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant2_range") - set(QUANT2_RESNET50_RANGE_MODEL_ARCHIVE "ResNet50_qat_range.tar.gz") - download_quant_model(${QUANT2_RESNET50_RANGE_MODEL_DIR} ${QUANT2_RESNET50_RANGE_MODEL_ARCHIVE} 2fdc8a139f041c0d270abec826b2d304) - inference_quant2_int8_image_classification_test(test_quant2_int8_resnet50_range_mkldnn ${QUANT2_RESNET50_RANGE_MODEL_DIR}/ResNet50_qat_range ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant2 ResNet50 with input/output scales in `fake_quantize_range_abs_max` operators and the `out_threshold` attributes, - # with weight scales in `fake_channel_wise_dequantize_max_abs` operators - set(QUANT2_RESNET50_CHANNELWISE_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant2_channelwise") - set(QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE "ResNet50_qat_channelwise.tar.gz") - download_quant_model(${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR} ${QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE} 887a1b1b0e9a4efd10f263a43764db26) - inference_quant2_int8_image_classification_test(test_quant2_int8_resnet50_channelwise_mkldnn ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR}/ResNet50_qat_channelwise ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - # Quant2 MobileNetV1 - set(QUANT2_MOBILENETV1_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV1_quant2") - set(QUANT2_MOBILENETV1_MODEL_ARCHIVE "MobileNet_qat_perf.tar.gz") - download_quant_model(${QUANT2_MOBILENETV1_MODEL_DIR} ${QUANT2_MOBILENETV1_MODEL_ARCHIVE} 7f626e453db2d56fed6c2538621ffacf) - set(FP32_MOBILENETV1_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv1") - inference_quant2_int8_image_classification_test(test_quant2_int8_mobilenetv1_mkldnn ${QUANT2_MOBILENETV1_MODEL_DIR}/MobileNet_qat_perf/float ${FP32_MOBILENETV1_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) - - ### Quant2 for NLP - - set(NLP_DATA_ARCHIVE "Ernie_dataset.tar.gz") - set(NLP_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_dataset") - set(NLP_DATA_PATH "${NLP_DATA_DIR}/Ernie_dataset/1.8w.bs1") - set(NLP_LABLES_PATH "${NLP_DATA_DIR}/Ernie_dataset/label.xnli.dev") - download_quant_data(${NLP_DATA_DIR} ${NLP_DATA_ARCHIVE} e650ce0cbc1fadbed5cc2c01d4e734dc) - - # Quant2 Ernie - set(QUANT2_ERNIE_MODEL_ARCHIVE "ernie_qat.tar.gz") - set(QUANT2_ERNIE_MODEL_DIR "${QUANT_INSTALL_DIR}/Ernie_quant2") - download_quant_model(${QUANT2_ERNIE_MODEL_DIR} ${QUANT2_ERNIE_MODEL_ARCHIVE} f7cdf4720755ecf66efbc8044e9922d9) - set(FP32_ERNIE_MODEL_ARCHIVE "ernie_fp32_model.tar.gz") - set(FP32_ERNIE_MODEL_DIR "${QUANT_INSTALL_DIR}/Ernie_float") - download_quant_fp32_model(${FP32_ERNIE_MODEL_DIR} ${FP32_ERNIE_MODEL_ARCHIVE} 114f38804a3ef8c45e7259e68bbd838b) - set(QUANT2_ERNIE_OPS_TO_QUANTIZE "fc,reshape2,transpose2,matmul,elementwise_add,slice") - inference_quant2_int8_nlp_test(test_quant2_int8_ernie_mkldnn ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float ${FP32_ERNIE_MODEL_DIR}/ernie_fp32_model ${NLP_DATA_PATH} ${NLP_LABLES_PATH} ${QUANT2_ERNIE_OPS_TO_QUANTIZE}) - - # Quant2 GRU - set(QUANT2_GRU_MODEL_ARCHIVE "GRU_quant_acc.tar.gz") - set(QUANT2_GRU_MODEL_DIR "${QUANT_INSTALL_DIR}/GRU_quant2") - download_quant_model(${QUANT2_GRU_MODEL_DIR} ${QUANT2_GRU_MODEL_ARCHIVE} cf207f8076dcfb8b74d8b6bdddf9090c) - set(QUANT2_GRU_OPS_TO_QUANTIZE "multi_gru") - - # Quant2 LSTM - set(QUANT2_LSTM_MODEL_ARCHIVE "lstm_quant.tar.gz") - set(QUANT2_LSTM_MODEL_DIR "${QUANT_INSTALL_DIR}/lstm_quant_test") - download_quant_model(${QUANT2_LSTM_MODEL_DIR} ${QUANT2_LSTM_MODEL_ARCHIVE} 40a693803b12ee9e251258f32559abcb) - set(QUANT2_LSTM_OPS_TO_QUANTIZE "fusion_lstm") - - ### Save FP32 model or INT8 model from Quant model - - set(QUANT2_INT8_RESNET50_SAVE_PATH "${QUANT_INSTALL_DIR}/ResNet50_quant2_int8") - save_quant_ic_model_test(save_quant2_model_resnet50 ${QUANT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf/float ${QUANT2_INT8_RESNET50_SAVE_PATH}) - - set(QUANT2_INT8_ERNIE_SAVE_PATH "${QUANT_INSTALL_DIR}/Ernie_quant2_int8") - save_quant_nlp_model_test(save_quant2_model_ernie ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float ${QUANT2_INT8_ERNIE_SAVE_PATH} ${QUANT2_ERNIE_OPS_TO_QUANTIZE}) - - set(QUANT2_INT8_GRU_SAVE_PATH "${QUANT_INSTALL_DIR}/GRU_quant2_int8") - save_quant_nlp_model_test(save_quant2_model_gru ${QUANT2_GRU_MODEL_DIR}/GRU_quant_acc ${QUANT2_INT8_GRU_SAVE_PATH} ${QUANT2_GRU_OPS_TO_QUANTIZE}) - - set(QUANT2_INT8_LSTM_SAVE_PATH "${QUANT_INSTALL_DIR}/lstm_quant2_int8") - save_quant_nlp_model_test(save_quant2_model_lstm ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_LSTM_OPS_TO_QUANTIZE}) - - # Convert Quant2 model to dot and pdf files - set(QUANT2_INT8_ERNIE_DOT_SAVE_PATH "${QUANT_INSTALL_DIR}/Ernie_quant2_int8_dot_file") - convert_model2dot_test(convert_model2dot_ernie ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float ${QUANT2_INT8_ERNIE_DOT_SAVE_PATH} "Ernie_quant2_int8") - - ### PTQ INT8 - - # PTQ int8 lstm model - set(LSTM_DATA_FILE "quant_lstm_input_data.tar.gz") - set(LSTM_URL "${INFERENCE_URL}/int8/unittest_model_data") - download_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_URL} ${LSTM_DATA_FILE} add84c754e9b792fea1fbd728d134ab7) - set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz") - download_lstm_model(${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} eecd9f44d69a84acc1cf2235c4b8b743) - inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data) + #### Image classification dataset: ImageNet (small) + # The dataset should already be downloaded for INT8v2 unit tests + set(IMAGENET_DATA_PATH "${INFERENCE_DEMO_INSTALL_DIR}/imagenet/data.bin") + + #### INT8 image classification python api test + # Models should be already downloaded for INT8v2 unit tests + + set(INT8_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2") + + #### QUANT & INT8 comparison python api tests + + set(QUANT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant") + + ### Quant1 for image classification + + # Quant ResNet50 + set(QUANT_RESNET50_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant") + set(QUANT_RESNET50_MODEL_ARCHIVE "ResNet50_qat_model.tar.gz") + download_quant_model( + ${QUANT_RESNET50_MODEL_DIR} ${QUANT_RESNET50_MODEL_ARCHIVE} + ff89b934ab961c3a4a844193ece2e8a7) + inference_quant_int8_image_classification_test( + test_quant_int8_resnet50_mkldnn ${QUANT_RESNET50_MODEL_DIR}/model + ${IMAGENET_DATA_PATH}) + + # Quant ResNet101 + set(QUANT_RESNET101_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet101_quant") + set(QUANT_RESNET101_MODEL_ARCHIVE "ResNet101_qat_model.tar.gz") + download_quant_model( + ${QUANT_RESNET101_MODEL_DIR} ${QUANT_RESNET101_MODEL_ARCHIVE} + 95c6d01e3aeba31c13efb2ba8057d558) + # inference_quant_int8_image_classification_test(test_quant_int8_resnet101_mkldnn ${QUANT_RESNET101_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + # Quant GoogleNet + set(QUANT_GOOGLENET_MODEL_DIR "${QUANT_INSTALL_DIR}/GoogleNet_quant") + set(QUANT_GOOGLENET_MODEL_ARCHIVE "GoogleNet_qat_model.tar.gz") + download_quant_model( + ${QUANT_GOOGLENET_MODEL_DIR} ${QUANT_GOOGLENET_MODEL_ARCHIVE} + 1d4a7383baa63e7d1c423e8db2b791d5) + inference_quant_int8_image_classification_test( + test_quant_int8_googlenet_mkldnn ${QUANT_GOOGLENET_MODEL_DIR}/model + ${IMAGENET_DATA_PATH}) + + # Quant MobileNetV1 + set(QUANT_MOBILENETV1_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV1_quant") + set(QUANT_MOBILENETV1_MODEL_ARCHIVE "MobileNetV1_qat_model.tar.gz") + download_quant_model( + ${QUANT_MOBILENETV1_MODEL_DIR} ${QUANT_MOBILENETV1_MODEL_ARCHIVE} + 3b774d94a9fcbb604d09bdb731fc1162) + inference_quant_int8_image_classification_test( + test_quant_int8_mobilenetv1_mkldnn ${QUANT_MOBILENETV1_MODEL_DIR}/model + ${IMAGENET_DATA_PATH}) + + # Quant MobileNetV2 + set(QUANT_MOBILENETV2_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV2_quant") + set(QUANT_MOBILENETV2_MODEL_ARCHIVE "MobileNetV2_qat_model.tar.gz") + download_quant_model( + ${QUANT_MOBILENETV2_MODEL_DIR} ${QUANT_MOBILENETV2_MODEL_ARCHIVE} + 758a99d9225d8b73e1a8765883f96cdd) + inference_quant_int8_image_classification_test( + test_quant_int8_mobilenetv2_mkldnn ${QUANT_MOBILENETV2_MODEL_DIR}/model + ${IMAGENET_DATA_PATH}) + + # Quant VGG16 + set(QUANT_VGG16_MODEL_DIR "${QUANT_INSTALL_DIR}/VGG16_quant") + set(QUANT_VGG16_MODEL_ARCHIVE "VGG16_qat_model.tar.gz") + download_quant_model(${QUANT_VGG16_MODEL_DIR} ${QUANT_VGG16_MODEL_ARCHIVE} + c37e63ca82a102f47be266f8068b0b55) + # inference_quant_int8_image_classification_test(test_quant_int8_vgg16_mkldnn ${QUANT_VGG16_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + # Quant VGG19 + set(QUANT_VGG19_MODEL_DIR "${QUANT_INSTALL_DIR}/VGG19_quant") + set(QUANT_VGG19_MODEL_ARCHIVE "VGG19_qat_model.tar.gz") + download_quant_model(${QUANT_VGG19_MODEL_DIR} ${QUANT_VGG19_MODEL_ARCHIVE} + 62bcd4b6c3ca2af67e8251d1c96ea18f) + # inference_quant_int8_image_classification_test(test_quant_int8_vgg19_mkldnn ${QUANT_VGG19_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + ### Quant2 for image classification + + # Quant2 ResNet50 with input/output scales in `fake_quantize_moving_average_abs_max` operators, + # with weight scales in `fake_dequantize_max_abs` operators + set(QUANT2_RESNET50_MODEL_DIR "${QUANT_INSTALL_DIR}/ResNet50_quant2") + set(QUANT2_RESNET50_MODEL_ARCHIVE "ResNet50_qat_perf.tar.gz") + download_quant_model( + ${QUANT2_RESNET50_MODEL_DIR} ${QUANT2_RESNET50_MODEL_ARCHIVE} + e87309457e8c462a579340607f064d66) + set(FP32_RESNET50_MODEL_DIR "${INT8_INSTALL_DIR}/resnet50") + inference_quant2_int8_image_classification_test( + test_quant2_int8_resnet50_mkldnn + ${QUANT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf/float + ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + # Quant2 ResNet50 with input/output scales in `fake_quantize_range_abs_max` operators and the `out_threshold` attributes, + # with weight scales in `fake_dequantize_max_abs` operators + set(QUANT2_RESNET50_RANGE_MODEL_DIR + "${QUANT_INSTALL_DIR}/ResNet50_quant2_range") + set(QUANT2_RESNET50_RANGE_MODEL_ARCHIVE "ResNet50_qat_range.tar.gz") + download_quant_model( + ${QUANT2_RESNET50_RANGE_MODEL_DIR} ${QUANT2_RESNET50_RANGE_MODEL_ARCHIVE} + 2fdc8a139f041c0d270abec826b2d304) + inference_quant2_int8_image_classification_test( + test_quant2_int8_resnet50_range_mkldnn + ${QUANT2_RESNET50_RANGE_MODEL_DIR}/ResNet50_qat_range + ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + # Quant2 ResNet50 with input/output scales in `fake_quantize_range_abs_max` operators and the `out_threshold` attributes, + # with weight scales in `fake_channel_wise_dequantize_max_abs` operators + set(QUANT2_RESNET50_CHANNELWISE_MODEL_DIR + "${QUANT_INSTALL_DIR}/ResNet50_quant2_channelwise") + set(QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE + "ResNet50_qat_channelwise.tar.gz") + download_quant_model( + ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR} + ${QUANT2_RESNET50_CHANNELWISE_MODEL_ARCHIVE} + 887a1b1b0e9a4efd10f263a43764db26) + inference_quant2_int8_image_classification_test( + test_quant2_int8_resnet50_channelwise_mkldnn + ${QUANT2_RESNET50_CHANNELWISE_MODEL_DIR}/ResNet50_qat_channelwise + ${FP32_RESNET50_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + # Quant2 MobileNetV1 + set(QUANT2_MOBILENETV1_MODEL_DIR "${QUANT_INSTALL_DIR}/MobileNetV1_quant2") + set(QUANT2_MOBILENETV1_MODEL_ARCHIVE "MobileNet_qat_perf.tar.gz") + download_quant_model( + ${QUANT2_MOBILENETV1_MODEL_DIR} ${QUANT2_MOBILENETV1_MODEL_ARCHIVE} + 7f626e453db2d56fed6c2538621ffacf) + set(FP32_MOBILENETV1_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv1") + inference_quant2_int8_image_classification_test( + test_quant2_int8_mobilenetv1_mkldnn + ${QUANT2_MOBILENETV1_MODEL_DIR}/MobileNet_qat_perf/float + ${FP32_MOBILENETV1_MODEL_DIR}/model ${IMAGENET_DATA_PATH}) + + ### Quant2 for NLP + + set(NLP_DATA_ARCHIVE "Ernie_dataset.tar.gz") + set(NLP_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_dataset") + set(NLP_DATA_PATH "${NLP_DATA_DIR}/Ernie_dataset/1.8w.bs1") + set(NLP_LABLES_PATH "${NLP_DATA_DIR}/Ernie_dataset/label.xnli.dev") + download_quant_data(${NLP_DATA_DIR} ${NLP_DATA_ARCHIVE} + e650ce0cbc1fadbed5cc2c01d4e734dc) + + # Quant2 Ernie + set(QUANT2_ERNIE_MODEL_ARCHIVE "ernie_qat.tar.gz") + set(QUANT2_ERNIE_MODEL_DIR "${QUANT_INSTALL_DIR}/Ernie_quant2") + download_quant_model(${QUANT2_ERNIE_MODEL_DIR} ${QUANT2_ERNIE_MODEL_ARCHIVE} + f7cdf4720755ecf66efbc8044e9922d9) + set(FP32_ERNIE_MODEL_ARCHIVE "ernie_fp32_model.tar.gz") + set(FP32_ERNIE_MODEL_DIR "${QUANT_INSTALL_DIR}/Ernie_float") + download_quant_fp32_model(${FP32_ERNIE_MODEL_DIR} ${FP32_ERNIE_MODEL_ARCHIVE} + 114f38804a3ef8c45e7259e68bbd838b) + set(QUANT2_ERNIE_OPS_TO_QUANTIZE + "fc,reshape2,transpose2,matmul,elementwise_add,slice") + inference_quant2_int8_nlp_test( + test_quant2_int8_ernie_mkldnn ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float + ${FP32_ERNIE_MODEL_DIR}/ernie_fp32_model ${NLP_DATA_PATH} + ${NLP_LABLES_PATH} ${QUANT2_ERNIE_OPS_TO_QUANTIZE}) + + # Quant2 GRU + set(QUANT2_GRU_MODEL_ARCHIVE "GRU_quant_acc.tar.gz") + set(QUANT2_GRU_MODEL_DIR "${QUANT_INSTALL_DIR}/GRU_quant2") + download_quant_model(${QUANT2_GRU_MODEL_DIR} ${QUANT2_GRU_MODEL_ARCHIVE} + cf207f8076dcfb8b74d8b6bdddf9090c) + set(QUANT2_GRU_OPS_TO_QUANTIZE "multi_gru") + + # Quant2 LSTM + set(QUANT2_LSTM_MODEL_ARCHIVE "lstm_quant.tar.gz") + set(QUANT2_LSTM_MODEL_DIR "${QUANT_INSTALL_DIR}/lstm_quant_test") + download_quant_model(${QUANT2_LSTM_MODEL_DIR} ${QUANT2_LSTM_MODEL_ARCHIVE} + 40a693803b12ee9e251258f32559abcb) + set(QUANT2_LSTM_OPS_TO_QUANTIZE "fusion_lstm") + + ### Save FP32 model or INT8 model from Quant model + + set(QUANT2_INT8_RESNET50_SAVE_PATH + "${QUANT_INSTALL_DIR}/ResNet50_quant2_int8") + save_quant_ic_model_test( + save_quant2_model_resnet50 + ${QUANT2_RESNET50_MODEL_DIR}/ResNet50_qat_perf/float + ${QUANT2_INT8_RESNET50_SAVE_PATH}) + + set(QUANT2_INT8_ERNIE_SAVE_PATH "${QUANT_INSTALL_DIR}/Ernie_quant2_int8") + save_quant_nlp_model_test( + save_quant2_model_ernie ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float + ${QUANT2_INT8_ERNIE_SAVE_PATH} ${QUANT2_ERNIE_OPS_TO_QUANTIZE}) + + set(QUANT2_INT8_GRU_SAVE_PATH "${QUANT_INSTALL_DIR}/GRU_quant2_int8") + save_quant_nlp_model_test( + save_quant2_model_gru ${QUANT2_GRU_MODEL_DIR}/GRU_quant_acc + ${QUANT2_INT8_GRU_SAVE_PATH} ${QUANT2_GRU_OPS_TO_QUANTIZE}) + + set(QUANT2_INT8_LSTM_SAVE_PATH "${QUANT_INSTALL_DIR}/lstm_quant2_int8") + save_quant_nlp_model_test( + save_quant2_model_lstm ${QUANT2_LSTM_MODEL_DIR}/lstm_quant + ${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_LSTM_OPS_TO_QUANTIZE}) + + # Convert Quant2 model to dot and pdf files + set(QUANT2_INT8_ERNIE_DOT_SAVE_PATH + "${QUANT_INSTALL_DIR}/Ernie_quant2_int8_dot_file") + convert_model2dot_test( + convert_model2dot_ernie ${QUANT2_ERNIE_MODEL_DIR}/Ernie_qat/float + ${QUANT2_INT8_ERNIE_DOT_SAVE_PATH} "Ernie_quant2_int8") + + ### PTQ INT8 + + # PTQ int8 lstm model + set(LSTM_DATA_FILE "quant_lstm_input_data.tar.gz") + set(LSTM_URL "${INFERENCE_URL}/int8/unittest_model_data") + download_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_URL} ${LSTM_DATA_FILE} + add84c754e9b792fea1fbd728d134ab7) + set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz") + download_lstm_model( + ${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} + eecd9f44d69a84acc1cf2235c4b8b743) + inference_quant2_int8_lstm_model_test( + test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model + ${QUANT2_LSTM_MODEL_DIR}/lstm_quant + ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data) endif() -# Since the tests for Quant & INT8 comparison support only testing on Linux +# Since the tests for Quant & INT8 comparison support only testing on Linux # with MKL-DNN, we remove it here to not test it on other systems. -list(REMOVE_ITEM TEST_OPS - test_mkldnn_int8_quantization_strategy - quant_int8_image_classification_comparison - quant_int8_nlp_comparison) +list(REMOVE_ITEM TEST_OPS test_mkldnn_int8_quantization_strategy + quant_int8_image_classification_comparison quant_int8_nlp_comparison) #TODO(wanghaoshuang): Fix this unitest failed on GCC8. -LIST(REMOVE_ITEM TEST_OPS test_auto_pruning) -LIST(REMOVE_ITEM TEST_OPS test_filter_pruning) - +list(REMOVE_ITEM TEST_OPS test_auto_pruning) +list(REMOVE_ITEM TEST_OPS test_filter_pruning) + # fix if(WIN32) - SET(SINGLE_CARD_TEST_OPS - test_user_defined_quantization - test_quantization_scale_pass - test_quantization_pass - test_moving_average_abs_max_scale_op - test_imperative_qat_channelwise - test_imperative_qat - test_imperative_out_scale - test_graph) - LIST(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS}) - foreach(src ${SINGLE_CARD_TEST_OPS}) - py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0) - endforeach() + set(SINGLE_CARD_TEST_OPS + test_user_defined_quantization + test_quantization_scale_pass + test_quantization_pass + test_moving_average_abs_max_scale_op + test_imperative_qat_channelwise + test_imperative_qat + test_imperative_out_scale + test_graph) + list(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS}) + foreach(src ${SINGLE_CARD_TEST_OPS}) + py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0) + endforeach() endif() - foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) + py_test(${src} SRCS ${src}.py) endforeach() # setting timeout value for old unittests if(NOT WIN32) - set_tests_properties(test_post_training_quantization_lstm_model PROPERTIES TIMEOUT 120) - set_tests_properties(test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") - set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") - set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT 120) - set_tests_properties(test_post_training_quantization_while PROPERTIES TIMEOUT 120) - set_tests_properties(test_imperative_ptq PROPERTIES TIMEOUT 120) - set_tests_properties(test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT 120) + set_tests_properties(test_post_training_quantization_lstm_model + PROPERTIES TIMEOUT 120) + set_tests_properties(test_post_training_quantization_mobilenetv1 + PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") + set_tests_properties(test_post_training_quantization_resnet50 + PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=NIGHTLY") + set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT + 120) + set_tests_properties(test_post_training_quantization_while PROPERTIES TIMEOUT + 120) + set_tests_properties(test_imperative_ptq PROPERTIES TIMEOUT 120) + set_tests_properties(test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT + 120) endif() set_tests_properties(test_graph PROPERTIES TIMEOUT 120) @@ -359,23 +530,30 @@ set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_qat_user_defined PROPERTIES TIMEOUT 200) if(LINUX AND WITH_MKLDNN) - set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(convert_model2dot_ernie PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant2_int8_resnet50_channelwise_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant_int8_mobilenetv2_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant2_int8_resnet50_range_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(save_quant2_model_resnet50 PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant_int8_resnet50_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant2_int8_ernie_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant_int8_googlenet_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant2_int8_resnet50_mkldnn PROPERTIES TIMEOUT 120) - set_tests_properties(test_quant2_int8_lstm_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT + 120) + set_tests_properties(convert_model2dot_ernie PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant2_int8_resnet50_channelwise_mkldnn + PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant_int8_mobilenetv2_mkldnn PROPERTIES TIMEOUT + 120) + set_tests_properties(test_quant2_int8_resnet50_range_mkldnn PROPERTIES TIMEOUT + 120) + set_tests_properties(save_quant2_model_resnet50 PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant_int8_resnet50_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT + 120) + set_tests_properties(test_quant2_int8_ernie_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant_int8_googlenet_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant2_int8_resnet50_mkldnn PROPERTIES TIMEOUT 120) + set_tests_properties(test_quant2_int8_lstm_mkldnn PROPERTIES TIMEOUT 120) endif() if(APPLE) - set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT 300) - set_tests_properties(test_post_training_quantization_while PROPERTIES TIMEOUT 300) - set_tests_properties(test_imperative_ptq PROPERTIES TIMEOUT 300) - set_tests_properties(test_imperative_skip_op PROPERTIES TIMEOUT 300) + set_tests_properties(test_post_training_quantization_mnist PROPERTIES TIMEOUT + 300) + set_tests_properties(test_post_training_quantization_while PROPERTIES TIMEOUT + 300) + set_tests_properties(test_imperative_ptq PROPERTIES TIMEOUT 300) + set_tests_properties(test_imperative_skip_op PROPERTIES TIMEOUT 300) endif() diff --git a/python/paddle/fluid/contrib/tests/CMakeLists.txt b/python/paddle/fluid/contrib/tests/CMakeLists.txt index b4c5ad057f9..48e107c4b4d 100644 --- a/python/paddle/fluid/contrib/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt @@ -1,19 +1,29 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") list(REMOVE_ITEM TEST_OPS test_multi_precision_fp16_train) foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) + py_test(${src} SRCS ${src}.py) endforeach() -py_test_modules(test_multi_precision_fp16_train MODULES test_multi_precision_fp16_train ENVS FLAGS_cudnn_deterministic=true FLAGS_cudnn_batchnorm_spatial_persistent=true FLAGS_conv_workspace_size_limit=1000) +py_test_modules( + test_multi_precision_fp16_train + MODULES + test_multi_precision_fp16_train + ENVS + FLAGS_cudnn_deterministic=true + FLAGS_cudnn_batchnorm_spatial_persistent=true + FLAGS_conv_workspace_size_limit=1000) set_tests_properties(test_image_classification_fp16 PROPERTIES TIMEOUT 120) set_tests_properties(test_weight_decay_extend PROPERTIES TIMEOUT 120) set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120) -if (APPLE) - set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300) - set_tests_properties(test_quantize_transpiler PROPERTIES TIMEOUT 300) +if(APPLE) + set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300) + set_tests_properties(test_quantize_transpiler PROPERTIES TIMEOUT 300) endif() diff --git a/python/paddle/fluid/tests/CMakeLists.txt b/python/paddle/fluid/tests/CMakeLists.txt index 587d4aee34c..6acee6dc11c 100644 --- a/python/paddle/fluid/tests/CMakeLists.txt +++ b/python/paddle/fluid/tests/CMakeLists.txt @@ -1,7 +1,9 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index 09c650f16e2..9e807a79353 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -1,10 +1,13 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") # default test foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) - set_tests_properties(${src} PROPERTIES FIXTURES_SETUP ${src}_infer_model) + py_test(${src} SRCS ${src}.py) + set_tests_properties(${src} PROPERTIES FIXTURES_SETUP ${src}_infer_model) endforeach() set_tests_properties(test_word2vec_book PROPERTIES TIMEOUT 120) set_tests_properties(test_recognize_digits PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt index b4adeb9575a..f21fc730fc8 100644 --- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -1,20 +1,22 @@ # New custom OP can support Windows/Linux/Mac now if(WITH_GPU OR APPLE) - py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) - py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) - py_test(test_custom_relu_model SRCS test_custom_relu_model.py) - py_test(test_context_pool SRCS test_context_pool.py) + py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) + py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) + py_test(test_custom_relu_model SRCS test_custom_relu_model.py) + py_test(test_context_pool SRCS test_context_pool.py) - # Compiling shared library will cost some time, but running process is very fast. - set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250) - set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) - set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180) - set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180) - if($ENV{USE_STANDALONE_EXECUTOR}) - # these test will fail in some server due to PR#42149, temporarily set it use old executor. - set_tests_properties(test_custom_relu_op_setup PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_custom_relu_model PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - endif() + # Compiling shared library will cost some time, but running process is very fast. + set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250) + set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) + set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180) + set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180) + if($ENV{USE_STANDALONE_EXECUTOR}) + # these test will fail in some server due to PR#42149, temporarily set it use old executor. + set_tests_properties(test_custom_relu_op_setup + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_custom_relu_model + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + endif() endif() py_test(test_custom_raw_op_kernel_op SRCS test_custom_raw_op_kernel_op.py) @@ -35,5 +37,5 @@ py_test(test_sysconfig SRCS test_sysconfig.py) py_test(test_check_abi SRCS test_check_abi.py) if(APPLE) - set_tests_properties(test_custom_simple_slice PROPERTIES TIMEOUT 300) + set_tests_properties(test_custom_simple_slice PROPERTIES TIMEOUT 300) endif() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 34237d47a56..214c68c250e 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1,22 +1,30 @@ -file(GLOB TEST_OPS RELATIVE -"${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0 FLAGS_fast_eager_deletion_mode=1 FLAGS_memory_fraction_of_eager_deletion=1.0) +set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0 FLAGS_fast_eager_deletion_mode=1 + FLAGS_memory_fraction_of_eager_deletion=1.0) set(dist_ENVS http_proxy="" https_proxy="") -file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") +file( + GLOB DIST_TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_dist_*.py") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op") -if ((NOT WITH_NCCL) AND (NOT WITH_RCCL)) - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") +if((NOT WITH_NCCL) AND (NOT WITH_RCCL)) + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") endif() string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") -if ((NOT WITH_GPU) AND (NOT WITH_XPU) AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_grad_clip") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_heter_ctr") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps_gpu_ctr") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_batch_merge") +if((NOT WITH_GPU) + AND (NOT WITH_XPU) + AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_grad_clip") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_heter_ctr") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ps_gpu_ctr") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_batch_merge") endif() list(APPEND DIST_TEST_OPS test_parallel_dygraph_mnist) @@ -30,13 +38,16 @@ list(APPEND DIST_TEST_OPS test_parallel_dygraph_se_resnext) list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding) list(APPEND DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) list(APPEND DIST_TEST_OPS test_parallel_dygraph_transformer) -if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - list(APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer) - list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer) - list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer_with_recompute) - list(APPEND DIST_TEST_OPS test_fleet_raw_program_meta_optimizer) - list(APPEND DIST_TEST_OPS test_gen_nccl_id_op) - list(APPEND DIST_TEST_OPS test_rnn_dp) +if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + list(APPEND DIST_TEST_OPS test_fleet_graph_execution_meta_optimizer) + list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer) + list(APPEND DIST_TEST_OPS test_fleet_pipeline_meta_optimizer_with_recompute) + list(APPEND DIST_TEST_OPS test_fleet_raw_program_meta_optimizer) + list(APPEND DIST_TEST_OPS test_gen_nccl_id_op) + list(APPEND DIST_TEST_OPS test_rnn_dp) endif() list(APPEND DIST_TEST_OPS test_parallel_dygraph_unused_variables) list(APPEND DIST_TEST_OPS test_parallel_dygraph_control_flow) @@ -89,7 +100,8 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_2) list(APPEND MIXED_DIST_TEST_OPS test_fleet_base_3) list(APPEND MIXED_DIST_TEST_OPS test_fleet_recompute_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_pipeline_meta_optimizer) -list(APPEND MIXED_DIST_TEST_OPS test_fleet_pipeline_meta_optimizer_with_recompute) +list(APPEND MIXED_DIST_TEST_OPS + test_fleet_pipeline_meta_optimizer_with_recompute) list(APPEND MIXED_DIST_TEST_OPS test_fleet_raw_program_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_rnn_dp) list(APPEND MIXED_DIST_TEST_OPS test_fleet_amp_meta_optimizer) @@ -122,237 +134,252 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS}) endforeach() if(NOT WITH_PYTHON AND ON_INFER) - LIST(REMOVE_ITEM TEST_OPS test_eager_trace_op) + list(REMOVE_ITEM TEST_OPS test_eager_trace_op) endif() if(NOT WITH_GPU) - LIST(REMOVE_ITEM TEST_OPS test_fused_feedforward_op) - LIST(REMOVE_ITEM TEST_OPS test_fused_attention_op) - LIST(REMOVE_ITEM TEST_OPS test_fused_attention_op_api) - LIST(REMOVE_ITEM TEST_OPS test_fused_multi_transformer_op) - LIST(REMOVE_ITEM TEST_OPS test_fused_transformer_encoder_layer) - LIST(REMOVE_ITEM TEST_OPS test_fused_bias_dropout_residual_layer_norm_op) - LIST(REMOVE_ITEM TEST_OPS test_fused_bias_dropout_residual_layer_norm_op_api) + list(REMOVE_ITEM TEST_OPS test_fused_feedforward_op) + list(REMOVE_ITEM TEST_OPS test_fused_attention_op) + list(REMOVE_ITEM TEST_OPS test_fused_attention_op_api) + list(REMOVE_ITEM TEST_OPS test_fused_multi_transformer_op) + list(REMOVE_ITEM TEST_OPS test_fused_transformer_encoder_layer) + list(REMOVE_ITEM TEST_OPS test_fused_bias_dropout_residual_layer_norm_op) + list(REMOVE_ITEM TEST_OPS test_fused_bias_dropout_residual_layer_norm_op_api) endif() -LIST(REMOVE_ITEM TEST_OPS test_fused_gemm_epilogue_op) -LIST(REMOVE_ITEM TEST_OPS test_fused_gemm_epilogue_grad_op) -LIST(REMOVE_ITEM TEST_OPS test_fuse_gemm_epilogue_pass) +list(REMOVE_ITEM TEST_OPS test_fused_gemm_epilogue_op) +list(REMOVE_ITEM TEST_OPS test_fused_gemm_epilogue_grad_op) +list(REMOVE_ITEM TEST_OPS test_fuse_gemm_epilogue_pass) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) - LIST(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op) - LIST(REMOVE_ITEM TEST_OPS test_c_concat) - LIST(REMOVE_ITEM TEST_OPS test_c_split) - LIST(REMOVE_ITEM TEST_OPS test_allgather) - LIST(REMOVE_ITEM TEST_OPS test_c_identity) - LIST(REMOVE_ITEM TEST_OPS test_c_embedding_op) - LIST(REMOVE_ITEM TEST_OPS test_allreduce) - LIST(REMOVE_ITEM TEST_OPS test_broadcast) - LIST(REMOVE_ITEM TEST_OPS test_collective_reduce) - LIST(REMOVE_ITEM TEST_OPS test_pipeline_parallel) - LIST(REMOVE_ITEM TEST_OPS test_collective_scatter) - LIST(REMOVE_ITEM TEST_OPS test_collective_sendrecv) - LIST(REMOVE_ITEM TEST_OPS test_reducescatter) - LIST(REMOVE_ITEM TEST_OPS test_reducescatter_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_split_embedding) - LIST(REMOVE_ITEM TEST_OPS test_collective_split_embedding_none_divisible) - LIST(REMOVE_ITEM TEST_OPS test_collective_split_row_linear) - LIST(REMOVE_ITEM TEST_OPS test_collective_split_col_linear) - LIST(REMOVE_ITEM TEST_OPS test_collective_reduce_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_scatter_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_barrier_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_allreduce_api) - LIST(REMOVE_ITEM TEST_OPS test_new_group_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_broadcast_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_allgather_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_alltoall_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_global_gather) - LIST(REMOVE_ITEM TEST_OPS test_collective_global_scatter) - LIST(REMOVE_ITEM TEST_OPS test_collective_sendrecv_api) - LIST(REMOVE_ITEM TEST_OPS test_collective_wait) - LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) - LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) - LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) - LIST(REMOVE_ITEM TEST_OPS test_disable_signal_handler) - LIST(REMOVE_ITEM TEST_OPS test_fleet_executor) - LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_with_task_nodes) - LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_multi_devices) - LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_origin_scheduler) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_mapper) - LIST(REMOVE_ITEM TEST_OPS test_fleet_executor_task_node) - LIST(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_run) - LIST(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_tensor) + list(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op) + list(REMOVE_ITEM TEST_OPS test_c_concat) + list(REMOVE_ITEM TEST_OPS test_c_split) + list(REMOVE_ITEM TEST_OPS test_allgather) + list(REMOVE_ITEM TEST_OPS test_c_identity) + list(REMOVE_ITEM TEST_OPS test_c_embedding_op) + list(REMOVE_ITEM TEST_OPS test_allreduce) + list(REMOVE_ITEM TEST_OPS test_broadcast) + list(REMOVE_ITEM TEST_OPS test_collective_reduce) + list(REMOVE_ITEM TEST_OPS test_pipeline_parallel) + list(REMOVE_ITEM TEST_OPS test_collective_scatter) + list(REMOVE_ITEM TEST_OPS test_collective_sendrecv) + list(REMOVE_ITEM TEST_OPS test_reducescatter) + list(REMOVE_ITEM TEST_OPS test_reducescatter_api) + list(REMOVE_ITEM TEST_OPS test_collective_split_embedding) + list(REMOVE_ITEM TEST_OPS test_collective_split_embedding_none_divisible) + list(REMOVE_ITEM TEST_OPS test_collective_split_row_linear) + list(REMOVE_ITEM TEST_OPS test_collective_split_col_linear) + list(REMOVE_ITEM TEST_OPS test_collective_reduce_api) + list(REMOVE_ITEM TEST_OPS test_collective_scatter_api) + list(REMOVE_ITEM TEST_OPS test_collective_barrier_api) + list(REMOVE_ITEM TEST_OPS test_collective_allreduce_api) + list(REMOVE_ITEM TEST_OPS test_new_group_api) + list(REMOVE_ITEM TEST_OPS test_collective_broadcast_api) + list(REMOVE_ITEM TEST_OPS test_collective_allgather_api) + list(REMOVE_ITEM TEST_OPS test_collective_alltoall_api) + list(REMOVE_ITEM TEST_OPS test_collective_global_gather) + list(REMOVE_ITEM TEST_OPS test_collective_global_scatter) + list(REMOVE_ITEM TEST_OPS test_collective_sendrecv_api) + list(REMOVE_ITEM TEST_OPS test_collective_wait) + list(REMOVE_ITEM TEST_OPS test_memcpy_op) + list(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) + list(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) + list(REMOVE_ITEM TEST_OPS test_disable_signal_handler) + list(REMOVE_ITEM TEST_OPS test_fleet_executor) + list(REMOVE_ITEM TEST_OPS test_fleet_executor_with_task_nodes) + list(REMOVE_ITEM TEST_OPS test_fleet_executor_multi_devices) + list(REMOVE_ITEM TEST_OPS test_fleet_executor_origin_scheduler) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_mapper) + list(REMOVE_ITEM TEST_OPS test_fleet_executor_task_node) + list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_run) + list(REMOVE_ITEM TEST_OPS test_fleet_exe_dist_model_tensor) endif() # Temporally disable test_deprecated_decorator -LIST(REMOVE_ITEM TEST_OPS test_deprecated_decorator) +list(REMOVE_ITEM TEST_OPS test_deprecated_decorator) -LIST(REMOVE_ITEM TEST_OPS test_tensordot) +list(REMOVE_ITEM TEST_OPS test_tensordot) if(WIN32) - LIST(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception) - LIST(REMOVE_ITEM TEST_OPS test_trainer_desc) - LIST(REMOVE_ITEM TEST_OPS test_checkpoint_notify_op) - LIST(REMOVE_ITEM TEST_OPS test_downpoursgd) - LIST(REMOVE_ITEM TEST_OPS test_fleet) - LIST(REMOVE_ITEM TEST_OPS test_fleet_nocvm_1) - LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker) - LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3) - LIST(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor) - LIST(REMOVE_ITEM TEST_OPS test_ps_dispatcher) - LIST(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp) - LIST(REMOVE_ITEM TEST_OPS test_nvprof) - - # TODO: Fix these unittests failed on Windows - LIST(REMOVE_ITEM TEST_OPS test_debugger) - if (WITH_GPU) - LIST(REMOVE_ITEM TEST_OPS test_update_loss_scaling_op) - endif() + list(REMOVE_ITEM TEST_OPS test_multiprocess_reader_exception) + list(REMOVE_ITEM TEST_OPS test_trainer_desc) + list(REMOVE_ITEM TEST_OPS test_checkpoint_notify_op) + list(REMOVE_ITEM TEST_OPS test_downpoursgd) + list(REMOVE_ITEM TEST_OPS test_fleet) + list(REMOVE_ITEM TEST_OPS test_fleet_nocvm_1) + list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker) + list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_3) + list(REMOVE_ITEM TEST_OPS test_fleet_unitaccessor) + list(REMOVE_ITEM TEST_OPS test_ps_dispatcher) + list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_nlp) + list(REMOVE_ITEM TEST_OPS test_nvprof) + + # TODO: Fix these unittests failed on Windows + list(REMOVE_ITEM TEST_OPS test_debugger) + if(WITH_GPU) + list(REMOVE_ITEM TEST_OPS test_update_loss_scaling_op) + endif() endif() if(NOT WITH_DISTRIBUTE OR WIN32) - # DISTRIBUTE related - LIST(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization) - LIST(REMOVE_ITEM TEST_OPS test_distributed_strategy) - LIST(REMOVE_ITEM TEST_OPS test_fleet_metric) - LIST(REMOVE_ITEM TEST_OPS test_fleet_ps) - LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2) - LIST(REMOVE_ITEM TEST_OPS test_fleet_utils) - LIST(REMOVE_ITEM TEST_OPS test_collective_cpu_barrier_with_gloo) - - # TODO: Fix these unittests failed on Windows - list(REMOVE_ITEM TEST_OPS test_fake_init_op) + # DISTRIBUTE related + list(REMOVE_ITEM TEST_OPS test_avoid_twice_initialization) + list(REMOVE_ITEM TEST_OPS test_distributed_strategy) + list(REMOVE_ITEM TEST_OPS test_fleet_metric) + list(REMOVE_ITEM TEST_OPS test_fleet_ps) + list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_2) + list(REMOVE_ITEM TEST_OPS test_fleet_utils) + list(REMOVE_ITEM TEST_OPS test_collective_cpu_barrier_with_gloo) + + # TODO: Fix these unittests failed on Windows + list(REMOVE_ITEM TEST_OPS test_fake_init_op) endif() if(NOT WITH_DISTRIBUTE) - LIST(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new) - LIST(REMOVE_ITEM TEST_OPS test_desc_clone_dist) + list(REMOVE_ITEM TEST_OPS test_fleet_rolemaker_new) + list(REMOVE_ITEM TEST_OPS test_desc_clone_dist) endif() if(WIN32) - LIST(REMOVE_ITEM TEST_OPS test_complex_matmul) - LIST(REMOVE_ITEM TEST_OPS test_ops_nms) + list(REMOVE_ITEM TEST_OPS test_complex_matmul) + list(REMOVE_ITEM TEST_OPS test_ops_nms) endif() -LIST(REMOVE_ITEM TEST_OPS test_fleet_checkpoint) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint3) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint_multiple) -LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint_dist_basic) -LIST(REMOVE_ITEM TEST_OPS test_hdfs1) -LIST(REMOVE_ITEM TEST_OPS test_hdfs2) -LIST(REMOVE_ITEM TEST_OPS test_hdfs3) -LIST(REMOVE_ITEM TEST_OPS test_checkpoint_saver) +list(REMOVE_ITEM TEST_OPS test_fleet_checkpoint) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint1) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint2) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint3) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint_multiple) +list(REMOVE_ITEM TEST_OPS test_auto_checkpoint_dist_basic) +list(REMOVE_ITEM TEST_OPS test_hdfs1) +list(REMOVE_ITEM TEST_OPS test_hdfs2) +list(REMOVE_ITEM TEST_OPS test_hdfs3) +list(REMOVE_ITEM TEST_OPS test_checkpoint_saver) if(APPLE OR WIN32) - LIST(REMOVE_ITEM TEST_OPS test_fs_interface) - LIST(REMOVE_ITEM TEST_OPS test_fleet_metric) + list(REMOVE_ITEM TEST_OPS test_fs_interface) + list(REMOVE_ITEM TEST_OPS test_fleet_metric) endif() list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_hybrid_parallel) -LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer_gloo) # NOTE: @xiongkun03, cpu is too slow, fix it in next PR +list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer_gloo +)# NOTE: @xiongkun03, cpu is too slow, fix it in next PR -if (NOT WITH_GLOO) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel_cpuonly) +if(NOT WITH_GLOO) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel_cpuonly) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_unused_variables_gloo) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height_gloo) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_gloo) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_diff_length_gloo) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_unused_variables_gloo) + list(REMOVE_ITEM TEST_OPS + test_parallel_dygraph_sparse_embedding_over_height_gloo) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_gloo) + list(REMOVE_ITEM TEST_OPS + test_parallel_dygraph_sparse_embedding_diff_length_gloo) endif() -if ((NOT WITH_GPU) AND (NOT WITH_ROCM)) - LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) - LIST(REMOVE_ITEM TEST_OPS test_rank_attention_op) # TODO(shenliang03): rank_attention_op support CPU device in future - LIST(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mnist) # TODO(Yancey1989): parallel dygraph support CPU device in future - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_unused_variables) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer) - LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_control_flow) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync_gradient_check) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_pipeline_parallel) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_tensor_parallel) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sharding_parallel) - list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_optimizer_stage2) - list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage2) - list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage3) - list(REMOVE_ITEM TEST_OPS test_dygraph_group_sharded_api) - list(REMOVE_ITEM TEST_OPS test_auto_parallel_parallelizer) - list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mp_layers) - LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) - LIST(REMOVE_ITEM TEST_OPS test_mixed_precision) - LIST(REMOVE_ITEM TEST_OPS test_fleet_base_single) - LIST(REMOVE_ITEM TEST_OPS test_dygraph_recompute) - list(REMOVE_ITEM TEST_OPS test_hybrid_parallel_inference_helper) - list(REMOVE_ITEM TEST_OPS test_parallel_class_center_sample) - LIST(REMOVE_ITEM TEST_OPS test_parallel_margin_cross_entropy) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_searcher) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_dist_tensor) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_serial) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_mppp) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_dpmppp) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_cost_model) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_data_unshard) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_save_load) - LIST(REMOVE_ITEM TEST_OPS test_auto_parallel_autoconvert) - LIST(REMOVE_ITEM TEST_OPS test_collective_process_group) - LIST(REMOVE_ITEM TEST_OPS test_eager_dist_api) +if((NOT WITH_GPU) AND (NOT WITH_ROCM)) + list(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) + list(REMOVE_ITEM TEST_OPS test_rank_attention_op + )# TODO(shenliang03): rank_attention_op support CPU device in future + list(REMOVE_ITEM TEST_OPS test_batch_fc_op + )# TODO(shenliang03): batch_fc_op support CPU device in future + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mnist + )# TODO(Yancey1989): parallel dygraph support CPU device in future + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_unused_variables) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_se_resnext) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_control_flow) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync_gradient_check) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_pipeline_parallel) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_tensor_parallel) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sharding_parallel) + list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_optimizer_stage2) + list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage2) + list(REMOVE_ITEM TEST_OPS test_dygraph_sharding_stage3) + list(REMOVE_ITEM TEST_OPS test_dygraph_group_sharded_api) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_parallelizer) + list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_mp_layers) + list(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) + list(REMOVE_ITEM TEST_OPS test_mixed_precision) + list(REMOVE_ITEM TEST_OPS test_fleet_base_single) + list(REMOVE_ITEM TEST_OPS test_dygraph_recompute) + list(REMOVE_ITEM TEST_OPS test_hybrid_parallel_inference_helper) + list(REMOVE_ITEM TEST_OPS test_parallel_class_center_sample) + list(REMOVE_ITEM TEST_OPS test_parallel_margin_cross_entropy) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_partitioner_gpt) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_searcher) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_dist_tensor) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_serial) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_mppp) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_reshard_dpmppp) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_cost_model) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_data_unshard) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_save_load) + list(REMOVE_ITEM TEST_OPS test_auto_parallel_autoconvert) + list(REMOVE_ITEM TEST_OPS test_collective_process_group) + list(REMOVE_ITEM TEST_OPS test_eager_dist_api) elseif(WITH_GPU) - if (${CUDNN_VERSION} VERSION_LESS 7100) - LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) - endif() + if(${CUDNN_VERSION} VERSION_LESS 7100) + list(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) + endif() endif() -if (WITH_NCCL) - if (${NCCL_VERSION} VERSION_LESS 2212) - LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding) - LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding_over_height) - LIST(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_transformer) - endif() +if(WITH_NCCL) + if(${NCCL_VERSION} VERSION_LESS 2212) + list(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_sparse_embedding) + list(REMOVE_ITEM DIST_TEST_OPS + test_parallel_dygraph_sparse_embedding_over_height) + list(REMOVE_ITEM DIST_TEST_OPS test_parallel_dygraph_transformer) + endif() endif() -if ((NOT WITH_NCCL) AND (NOT WITH_RCCL)) - list(REMOVE_ITEM TEST_OPS test_imperative_group) - LIST(REMOVE_ITEM TEST_OPS test_new_group_api) +if((NOT WITH_NCCL) AND (NOT WITH_RCCL)) + list(REMOVE_ITEM TEST_OPS test_imperative_group) + list(REMOVE_ITEM TEST_OPS test_new_group_api) endif() if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) - LIST(REMOVE_ITEM TEST_OPS test_fused_gate_attention_op) - LIST(REMOVE_ITEM TEST_OPS test_boxps) + list(REMOVE_ITEM TEST_OPS test_fused_gate_attention_op) + list(REMOVE_ITEM TEST_OPS test_boxps) endif() -list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 -list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 -list(REMOVE_ITEM TEST_OPS test_cond_op) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 +list(REMOVE_ITEM TEST_OPS test_seq_concat_op +)# FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 +list(REMOVE_ITEM TEST_OPS test_lstm_unit_op +)# # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 +list(REMOVE_ITEM TEST_OPS test_cond_op) + +# FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 list(REMOVE_ITEM TEST_OPS op_test) # op_test is a helper python file, not a test -list(REMOVE_ITEM TEST_OPS decorator_helper) # decorator_helper is a helper python file, not a test +list(REMOVE_ITEM TEST_OPS decorator_helper +)# decorator_helper is a helper python file, not a test if(APPLE) - if(NOT WITH_DISTRIBUTE) - list(REMOVE_ITEM TEST_OPS test_desc_clone) - list(REMOVE_ITEM TEST_OPS test_program_code) - endif(NOT WITH_DISTRIBUTE) - message(WARNING "These tests has been disabled in OSX before being fixed:\n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext_*") - # this op is not support on mac - list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) - list(REMOVE_ITEM TEST_OPS test_detection_map_op) - list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass) + if(NOT WITH_DISTRIBUTE) + list(REMOVE_ITEM TEST_OPS test_desc_clone) + list(REMOVE_ITEM TEST_OPS test_program_code) + endif(NOT WITH_DISTRIBUTE) + message( + WARNING + "These tests has been disabled in OSX before being fixed:\n test_fuse_elewise_add_act_pass \n test_detection_map_op \n test_dist_se_resnext_*" + ) + # this op is not support on mac + list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) + list(REMOVE_ITEM TEST_OPS test_detection_map_op) + list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass) endif() if(NOT WITH_MKLML) - # this op is not support on openblas - list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) + # this op is not support on openblas + list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) endif() if(NOT WITH_MKL OR NOT WITH_AVX) @@ -360,19 +387,21 @@ if(NOT WITH_MKL OR NOT WITH_AVX) list(REMOVE_ITEM TEST_OPS test_var_conv_2d) endif() -if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON) +if(WITH_COVERAGE + OR WIN32 + OR WITH_NV_JETSON) list(REMOVE_ITEM TEST_OPS test_pyramid_hash_op) endif() list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash) if((WITH_ROCM OR WITH_GPU) OR NOT WITH_MKLML) - # matmul with multiple heads need MKL support - LIST(REMOVE_ITEM TEST_OPS test_matmul_op_with_head) + # matmul with multiple heads need MKL support + list(REMOVE_ITEM TEST_OPS test_matmul_op_with_head) endif() if(NOT WITH_CRYPTO) - LIST(REMOVE_ITEM TEST_OPS test_crypto) + list(REMOVE_ITEM TEST_OPS test_crypto) endif() function(py_test_modules TARGET_NAME) @@ -380,126 +409,158 @@ function(py_test_modules TARGET_NAME) set(options SERIAL) set(oneValueArgs "") set(multiValueArgs MODULES DEPS ENVS) - cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL "")) - if(WITH_ASCEND_CL) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python:$ENV{PYTHONPATH} ${py_test_modules_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -m coverage run --branch -p ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - else() - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -m coverage run --branch -p ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() + cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + if(WITH_COVERAGE AND NOT (WITH_INCREMENTAL_COVERAGE + AND "$ENV{PADDLE_GIT_DIFF_PY_FILE}" STREQUAL "")) + if(WITH_ASCEND_CL) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env + PYTHONPATH=${PADDLE_BINARY_DIR}/python:$ENV{PYTHONPATH} + ${py_test_modules_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -m coverage run --branch -p + ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + else() + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + ${py_test_modules_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -m coverage run --branch -p + ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() else() - if(WITH_ASCEND_CL) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python:$ENV{PYTHONPATH} ${py_test_modules_ENVS} - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - else() - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() + if(WITH_ASCEND_CL) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env + PYTHONPATH=${PADDLE_BINARY_DIR}/python:$ENV{PYTHONPATH} + ${py_test_modules_ENVS} ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + else() + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + ${py_test_modules_ENVS} ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() endif() - if (py_test_modules_SERIAL) - set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) + if(py_test_modules_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) endif() if(WIN32) - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 150) endif() endif() endfunction() - function(bash_test_modules TARGET_NAME) - if(NOT WITH_TESTING) - return() - endif() - - set(options SERIAL) - set(oneValueArgs TIMEOUT START_BASH) - set(multiValueArgs DEPS ENVS LABELS) - cmake_parse_arguments(bash_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(NOT WITH_TESTING) + return() + endif() + set(options SERIAL) + set(oneValueArgs TIMEOUT START_BASH) + set(multiValueArgs DEPS ENVS LABELS) + cmake_parse_arguments(bash_test_modules "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - set(timeout 350) - if(${bash_test_modules_TIMEOUT}) - set(timeout ${bash_test_modules_TIMEOUT}) - endif() + set(timeout 350) + if(${bash_test_modules_TIMEOUT}) + set(timeout ${bash_test_modules_TIMEOUT}) + endif() - if(WITH_COVERAGE) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS} - WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - else() - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS} - bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() + if(WITH_COVERAGE) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} + ${bash_test_modules_ENVS} WITH_COVERAGE=ON + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data bash + ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + else() + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} + ${bash_test_modules_ENVS} bash + ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() - if (bash_test_modules_SERIAL) - set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) - endif() + if(bash_test_modules_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) + endif() - if(bash_test_modules_LABELS) - set_tests_properties(${TARGET_NAME} PROPERTIES LABELS ${bash_test_modules_LABELS}) - endif() + if(bash_test_modules_LABELS) + set_tests_properties(${TARGET_NAME} PROPERTIES LABELS + ${bash_test_modules_LABELS}) + endif() endfunction() function(parallel_bash_test_modules TARGET_NAME) - if(NOT WITH_TESTING) - return() - endif() - - set(options SERIAL) - set(oneValueArgs TIMEOUT START_BASH) - set(multiValueArgs DEPS ENVS LABELS UnitTests) - cmake_parse_arguments(parallel_bash_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(NOT WITH_TESTING) + return() + endif() + set(options SERIAL) + set(oneValueArgs TIMEOUT START_BASH) + set(multiValueArgs DEPS ENVS LABELS UnitTests) + cmake_parse_arguments(parallel_bash_test_modules "${options}" + "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - set(timeout 120) - if(${parallel_bash_test_modules_TIMEOUT}) - set(timeout ${parallel_bash_test_modules_TIMEOUT}) - endif() - - list(JOIN parallel_bash_test_modules_UnitTests " " uts_string) + set(timeout 120) + if(${parallel_bash_test_modules_TIMEOUT}) + set(timeout ${parallel_bash_test_modules_TIMEOUT}) + endif() - if(WITH_COVERAGE) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} - WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - bash ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - else() - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python - TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} - bash ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) - endif() + list(JOIN parallel_bash_test_modules_UnitTests " " uts_string) + + if(WITH_COVERAGE) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} + ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} + WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + bash + ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + else() + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python + TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} + ${parallel_bash_test_modules_ENVS} UnitTests=${uts_string} bash + ${CMAKE_CURRENT_BINARY_DIR}/${parallel_bash_test_modules_START_BASH} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + endif() - if (parallel_bash_test_modules_SERIAL) - set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) - endif() + if(parallel_bash_test_modules_SERIAL) + set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1) + endif() - if(parallel_bash_test_modules_LABELS) - set_tests_properties(${TARGET_NAME} PROPERTIES LABELS ${parallel_bash_test_modules_LABELS}) - endif() + if(parallel_bash_test_modules_LABELS) + set_tests_properties(${TARGET_NAME} + PROPERTIES LABELS ${parallel_bash_test_modules_LABELS}) + endif() endfunction() list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type) @@ -522,7 +583,8 @@ list(REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer) list(REMOVE_ITEM TEST_OPS test_layers) list(REMOVE_ITEM TEST_OPS test_parallel_executor_seresnext_base_cpu) list(REMOVE_ITEM TEST_OPS test_parallel_executor_seresnext_with_reduce_cpu) -list(REMOVE_ITEM TEST_OPS test_parallel_executor_seresnext_with_fuse_all_reduce_cpu) +list(REMOVE_ITEM TEST_OPS + test_parallel_executor_seresnext_with_fuse_all_reduce_cpu) list(REMOVE_ITEM TEST_OPS test_imperative_ocr_attention_model) list(REMOVE_ITEM TEST_OPS test_async_ssa_graph_executor_mnist) list(REMOVE_ITEM TEST_OPS test_install_check) @@ -542,11 +604,14 @@ list(REMOVE_ITEM TEST_OPS test_imperative_static_runner_while) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) # disable sparse_attention which not in suitable env -if ( (NOT WITH_GPU) OR (WIN32) OR (PADDLE_WITH_ARM) OR (WITH_ROCM) ) - list(REMOVE_ITEM TEST_OPS test_sparse_attention_op) +if((NOT WITH_GPU) + OR (WIN32) + OR (PADDLE_WITH_ARM) + OR (WITH_ROCM)) + list(REMOVE_ITEM TEST_OPS test_sparse_attention_op) endif() -if (APPLE OR WIN32) +if(APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_dataset) list(REMOVE_ITEM TEST_OPS test_dataset_dataloader) list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_base) @@ -563,33 +628,35 @@ if (APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_paddle_multiprocessing) endif() -if (NOT WITH_GLOO) - LIST(REMOVE_ITEM TEST_OPS test_cpuonly_spawn) +if(NOT WITH_GLOO) + list(REMOVE_ITEM TEST_OPS test_cpuonly_spawn) endif() -if(NOT WITH_GPU OR WIN32 OR APPLE) +if(NOT WITH_GPU + OR WIN32 + OR APPLE) list(REMOVE_ITEM TEST_OPS test_build_strategy_fusion_group_pass) endif() # Some ops need to check results when gc is enabled # Currently, only ops that register NoNeedBufferVarsInference need to do this test set(TEST_OPS_WITH_GC - test_affine_channel_op - test_concat_op - test_elementwise_add_op - test_elementwise_sub_op - test_fill_zeros_like2_op - test_gather_op - test_gather_nd_op - test_linear_chain_crf_op - test_lod_reset_op - test_lookup_table_op - test_mean_op - test_pad2d_op - test_scatter_op - test_slice_op - test_space_to_depth_op - test_squared_l2_distance_op) + test_affine_channel_op + test_concat_op + test_elementwise_add_op + test_elementwise_sub_op + test_fill_zeros_like2_op + test_gather_op + test_gather_nd_op + test_linear_chain_crf_op + test_lod_reset_op + test_lookup_table_op + test_mean_op + test_pad2d_op + test_scatter_op + test_slice_op + test_space_to_depth_op + test_squared_l2_distance_op) foreach(TEST_OP ${TEST_OPS_WITH_GC}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) @@ -603,184 +670,310 @@ foreach(TEST_OP ${TEST_EAGER_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS FLAGS_enable_eager_mode=1) endforeach() -if ((NOT WITH_GPU) AND (NOT WITH_XPU) AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) - list(REMOVE_ITEM TEST_OPS "test_fleet_graph_execution_meta_optimizer") - list(REMOVE_ITEM TEST_OPS "test_gen_nccl_id_op") - list(REMOVE_ITEM TEST_OPS "test_dist_fleet_grad_clip") - list(REMOVE_ITEM TEST_OPS "test_dist_fleet_heter_ctr") - list(REMOVE_ITEM TEST_OPS "test_dist_fleet_ps_gpu_ctr") - list(REMOVE_ITEM TEST_OPS "test_dist_mnist_batch_merge") +if((NOT WITH_GPU) + AND (NOT WITH_XPU) + AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) + list(REMOVE_ITEM TEST_OPS "test_fleet_graph_execution_meta_optimizer") + list(REMOVE_ITEM TEST_OPS "test_gen_nccl_id_op") + list(REMOVE_ITEM TEST_OPS "test_dist_fleet_grad_clip") + list(REMOVE_ITEM TEST_OPS "test_dist_fleet_heter_ctr") + list(REMOVE_ITEM TEST_OPS "test_dist_fleet_ps_gpu_ctr") + list(REMOVE_ITEM TEST_OPS "test_dist_mnist_batch_merge") endif() foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) -py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4) -if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL OR APPLE) - py_test_modules(test_warpctc_op MODULES test_warpctc_op) - set_tests_properties(test_warpctc_op PROPERTIES TIMEOUT 120) +py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS + FLAGS_inner_op_parallelism=4) +if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL + OR APPLE) + py_test_modules(test_warpctc_op MODULES test_warpctc_op) + set_tests_properties(test_warpctc_op PROPERTIES TIMEOUT 120) endif() -py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${GC_ENVS}) -py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS}) +py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS + ${GC_ENVS}) +py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS + ${GC_ENVS}) py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS - FLAGS_cudnn_deterministic=1) -set_tests_properties(test_imperative_resnet PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") -py_test_modules(test_imperative_resnet_sorted_gradient MODULES test_imperative_resnet_sorted_gradient ENVS - FLAGS_cudnn_deterministic=1) -set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + FLAGS_cudnn_deterministic=1) +set_tests_properties(test_imperative_resnet + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") +py_test_modules( + test_imperative_resnet_sorted_gradient MODULES + test_imperative_resnet_sorted_gradient ENVS FLAGS_cudnn_deterministic=1) +set_tests_properties(test_imperative_resnet_sorted_gradient + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") py_test_modules(test_imperative_mnist MODULES test_imperative_mnist ENVS - FLAGS_cudnn_deterministic=1) -py_test_modules(test_imperative_mnist_sorted_gradient MODULES test_imperative_mnist_sorted_gradient ENVS - FLAGS_cudnn_deterministic=1) -py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext ENVS - FLAGS_cudnn_deterministic=1) -set_tests_properties(test_imperative_se_resnext PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") -py_test_modules(test_imperative_ocr_attention_model MODULES test_imperative_ocr_attention_model ENVS - FLAGS_cudnn_deterministic=1) + FLAGS_cudnn_deterministic=1) +py_test_modules( + test_imperative_mnist_sorted_gradient MODULES + test_imperative_mnist_sorted_gradient ENVS FLAGS_cudnn_deterministic=1) +py_test_modules(test_imperative_se_resnext MODULES test_imperative_se_resnext + ENVS FLAGS_cudnn_deterministic=1) +set_tests_properties(test_imperative_se_resnext + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") +py_test_modules( + test_imperative_ocr_attention_model MODULES + test_imperative_ocr_attention_model ENVS FLAGS_cudnn_deterministic=1) py_test_modules(test_install_check MODULES test_install_check ENVS - FLAGS_cudnn_deterministic=1) + FLAGS_cudnn_deterministic=1) set_tests_properties(test_install_check PROPERTIES LABELS "RUN_TYPE=DIST") -py_test_modules(test_imperative_static_runner_mnist MODULES test_imperative_static_runner_mnist ENVS - FLAGS_cudnn_deterministic=1) -py_test_modules(test_imperative_static_runner_while MODULES test_imperative_static_runner_while ENVS - FLAGS_cudnn_deterministic=1) - -if ((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)) - py_test_modules(test_fused_gemm_epilogue_op MODULES test_fused_gemm_epilogue_op) - py_test_modules(test_fused_gemm_epilogue_grad_op MODULES test_fused_gemm_epilogue_grad_op) - py_test_modules(test_fused_gemm_epilogue_op_with_es MODULES test_fused_gemm_epilogue_op ENVS FLAGS_cublaslt_exhaustive_search_times=30) - py_test_modules(test_fused_gemm_epilogue_grad_op_with_es MODULES test_fused_gemm_epilogue_grad_op ENVS FLAGS_cublaslt_exhaustive_search_times=30) - py_test_modules(test_fuse_gemm_epilogue_pass MODULES test_fuse_gemm_epilogue_pass) +py_test_modules( + test_imperative_static_runner_mnist MODULES + test_imperative_static_runner_mnist ENVS FLAGS_cudnn_deterministic=1) +py_test_modules( + test_imperative_static_runner_while MODULES + test_imperative_static_runner_while ENVS FLAGS_cudnn_deterministic=1) + +if((WITH_GPU) AND (CUDA_VERSION GREATER_EQUAL 11.6)) + py_test_modules(test_fused_gemm_epilogue_op MODULES + test_fused_gemm_epilogue_op) + py_test_modules(test_fused_gemm_epilogue_grad_op MODULES + test_fused_gemm_epilogue_grad_op) + py_test_modules( + test_fused_gemm_epilogue_op_with_es MODULES test_fused_gemm_epilogue_op + ENVS FLAGS_cublaslt_exhaustive_search_times=30) + py_test_modules( + test_fused_gemm_epilogue_grad_op_with_es MODULES + test_fused_gemm_epilogue_grad_op ENVS + FLAGS_cublaslt_exhaustive_search_times=30) + py_test_modules(test_fuse_gemm_epilogue_pass MODULES + test_fuse_gemm_epilogue_pass) endif() set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") -set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") -set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS + "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_conv2d_op_depthwise_conv + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_norm_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") if(WITH_DISTRIBUTE) - add_subdirectory(distributed_passes) - add_subdirectory(ps) - add_subdirectory(auto_parallel) - - # FIXME(typhoonzero): add these tests back - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transformer") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transpiler") - - # TODO(sandyhouse): fix and add the ut back - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_hallreduce") - - #not need - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_base") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_base") - - - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_ctr") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_lars") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_train") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_save_load") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_text_classification") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec") - - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo") - - py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) - py_test_modules(test_communicator_async MODULES test_communicator_async ENVS ${dist_ENVS}) - py_test_modules(test_communicator_ps_gpu MODULES test_communicator_ps_gpu ENVS ${dist_ENVS}) - py_test_modules(test_communicator_geo MODULES test_communicator_geo ENVS ${dist_ENVS}) - py_test_modules(test_communicator_half_async MODULES test_communicator_half_async ENVS ${dist_ENVS} FLAGS_communicator_send_queue_size=1 FLAGS_communicator_max_merge_var_num=1) - py_test_modules(test_communicator_sync MODULES test_communicator_sync ENVS ${dist_ENVS} FLAGS_communicator_send_queue_size=1 FLAGS_communicator_max_merge_var_num=1) - py_test_modules(test_collective_optimizer MODULES test_collective_optimizer) - if(NOT APPLE) - py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS}) - py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS ${dist_ENVS}) - py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS ${dist_ENVS}) - py_test_modules(test_fleet_amp_init MODULES test_fleet_amp_init ENVS ${dist_ENVS}) - py_test_modules(test_fleet_fp16_allreduce_meta_optimizer MODULES test_fleet_fp16_allreduce_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_private_function MODULES test_fleet_private_function ENVS ${dist_ENVS}) - py_test_modules(test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS ${dist_ENVS}) - py_test_modules(test_fleet_distributed_strategy MODULES test_fleet_distributed_strategy) - py_test_modules(test_fleet_static_mp_layers MODULES test_fleet_static_mp_layers) - #py_test_modules(test_fleet_auto MODULES test_fleet_auto ENVS ${dist_ENVS}) - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - py_test_modules(test_fleet_amp_meta_optimizer MODULES test_fleet_amp_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_gradient_merge_meta_optimizer MODULES test_fleet_gradient_merge_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_graph_executor MODULES test_fleet_graph_executor ENVS ${dist_ENVS}) - py_test_modules(test_fleet_hybrid_meta_optimizer MODULES test_fleet_hybrid_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_recompute_meta_optimizer MODULES test_fleet_recompute_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_sharding_meta_optimizer MODULES test_fleet_sharding_meta_optimizer ENVS ${dist_ENVS}) - endif() - if(NOT WIN32) - py_test_modules(test_auto_parallel_partitioner MODULES test_auto_parallel_partitioner ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_partitioner_gpt MODULES test_auto_parallel_partitioner_gpt ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_searcher MODULES test_auto_parallel_searcher ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_reshard MODULES test_auto_parallel_reshard ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_dist_tensor MODULES test_auto_parallel_dist_tensor ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_reshard_serial MODULES test_auto_parallel_reshard_serial ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_reshard_mppp MODULES test_auto_parallel_reshard_mppp ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_reshard_dpmppp MODULES test_auto_parallel_reshard_dpmppp ENVS ${dist_ENVS}) - py_test_modules(test_auto_parallel_cost_model MODULES test_auto_parallel_cost_model ENVS ${dist_ENVS}) - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - py_test_modules(test_fleet_lamb_meta_optimizer MODULES test_fleet_lamb_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_lars_meta_optimizer MODULES test_fleet_lars_meta_optimizer ENVS ${dist_ENVS}) - py_test_modules(test_fleet_localsgd_meta_optimizer MODULES test_fleet_localsgd_meta_optimizer ENVS ${dist_ENVS}) - - - - endif() - endif(NOT WIN32) - endif(NOT APPLE) - if(WITH_DGC) - # if with dgc, test all dgc tests. - # NOTE. dist dgc tests is already in DIST_TEST_OPS - py_test_modules(test_dgc_op MODULES test_dgc_op) - py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op) - py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer) - py_test_modules(test_fleet_dgc_meta_optimizer MODULES test_fleet_dgc_meta_optimizer) - else() - # if not with dgc, must close all dgc tests - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc") + add_subdirectory(distributed_passes) + add_subdirectory(ps) + add_subdirectory(auto_parallel) + + # FIXME(typhoonzero): add these tests back + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transformer") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_transpiler") + + # TODO(sandyhouse): fix and add the ut back + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_hallreduce") + + #not need + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_base") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_base") + + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_ctr") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_lars") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_train") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_save_load") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_text_classification") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec") + + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_gloo") + + py_test_modules(test_recv_save_op MODULES test_recv_save_op ENVS ${dist_ENVS}) + py_test_modules(test_communicator_async MODULES test_communicator_async ENVS + ${dist_ENVS}) + py_test_modules(test_communicator_ps_gpu MODULES test_communicator_ps_gpu + ENVS ${dist_ENVS}) + py_test_modules(test_communicator_geo MODULES test_communicator_geo ENVS + ${dist_ENVS}) + py_test_modules( + test_communicator_half_async + MODULES + test_communicator_half_async + ENVS + ${dist_ENVS} + FLAGS_communicator_send_queue_size=1 + FLAGS_communicator_max_merge_var_num=1) + py_test_modules( + test_communicator_sync + MODULES + test_communicator_sync + ENVS + ${dist_ENVS} + FLAGS_communicator_send_queue_size=1 + FLAGS_communicator_max_merge_var_num=1) + py_test_modules(test_collective_optimizer MODULES test_collective_optimizer) + if(NOT APPLE) + py_test_modules(test_fleet_base MODULES test_fleet_base ENVS ${dist_ENVS}) + py_test_modules(test_fleet_base_2 MODULES test_fleet_base_2 ENVS + ${dist_ENVS}) + py_test_modules(test_fleet_base_3 MODULES test_fleet_base_3 ENVS + ${dist_ENVS}) + py_test_modules(test_fleet_amp_init MODULES test_fleet_amp_init ENVS + ${dist_ENVS}) + py_test_modules(test_fleet_fp16_allreduce_meta_optimizer MODULES + test_fleet_fp16_allreduce_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_private_function MODULES + test_fleet_private_function ENVS ${dist_ENVS}) + py_test_modules(test_fleet_meta_optimizer_base MODULES + test_fleet_meta_optimizer_base ENVS ${dist_ENVS}) + py_test_modules(test_fleet_distributed_strategy MODULES + test_fleet_distributed_strategy) + py_test_modules(test_fleet_static_mp_layers MODULES + test_fleet_static_mp_layers) + #py_test_modules(test_fleet_auto MODULES test_fleet_auto ENVS ${dist_ENVS}) + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + py_test_modules(test_fleet_amp_meta_optimizer MODULES + test_fleet_amp_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules( + test_fleet_gradient_merge_meta_optimizer MODULES + test_fleet_gradient_merge_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_graph_executor MODULES + test_fleet_graph_executor ENVS ${dist_ENVS}) + py_test_modules(test_fleet_hybrid_meta_optimizer MODULES + test_fleet_hybrid_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_recompute_meta_optimizer MODULES + test_fleet_recompute_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_sharding_meta_optimizer MODULES + test_fleet_sharding_meta_optimizer ENVS ${dist_ENVS}) + endif() + if(NOT WIN32) + py_test_modules(test_auto_parallel_partitioner MODULES + test_auto_parallel_partitioner ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_partitioner_gpt MODULES + test_auto_parallel_partitioner_gpt ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_searcher MODULES + test_auto_parallel_searcher ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_reshard MODULES + test_auto_parallel_reshard ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_dist_tensor MODULES + test_auto_parallel_dist_tensor ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_reshard_serial MODULES + test_auto_parallel_reshard_serial ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_reshard_mppp MODULES + test_auto_parallel_reshard_mppp ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_reshard_dpmppp MODULES + test_auto_parallel_reshard_dpmppp ENVS ${dist_ENVS}) + py_test_modules(test_auto_parallel_cost_model MODULES + test_auto_parallel_cost_model ENVS ${dist_ENVS}) + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + py_test_modules(test_fleet_lamb_meta_optimizer MODULES + test_fleet_lamb_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_lars_meta_optimizer MODULES + test_fleet_lars_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_localsgd_meta_optimizer MODULES + test_fleet_localsgd_meta_optimizer ENVS ${dist_ENVS}) + + endif() + endif(NOT WIN32) + endif(NOT APPLE) + if(WITH_DGC) + # if with dgc, test all dgc tests. + # NOTE. dist dgc tests is already in DIST_TEST_OPS + py_test_modules(test_dgc_op MODULES test_dgc_op) + py_test_modules(test_dgc_momentum_op MODULES test_dgc_momentum_op) + py_test_modules(test_dgc_optimizer MODULES test_dgc_optimizer) + py_test_modules(test_fleet_dgc_meta_optimizer MODULES + test_fleet_dgc_meta_optimizer) + else() + # if not with dgc, must close all dgc tests + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") + list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc") + endif() + if(NOT APPLE) + if(WITH_GPU OR WITH_ROCM) + bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh + ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + py_test_modules(test_launch_coverage MODULES test_launch_coverage) + endif() + + bash_test_modules(test_fleetrun START_BASH test_fleetrun.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + bash_test_modules( + test_fleet_launch_nproc START_BASH test_fleet_launch_nproc.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules( + test_fleet_run_random_port START_BASH test_fleet_run_random_port.sh + ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules( + test_fleet_launch_async START_BASH test_fleet_launch_async.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules( + test_fleet_launch_cloud START_BASH test_fleet_launch_cloud.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + endif() + if(WITH_ASCEND OR WITH_ASCEND_CL) + bash_test_modules( + test_fleet_launch_ascend START_BASH test_fleet_launch_ascend.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules(test_ascend_group START_BASH test_ascend_group.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) endif() - if(NOT APPLE) - if(WITH_GPU OR WITH_ROCM) - bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - py_test_modules(test_launch_coverage MODULES test_launch_coverage) - endif() - - bash_test_modules(test_fleetrun START_BASH test_fleetrun.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - bash_test_modules(test_fleet_launch_nproc START_BASH test_fleet_launch_nproc.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_fleet_run_random_port START_BASH test_fleet_run_random_port.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_fleet_launch_async START_BASH test_fleet_launch_async.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_fleet_launch_cloud START_BASH test_fleet_launch_cloud.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - endif() - if(WITH_ASCEND OR WITH_ASCEND_CL) - bash_test_modules(test_fleet_launch_ascend START_BASH test_fleet_launch_ascend.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_ascend_group START_BASH test_ascend_group.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - endif() - - # port range (20000, 23000) is reserved for dist-ops - set(dist_ut_port 20001) - foreach(TEST_OP ${DIST_TEST_OPS}) - bash_test_modules(${TEST_OP} START_BASH dist_test.sh LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}") - MATH(EXPR dist_ut_port "${dist_ut_port}+20") - if(dist_ut_port GREATER_EQUAL 22998) - message(FATAL_ERROR "available ports have been exhausted:${dist_ut_port}") - endif() - endforeach(TEST_OP) - # solve it later. - bash_test_modules(test_fleet_launch_ps START_BASH test_fleet_launch_ps.sh LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}" PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR} ) - if (WITH_GLOO) - bash_test_modules(test_cpuonly_launch START_BASH test_cpuonly_launch.sh LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}" PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR} ) - endif() - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - bash_test_modules(test_new_group START_BASH test_new_group.sh LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}+20" PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR} ) - endif() - endif(NOT APPLE) + + # port range (20000, 23000) is reserved for dist-ops + set(dist_ut_port 20001) + foreach(TEST_OP ${DIST_TEST_OPS}) + bash_test_modules( + ${TEST_OP} + START_BASH + dist_test.sh + LABELS + "RUN_TYPE=EXCLUSIVE" + ENVS + "PADDLE_DIST_UT_PORT=${dist_ut_port}") + math(EXPR dist_ut_port "${dist_ut_port}+20") + if(dist_ut_port GREATER_EQUAL 22998) + message( + FATAL_ERROR "available ports have been exhausted:${dist_ut_port}") + endif() + endforeach(TEST_OP) + # solve it later. + bash_test_modules( + test_fleet_launch_ps + START_BASH + test_fleet_launch_ps.sh + LABELS + "RUN_TYPE=EXCLUSIVE" + ENVS + "PADDLE_DIST_UT_PORT=${dist_ut_port}" + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + if(WITH_GLOO) + bash_test_modules( + test_cpuonly_launch + START_BASH + test_cpuonly_launch.sh + LABELS + "RUN_TYPE=EXCLUSIVE" + ENVS + "PADDLE_DIST_UT_PORT=${dist_ut_port}" + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + endif() + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + bash_test_modules( + test_new_group + START_BASH + test_new_group.sh + LABELS + "RUN_TYPE=EXCLUSIVE" + ENVS + "PADDLE_DIST_UT_PORT=${dist_ut_port}+20" + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + endif() + endif(NOT APPLE) endif() py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf) @@ -789,65 +982,172 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf) # We guess there are some bugs in cuda 10.1 or 10.2, # since this unittest is stable in cuda 11 (py3 pipeline) now. if(NOT WITH_COVERAGE) - py_test_modules(test_parallel_executor_profiler MODULES test_parallel_executor_profiler) - set_tests_properties(test_parallel_executor_profiler PROPERTIES LABELS "RUN_TYPE=DIST") + py_test_modules(test_parallel_executor_profiler MODULES + test_parallel_executor_profiler) + set_tests_properties(test_parallel_executor_profiler + PROPERTIES LABELS "RUN_TYPE=DIST") set_tests_properties(test_parallel_executor_profiler PROPERTIES TIMEOUT 120) endif() -py_test_modules(test_parallel_executor_transformer MODULES test_parallel_executor_transformer) +py_test_modules(test_parallel_executor_transformer MODULES + test_parallel_executor_transformer) if(WIN32) - py_test_modules(test_parallel_executor_transformer_auto_growth MODULES test_parallel_executor_transformer_auto_growth ENVS FLAGS_allocator_strategy=auto_growth CUDA_VISIBLE_DEVICES=0) - py_test_modules(test_fuse_all_reduce_pass MODULES test_fuse_all_reduce_pass ENVS CUDA_VISIBLE_DEVICES=0) - py_test_modules(test_feed_data_check_shape_type MODULES test_feed_data_check_shape_type ENVS CUDA_VISIBLE_DEVICES=0) - py_test_modules(test_fetch_lod_tensor_array MODULES test_fetch_lod_tensor_array ENVS CUDA_VISIBLE_DEVICES=0) + py_test_modules( + test_parallel_executor_transformer_auto_growth MODULES + test_parallel_executor_transformer_auto_growth ENVS + FLAGS_allocator_strategy=auto_growth CUDA_VISIBLE_DEVICES=0) + py_test_modules(test_fuse_all_reduce_pass MODULES test_fuse_all_reduce_pass + ENVS CUDA_VISIBLE_DEVICES=0) + py_test_modules(test_feed_data_check_shape_type MODULES + test_feed_data_check_shape_type ENVS CUDA_VISIBLE_DEVICES=0) + py_test_modules(test_fetch_lod_tensor_array MODULES + test_fetch_lod_tensor_array ENVS CUDA_VISIBLE_DEVICES=0) else() - py_test_modules(test_parallel_executor_transformer_auto_growth MODULES test_parallel_executor_transformer_auto_growth ENVS FLAGS_allocator_strategy=auto_growth) - py_test_modules(test_fuse_all_reduce_pass MODULES test_fuse_all_reduce_pass) - py_test_modules(test_feed_data_check_shape_type MODULES test_feed_data_check_shape_type) - py_test_modules(test_fetch_lod_tensor_array MODULES test_fetch_lod_tensor_array) + py_test_modules( + test_parallel_executor_transformer_auto_growth MODULES + test_parallel_executor_transformer_auto_growth ENVS + FLAGS_allocator_strategy=auto_growth) + py_test_modules(test_fuse_all_reduce_pass MODULES test_fuse_all_reduce_pass) + py_test_modules(test_feed_data_check_shape_type MODULES + test_feed_data_check_shape_type) + py_test_modules(test_fetch_lod_tensor_array MODULES + test_fetch_lod_tensor_array) endif() py_test_modules(test_data_norm_op MODULES test_data_norm_op) -py_test_modules(test_fuse_bn_act_pass MODULES test_fuse_bn_act_pass ENVS FLAGS_cudnn_deterministic=1 FLAGS_cudnn_batchnorm_spatial_persistent=1 FLAGS_conv_workspace_size_limit=1000) -py_test_modules(test_fuse_bn_add_act_pass MODULES test_fuse_bn_add_act_pass ENVS FLAGS_cudnn_deterministic=1 FLAGS_cudnn_batchnorm_spatial_persistent=1 FLAGS_conv_workspace_size_limit=1000) +py_test_modules( + test_fuse_bn_act_pass + MODULES + test_fuse_bn_act_pass + ENVS + FLAGS_cudnn_deterministic=1 + FLAGS_cudnn_batchnorm_spatial_persistent=1 + FLAGS_conv_workspace_size_limit=1000) +py_test_modules( + test_fuse_bn_add_act_pass + MODULES + test_fuse_bn_add_act_pass + ENVS + FLAGS_cudnn_deterministic=1 + FLAGS_cudnn_batchnorm_spatial_persistent=1 + FLAGS_conv_workspace_size_limit=1000) # NOTE: These unittests will appear NaN steadily in windows CI. After analysis, # it is found that windows CI will run all the training unittests with the ON_INFER option turned on, # which will not appear in other CIs. The calculation behavior of some ops in inference mode is # inconsistent with that in non-inference mode. if(NOT ON_INFER) - py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES test_parallel_executor_seresnext_base_cpu) - py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES test_parallel_executor_seresnext_with_reduce_cpu) - py_test_modules(test_parallel_executor_seresnext_with_fuse_all_reduce_cpu MODULES test_parallel_executor_seresnext_with_fuse_all_reduce_cpu) - set_tests_properties(test_parallel_executor_seresnext_base_cpu PROPERTIES TIMEOUT 900) - set_tests_properties(test_parallel_executor_seresnext_base_cpu PROPERTIES LABELS "RUN_TYPE=NIGHTLY") - set_tests_properties(test_parallel_executor_seresnext_with_reduce_cpu PROPERTIES TIMEOUT 750) - set_tests_properties(test_parallel_executor_seresnext_with_reduce_cpu PROPERTIES LABELS "RUN_TYPE=NIGHTLY") - set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_cpu PROPERTIES TIMEOUT 750) - set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_cpu PROPERTIES LABELS "RUN_TYPE=NIGHTLY") + py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES + test_parallel_executor_seresnext_base_cpu) + py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES + test_parallel_executor_seresnext_with_reduce_cpu) + py_test_modules( + test_parallel_executor_seresnext_with_fuse_all_reduce_cpu MODULES + test_parallel_executor_seresnext_with_fuse_all_reduce_cpu) + set_tests_properties(test_parallel_executor_seresnext_base_cpu + PROPERTIES TIMEOUT 900) + set_tests_properties(test_parallel_executor_seresnext_base_cpu + PROPERTIES LABELS "RUN_TYPE=NIGHTLY") + set_tests_properties(test_parallel_executor_seresnext_with_reduce_cpu + PROPERTIES TIMEOUT 750) + set_tests_properties(test_parallel_executor_seresnext_with_reduce_cpu + PROPERTIES LABELS "RUN_TYPE=NIGHTLY") + set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_cpu + PROPERTIES TIMEOUT 750) + set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_cpu + PROPERTIES LABELS "RUN_TYPE=NIGHTLY") endif() if(NOT WIN32) - # TODO: fix these unittests failure on Windows - py_test_modules(test_layers MODULES test_layers ENVS FLAGS_cudnn_deterministic=1) - py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer) - # FIXME(zcd): temporally disable test_parallel_executor_fetch_feed in Windows CI because of the random failure. - py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed) - set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) + # TODO: fix these unittests failure on Windows + py_test_modules(test_layers MODULES test_layers ENVS + FLAGS_cudnn_deterministic=1) + py_test_modules(test_ir_memory_optimize_transformer MODULES + test_ir_memory_optimize_transformer) + # FIXME(zcd): temporally disable test_parallel_executor_fetch_feed in Windows CI because of the random failure. + py_test_modules(test_parallel_executor_fetch_feed MODULES + test_parallel_executor_fetch_feed) + set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) endif() -if(WITH_DISTRIBUTE AND NOT APPLE AND NOT WIN32) - py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint) - set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 200) - set_tests_properties(test_fleet_checkpoint PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint3 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint_multiple START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_auto_checkpoint_dist_basic START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_hdfs1 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_hdfs2 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") - bash_test_modules(test_hdfs3 START_BASH dist_test.sh TIMEOUT 200 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") +if(WITH_DISTRIBUTE + AND NOT APPLE + AND NOT WIN32) + py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint) + set_tests_properties(test_fleet_checkpoint PROPERTIES TIMEOUT 200) + set_tests_properties(test_fleet_checkpoint + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint1 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint2 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint3 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint_multiple + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_auto_checkpoint_dist_basic + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_hdfs1 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_hdfs2 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules( + test_hdfs3 + START_BASH + dist_test.sh + TIMEOUT + 200 + LABELS + "RUN_TYPE=EXCLUSIVE:NIGHTLY") endif() add_subdirectory(sequence) @@ -856,38 +1156,42 @@ add_subdirectory(rnn) add_subdirectory(autograd) add_subdirectory(distribution) -if (NOT WIN32 OR NOT WITH_GPU) - add_subdirectory(fft) +if(NOT WIN32 OR NOT WITH_GPU) + add_subdirectory(fft) endif() -if (WITH_XPU) - add_subdirectory(xpu) +if(WITH_XPU) + add_subdirectory(xpu) endif() # dist xpu tests: -if (WITH_XPU_BKCL) - #py_test(test_collective_reduce_api_xpu SRCS "test_collective_reduce_api.py") - py_test(test_collective_allreduce_api_xpu SRCS "test_collective_allreduce_api.py") +if(WITH_XPU_BKCL) + #py_test(test_collective_reduce_api_xpu SRCS "test_collective_reduce_api.py") + py_test(test_collective_allreduce_api_xpu + SRCS "test_collective_allreduce_api.py") endif() if(WIN32) - cc_test(cc_imp_py_test SRCS cc_imp_py_test.cc DEPS python) + cc_test( + cc_imp_py_test + SRCS cc_imp_py_test.cc + DEPS python) endif() -if (WITH_ASCEND_CL) - add_subdirectory(npu) +if(WITH_ASCEND_CL) + add_subdirectory(npu) endif() -if (WITH_MKLDNN) - add_subdirectory(mkldnn) +if(WITH_MKLDNN) + add_subdirectory(mkldnn) endif() -if (WITH_IPU) - add_subdirectory(ipu) +if(WITH_IPU) + add_subdirectory(ipu) endif() -if (WITH_MLU) - add_subdirectory(mlu) +if(WITH_MLU) + add_subdirectory(mlu) endif() add_subdirectory(asp) @@ -896,92 +1200,120 @@ add_subdirectory(ir) add_subdirectory(interpreter) -if (WITH_TESTING) - set_property(TEST test_parallel_executor_mnist PROPERTY ENVIRONMENT GLOG_vmodule=all_reduce_deps_pass=10) - set_property(TEST test_parallel_executor_fix_op_run_order PROPERTY ENVIRONMENT GLOG_vmodule=fix_op_run_order_pass=10) +if(WITH_TESTING) + set_property(TEST test_parallel_executor_mnist + PROPERTY ENVIRONMENT GLOG_vmodule=all_reduce_deps_pass=10) + set_property(TEST test_parallel_executor_fix_op_run_order + PROPERTY ENVIRONMENT GLOG_vmodule=fix_op_run_order_pass=10) endif() -set_tests_properties(test_parallel_executor_test_while_train test_parallel_executor_mnist - test_parallel_executor_feed_persistable_var - test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass - test_data_norm_op - test_dataloader_keep_order - test_dataloader_unkeep_order - test_parallel_executor_inference_feed_partial_data - test_parallel_ssa_graph_inference_feed_partial_data - test_fetch_unmerged - test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST") +set_tests_properties( + test_parallel_executor_test_while_train + test_parallel_executor_mnist + test_parallel_executor_feed_persistable_var + test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass + test_data_norm_op + test_dataloader_keep_order + test_dataloader_unkeep_order + test_parallel_executor_inference_feed_partial_data + test_parallel_ssa_graph_inference_feed_partial_data + test_fetch_unmerged + test_buffer_shared_memory_reuse_pass + PROPERTIES LABELS "RUN_TYPE=DIST") # disable test_parallel_executor_fetch_isolated_var # set_tests_properties(test_parallel_executor_fetch_isolated_var PROPERTIES LABELS "RUN_TYPE=DIST") -set_tests_properties(test_parallel_executor_crf test_sync_batch_norm_op test_inplace_abn_op - test_parallel_executor_seresnext_base_gpu - test_parallel_executor_seresnext_with_reduce_gpu - test_parallel_executor_seresnext_with_fuse_all_reduce_gpu - test_distributed_fused_lamb_op_with_clip - test_distributed_fused_lamb_op_without_clip - test_distributed_fused_lamb_op_with_gradient_merge - test_parallel_executor_fetch_isolated_var - PROPERTIES LABELS "RUN_TYPE=DIST") +set_tests_properties( + test_parallel_executor_crf + test_sync_batch_norm_op + test_inplace_abn_op + test_parallel_executor_seresnext_base_gpu + test_parallel_executor_seresnext_with_reduce_gpu + test_parallel_executor_seresnext_with_fuse_all_reduce_gpu + test_distributed_fused_lamb_op_with_clip + test_distributed_fused_lamb_op_without_clip + test_distributed_fused_lamb_op_with_gradient_merge + test_parallel_executor_fetch_isolated_var + PROPERTIES LABELS "RUN_TYPE=DIST") if(NOT WIN32 AND NOT APPLE) - set_tests_properties(test_imperative_signal_handler PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_imperative_data_loader_base PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_imperative_data_loader_fds_clear PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - # set_tests_properties(test_imperative_data_loader_exception PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_static PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_dynamic PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_exception PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_iterable_dataset_dynamic PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_dataset PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_multiprocess_dataloader_static PROPERTIES TIMEOUT 120) + set_tests_properties(test_imperative_signal_handler + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_imperative_data_loader_base + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_imperative_data_loader_fds_clear + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + # set_tests_properties(test_imperative_data_loader_exception PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_static + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_dynamic + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_exception + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_iterable_dataset_dynamic + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_dataset + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_multiprocess_dataloader_static PROPERTIES TIMEOUT + 120) endif() -if (NOT WIN32) - set_tests_properties(test_multiprocess_reader_exception PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_layers PROPERTIES TIMEOUT 120) - if (WITH_NV_JETSON) - set_tests_properties(test_ir_memory_optimize_transformer PROPERTIES TIMEOUT 1200) - else () - set_tests_properties(test_ir_memory_optimize_transformer PROPERTIES TIMEOUT 120) - endif () +if(NOT WIN32) + set_tests_properties(test_multiprocess_reader_exception + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_layers PROPERTIES TIMEOUT 120) + if(WITH_NV_JETSON) + set_tests_properties(test_ir_memory_optimize_transformer PROPERTIES TIMEOUT + 1200) + else() + set_tests_properties(test_ir_memory_optimize_transformer PROPERTIES TIMEOUT + 120) + endif() endif() -if (WITH_DISTRIBUTE AND NOT WIN32) - set_tests_properties(test_fleet_utils PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_cpu_barrier_with_gloo PROPERTIES TIMEOUT 40) +if(WITH_DISTRIBUTE AND NOT WIN32) + set_tests_properties(test_fleet_utils PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_cpu_barrier_with_gloo PROPERTIES TIMEOUT + 40) endif() -if (WITH_DISTRIBUTE) - set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT 120) - set_tests_properties(test_dist_fleet_ctr2 PROPERTIES TIMEOUT 200) - set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT 200) - set_tests_properties(test_dist_fleet_infer PROPERTIES TIMEOUT 200) - set_tests_properties(test_dist_fleet_raw_program_optimizer PROPERTIES TIMEOUT 120) - set_tests_properties(test_dist_fleet_raw_program_optimizer_fuse_allreduce PROPERTIES TIMEOUT 60) - set_tests_properties(test_dist_dygraph_apis PROPERTIES TIMEOUT 120) +if(WITH_DISTRIBUTE) + set_tests_properties(test_communicator_half_async PROPERTIES TIMEOUT 120) + set_tests_properties(test_dist_fleet_ctr2 PROPERTIES TIMEOUT 200) + set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT + 200) + set_tests_properties(test_dist_fleet_infer PROPERTIES TIMEOUT 200) + set_tests_properties(test_dist_fleet_raw_program_optimizer PROPERTIES TIMEOUT + 120) + set_tests_properties(test_dist_fleet_raw_program_optimizer_fuse_allreduce + PROPERTIES TIMEOUT 60) + set_tests_properties(test_dist_dygraph_apis PROPERTIES TIMEOUT 120) endif() -if (WITH_DISTRIBUTE AND NOT APPLE) - if(WITH_GPU OR WITH_ROCM) - set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 360) - endif() +if(WITH_DISTRIBUTE AND NOT APPLE) + if(WITH_GPU OR WITH_ROCM) + set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 360) + endif() endif() # setting timeout value as 15S set_tests_properties(test_run PROPERTIES TIMEOUT 120) set_tests_properties(test_sync_batch_norm_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cross_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_lod_tensor_to_selected_rows PROPERTIES TIMEOUT 200) +set_tests_properties(test_imperative_lod_tensor_to_selected_rows + PROPERTIES TIMEOUT 200) set_tests_properties(test_lstm_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_star_gan_with_gradient_penalty PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_star_gan_with_gradient_penalty + PROPERTIES TIMEOUT 120) set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_deformable_conv_op PROPERTIES TIMEOUT 200) set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_profiler PROPERTIES TIMEOUT 120) -set_tests_properties(test_inplace_softmax_with_cross_entropy PROPERTIES TIMEOUT 120) +set_tests_properties(test_inplace_softmax_with_cross_entropy PROPERTIES TIMEOUT + 120) set_tests_properties(test_cross_entropy2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cross_entropy_loss PROPERTIES TIMEOUT 180) set_tests_properties(test_fetch_unmerged PROPERTIES TIMEOUT 120) @@ -993,18 +1325,20 @@ set_tests_properties(test_elementwise_div_op PROPERTIES TIMEOUT 120) set_tests_properties(test_regularizer_api PROPERTIES TIMEOUT 150) set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 120) if(NOT WIN32) - if (WITH_NV_JETSON) + if(WITH_NV_JETSON) set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 1200) - else () + else() set_tests_properties(test_ir_memory_optimize_nlp PROPERTIES TIMEOUT 120) - endif () + endif() endif() set_tests_properties(test_add_reader_dependency PROPERTIES TIMEOUT 120) set_tests_properties(test_bilateral_slice_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES TIMEOUT 120) +set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES TIMEOUT + 120) set_tests_properties(test_fuse_relu_depthwise_conv_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_fleet_util PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_transformer_sorted_gradient PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_transformer_sorted_gradient + PROPERTIES TIMEOUT 120) set_tests_properties(test_matmul_op PROPERTIES TIMEOUT 120) set_tests_properties(test_nearest_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_trilinear_interp_op PROPERTIES TIMEOUT 120) @@ -1013,56 +1347,66 @@ set_tests_properties(test_gather_op PROPERTIES TIMEOUT 120) set_tests_properties(test_static_save_load PROPERTIES TIMEOUT 250) set_tests_properties(test_pylayer_op PROPERTIES TIMEOUT 120) set_tests_properties(test_paddle_save_load_binary PROPERTIES TIMEOUT 120) -if (WIN32) - set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 900) - set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250) +if(WIN32) + set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 900) + set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250) else() - set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 600) - set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250) + set_tests_properties(test_static_save_load_large PROPERTIES TIMEOUT 600) + set_tests_properties(test_paddle_save_load PROPERTIES TIMEOUT 250) endif() -if (WITH_NV_JETSON) - set_tests_properties(test_concat_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_conv3d_transpose_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_conv3d_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_norm_op PROPERTIES TIMEOUT 1200) - set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 1500) - set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 1500) +if(WITH_NV_JETSON) + set_tests_properties(test_concat_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_conv3d_transpose_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_conv3d_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_norm_op PROPERTIES TIMEOUT 1200) + set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 1500) + set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 1500) else() - set_tests_properties(test_concat_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv3d_transpose_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv3d_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_norm_op PROPERTIES TIMEOUT 120) - set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 150) - set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_concat_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv3d_transpose_part2_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv3d_transpose_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv3d_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_norm_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) endif() -set_tests_properties(test_imperative_selected_rows_to_lod_tensor PROPERTIES TIMEOUT 200) +set_tests_properties(test_imperative_selected_rows_to_lod_tensor + PROPERTIES TIMEOUT 200) set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data + PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_executor_crf PROPERTIES TIMEOUT 120) #set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_save_load PROPERTIES TIMEOUT 120) -set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT 120) -set_tests_properties(test_parallel_executor_seresnext_with_reduce_gpu PROPERTIES TIMEOUT 120) +set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT + 120) +set_tests_properties(test_parallel_executor_seresnext_with_reduce_gpu + PROPERTIES TIMEOUT 120) set_tests_properties(test_dropout_op PROPERTIES TIMEOUT 120) set_tests_properties(test_argsort_op PROPERTIES TIMEOUT 120) set_tests_properties(test_gather_nd_op PROPERTIES TIMEOUT 120) set_tests_properties(test_nn_grad PROPERTIES TIMEOUT 180) set_tests_properties(test_elementwise_sub_op PROPERTIES TIMEOUT 120) set_tests_properties(test_row_conv_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_gpu PROPERTIES TIMEOUT 120) -set_tests_properties(test_distributed_fused_lamb_op_with_clip PROPERTIES TIMEOUT 120) -set_tests_properties(test_distributed_fused_lamb_op_without_clip PROPERTIES TIMEOUT 120) -set_tests_properties(test_distributed_fused_lamb_op_with_gradient_merge PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_executor_seresnext_with_fuse_all_reduce_gpu + PROPERTIES TIMEOUT 120) +set_tests_properties(test_distributed_fused_lamb_op_with_clip PROPERTIES TIMEOUT + 120) +set_tests_properties(test_distributed_fused_lamb_op_without_clip + PROPERTIES TIMEOUT 120) +set_tests_properties(test_distributed_fused_lamb_op_with_gradient_merge + PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_min_op PROPERTIES TIMEOUT 120) set_tests_properties(test_nan_inf PROPERTIES TIMEOUT 120) set_tests_properties(test_deformable_conv_v1_op PROPERTIES TIMEOUT 300) -set_tests_properties(test_parallel_executor_transformer_auto_growth PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_executor_transformer_auto_growth + PROPERTIES TIMEOUT 120) set_tests_properties(test_py_reader_using_executor PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_add_op PROPERTIES TIMEOUT 120) set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT 120) +set_tests_properties(test_imperative_ptb_rnn_sorted_gradient PROPERTIES TIMEOUT + 120) set_tests_properties(test_crop_tensor_op PROPERTIES TIMEOUT 120) set_tests_properties(test_eager_deletion_lstm_net PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_executor_mnist PROPERTIES TIMEOUT 120) @@ -1070,7 +1414,8 @@ set_tests_properties(test_imperative_ptb_rnn PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_save_load_v2 PROPERTIES TIMEOUT 120) set_tests_properties(test_conv2d_transpose_op PROPERTIES TIMEOUT 120) set_tests_properties(test_prroi_pool_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static PROPERTIES TIMEOUT 120) +set_tests_properties(test_multiprocess_dataloader_iterable_dataset_static + PROPERTIES TIMEOUT 120) set_tests_properties(test_lstm_cudnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_stack_op PROPERTIES TIMEOUT 120) set_tests_properties(test_bilinear_interp_v2_op PROPERTIES TIMEOUT 120) @@ -1081,14 +1426,16 @@ set_tests_properties(test_deformable_psroi_pooling PROPERTIES TIMEOUT 120) set_tests_properties(test_trilinear_interp_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 120) set_tests_properties(test_masked_select_op PROPERTIES TIMEOUT 120) -set_tests_properties(test_sigmoid_cross_entropy_with_logits_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_sigmoid_cross_entropy_with_logits_op + PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_optimizer_v2 PROPERTIES TIMEOUT 150) set_tests_properties(test_partial_sum_op PROPERTIES TIMEOUT 120) set_tests_properties(test_cond PROPERTIES TIMEOUT 120) set_tests_properties(test_space_to_depth_op PROPERTIES TIMEOUT 200) set_tests_properties(test_dyn_rnn PROPERTIES TIMEOUT 120) set_tests_properties(test_sgd_op PROPERTIES TIMEOUT 250) -set_tests_properties(test_parallel_executor_seresnext_base_gpu PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_executor_seresnext_base_gpu + PROPERTIES TIMEOUT 120) set_tests_properties(test_norm_nn_grad PROPERTIES TIMEOUT 180) set_tests_properties(test_matrix_nms_op PROPERTIES TIMEOUT 120) set_tests_properties(test_generator_dataloader PROPERTIES TIMEOUT 120) @@ -1098,7 +1445,9 @@ set_tests_properties(test_softmax_with_cross_entropy_op PROPERTIES TIMEOUT 220) set_tests_properties(test_reduce_op PROPERTIES TIMEOUT 500) set_tests_properties(test_adam_optimizer_fp32_fp64 PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_nn_grad PROPERTIES TIMEOUT 120) -set_tests_properties(test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass PROPERTIES TIMEOUT 120) +set_tests_properties( + test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass + PROPERTIES TIMEOUT 120) set_tests_properties(test_conv_nn_grad PROPERTIES TIMEOUT 120) set_tests_properties(test_program_prune_backward PROPERTIES TIMEOUT 120) set_tests_properties(test_group_norm_op PROPERTIES TIMEOUT 120) @@ -1123,17 +1472,20 @@ set_tests_properties(test_dygraph_multi_forward PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_ocr_attention_model PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_mnist PROPERTIES TIMEOUT 120) set_tests_properties(test_fused_elemwise_activation_op PROPERTIES TIMEOUT 270) -set_tests_properties(test_fused_elemwise_activation_op PROPERTIES LABELS "RUN_TYPE=NIGHTLY") +set_tests_properties(test_fused_elemwise_activation_op + PROPERTIES LABELS "RUN_TYPE=NIGHTLY") set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) set_tests_properties(test_imperative_resnet PROPERTIES TIMEOUT 200) -set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES TIMEOUT 200) +set_tests_properties(test_imperative_resnet_sorted_gradient PROPERTIES TIMEOUT + 200) set_tests_properties(test_imperative_se_resnext PROPERTIES TIMEOUT 200) set_tests_properties(test_matmul_v2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_slice_op PROPERTIES TIMEOUT 120) set_tests_properties(test_strided_slice_op PROPERTIES TIMEOUT 120) set_tests_properties(test_translated_layer PROPERTIES TIMEOUT 120) -set_tests_properties(test_parallel_executor_inference_feed_partial_data PROPERTIES TIMEOUT 120) +set_tests_properties(test_parallel_executor_inference_feed_partial_data + PROPERTIES TIMEOUT 120) set_tests_properties(test_pad3d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_dataloader_keep_order PROPERTIES TIMEOUT 120) set_tests_properties(test_mean_op PROPERTIES TIMEOUT 120) @@ -1142,118 +1494,155 @@ set_tests_properties(test_reader_reset PROPERTIES TIMEOUT 120) set_tests_properties(test_pool3d_api PROPERTIES TIMEOUT 120) set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120) set_tests_properties(test_split_program PROPERTIES TIMEOUT 120) -if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) - set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_se_resnext PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 350) - set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 350) - set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 300) - set_tests_properties(test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30) - set_tests_properties(test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 500) - set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_sharding_parallel PROPERTIES TIMEOUT 120) - set_tests_properties(test_dygraph_sharding_optimizer_stage2 PROPERTIES TIMEOUT 120) - set_tests_properties(test_dygraph_sharding_stage2 PROPERTIES TIMEOUT 200) - set_tests_properties(test_dygraph_sharding_stage3 PROPERTIES TIMEOUT 350) - set_tests_properties(test_dygraph_group_sharded_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_auto_parallel_parallelizer PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_dygraph_mp_layers PROPERTIES TIMEOUT 120) - set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_class_center_sample PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_margin_cross_entropy PROPERTIES TIMEOUT 120) - set_tests_properties(test_auto_parallel_data_unshard PROPERTIES TIMEOUT 120) - set_tests_properties(test_auto_parallel_save_load PROPERTIES TIMEOUT 120) - set_tests_properties(test_auto_parallel_autoconvert PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_process_group PROPERTIES TIMEOUT 120) - set_tests_properties(test_eager_dist_api PROPERTIES TIMEOUT 100) - - if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212) - set_tests_properties(test_parallel_dygraph_sparse_embedding PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT 200) - set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height PROPERTIES TIMEOUT 150) - endif() +if(WITH_DISTRIBUTE + AND WITH_GPU + AND WITH_NCCL) + set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT + 120) + set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT 200) + set_tests_properties(test_parallel_dygraph_se_resnext PROPERTIES TIMEOUT 200) + set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT + 350) + set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT + 350) + set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 300) + set_tests_properties(test_parallel_dygraph_no_sync_gradient_check + PROPERTIES TIMEOUT 30) + set_tests_properties(test_parallel_dygraph_pipeline_parallel + PROPERTIES TIMEOUT 500) + set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT + 200) + set_tests_properties(test_parallel_dygraph_sharding_parallel + PROPERTIES TIMEOUT 120) + set_tests_properties(test_dygraph_sharding_optimizer_stage2 PROPERTIES TIMEOUT + 120) + set_tests_properties(test_dygraph_sharding_stage2 PROPERTIES TIMEOUT 200) + set_tests_properties(test_dygraph_sharding_stage3 PROPERTIES TIMEOUT 350) + set_tests_properties(test_dygraph_group_sharded_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_auto_parallel_parallelizer PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_dygraph_mp_layers PROPERTIES TIMEOUT 120) + set_tests_properties(test_hybrid_parallel_inference_helper PROPERTIES TIMEOUT + 120) + set_tests_properties(test_parallel_class_center_sample PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_margin_cross_entropy PROPERTIES TIMEOUT + 120) + set_tests_properties(test_auto_parallel_data_unshard PROPERTIES TIMEOUT 120) + set_tests_properties(test_auto_parallel_save_load PROPERTIES TIMEOUT 120) + set_tests_properties(test_auto_parallel_autoconvert PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_process_group PROPERTIES TIMEOUT 120) + set_tests_properties(test_eager_dist_api PROPERTIES TIMEOUT 100) + + if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212) + set_tests_properties(test_parallel_dygraph_sparse_embedding + PROPERTIES TIMEOUT 200) + set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT + 200) + set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height + PROPERTIES TIMEOUT 150) + endif() endif() if(APPLE) - set_tests_properties(test_imperative_transformer_sorted_gradient PROPERTIES TIMEOUT 300) - set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 300) - set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 300) - set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT 300) + set_tests_properties(test_imperative_transformer_sorted_gradient + PROPERTIES TIMEOUT 300) + set_tests_properties(test_multiclass_nms_op PROPERTIES TIMEOUT 300) + set_tests_properties(test_weight_decay PROPERTIES TIMEOUT 300) + set_tests_properties(test_imperative_static_runner_mnist PROPERTIES TIMEOUT + 300) endif() if((WITH_ROCM OR WITH_GPU) AND NOT WIN32) - set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_alltoall_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_global_gather PROPERTIES TIMEOUT 200) - set_tests_properties(test_collective_global_scatter PROPERTIES TIMEOUT 200) - set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) - if(WITH_DISTRIBUTE) - set_tests_properties(test_new_group_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_pipeline PROPERTIES TIMEOUT 120) - set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT 120) - set_tests_properties(test_static_model_parallel PROPERTIES TIMEOUT 240) - set_tests_properties(test_static_model_parallel_fused_feedforward PROPERTIES TIMEOUT 120) - set_tests_properties(test_static_model_parallel_fused_attention PROPERTIES TIMEOUT 120) - set_tests_properties(test_static_model_parallel_fused_multi_transformer PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_split_embedding - test_collective_split_embedding_none_divisible - test_collective_split_row_linear - test_collective_split_col_linear - test_collective_scatter_api - test_collective_barrier_api - test_collective_reduce_api - test_pipeline_parallel - test_collective_allreduce_api - test_new_group_api - test_collective_broadcast_api - test_collective_allgather_api - test_collective_alltoall_api - test_collective_global_gather - test_collective_global_scatter - PROPERTIES LABELS "RUN_TYPE=DIST") - endif() - set_tests_properties(test_paddle_multiprocessing PROPERTIES TIMEOUT 120) - set_tests_properties(test_reducescatter_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_broadcast PROPERTIES TIMEOUT 120) - set_tests_properties(test_reducescatter PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_pipeline_parallel PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce PROPERTIES TIMEOUT 120) - set_tests_properties(test_allreduce PROPERTIES TIMEOUT 120) - set_tests_properties(test_c_concat PROPERTIES TIMEOUT 120) - set_tests_properties(test_c_split PROPERTIES TIMEOUT 120) - set_tests_properties(test_allgather PROPERTIES TIMEOUT 120) - set_tests_properties(test_c_identity PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_scatter_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_barrier_api PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_scatter PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_sendrecv PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_alltoall_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_global_gather PROPERTIES TIMEOUT 200) + set_tests_properties(test_collective_global_scatter PROPERTIES TIMEOUT 200) + set_tests_properties(test_collective_sendrecv_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) + if(WITH_DISTRIBUTE) + set_tests_properties(test_new_group_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_pipeline PROPERTIES TIMEOUT 120) + set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT 120) + set_tests_properties(test_static_model_parallel PROPERTIES TIMEOUT 240) + set_tests_properties(test_static_model_parallel_fused_feedforward + PROPERTIES TIMEOUT 120) + set_tests_properties(test_static_model_parallel_fused_attention + PROPERTIES TIMEOUT 120) + set_tests_properties(test_static_model_parallel_fused_multi_transformer + PROPERTIES TIMEOUT 120) + set_tests_properties( + test_collective_split_embedding + test_collective_split_embedding_none_divisible + test_collective_split_row_linear + test_collective_split_col_linear + test_collective_scatter_api + test_collective_barrier_api + test_collective_reduce_api + test_pipeline_parallel + test_collective_allreduce_api + test_new_group_api + test_collective_broadcast_api + test_collective_allgather_api + test_collective_alltoall_api + test_collective_global_gather + test_collective_global_scatter + PROPERTIES LABELS "RUN_TYPE=DIST") + endif() + set_tests_properties(test_paddle_multiprocessing PROPERTIES TIMEOUT 120) + set_tests_properties(test_reducescatter_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_broadcast PROPERTIES TIMEOUT 120) + set_tests_properties(test_reducescatter PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_pipeline_parallel PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce PROPERTIES TIMEOUT 120) + set_tests_properties(test_allreduce PROPERTIES TIMEOUT 120) + set_tests_properties(test_c_concat PROPERTIES TIMEOUT 120) + set_tests_properties(test_c_split PROPERTIES TIMEOUT 120) + set_tests_properties(test_allgather PROPERTIES TIMEOUT 120) + set_tests_properties(test_c_identity PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_scatter_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_barrier_api PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_scatter PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_sendrecv PROPERTIES TIMEOUT 120) endif() if(WITH_GPU OR WITH_ROCM) - set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 300) - set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120) - set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) + set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT + 300) + set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT + 120) + set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) endif() set_tests_properties(test_inplace_addto_strategy PROPERTIES TIMEOUT 120) set_tests_properties(test_eigvals_op PROPERTIES TIMEOUT 400) -set_tests_properties(test_cuda_memory_reserved PROPERTIES ENVIRONMENT "FLAGS_allocator_strategy=auto_growth") -if (WITH_GLOO) - set_tests_properties(test_parallel_dygraph_dataparallel_cpuonly PROPERTIES TIMEOUT 30) - set_tests_properties(test_parallel_dygraph_unused_variables_gloo PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_dygraph_sparse_embedding_gloo PROPERTIES TIMEOUT 120) - set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height_gloo PROPERTIES TIMEOUT 120) +set_tests_properties( + test_cuda_memory_reserved PROPERTIES ENVIRONMENT + "FLAGS_allocator_strategy=auto_growth") +if(WITH_GLOO) + set_tests_properties(test_parallel_dygraph_dataparallel_cpuonly + PROPERTIES TIMEOUT 30) + set_tests_properties(test_parallel_dygraph_unused_variables_gloo + PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_dygraph_sparse_embedding_gloo + PROPERTIES TIMEOUT 120) + set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height_gloo + PROPERTIES TIMEOUT 120) endif() if($ENV{USE_STANDALONE_EXECUTOR}) - # these test will fail in some server due to PR#42149, temporarily set it use old executor. - set_tests_properties(test_apply_pass_to_program PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_buffer_shared_memory_reuse_pass PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_optimizer PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_star_gan_with_gradient_penalty PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_switch_autotune PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_mnist_sorted_gradient PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + # these test will fail in some server due to PR#42149, temporarily set it use old executor. + set_tests_properties(test_apply_pass_to_program + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_buffer_shared_memory_reuse_pass + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties( + test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_imperative_optimizer + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_imperative_star_gan_with_gradient_penalty + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_switch_autotune + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_imperative_mnist_sorted_gradient + PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) endif() diff --git a/python/paddle/fluid/tests/unittests/asp/CMakeLists.txt b/python/paddle/fluid/tests/unittests/asp/CMakeLists.txt index 76856d88e17..4fd16354e6c 100644 --- a/python/paddle/fluid/tests/unittests/asp/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/asp/CMakeLists.txt @@ -1,4 +1,7 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_static") @@ -6,20 +9,31 @@ list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_dynamic") list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_sharding") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) if(WITH_DISTRIBUTE) - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - py_test_modules(test_fleet_with_asp_dynamic MODULES test_fleet_with_asp_dynamic ENVS ${dist_ENVS}) - py_test_modules(test_fleet_with_asp_static MODULES test_fleet_with_asp_static ENVS ${dist_ENVS}) - endif() + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + py_test_modules(test_fleet_with_asp_dynamic MODULES + test_fleet_with_asp_dynamic ENVS ${dist_ENVS}) + py_test_modules(test_fleet_with_asp_static MODULES + test_fleet_with_asp_static ENVS ${dist_ENVS}) + endif() endif() -if((WITH_DISTRIBUTE) AND (NOT WIN32) AND (NOT APPLE)) - if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL) - py_test_modules(test_fleet_with_asp_sharding MODULES test_fleet_with_asp_sharding ENVS ${dist_ENVS}) - endif() +if((WITH_DISTRIBUTE) + AND (NOT WIN32) + AND (NOT APPLE)) + if(WITH_GPU + OR WITH_XPU + OR WITH_ASCEND + OR WITH_ASCEND_CL) + py_test_modules(test_fleet_with_asp_sharding MODULES + test_fleet_with_asp_sharding ENVS ${dist_ENVS}) + endif() endif() set_tests_properties(test_asp_pruning_dynamic PROPERTIES TIMEOUT 30) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt index 8c8a6823105..10498bf48e9 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt @@ -1,36 +1,51 @@ # file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") # string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") if(WITH_DISTRIBUTE AND WITH_GPU) - py_test_modules(test_auto_parallel_relaunch MODULES test_auto_parallel_relaunch ENVS ${dist_ENVS}) - set_tests_properties(test_auto_parallel_relaunch PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) + py_test_modules(test_auto_parallel_relaunch MODULES + test_auto_parallel_relaunch ENVS ${dist_ENVS}) + set_tests_properties(test_auto_parallel_relaunch + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) - py_test_modules(test_relaunch_with_planner MODULES test_relaunch_with_planner ENVS ${dist_ENVS}) - set_tests_properties(test_relaunch_with_planner PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) + py_test_modules(test_relaunch_with_planner MODULES test_relaunch_with_planner + ENVS ${dist_ENVS}) + set_tests_properties(test_relaunch_with_planner + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120) - py_test_modules(test_relaunch_with_gpt_planner MODULES test_relaunch_with_gpt_planner ENVS ${dist_ENVS}) - set_tests_properties(test_relaunch_with_gpt_planner PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 240) + py_test_modules(test_relaunch_with_gpt_planner MODULES + test_relaunch_with_gpt_planner ENVS ${dist_ENVS}) + set_tests_properties(test_relaunch_with_gpt_planner + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 240) - py_test_modules(test_engine_api MODULES test_engine_api ENVS ${dist_ENVS}) - set_tests_properties(test_engine_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 80) + py_test_modules(test_engine_api MODULES test_engine_api ENVS ${dist_ENVS}) + set_tests_properties(test_engine_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" + TIMEOUT 80) - py_test_modules(test_converter MODULES test_converter ENVS ${dist_ENVS}) - set_tests_properties(test_converter PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) - py_test_modules(test_high_order_grad MODULES test_high_order_grad ENVS ${dist_ENVS}) - set_tests_properties(test_high_order_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) + py_test_modules(test_converter MODULES test_converter ENVS ${dist_ENVS}) + set_tests_properties(test_converter PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" + TIMEOUT 50) + py_test_modules(test_high_order_grad MODULES test_high_order_grad ENVS + ${dist_ENVS}) + set_tests_properties(test_high_order_grad + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50) - py_test_modules(test_while_op_completion MODULES test_while_op_completion ENVS ${dist_ENVS}) - py_test_modules(test_while_op_partition MODULES test_while_op_partition ENVS ${dist_ENVS}) - py_test_modules(test_tunable_variable MODULES test_tunable_variable ENVS ${dist_ENVS}) - py_test_modules(test_tunable_space MODULES test_tunable_space ENVS ${dist_ENVS}) - py_test_modules(test_recorder MODULES test_recorder ENVS ${dist_ENVS}) - py_test_modules(test_trial MODULES test_trial ENVS ${dist_ENVS}) - py_test_modules(test_new_cost_model MODULES test_new_cost_model ENVS ${dist_ENVS}) - py_test_modules(test_dist_reshape MODULES test_dist_reshape ENVS ${dist_ENVS}) - py_test_modules(test_dist_pnorm MODULES test_dist_pnorm ENVS ${dist_ENVS}) - py_test_modules(test_dist_slice MODULES test_dist_slice ENVS ${dist_ENVS}) - py_test_modules(test_cluster MODULES test_cluster ENVS ${dist_ENVS}) - py_test_modules(test_comm_cost MODULES test_comm_cost ENVS ${dist_ENVS}) - py_test_modules(test_comp_cost MODULES test_comp_cost ENVS ${dist_ENVS}) - py_test_modules(test_dist_context MODULES test_dist_context ENVS ${dist_ENVS}) - py_test_modules(test_prim_dist_op MODULES test_prim_dist_op ENVS ${dist_ENVS}) + py_test_modules(test_while_op_completion MODULES test_while_op_completion + ENVS ${dist_ENVS}) + py_test_modules(test_while_op_partition MODULES test_while_op_partition ENVS + ${dist_ENVS}) + py_test_modules(test_tunable_variable MODULES test_tunable_variable ENVS + ${dist_ENVS}) + py_test_modules(test_tunable_space MODULES test_tunable_space ENVS + ${dist_ENVS}) + py_test_modules(test_recorder MODULES test_recorder ENVS ${dist_ENVS}) + py_test_modules(test_trial MODULES test_trial ENVS ${dist_ENVS}) + py_test_modules(test_new_cost_model MODULES test_new_cost_model ENVS + ${dist_ENVS}) + py_test_modules(test_dist_reshape MODULES test_dist_reshape ENVS ${dist_ENVS}) + py_test_modules(test_dist_pnorm MODULES test_dist_pnorm ENVS ${dist_ENVS}) + py_test_modules(test_dist_slice MODULES test_dist_slice ENVS ${dist_ENVS}) + py_test_modules(test_cluster MODULES test_cluster ENVS ${dist_ENVS}) + py_test_modules(test_comm_cost MODULES test_comm_cost ENVS ${dist_ENVS}) + py_test_modules(test_comp_cost MODULES test_comp_cost ENVS ${dist_ENVS}) + py_test_modules(test_dist_context MODULES test_dist_context ENVS ${dist_ENVS}) + py_test_modules(test_prim_dist_op MODULES test_prim_dist_op ENVS ${dist_ENVS}) endif() diff --git a/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt b/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt index 37216241b8f..b5ebeb659a6 100644 --- a/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt @@ -1,9 +1,12 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0) foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) endforeach(TEST_OP) set_tests_properties(test_autograd_functional_dynamic PROPERTIES TIMEOUT 160) diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt b/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt index 764a862d30f..c68cebaa25b 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/distributed_passes/CMakeLists.txt @@ -1,25 +1,30 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -if ((NOT WITH_GPU) AND (NOT WITH_XPU) AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_adam_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_all_reduce_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_act_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_add_act_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_momentum_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_relu_depthwise_conv_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_fuse_sgd_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_gradient_merge_pass") - list(REMOVE_ITEM TEST_OPS "test_dist_inplace_addto_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_amp_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_recompute_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_sharding_pass") - list(REMOVE_ITEM TEST_OPS "test_auto_parallel_fp16_pass") +if((NOT WITH_GPU) + AND (NOT WITH_XPU) + AND NOT (WITH_ASCEND OR WITH_ASCEND_CL)) + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_adam_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_all_reduce_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_act_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_bn_add_act_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_momentum_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_relu_depthwise_conv_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_fuse_sgd_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_gradient_merge_pass") + list(REMOVE_ITEM TEST_OPS "test_dist_inplace_addto_pass") + list(REMOVE_ITEM TEST_OPS "test_auto_parallel_amp_pass") + list(REMOVE_ITEM TEST_OPS "test_auto_parallel_recompute_pass") + list(REMOVE_ITEM TEST_OPS "test_auto_parallel_sharding_pass") + list(REMOVE_ITEM TEST_OPS "test_auto_parallel_fp16_pass") endif() foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - list(APPEND DIST_TEST_OPS ${TEST_OP}) - set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 120) - set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST") + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + list(APPEND DIST_TEST_OPS ${TEST_OP}) + set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 120) + set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST") endforeach(TEST_OP) diff --git a/python/paddle/fluid/tests/unittests/distribution/CMakeLists.txt b/python/paddle/fluid/tests/unittests/distribution/CMakeLists.txt index f71e04c09aa..e3bf89c4882 100644 --- a/python/paddle/fluid/tests/unittests/distribution/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/distribution/CMakeLists.txt @@ -1,6 +1,9 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt index ddc959a29a2..f9a1e83d381 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt @@ -1,35 +1,61 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") set(GC_ENVS FLAGS_eager_delete_tensor_gb=0.0) set(DY2ST_EAGER_TEST_ENVS ${GC_ENVS} FLAGS_enable_eager_mode=1) -set(TEST_EAGER_OPS test_bmn test_break_continue test_ifelse test_loop test_mnist_amp - test_mnist_pure_fp16 test_mobile_net test_program_translator test_ptb_lm test_reinforcement_learning - test_resnet test_resnet_amp test_resnet_pure_fp16 test_se_resnet test_sentiment test_seq2seq - test_tsm test_word2vec test_yolov3 test_bert test_cycle_gan test_lstm test_simnet test_transformer) +set(TEST_EAGER_OPS + test_bmn + test_break_continue + test_ifelse + test_loop + test_mnist_amp + test_mnist_pure_fp16 + test_mobile_net + test_program_translator + test_ptb_lm + test_reinforcement_learning + test_resnet + test_resnet_amp + test_resnet_pure_fp16 + test_se_resnet + test_sentiment + test_seq2seq + test_tsm + test_word2vec + test_yolov3 + test_bert + test_cycle_gan + test_lstm + test_simnet + test_transformer) list(REMOVE_ITEM TEST_OPS test_lac) # NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope will # be removed and will cause some random failed in multi-thread. if(NOT ON_INFER) - py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1) - set_tests_properties(test_lac PROPERTIES TIMEOUT 120) + py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1) + set_tests_properties(test_lac PROPERTIES TIMEOUT 120) endif() if(WIN32 AND NOT WITH_GPU) - list(REMOVE_ITEM TEST_OPS test_resnet_amp) # disable on Windows CPU CI for timeout + list(REMOVE_ITEM TEST_OPS test_resnet_amp + )# disable on Windows CPU CI for timeout endif() foreach(TEST_OP ${TEST_OPS}) - list(FIND TEST_EAGER_OPS ${TEST_OP} WAS_FOUND) - if (NOT WAS_FOUND EQUAL -1) - py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${DY2ST_EAGER_TEST_ENVS}) - else() - py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) - endif() + list(FIND TEST_EAGER_OPS ${TEST_OP} WAS_FOUND) + if(NOT WAS_FOUND EQUAL -1) + py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${DY2ST_EAGER_TEST_ENVS}) + else() + py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS}) + endif() endforeach(TEST_OP) set_tests_properties(test_se_resnet PROPERTIES TIMEOUT 900) -set_tests_properties(test_yolov3 PROPERTIES TIMEOUT 900 LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_yolov3 PROPERTIES TIMEOUT 900 LABELS + "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_mobile_net PROPERTIES TIMEOUT 120) set_tests_properties(test_seq2seq PROPERTIES TIMEOUT 120) set_tests_properties(test_cycle_gan PROPERTIES TIMEOUT 150) @@ -42,14 +68,14 @@ set_tests_properties(test_bmn PROPERTIES TIMEOUT 120) set_tests_properties(test_build_strategy PROPERTIES TIMEOUT 120) if(NOT WIN32) - set_tests_properties(test_resnet_v2 PROPERTIES TIMEOUT 120) - set_tests_properties(test_tsm PROPERTIES TIMEOUT 900) - #set_tests_properties(test_resnet PROPERTIES TIMEOUT 120) + set_tests_properties(test_resnet_v2 PROPERTIES TIMEOUT 120) + set_tests_properties(test_tsm PROPERTIES TIMEOUT 900) + #set_tests_properties(test_resnet PROPERTIES TIMEOUT 120) endif() if(APPLE) - set_tests_properties(test_bmn PROPERTIES TIMEOUT 300) - set_tests_properties(test_build_strategy PROPERTIES TIMEOUT 300) - set_tests_properties(test_mobile_net PROPERTIES TIMEOUT 300) - set_tests_properties(test_resnet_v2 PROPERTIES TIMEOUT 300) + set_tests_properties(test_bmn PROPERTIES TIMEOUT 300) + set_tests_properties(test_build_strategy PROPERTIES TIMEOUT 300) + set_tests_properties(test_mobile_net PROPERTIES TIMEOUT 300) + set_tests_properties(test_resnet_v2 PROPERTIES TIMEOUT 300) endif() diff --git a/python/paddle/fluid/tests/unittests/fft/CMakeLists.txt b/python/paddle/fluid/tests/unittests/fft/CMakeLists.txt index f71e04c09aa..e3bf89c4882 100644 --- a/python/paddle/fluid/tests/unittests/fft/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/fft/CMakeLists.txt @@ -1,6 +1,9 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) diff --git a/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt b/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt index 09cc6ed5b5f..976a36b7615 100644 --- a/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt @@ -1,9 +1,46 @@ -file(GLOB TEST_INTERP_CASES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_INTERP_CASES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_INTERP_CASES "${TEST_INTERP_CASES}") foreach(target ${TEST_INTERP_CASES}) - py_test_modules(${target} MODULES ${target} ENVS FLAGS_host_trace_level=10 FLAGS_static_executor_perfstat_filepath=./perfstat FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=false FLAGS_eager_delete_tensor_gb=0) - py_test_modules(${target}_non_eager_deletion MODULES ${target} ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=false FLAGS_eager_delete_tensor_gb=0.000001) - py_test_modules(${target}_fast_gc MODULES ${target} ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=true FLAGS_eager_delete_tensor_gb=0) - py_test_modules(${target}_fast_gc_non_eager_deletion MODULES ${target} ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=true FLAGS_eager_delete_tensor_gb=0.000001) + py_test_modules( + ${target} + MODULES + ${target} + ENVS + FLAGS_host_trace_level=10 + FLAGS_static_executor_perfstat_filepath=./perfstat + FLAGS_allocator_strategy=auto_growth + FLAGS_use_stream_safe_cuda_allocator=true + FLAGS_fast_eager_deletion_mode=false + FLAGS_eager_delete_tensor_gb=0) + py_test_modules( + ${target}_non_eager_deletion + MODULES + ${target} + ENVS + FLAGS_allocator_strategy=auto_growth + FLAGS_use_stream_safe_cuda_allocator=true + FLAGS_fast_eager_deletion_mode=false + FLAGS_eager_delete_tensor_gb=0.000001) + py_test_modules( + ${target}_fast_gc + MODULES + ${target} + ENVS + FLAGS_allocator_strategy=auto_growth + FLAGS_use_stream_safe_cuda_allocator=true + FLAGS_fast_eager_deletion_mode=true + FLAGS_eager_delete_tensor_gb=0) + py_test_modules( + ${target}_fast_gc_non_eager_deletion + MODULES + ${target} + ENVS + FLAGS_allocator_strategy=auto_growth + FLAGS_use_stream_safe_cuda_allocator=true + FLAGS_fast_eager_deletion_mode=true + FLAGS_eager_delete_tensor_gb=0.000001) endforeach() diff --git a/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt index 4826b375126..6b709d85d75 100644 --- a/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ipu/CMakeLists.txt @@ -1,15 +1,18 @@ if(WITH_IPU) - file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") - string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") + string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - # set all UTs timeout to 200s - set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200) - endforeach(TEST_OP) + foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + # set all UTs timeout to 200s + set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200) + endforeach(TEST_OP) - set_tests_properties(test_conv_op_ipu PROPERTIES TIMEOUT 300) - set_tests_properties(test_elemetwise_x_op_ipu PROPERTIES TIMEOUT 300) - set_tests_properties(test_reduce_x_op_ipu PROPERTIES TIMEOUT 600) - set_tests_properties(test_save_load_ipu PROPERTIES TIMEOUT 600) + set_tests_properties(test_conv_op_ipu PROPERTIES TIMEOUT 300) + set_tests_properties(test_elemetwise_x_op_ipu PROPERTIES TIMEOUT 300) + set_tests_properties(test_reduce_x_op_ipu PROPERTIES TIMEOUT 600) + set_tests_properties(test_save_load_ipu PROPERTIES TIMEOUT 600) endif() diff --git a/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt index 3d80d92595b..d34ee9380ea 100644 --- a/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/CMakeLists.txt @@ -1,8 +1,13 @@ -file(GLOB TEST_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_IR_PASSES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}") -if(((NOT WITH_GPU) AND (NOT WITH_ROCM)) OR WIN32 OR APPLE) - LIST(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass) +if(((NOT WITH_GPU) AND (NOT WITH_ROCM)) + OR WIN32 + OR APPLE) + list(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass) endif() foreach(target ${TEST_IR_PASSES}) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 4717dfa1eab..3687d09653f 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -1,10 +1,19 @@ -file(GLOB TEST_INFERENCE_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_INFERENCE_IR_PASSES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_INFERENCE_IR_PASSES "${TEST_INFERENCE_IR_PASSES}") -file(GLOB TEST_TRT_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_trt_*.py") +file( + GLOB TEST_TRT_IR_PASSES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_trt_*.py") string(REPLACE ".py" "" TEST_TRT_IR_PASSES "${TEST_TRT_IR_PASSES}") -file(GLOB TEST_TRT_CONVERTER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_trt_convert_*.py") +file( + GLOB TEST_TRT_CONVERTER + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_trt_convert_*.py") string(REPLACE ".py" "" TEST_TRT_CONVERTER "${TEST_TRT_CONVERTER}") # Only for cpu(mkl + openblas) @@ -27,7 +36,8 @@ if(WITH_GPU AND TENSORRT_FOUND) foreach(target ${TEST_TRT_IR_PASSES}) if(${target} STREQUAL "test_trt_slice_dynamic_plugin") - if("${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}" VERSION_GREATER "7.1") + if("${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}" VERSION_GREATER + "7.1") py_test_modules(${target} MODULES ${target}) set_tests_properties(${target} PROPERTIES TIMEOUT 60) endif() @@ -42,7 +52,10 @@ if(WITH_GPU AND TENSORRT_FOUND) endforeach() endif() -file(GLOB TEST_MKLDNN_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_mkldnn_*.py") +file( + GLOB TEST_MKLDNN_IR_PASSES + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_mkldnn_*.py") string(REPLACE ".py" "" TEST_MKLDNN_IR_PASSES "${TEST_MKLDNN_IR_PASSES}") foreach(TEST_INFERENCE_IR_PASS ${TEST_MKLDNN_IR_PASSES}) list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES ${TEST_INFERENCE_IR_PASS}) @@ -54,95 +67,142 @@ if(WITH_MKLDNN) endforeach() endif() -if (WITH_MKLDNN AND TENSORRT_FOUND AND WITH_GPU) +if(WITH_MKLDNN + AND TENSORRT_FOUND + AND WITH_GPU) foreach(target ${TEST_INFERENCE_IR_PASSES}) py_test_modules(${target} MODULES ${target}) endforeach() endif() -if (NOT WITH_MKLDNN AND NOT TENSORRT_FOUND AND NOT WITH_GPU) +if(NOT WITH_MKLDNN + AND NOT TENSORRT_FOUND + AND NOT WITH_GPU) foreach(target ${TEST_INFERENCE_CPU_UT}) py_test_modules(${target} MODULES ${target}) endforeach() -set_tests_properties(test_mul_lstm_fuse_pass PROPERTIES TIMEOUT 300) -set_tests_properties(test_mul_gru_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_mul_lstm_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_mul_gru_fuse_pass PROPERTIES TIMEOUT 300) endif() if(WITH_GPU AND TENSORRT_FOUND) -set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120) -set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120) -set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) -#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) -set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) -set_tests_properties(test_trt_inspector PROPERTIES TIMEOUT 60) -if(WITH_NV_JETSON) - set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450) - set_tests_properties(test_trt_pool3d_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 450) -else() - set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 300) - set_tests_properties(test_trt_pool3d_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45) -endif() -set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT 100) -set_tests_properties(test_trt_conv_quant_dequant_pass PROPERTIES TIMEOUT 100) -set_tests_properties(test_trt_matmul_quant_dequant PROPERTIES TIMEOUT 100) -set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60) -set_tests_properties(test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30) -set_tests_properties(test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60 ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - -if (WITH_MKLDNN AND TENSORRT_FOUND AND WITH_GPU) - set_tests_properties(test_emb_eltwise_layernorm_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_fc_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_simplify_with_basic_ops_pass_autoscan PROPERTIES TIMEOUT 60) - set_tests_properties(test_adaptive_pool2d_convert_global_pass_autoscan PROPERTIES TIMEOUT 100) - set_tests_properties(test_conv_act_mkldnn_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv_elementwise_add2_act_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv_elementwise_add_act_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_flatten2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_squeeze2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_reshape2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_trt_flatten2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_trt_squeeze2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_trt_reshape2_matmul_fuse_pass PROPERTIES TIMEOUT 240) - set_tests_properties(test_shuffle_channel_detect_pass PROPERTIES TIMEOUT 120) - if (WIN32) - set_tests_properties(test_matmul_scale_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_matmul_v2_scale_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_map_matmul_v2_to_matmul_pass PROPERTIES TIMEOUT 360) - set_tests_properties(test_map_matmul_v2_to_mul_pass PROPERTIES TIMEOUT 360) - set_tests_properties(test_map_matmul_to_mul_pass PROPERTIES TIMEOUT 360) - else () - set_tests_properties(test_matmul_scale_fuse_pass PROPERTIES TIMEOUT 60) - set_tests_properties(test_matmul_v2_scale_fuse_pass PROPERTIES TIMEOUT 60) - set_tests_properties(test_map_matmul_v2_to_matmul_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_map_matmul_v2_to_mul_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_map_matmul_to_mul_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) + #set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) + set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) + set_tests_properties(test_trt_inspector PROPERTIES TIMEOUT 60) + if(WITH_NV_JETSON) + set_tests_properties( + test_trt_pool_op + PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT + 450) + set_tests_properties( + test_trt_pool3d_op + PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT + 450) + else() + set_tests_properties( + test_trt_pool_op + PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT + 300) + set_tests_properties( + test_trt_pool3d_op + PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45) + endif() + set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60) + set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60) + set_tests_properties(test_trt_fc_fuse_quant_dequant_pass PROPERTIES TIMEOUT + 100) + set_tests_properties(test_trt_conv_quant_dequant_pass PROPERTIES TIMEOUT 100) + set_tests_properties(test_trt_matmul_quant_dequant PROPERTIES TIMEOUT 100) + set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60) + set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60) + set_tests_properties(test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30) + set_tests_properties( + test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60 ENVIRONMENT + FLAGS_USE_STANDALONE_EXECUTOR=0) + + if(WITH_MKLDNN + AND TENSORRT_FOUND + AND WITH_GPU) + set_tests_properties(test_emb_eltwise_layernorm_fuse_pass PROPERTIES TIMEOUT + 120) + set_tests_properties(test_fc_fuse_pass PROPERTIES TIMEOUT 240) + set_tests_properties(test_simplify_with_basic_ops_pass_autoscan + PROPERTIES TIMEOUT 60) + set_tests_properties(test_adaptive_pool2d_convert_global_pass_autoscan + PROPERTIES TIMEOUT 100) + set_tests_properties(test_conv_act_mkldnn_fuse_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv_elementwise_add2_act_fuse_pass + PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv_elementwise_add_act_fuse_pass + PROPERTIES TIMEOUT 120) + set_tests_properties(test_flatten2_matmul_fuse_pass PROPERTIES TIMEOUT 240) + set_tests_properties(test_squeeze2_matmul_fuse_pass PROPERTIES TIMEOUT 240) + set_tests_properties(test_reshape2_matmul_fuse_pass PROPERTIES TIMEOUT 240) + set_tests_properties(test_trt_flatten2_matmul_fuse_pass PROPERTIES TIMEOUT + 240) + set_tests_properties(test_trt_squeeze2_matmul_fuse_pass PROPERTIES TIMEOUT + 240) + set_tests_properties(test_trt_reshape2_matmul_fuse_pass PROPERTIES TIMEOUT + 240) + set_tests_properties(test_shuffle_channel_detect_pass PROPERTIES TIMEOUT + 120) + if(WIN32) + set_tests_properties(test_matmul_scale_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_matmul_v2_scale_fuse_pass PROPERTIES TIMEOUT + 300) + set_tests_properties(test_map_matmul_v2_to_matmul_pass PROPERTIES TIMEOUT + 360) + set_tests_properties(test_map_matmul_v2_to_mul_pass PROPERTIES TIMEOUT + 360) + set_tests_properties(test_map_matmul_to_mul_pass PROPERTIES TIMEOUT 360) + else() + set_tests_properties(test_matmul_scale_fuse_pass PROPERTIES TIMEOUT 60) + set_tests_properties(test_matmul_v2_scale_fuse_pass PROPERTIES TIMEOUT 60) + set_tests_properties(test_map_matmul_v2_to_matmul_pass PROPERTIES TIMEOUT + 120) + set_tests_properties(test_map_matmul_v2_to_mul_pass PROPERTIES TIMEOUT + 120) + set_tests_properties(test_map_matmul_to_mul_pass PROPERTIES TIMEOUT 120) + endif() endif() -endif() -if (WITH_MKLDNN) - set_tests_properties(test_mkldnn_conv_elementwise_add_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_mkldnn_depthwise_conv_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_mkldnn_reshape_transpose_matmul_fuse_pass PROPERTIES TIMEOUT 100) - set_tests_properties(test_mkldnn_mish_op PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_conv3d_op PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_prelu_op PROPERTIES TIMEOUT 300) - set_tests_properties(test_conv_act_mkldnn_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_conv_transpose_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT 250) - set_tests_properties(test_mkldnn_matmul_transpose_reshape_fuse_pass PROPERTIES TIMEOUT 100) - set_tests_properties(test_conv_transpose_bn_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_conv_hard_sigmoid_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_conv_hard_swish_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_batch_norm_act_fuse_pass PROPERTIES TIMEOUT 100) - set_tests_properties(test_mkldnn_matmul_v2_transpose_reshape_fuse_pass PROPERTIES TIMEOUT 100) - set_tests_properties(test_mkldnn_conv_transpose_bias_fuse_pass PROPERTIES TIMEOUT 100) - set_tests_properties(test_conv_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_fc_mish_fuse_pass PROPERTIES TIMEOUT 300) - set_tests_properties(test_mkldnn_fc_elementwise_add_fuse_pass PROPERTIES TIMEOUT 120) - set_tests_properties(test_mkldnn_conv_affine_channel_fuse_pass PROPERTIES TIMEOUT 60) -endif() + if(WITH_MKLDNN) + set_tests_properties(test_mkldnn_conv_elementwise_add_fuse_pass + PROPERTIES TIMEOUT 120) + set_tests_properties(test_mkldnn_depthwise_conv_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_mkldnn_reshape_transpose_matmul_fuse_pass + PROPERTIES TIMEOUT 100) + set_tests_properties(test_mkldnn_mish_op PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_conv3d_op PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_prelu_op PROPERTIES TIMEOUT 300) + set_tests_properties(test_conv_act_mkldnn_fuse_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_conv_transpose_eltwiseadd_bn_fuse_pass + PROPERTIES TIMEOUT 250) + set_tests_properties(test_mkldnn_matmul_transpose_reshape_fuse_pass + PROPERTIES TIMEOUT 100) + set_tests_properties(test_conv_transpose_bn_fuse_pass PROPERTIES TIMEOUT + 300) + set_tests_properties(test_mkldnn_conv_hard_sigmoid_fuse_pass + PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_conv_hard_swish_fuse_pass + PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_batch_norm_act_fuse_pass PROPERTIES TIMEOUT + 100) + set_tests_properties(test_mkldnn_matmul_v2_transpose_reshape_fuse_pass + PROPERTIES TIMEOUT 100) + set_tests_properties(test_mkldnn_conv_transpose_bias_fuse_pass + PROPERTIES TIMEOUT 100) + set_tests_properties(test_conv_eltwiseadd_bn_fuse_pass PROPERTIES TIMEOUT + 300) + set_tests_properties(test_mkldnn_conv_mish_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_fc_mish_fuse_pass PROPERTIES TIMEOUT 300) + set_tests_properties(test_mkldnn_fc_elementwise_add_fuse_pass + PROPERTIES TIMEOUT 120) + set_tests_properties(test_mkldnn_conv_affine_channel_fuse_pass + PROPERTIES TIMEOUT 60) + endif() endif() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt b/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt index 69991a446d7..7ed1529ea4c 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/mkldnn/CMakeLists.txt @@ -1,8 +1,11 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) set_tests_properties(test_concat_mkldnn_op PROPERTIES TIMEOUT 120) set_tests_properties(test_conv3d_mkldnn_op PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt index 229a2c1792c..5c680c564f4 100644 --- a/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt @@ -1,40 +1,54 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -file(GLOB TEST_DIST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_collective_*.py") +file( + GLOB TEST_DIST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_collective_*.py") string(REPLACE ".py" "" TEST_DIST_OPS "${TEST_DIST_OPS}") -if (WITH_MLU) - foreach(TEST_OP ${TEST_DIST_OPS}) - LIST(REMOVE_ITEM TEST_OPS ${TEST_OP}) - endforeach(TEST_OP) - LIST(REMOVE_ITEM TEST_OPS "test_spawn_mlu") +if(WITH_MLU) + foreach(TEST_OP ${TEST_DIST_OPS}) + list(REMOVE_ITEM TEST_OPS ${TEST_OP}) + endforeach(TEST_OP) + list(REMOVE_ITEM TEST_OPS "test_spawn_mlu") - foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - endforeach(TEST_OP) + foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + endforeach(TEST_OP) - if(WITH_CNCL) - LIST(APPEND TEST_DIST_OPS "test_spawn_mlu") - foreach(TEST_OP ${TEST_DIST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - endforeach(TEST_OP) - bash_test_modules(test_launch_async_mlu START_BASH test_launch_async_mlu.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_launch_cloud_mlu START_BASH test_launch_cloud_mlu.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_launch_nproc_mlu START_BASH test_launch_nproc_mlu.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - bash_test_modules(test_c_comm_init_op_mlu START_BASH test_c_comm_init_op_mlu.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) - set_tests_properties(test_collective_broadcast PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_sum PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_max PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_min PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_prod PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allgather PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce_sum PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce_max PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce_min PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_reduce_prod PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_broadcast_api_mlu PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allreduce_api_mlu PROPERTIES TIMEOUT 120) - set_tests_properties(test_collective_allgather_api_mlu PROPERTIES TIMEOUT 120) - set_tests_properties(test_c_comm_init_op_mlu PROPERTIES TIMEOUT 120) - endif(WITH_CNCL) + if(WITH_CNCL) + list(APPEND TEST_DIST_OPS "test_spawn_mlu") + foreach(TEST_OP ${TEST_DIST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + endforeach(TEST_OP) + bash_test_modules(test_launch_async_mlu START_BASH test_launch_async_mlu.sh + ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules(test_launch_cloud_mlu START_BASH test_launch_cloud_mlu.sh + ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules(test_launch_nproc_mlu START_BASH test_launch_nproc_mlu.sh + ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + bash_test_modules( + test_c_comm_init_op_mlu START_BASH test_c_comm_init_op_mlu.sh ENVS + PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) + set_tests_properties(test_collective_broadcast PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allreduce_sum PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allreduce_max PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allreduce_min PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allreduce_prod PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_allgather PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce_sum PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce_max PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce_min PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_reduce_prod PROPERTIES TIMEOUT 120) + set_tests_properties(test_collective_broadcast_api_mlu PROPERTIES TIMEOUT + 120) + set_tests_properties(test_collective_allreduce_api_mlu PROPERTIES TIMEOUT + 120) + set_tests_properties(test_collective_allgather_api_mlu PROPERTIES TIMEOUT + 120) + set_tests_properties(test_c_comm_init_op_mlu PROPERTIES TIMEOUT 120) + endif(WITH_CNCL) endif() diff --git a/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt index e9d9af5c113..7498fa72194 100644 --- a/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/npu/CMakeLists.txt @@ -1,26 +1,32 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -if (WITH_ASCEND_CL) - foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - endforeach(TEST_OP) +if(WITH_ASCEND_CL) + foreach(TEST_OP ${TEST_OPS}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + endforeach(TEST_OP) - # NOTE: NPU `get_float_status` read the value from register, During the test, - # it is found that this register will be overwritten by any program on the card. - # In order to prevent the interference of nan/inf in the other unittests, we - # need to set the unittests related to `float_status` to exclusive. - set_tests_properties(test_amp_check_finite_and_scale_op_npu PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_flags_check_nan_inf_npu PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") - set_tests_properties(test_float_status_op_npu PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + # NOTE: NPU `get_float_status` read the value from register, During the test, + # it is found that this register will be overwritten by any program on the card. + # In order to prevent the interference of nan/inf in the other unittests, we + # need to set the unittests related to `float_status` to exclusive. + set_tests_properties(test_amp_check_finite_and_scale_op_npu + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_flags_check_nan_inf_npu + PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") + set_tests_properties(test_float_status_op_npu PROPERTIES LABELS + "RUN_TYPE=EXCLUSIVE") - # Note: the following test cases has running time more than 120s - set_tests_properties(test_nearest_interp_op_npu PROPERTIES TIMEOUT 200) - set_tests_properties(test_nearest_interp_v2_op_npu PROPERTIES TIMEOUT 200) - set_tests_properties(test_bilinear_interp_v2_op_npu PROPERTIES TIMEOUT 200) - set_tests_properties(test_stack_op_npu PROPERTIES TIMEOUT 300) - set_tests_properties(test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200) - set_tests_properties(test_conv2d_op_npu PROPERTIES TIMEOUT 300) - set_tests_properties(test_matmulv2_op_npu PROPERTIES TIMEOUT 300) - set_tests_properties(test_elementwise_add_op_npu PROPERTIES TIMEOUT 200) + # Note: the following test cases has running time more than 120s + set_tests_properties(test_nearest_interp_op_npu PROPERTIES TIMEOUT 200) + set_tests_properties(test_nearest_interp_v2_op_npu PROPERTIES TIMEOUT 200) + set_tests_properties(test_bilinear_interp_v2_op_npu PROPERTIES TIMEOUT 200) + set_tests_properties(test_stack_op_npu PROPERTIES TIMEOUT 300) + set_tests_properties(test_conv2d_transpose_op_npu PROPERTIES TIMEOUT 200) + set_tests_properties(test_conv2d_op_npu PROPERTIES TIMEOUT 300) + set_tests_properties(test_matmulv2_op_npu PROPERTIES TIMEOUT 300) + set_tests_properties(test_elementwise_add_op_npu PROPERTIES TIMEOUT 200) endif() diff --git a/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt index 9af32a8aca7..ab985d73d53 100755 --- a/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ps/CMakeLists.txt @@ -1,8 +1,11 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) - list(APPEND TEST_OPS ${TEST_OP}) - set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 50) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + list(APPEND TEST_OPS ${TEST_OP}) + set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 50) endforeach(TEST_OP) diff --git a/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt b/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt index ffc78d33347..35a95749880 100644 --- a/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt @@ -1,10 +1,13 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) if(NOT WIN32) - set_tests_properties(test_rnn_nets_static PROPERTIES TIMEOUT 120) - set_tests_properties(test_rnn_nets PROPERTIES TIMEOUT 120) + set_tests_properties(test_rnn_nets_static PROPERTIES TIMEOUT 120) + set_tests_properties(test_rnn_nets PROPERTIES TIMEOUT 120) endif() diff --git a/python/paddle/fluid/tests/unittests/sequence/CMakeLists.txt b/python/paddle/fluid/tests/unittests/sequence/CMakeLists.txt index c6ba82f8cbf..5c13f56d446 100644 --- a/python/paddle/fluid/tests/unittests/sequence/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/sequence/CMakeLists.txt @@ -1,8 +1,11 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) set_tests_properties(test_sequence_conv PROPERTIES TIMEOUT 120) set_tests_properties(test_sequence_concat PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt b/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt index 512a76b3f60..233c4e61436 100644 --- a/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/xpu/CMakeLists.txt @@ -1,24 +1,30 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -if (WITH_XPU_BKCL) - list(REMOVE_ITEM TEST_OPS "test_gen_bkcl_id_op") +if(WITH_XPU_BKCL) + list(REMOVE_ITEM TEST_OPS "test_gen_bkcl_id_op") endif() -file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") -if (WITH_XPU_BKCL) - list(APPEND DIST_TEST_OPS test_gen_bkcl_id_op) +file( + GLOB DIST_TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_dist_*.py") +if(WITH_XPU_BKCL) + list(APPEND DIST_TEST_OPS test_gen_bkcl_id_op) endif() list(REMOVE_ITEM TEST_OPS test_concat_op_xpu) list(REMOVE_ITEM TEST_OPS test_mean_op_xpu) foreach(TEST_OP ${TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) foreach(TEST_OP ${DIST_TEST_OPS}) - py_test_modules(${TEST_OP} MODULES ${TEST_OP}) + py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) set_tests_properties(test_mul_op_xpu PROPERTIES TIMEOUT 120) diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index bc9f402ed96..62cf9afddc2 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -1,19 +1,25 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file( + GLOB TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") +file( + GLOB DIST_TEST_OPS + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "test_dist_*.py") string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") foreach(TEST_OP ${DIST_TEST_OPS}) - list(REMOVE_ITEM TEST_OPS ${TEST_OP}) + list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() if(NOT WITH_COVERAGE) - LIST(REMOVE_ITEM TEST_OPS test_hapi_hub) + list(REMOVE_ITEM TEST_OPS test_hapi_hub) endif() foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) + py_test(${src} SRCS ${src}.py) endforeach() function(py_dist_test TARGET_NAME) @@ -21,27 +27,34 @@ function(py_dist_test TARGET_NAME) set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS ENVS) - cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) - if(WITH_COVERAGE AND (WITH_GPU OR WITH_ROCM) AND (WITH_NCCL OR WITH_RCCL) AND NOT WIN32) - add_test(NAME ${TARGET_NAME} - COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true - FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 - PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS} - COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data - ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS} - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if(WITH_COVERAGE + AND (WITH_GPU OR WITH_ROCM) + AND (WITH_NCCL OR WITH_RCCL) + AND NOT WIN32) + add_test( + NAME ${TARGET_NAME} + COMMAND + ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true + FLAGS_cudnn_deterministic=true FLAGS_cpu_deterministic=true + NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 + PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_dist_test_ENVS} + COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data + ${PYTHON_EXECUTABLE} -u ${py_dist_test_SRCS} ${py_dist_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) # No unit test should exceed 10 minutes. - set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS "RUN_TYPE=DIST") + set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 600 LABELS + "RUN_TYPE=DIST") endif() - endif() endfunction() foreach(src ${DIST_TEST_OPS}) - message(STATUS ${src}) - py_dist_test(${src} SRCS ${src}.py) + message(STATUS ${src}) + py_dist_test(${src} SRCS ${src}.py) endforeach() set_tests_properties(test_dataset_cifar PROPERTIES TIMEOUT 120) set_tests_properties(test_pretrained_model PROPERTIES TIMEOUT 120) @@ -52,13 +65,14 @@ set_tests_properties(test_dataset_wmt PROPERTIES TIMEOUT 120) set_tests_properties(test_vision_models PROPERTIES TIMEOUT 120) set_tests_properties(test_dataset_uci_housing PROPERTIES TIMEOUT 120) set_tests_properties(test_dataset_imdb PROPERTIES TIMEOUT 300) -set_tests_properties(test_pretrained_model PROPERTIES TIMEOUT 600) +set_tests_properties(test_pretrained_model PROPERTIES TIMEOUT 600) if(WITH_COVERAGE) - set_tests_properties(test_hapi_hub PROPERTIES TIMEOUT 300) + set_tests_properties(test_hapi_hub PROPERTIES TIMEOUT 300) endif() if(APPLE) set_tests_properties(test_callback_early_stop PROPERTIES TIMEOUT 300) - set_tests_properties(test_callback_reduce_lr_on_plateau PROPERTIES TIMEOUT 300) + set_tests_properties(test_callback_reduce_lr_on_plateau PROPERTIES TIMEOUT + 300) set_tests_properties(test_vision_models PROPERTIES TIMEOUT 300) endif() -- GitLab