diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b2d187f622294bd2c798f03b4190765068eb958..06d5fa716768c4271a12bed8cc21051c5aba4e21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -257,11 +257,25 @@ if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE) endif() if(MGE_WITH_CUDA) + # FIXME: check_language(CUDA) failed when sbsa mode! + # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676 + if(CMAKE_TOOLCHAIN_FILE) + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!") + endif() + include(CheckLanguage) check_language(CUDA) - if(NOT CMAKE_CUDA_COMPILER) + if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE) message(FATAL_ERROR "CUDA compiler not found in PATH") endif() + + # remove this after CMAKE fix nvcc sbsa + if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE) + set(CMAKE_CUDA_COMPILER "nvcc") + message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!") + endif() + enable_language(CUDA) set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CUDA_STANDARD_REQUIRED ON) @@ -375,7 +389,7 @@ if(MGE_WITH_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions") endif() if(NOT MGE_CUDA_GENCODE) - if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") + if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64") set(MEGDNN_THREADS_512 0) if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED)) message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON") @@ -429,14 +443,14 @@ if(MGE_WITH_CUDA) if(MGE_CUDA_USE_STATIC) if(MGE_WITH_TRT) if(MSVC OR WIN32) - list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${CUDNN_LIBRARY}) message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}") + list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY}) else() - if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) - list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer myelin_compiler_static myelin_executor_static myelin_pattern_runtime_static myelin_pattern_library_static -Wl,--no-whole-archive) - else() - list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive) - endif() + list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive) + endif() + if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) + message(STATUS "handle trt myelin lib after trt7") + list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library) endif() endif() @@ -497,6 +511,10 @@ if(MGE_WITH_CUDA) else() if(MGE_WITH_TRT) list(APPEND MGE_CUDA_LIBS libnvinfer) + if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) + message(STATUS "handle trt myelin lib after trt7") + list(APPEND MGE_CUDA_LIBS libmyelin) + endif() endif() list(APPEND MGE_CUDA_LIBS libcudnn) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0") @@ -779,6 +797,13 @@ if(MGE_ARCH STREQUAL "aarch64") endif() endif() + if(MGE_WITH_CUDA) + message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ + when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ + for save link time(14min->1min), you may open below flags if not deploy on\ + arm a53 platform, or just build release type!") + #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769") + endif() endif() if(MGE_ARCH STREQUAL "riscv64") @@ -948,4 +973,4 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") -endif() \ No newline at end of file +endif() diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 7ac5b5b2cc109a1f5becdf663f5eee524ae43d34..1958a643da2b360374f8563a75143e8faef26c90 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -65,3 +65,77 @@ set_target_properties(libnvinfer PROPERTIES message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") +if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) + if(MGE_CUDA_USE_STATIC) + find_library(LIBMYELIN_COMPILER + NAMES libmyelin_compiler_static.a myelin_compiler_static.lib + PATHS ${__found_trt_root}/lib + ) + if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") + message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") + else() + message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") + endif() + add_library(libmyelin_compiler STATIC IMPORTED) + set_target_properties(libmyelin_compiler PROPERTIES + IMPORTED_LOCATION ${LIBMYELIN_COMPILER} + ) + + find_library(LIBMYELIN_EXECUTOR + NAMES libmyelin_executor_static.a myelin_executor_static.lib + PATHS ${__found_trt_root}/lib + ) + if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") + message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") + else() + message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") + endif() + add_library(libmyelin_executor STATIC IMPORTED) + set_target_properties(libmyelin_executor PROPERTIES + IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR} + ) + + find_library(LIBMYELIN_PATTERN_RUNTIME + NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib + PATHS ${__found_trt_root}/lib + ) + if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") + message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") + else() + message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") + endif() + add_library(libmyelin_pattern_runtime STATIC IMPORTED) + set_target_properties(libmyelin_pattern_runtime PROPERTIES + IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME} + ) + + find_library(LIBMYELIN_PATTERN_LIBRARY + NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib + PATHS ${__found_trt_root}/lib + ) + if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") + else() + message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") + endif() + add_library(libmyelin_pattern_library STATIC IMPORTED) + set_target_properties(libmyelin_pattern_library PROPERTIES + IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY} + ) + else() + find_library(LIBMYELIN_SHARED + NAMES libmyelin.so myelin.dll + PATHS ${__found_trt_root}/lib + ) + + if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") + message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") + else() + message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") + endif() + add_library(libmyelin SHARED IMPORTED) + set_target_properties(libmyelin PROPERTIES + IMPORTED_LOCATION ${LIBMYELIN_SHARED} + ) + endif() +endif() diff --git a/scripts/cmake-build/BUILD_README.md b/scripts/cmake-build/BUILD_README.md index 877dce51089cf36f47b3a8fa8aed6c4ba818b049..c3e1ddba4c728128a555b07b488cc6ccfe0cf3b6 100755 --- a/scripts/cmake-build/BUILD_README.md +++ b/scripts/cmake-build/BUILD_README.md @@ -66,7 +66,7 @@ Now we support ARM-Linux on Linux and Windows fully, also experimental on MacOS * commands: ``` -1: download toolchains from https://releases.linaro.org/components/toolchain/gcc-linaro/ or https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads if use Windows or Linux +1: download toolchains from http://releases.linaro.org/components/toolchain/binaries/ or https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads if use Windows or Linux 2: download toolchains from https://github.com/thinkski/osx-arm-linux-toolchains if use MacOS ``` diff --git a/scripts/cmake-build/create_cuda_build_libs.py b/scripts/cmake-build/create_cuda_build_libs.py new file mode 100755 index 0000000000000000000000000000000000000000..bdbf70b25809a1aafd1e4b9931a9fc4d70361535 --- /dev/null +++ b/scripts/cmake-build/create_cuda_build_libs.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess +import glob + +def handle_cuda_libs(path): + subprocess.check_call('rm -rf tmp && rm -rf tmp_sub', shell=True) + print('\nhandle cuda file from.{}'.format(path)) + cmd = 'dpkg-deb -xv {} tmp'.format(path) + subprocess.check_call(cmd, shell=True) + sub_debs = glob.glob('tmp/**/*.deb', recursive=True) + assert(len(sub_debs) > 0) + for sub_deb in sub_debs: + subprocess.check_call('rm -rf tmp_sub', shell=True) + print('handle sub_deb: {}'.format(sub_deb)) + cmd = 'dpkg-deb -xv {} tmp_sub'.format(sub_deb) + subprocess.check_call(cmd, shell=True) + sub_sub_debs = glob.glob('tmp_sub/**/*.deb', recursive=True) + assert(len(sub_sub_debs) == 0) + if (os.path.isdir('tmp_sub/usr/share/')): + subprocess.check_call('cp -v tmp_sub/usr/share/* output/ -rf', shell=True) + if (os.path.isdir('tmp_sub/usr/local/')): + subprocess.check_call('cp -v tmp_sub/usr/local/* output/ -rf', shell=True) + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + "-s", + "--sbsa_mode", + action="store_true", + help="create cuda sbsa libs, which means use to x86 cross build for aarch64 cuda libs", + ) + + parser.add_argument( + "-t", + "--target_aarch", + type=str, + choices=['x86-64', 'aarch64'], + help="create libs build for, now support x86-64 and aarch64", + dest="target_aarch", + required=True, + ) + + parser.add_argument( + "-d", + "--cudnn_deb", + help="cudnn deb package, download from: download from: https://developer.nvidia.com/cudnn-download-survey", + dest="cudnn_deb", + type=str, + required=True, + ) + + parser.add_argument( + "-r", + "--trt_deb", + help="trt deb package, download from: https://developer.nvidia.com/nvidia-tensorrt-download", + dest="trt_deb", + type=str, + required=True, + ) + + parser.add_argument( + "-c", + "--cuda_deb", + help="cuda deb package, download from: https://developer.nvidia.com/cuda-downloads", + dest="cuda_deb", + type=str, + required=True, + ) + + parser.add_argument( + "-a", + "--cuda_aarch64_deb", + help="cuda aarch64 libs package: download from: https://developer.nvidia.com/cuda-downloads", + type=str, + dest="cuda_aarch64_deb", + ) + + args = parser.parse_args() + + if (args.target_aarch == 'x86-64' and args.sbsa_mode): + print('ERROR: sbsa_mode only support target_aarch = \'aarch64\' now') + exit(-1) + + if (args.sbsa_mode and not args.cuda_aarch64_deb): + print('ERROR: sbsa_mode need -a/--cuda_aarch64_deb to provide cuda aarch64 libs package') + exit(-1) + + if (not os.path.isfile(args.cuda_deb)): + print('ERROR: can not find file:{}'.format(args.cuda_deb)) + exit(-1) + + if (args.sbsa_mode and not os.path.isfile(args.cuda_aarch64_deb)): + print('ERROR: can not find file:{}'.format(args.cuda_aarch64_deb)) + exit(-1) + + if (not os.path.isfile(args.cudnn_deb)): + print('ERROR: can not find file:{}'.format(args.cudnn_deb)) + exit(-1) + + if (not os.path.isfile(args.trt_deb)): + print('ERROR: can not find file:{}'.format(args.trt_deb)) + exit(-1) + + print("CONFIG SUMMARY: create cuda cmake build libs for {}, is for sbsa_mode: {}".format(args.target_aarch, args.sbsa_mode)) + + cmd = 'rm -rf output && mkdir output' + subprocess.check_call(cmd, shell=True) + + #handle cuda + handle_cuda_libs(args.cuda_deb) + + #handle sbsa_mode + if (args.sbsa_mode): + handle_cuda_libs(args.cuda_aarch64_deb) + + # check cuda/sbsa_mode valid and handle link + nvcc = glob.glob('./output/*/bin/nvcc', recursive=True) + cuda_version = nvcc[0][9:-9] + print('cuda version: {}'.format(cuda_version)) + assert(len(nvcc) == 1) + if (args.sbsa_mode): + subprocess.check_call('file {} | grep {}'.format(nvcc[0], 'x86-64'), shell=True) + remove_x86_64_libs = ['targets/x86_64-linux', 'include', 'lib64'] + for remove_lib in remove_x86_64_libs: + subprocess.check_call('rm -rf ./output/{}/{}'.format(cuda_version, remove_lib), shell=True) + #create link for sbsa + cwd = os.getcwd() + os.chdir('output/{}'.format(cuda_version)) + cmd = 'ln -s targets/sbsa-linux/include/ include && ln -s targets/sbsa-linux/lib/ lib64' + subprocess.check_call(cmd, shell=True) + #handle libnvrtc.so + readelf_nvrtc = os.popen('readelf -d lib64/stubs/libnvrtc.so | grep SONAME').read().split('\n')[0] + loc = readelf_nvrtc.find('[') + libnvrtc_with_version = readelf_nvrtc[loc+1:-1] + print('libnvrtc_with_version: {}'.format(libnvrtc_with_version)) + cmd = 'cp lib64/stubs/libnvrtc.so lib64/{}'.format(libnvrtc_with_version) + subprocess.check_call(cmd, shell=True) + os.chdir(cwd) + else: + subprocess.check_call('file {} | grep {}'.format(nvcc[0], args.target_aarch), shell=True) + + # handle cudnn + subprocess.check_call('rm -rf tmp && rm -rf tmp_sub && mkdir tmp', shell=True) + print('\nhandle cuda file from.{}'.format(args.cudnn_deb)) + # FIXME: later release cudnn may dir not with cuda, nvidia may fix later!! + cmd = 'tar -xvf {} -C tmp && mv tmp/cuda output/cudnn'.format(args.cudnn_deb) + subprocess.check_call(cmd, shell=True) + cudnn_libs = glob.glob('output/cudnn/lib64/libcudnn.so*') + cudnn_real_libs = [] + for lib in cudnn_libs: + if (not os.path.islink(lib)): + cudnn_real_libs.append(lib) + assert(len(cudnn_real_libs) > 0) + for lib in cudnn_real_libs: + subprocess.check_call('file {} | grep {}'.format(lib, args.target_aarch), shell=True) + + # handle trt + print('\nhandle cuda file from.{}'.format(args.trt_deb)) + cmd = 'tar -xvf {} -C output'.format(args.trt_deb) + subprocess.check_call(cmd, shell=True) + trt_libs = glob.glob('output/TensorRT-*/lib/libnvinfer.so.*') + trt_real_libs = [] + for lib in trt_libs: + if (not os.path.islink(lib)): + trt_real_libs.append(lib) + assert(len(trt_real_libs) > 0) + for lib in trt_real_libs: + subprocess.check_call('file {} | grep {}'.format(lib, args.target_aarch), shell=True) + +if __name__ == "__main__": + main() diff --git a/scripts/cmake-build/cross_build_linux_arm_inference.sh b/scripts/cmake-build/cross_build_linux_arm_inference.sh index ee4f1108dce37dd1ed23ec60ebbec3459e727f3f..dc1cfe258e25674583c48ba5fc773ec76d042863 100755 --- a/scripts/cmake-build/cross_build_linux_arm_inference.sh +++ b/scripts/cmake-build/cross_build_linux_arm_inference.sh @@ -3,17 +3,21 @@ set -e ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") BUILD_TYPE=Release +MGE_WITH_CUDA=OFF MGE_ARMV8_2_FEATURE_FP16=OFF MGE_ARMV8_2_FEATURE_DOTPROD=OFF MGE_DISABLE_FLOAT16=OFF ARCH=arm64-v8a REMOVE_OLD_BUILD=false +CMAKE_C_FLAGS="-Wno-psabi" +CMAKE_CXX_FLAGS="-Wno-psabi" echo "EXTRA_CMAKE_ARGS: ${EXTRA_CMAKE_ARGS}" function usage() { echo "$0 args1 args2 .." echo "available args detail:" echo "-d : Build with Debug mode, default Release mode" + echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" echo "-k : open MGE_DISABLE_FLOAT16 for NEON " @@ -25,13 +29,17 @@ function usage() { exit -1 } -while getopts "rkhdfpa:" arg +while getopts "rkhdcfpa:" arg do case $arg in d) echo "Build with Debug mode" BUILD_TYPE=Debug ;; + c) + echo "Build with CUDA" + MGE_WITH_CUDA=ON + ;; f) echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" MGE_ARMV8_2_FEATURE_FP16=ON @@ -77,6 +85,7 @@ done echo "----------------------------------------------------" echo "build config summary:" echo "BUILD_TYPE: $BUILD_TYPE" +echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" @@ -94,17 +103,35 @@ elif [[ $OS =~ "NT" ]]; then MAKEFILE_TYPE="Unix" fi +if [ ! $OS = "Linux" ] && [ $MGE_WITH_CUDA = "ON" ];then + echo "cross build for arm with cuda only support from Linux" + exit -1 +fi + +if [ $MGE_WITH_CUDA = "ON" ] && [ ! $ARCH = "arm64-v8a" ];then + echo "arm with cuda only support ARCH: arm64-v8a" + exit -1 +fi + +if [ $MGE_WITH_CUDA = "OFF" ];then + echo "config -Werror=unused-parameter when cuda off for CI check" + CMAKE_C_FLAGS="-Werror=unused-parameter -Wno-psabi" + CMAKE_CXX_FLAGS="-Werror=unused-parameter -Wno-psabi" +fi + SRC_DIR=$($READLINK -f "`dirname $0`/../../") source $SRC_DIR/scripts/cmake-build/utils/utils.sh function cmake_build() { - BUILD_DIR=$SRC_DIR/build_dir/gnu-linux/$1/$BUILD_TYPE/build + BUILD_DIR=$SRC_DIR/build_dir/gnu-linux/MGE_WITH_CUDA_$3/$1/$BUILD_TYPE/build INSTALL_DIR=$BUILD_DIR/../install TOOLCHAIN=$SRC_DIR/toolchains/$2 + MGE_WITH_CUDA=$3 echo "build dir: $BUILD_DIR" echo "install dir: $INSTALL_DIR" echo "build type: $BUILD_TYPE" echo "build toolchain: $TOOLCHAIN" + echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" echo "BUILD MAKEFILE_TYPE: $MAKEFILE_TYPE" try_remove_old_build $REMOVE_OLD_BUILD $BUILD_DIR $INSTALL_DIR @@ -113,10 +140,12 @@ function cmake_build() { mkdir -p $INSTALL_DIR cd $BUILD_DIR cmake -G "$MAKEFILE_TYPE Makefiles" \ + -DCMAKE_C_FLAGS=$CMAKE_C_FLAGS \ + -DCMAKE_CXX_FLAGS=$CMAKE_CXX_FLAGS \ -DCMAKE_TOOLCHAIN_FILE=$TOOLCHAIN \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DMGE_INFERENCE_ONLY=ON \ - -DMGE_WITH_CUDA=OFF \ + -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ -DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ @@ -141,4 +170,4 @@ else echo "ERR CONFIG ABORT NOW!!" exit -1 fi -cmake_build $ARCH $toolchain +cmake_build $ARCH $toolchain $MGE_WITH_CUDA diff --git a/toolchains/aarch64-linux-gnu.toolchain.cmake b/toolchains/aarch64-linux-gnu.toolchain.cmake index d8dc530558951c45404c1bbcfe970a6f29d30192..dddd009171592d8453014ddbb1934b5c8f348d2d 100644 --- a/toolchains/aarch64-linux-gnu.toolchain.cmake +++ b/toolchains/aarch64-linux-gnu.toolchain.cmake @@ -1,8 +1,6 @@ set(ARM_CROSS_BUILD_ARCH aarch64) set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") -set(CMAKE_C_FLAGS "-Werror=unused-parameter -Wno-psabi") -set(CMAKE_CXX_FLAGS "-Werror=unused-parameter -Wno-psabi") set(CMAKE_STRIP "aarch64-linux-gnu-strip") set(CMAKE_SYSTEM_PROCESSOR aarch64) set(CMAKE_SYSTEM_NAME Linux)