提交 cf27dd64 编写于 作者: M Megvii Engine Team

fix(cuda): use cudnn8.0.4 as cu111 default libs

GitOrigin-RevId: 721ca73bae696b06872614fb095f5282dbb36662
上级 649e4dd7
...@@ -216,7 +216,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { ...@@ -216,7 +216,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) {
} }
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { TEST_F(CUDA, CONV_BIAS_NCHW_QS8) {
//! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH
require_compute_capability(6, 1); require_compute_capability(6, 1);
......
...@@ -63,6 +63,7 @@ add_custom_command( ...@@ -63,6 +63,7 @@ add_custom_command(
TARGET ${MODULE_NAME} POST_BUILD TARGET ${MODULE_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$<TARGET_FILE_NAME:${MODULE_NAME}> # clean develop
COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py
......
...@@ -74,7 +74,7 @@ ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 ...@@ -74,7 +74,7 @@ ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101
* If you just want to build with cpu only version, you can set `BUILD_WHL_CPU_ONLY` environment 'ON'. eg: * If you just want to build with cpu only version, you can set `BUILD_WHL_CPU_ONLY` environment 'ON'. eg:
```bash ```bash
BUILD_WHL_CPU_ONLY="ON" ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cpu
``` ```
## Build for MacOS ## Build for MacOS
......
...@@ -9,16 +9,16 @@ local_path=$(dirname $(readlink -f $0)) ...@@ -9,16 +9,16 @@ local_path=$(dirname $(readlink -f $0))
CUDNN_LIB_DIR="/opt/cudnn/lib64/" CUDNN_LIB_DIR="/opt/cudnn/lib64/"
CUDA_LIB_DIR="/usr/local/cuda/lib64/" CUDA_LIB_DIR="/usr/local/cuda/lib64/"
CUDA_SDK="unknown" SDK_NAME="unknown"
function usage() { function usage() {
echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112" echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112, cpu"
} }
while [ "$1" != "" ]; do while [ "$1" != "" ]; do
case $1 in case $1 in
-sdk) -sdk)
shift shift
CUDA_SDK=$1 SDK_NAME=$1
shift shift
;; ;;
*) *)
...@@ -27,17 +27,16 @@ while [ "$1" != "" ]; do ...@@ -27,17 +27,16 @@ while [ "$1" != "" ]; do
esac esac
done done
echo "Build with ${CUDA_SDK}" echo "Build with ${SDK_NAME}"
if [ $CUDA_SDK == "cu101" ];then if [ $SDK_NAME == "cu101" ];then
COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1"
EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF" EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF"
OUT_DIR="cu101"
BUILD_GCC8="ON" BUILD_GCC8="ON"
REQUIR_CUDA_VERSION="10010" REQUIR_CUDA_VERSION="10010"
REQUIR_CUDNN_VERSION="7.6.3" REQUIR_CUDNN_VERSION="7.6.3"
REQUIR_TENSORRT_VERSION="6.0.1.5" REQUIR_TENSORRT_VERSION="6.0.1.5"
elif [ $CUDA_SDK == "cu111" ];then elif [ $SDK_NAME == "cu111" ];then
COPY_LIB_LIST="\ COPY_LIB_LIST="\
${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\
${CUDA_LIB_DIR}/libcublasLt.so.11:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\
...@@ -56,11 +55,10 @@ elif [ $CUDA_SDK == "cu111" ];then ...@@ -56,11 +55,10 @@ elif [ $CUDA_SDK == "cu111" ];then
arch=compute_80,code=sm_80 \ arch=compute_80,code=sm_80 \
arch=compute_86,code=sm_86 \ arch=compute_86,code=sm_86 \
arch=compute_86,code=compute_86" arch=compute_86,code=compute_86"
OUT_DIR="cu111"
REQUIR_CUDA_VERSION="11010" REQUIR_CUDA_VERSION="11010"
REQUIR_CUDNN_VERSION="8.0.5" REQUIR_CUDNN_VERSION="8.0.4"
REQUIR_TENSORRT_VERSION="7.2.2.3" REQUIR_TENSORRT_VERSION="7.2.2.3"
elif [ $CUDA_SDK == "cu112" ];then elif [ $SDK_NAME == "cu112" ];then
COPY_LIB_LIST="\ COPY_LIB_LIST="\
${CUDA_LIB_DIR}/libnvrtc.so.11.2:\ ${CUDA_LIB_DIR}/libnvrtc.so.11.2:\
${CUDA_LIB_DIR}/libcublasLt.so.11:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\
...@@ -79,16 +77,17 @@ elif [ $CUDA_SDK == "cu112" ];then ...@@ -79,16 +77,17 @@ elif [ $CUDA_SDK == "cu112" ];then
arch=compute_80,code=sm_80 \ arch=compute_80,code=sm_80 \
arch=compute_86,code=sm_86 \ arch=compute_86,code=sm_86 \
arch=compute_86,code=compute_86" arch=compute_86,code=compute_86"
OUT_DIR="cu112"
REQUIR_CUDA_VERSION="11020" REQUIR_CUDA_VERSION="11020"
REQUIR_CUDNN_VERSION="8.0.5" REQUIR_CUDNN_VERSION="8.0.4"
REQUIR_TENSORRT_VERSION="7.2.2.3" REQUIR_TENSORRT_VERSION="7.2.2.3"
elif [ $SDK_NAME == "cpu" ];then
echo "use $SDK_NAME without cuda support"
BUILD_WHL_CPU_ONLY="ON"
else else
echo "no support sdk ${CUDA_SDK}, please set by '-sdk cu111'" echo "no support sdk ${SDK_NAME}, please set by '-sdk cu111'"
exit -1 exit -1
fi fi
BUILD_WHL_CPU_ONLY=${BUILD_WHL_CPU_ONLY}
if [[ -z ${BUILD_WHL_CPU_ONLY} ]] if [[ -z ${BUILD_WHL_CPU_ONLY} ]]
then then
BUILD_WHL_CPU_ONLY="OFF" BUILD_WHL_CPU_ONLY="OFF"
...@@ -205,7 +204,7 @@ docker run --rm -it $TMPFS_ARGS \ ...@@ -205,7 +204,7 @@ docker run --rm -it $TMPFS_ARGS \
-e ALL_PYTHON="${ALL_PYTHON}" \ -e ALL_PYTHON="${ALL_PYTHON}" \
-e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \ -e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \
-e COPY_LIB_LIST="$COPY_LIB_LIST" \ -e COPY_LIB_LIST="$COPY_LIB_LIST" \
-e OUT_DIR="$OUT_DIR" \ -e SDK_NAME="$SDK_NAME" \
-v ${CUDA_ROOT_DIR}:/usr/local/cuda \ -v ${CUDA_ROOT_DIR}:/usr/local/cuda \
-v ${CUDNN_ROOT_DIR}:/opt/cudnn \ -v ${CUDNN_ROOT_DIR}:/opt/cudnn \
-v ${TENSORRT_ROOT_DIR}:/opt/tensorrt \ -v ${TENSORRT_ROOT_DIR}:/opt/tensorrt \
......
...@@ -119,13 +119,13 @@ do ...@@ -119,13 +119,13 @@ do
if [ ${USE_AUDITWHEEL} = "ON" ]; then if [ ${USE_AUDITWHEEL} = "ON" ]; then
LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl
else else
mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR} mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}
cd ${BUILD_DIR}/staging/dist/ cd ${BUILD_DIR}/staging/dist/
org_whl_name=`ls Meg*${ver}*.whl` org_whl_name=`ls Meg*${ver}*.whl`
compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'`
echo "org whl name: ${org_whl_name}" echo "org whl name: ${org_whl_name}"
echo "comapt whl name: ${compat_whl_name}" echo "comapt whl name: ${compat_whl_name}"
mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR}/${compat_whl_name} mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name}
cd /home/output cd /home/output
fi fi
chown -R ${UID}.${UID} . chown -R ${UID}.${UID} .
......
...@@ -1836,7 +1836,7 @@ TEST(TestEnableTensorCore, SmallInputShape) { ...@@ -1836,7 +1836,7 @@ TEST(TestEnableTensorCore, SmallInputShape) {
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestEnableTensorCore, Nchw4Nchw) { TEST(TestEnableTensorCore, Nchw4Nchw) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -1936,7 +1936,7 @@ TEST(TestEnableTensorCore, Nchw4Nchw) { ...@@ -1936,7 +1936,7 @@ TEST(TestEnableTensorCore, Nchw4Nchw) {
#endif #endif
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestEnableTensorCore, ConvBiasWithZ) { TEST(TestEnableTensorCore, ConvBiasWithZ) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2002,7 +2002,7 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) { ...@@ -2002,7 +2002,7 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) {
#endif #endif
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestEnableTensorCore, Pooling) { TEST(TestEnableTensorCore, Pooling) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2148,7 +2148,7 @@ TEST(TestGoptInference, EnableTensorCore) { ...@@ -2148,7 +2148,7 @@ TEST(TestGoptInference, EnableTensorCore) {
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(FuseConvBiasZPass, BlockFuse) { TEST(FuseConvBiasZPass, BlockFuse) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2273,7 +2273,7 @@ TEST(FuseConvBiasZPass, BlockFuse) { ...@@ -2273,7 +2273,7 @@ TEST(FuseConvBiasZPass, BlockFuse) {
#endif #endif
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestEnableTensorCore, ShuffleMerge) { TEST(TestEnableTensorCore, ShuffleMerge) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2450,7 +2450,7 @@ TEST(FuseConvBiasZPass, Basic) { ...@@ -2450,7 +2450,7 @@ TEST(FuseConvBiasZPass, Basic) {
#if MGB_CUDA #if MGB_CUDA
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, EnableCHWN4) { TEST(TestGoptInference, EnableCHWN4) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2552,7 +2552,7 @@ TEST(TestGoptInference, EnableCHWN4) { ...@@ -2552,7 +2552,7 @@ TEST(TestGoptInference, EnableCHWN4) {
#endif #endif
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, EnableCHWN4WarpPespective) { TEST(TestGoptInference, EnableCHWN4WarpPespective) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2731,7 +2731,7 @@ TEST(TestGoptInference, EnableCHWN4Pooling) { ...@@ -2731,7 +2731,7 @@ TEST(TestGoptInference, EnableCHWN4Pooling) {
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { TEST(TestGoptInference, EnableCHWN4ShuffleRemove) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -2857,7 +2857,7 @@ TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { ...@@ -2857,7 +2857,7 @@ TEST(TestGoptInference, EnableCHWN4ShuffleRemove) {
#endif #endif
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, ConvertFormatNCHW4GPU) { TEST(TestGoptInference, ConvertFormatNCHW4GPU) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -3076,7 +3076,7 @@ TEST(TestGoptInference, ConvertFormatNCHW4) { ...@@ -3076,7 +3076,7 @@ TEST(TestGoptInference, ConvertFormatNCHW4) {
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { TEST(TestGoptInference, ConvertFormatNCHW4Ic3) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
...@@ -3945,7 +3945,7 @@ TEST(TestGoptInference, FoldingConvDimshuffle) { ...@@ -3945,7 +3945,7 @@ TEST(TestGoptInference, FoldingConvDimshuffle) {
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) { TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
......
...@@ -1978,7 +1978,7 @@ TEST(TestTensorRTReplace, FuseConvAdd) { ...@@ -1978,7 +1978,7 @@ TEST(TestTensorRTReplace, FuseConvAdd) {
MGB_ASSERT_TENSOR_NEAR(outputs[1], outputs[3], 1e-3); MGB_ASSERT_TENSOR_NEAR(outputs[1], outputs[3], 1e-3);
} }
//! close for cu111 ci, reopen it when bug fixed //! close for cu111 ci, reopen it when bug fixed
#if 0 #if CUDA_VERSION < 11000
TEST(TestTensorRTReplace, FuseConvAddNchw2nchw4) { TEST(TestTensorRTReplace, FuseConvAddNchw2nchw4) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
auto cn = CompNode::load("gpu0"); auto cn = CompNode::load("gpu0");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册