未验证 提交 d170a54a 编写于 作者: S Shenghang Tsai 提交者: GitHub

enable CMake first class cuda support (#5858)

* cmake first class cuda support

* refine

* refien

* refine

* refein

* refein

* refeine

* refine

* refein

* refine

* refien

* refgine

* refien

* refein

* refein

* rm useless

* refien

* refein

* also link cuda libs if build static

* refein

* refien

* add

* Revert "add"

This reverts commit d9e67ad1.

* fix

* refeine

* retine
Co-authored-by: Noneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
上级 45ec2370
......@@ -122,33 +122,27 @@ jobs:
CI_PERSONAL_ACCESS_TOKEN: ${{ secrets.CI_PERSONAL_ACCESS_TOKEN }}
changed_files:
name: "Change files"
name: "Changed files"
runs-on: ubuntu-latest
outputs:
all: ${{ steps.changes.outputs.all}}
python: ${{ steps.changes.outputs.python }}
should_run_single_client_tests: ${{ steps.should_run_single_client_tests.outputs.result }}
should_run_single_client_tests: ${{ steps.changes.outputs.should_run_single_client_tests }}
steps:
- name: Checkout OneFlow
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set extra args
if: contains(github.event.pull_request.labels.*.name, 'need-single-client-tests')
run: |
echo "extra_args_from_labels=--need_single_client_tests" >> $GITHUB_ENV
- name: Find changes
id: changes
run: |
git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }}
echo "::set-output name=all::$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | xargs)"
echo "::set-output name=python::$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep .py$ | xargs)"
- name: echo changed files
run: |
echo ${{steps.changes.outputs.all}}
echo ${{steps.changes.outputs.python}}
- name: Should run single client tests
id: should_run_single_client_tests
if: contains(steps.changes.outputs.python, 'python/oneflow/compatible/single_client') || steps.changes.outputs.python != steps.changes.outputs.all
python3 tools/flags_from_git_diff.py --base ${{ github.event.pull_request.base.sha }} --head ${{ github.sha }} ${{ env.extra_args_from_labels }}
- name: Will run single client tests
if: steps.changes.outputs.should_run_single_client_tests == '1'
run: |
echo "::set-output name=result::true"
echo "yes"
build:
name: Build
needs:
......@@ -248,7 +242,7 @@ jobs:
if: env.is_built != '1'
- name: Build OneFlow
timeout-minutes: 45
if: env.is_built != '1' && ((contains(fromJson('["cuda", "cpu"]'), matrix.test_suite) || (contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests)))
if: env.is_built != '1' && ((contains(fromJson('["cuda", "cpu"]'), matrix.test_suite) || (contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests == '1')))
uses: ./.github/actions/whl
with:
tmp_dir: ${ci_tmp_dir}
......@@ -258,7 +252,7 @@ jobs:
python_version: ${{ matrix.python_version }}
- name: Single client custom Op test (run by oneflow build docker)
timeout-minutes: 45
if: matrix.test_suite == 'cpu' && env.is_built != '1' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cpu' && env.is_built != '1' && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run --shm-size=8g --rm -w $PWD -v $PWD:$PWD -v /dataset:/dataset -v /model_zoo:/model_zoo \
......@@ -452,21 +446,21 @@ jobs:
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/build_docs.sh"
- name: Single client op test (distributed, 1st try)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests == '1'
continue-on-error: true
id: distributed_try_1
run: |
python3 ci/test/distributed_run.py --mode=single_client --bash_script=ci/test/2node_op_test.sh --custom_img_tag=${{ env.image_name }} --oneflow_wheel_path=${{ env.wheelhouse_dir }} --oneflow_wheel_python_version=3.6
- name: Single client op test (distributed, 2nd try)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && steps.distributed_try_1.outcome=='failure' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && steps.distributed_try_1.outcome=='failure' && needs.changed_files.outputs.should_run_single_client_tests == '1'
continue-on-error: true
id: distributed_try_2
run: |
python3 ci/test/distributed_run.py --mode=single_client --bash_script=ci/test/2node_op_test.sh --custom_img_tag=${{ env.image_name }} --oneflow_wheel_path=${{ env.wheelhouse_dir }} --oneflow_wheel_python_version=3.6
- name: Single client op test (distributed, 3rd try)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && steps.distributed_try_2.outcome=='failure' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && steps.distributed_try_2.outcome=='failure' && needs.changed_files.outputs.should_run_single_client_tests == '1'
continue-on-error: false
id: distributed_try_3
run: |
......@@ -482,7 +476,7 @@ jobs:
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/doctest.sh"
- name: Single client dry run test (run without runtime)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
......@@ -608,7 +602,7 @@ jobs:
})
- name: Single client op test
timeout-minutes: 45
if: contains(fromJson('["cuda", "cuda_op"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests
if: contains(fromJson('["cuda_op", "cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run \
......@@ -617,7 +611,7 @@ jobs:
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_op_test.sh"
- name: Single client model test
timeout-minutes: 45
if: contains(fromJson('["cuda", "cuda"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests
if: contains(fromJson('["cuda", "cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run \
......@@ -627,7 +621,7 @@ jobs:
- name: Single client model serve test
timeout-minutes: 45
id: model_serve_test
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
......@@ -642,7 +636,7 @@ jobs:
${image_name} bash ci/test/print_stack_from_core.sh python3 serving-tmp
- name: Single client benchmark (mainly for backward compatibility)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
......@@ -650,7 +644,7 @@ jobs:
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test.sh"
- name: Single client benchmark FP16 (mainly for backward compatibility)
timeout-minutes: 45
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests
if: matrix.test_suite == 'cuda' && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
......@@ -658,14 +652,14 @@ jobs:
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test_fp16.sh"
- name: Single client XLA Test
timeout-minutes: 45
if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests
if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && needs.changed_files.outputs.should_run_single_client_tests == '1'
run: |
set -x
docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
${image_name} \
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/test_xla.sh"
- name: Remove automerge
if: contains(fromJson('["cuda_new_interface", "cpu_new_interface"]'), matrix.test_suite) && failure() && cancelled() == false && contains(github.event.pull_request.labels.*.name, 'automerge')
if: contains(fromJson('["cuda_new_interface", "cpu_new_interface", "cpu", "cuda", "cuda_op"]'), matrix.test_suite) && failure() && cancelled() == false && contains(github.event.pull_request.labels.*.name, 'automerge')
uses: actions/github-script@v4
with:
script: |
......
......@@ -142,6 +142,8 @@ endif()
include(third_party)
if (BUILD_CUDA)
enable_language(CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")
message(STATUS "CUDA_TOOLKIT_ROOT_DIR: ${CUDA_TOOLKIT_ROOT_DIR}")
set(CUDA_SEPARABLE_COMPILATION OFF)
......@@ -182,6 +184,7 @@ if (BUILD_CUDA)
list(APPEND CUDA_NVCC_FLAGS -gencode ${CUDA_NVCC_GENCODE})
endforeach()
message(STATUS "CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
list(JOIN CUDA_NVCC_FLAGS " " CMAKE_CUDA_FLAGS)
endif()
message(STATUS "CMAKE_CXX_COMPILER_VERSION: " ${CMAKE_CXX_COMPILER_VERSION})
......
......@@ -5,3 +5,6 @@ set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(CUDA_NVCC_GENCODES "arch=compute_61,code=sm_61" CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER sccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER sccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER sccache CACHE STRING "")
......@@ -5,3 +5,6 @@ set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(CUDA_NVCC_GENCODES "arch=compute_75,code=sm_75" CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER sccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER sccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER sccache CACHE STRING "")
......@@ -2,19 +2,11 @@ include(python)
include(CheckCXXCompilerFlag)
function(oneflow_add_executable)
if (BUILD_CUDA)
cuda_add_executable(${ARGV})
else()
add_executable(${ARGV})
endif()
add_executable(${ARGV})
endfunction()
function(oneflow_add_library)
if (BUILD_CUDA)
cuda_add_library(${ARGV})
else()
add_library(${ARGV})
endif()
add_library(${ARGV})
endfunction()
function(target_try_compile_option target flag)
......@@ -45,8 +37,8 @@ function(target_treat_warnings_as_errors target)
target_try_compile_options(${target} -Wno-error=deprecated-declarations)
# disable unused-* for different compile mode (maybe unused in cpu.cmake, but used in cuda.cmake)
target_try_compile_options(${target}
-Wno-error=unused-const-variable
target_try_compile_options(${target}
-Wno-error=unused-const-variable
-Wno-error=unused-variable
-Wno-error=unused-local-typedefs
-Wno-error=unused-private-field
......@@ -289,7 +281,9 @@ if(BUILD_CUDA)
target_link_libraries(of_cudaobj ${oneflow_third_party_libs})
set(ONEFLOW_CUDA_LIBS of_cudaobj)
target_compile_options(of_cudaobj PRIVATE -Werror=return-type)
target_compile_options(of_cudaobj PRIVATE -Xcompiler -Werror=return-type)
# remove THRUST_IGNORE_CUB_VERSION_CHECK if starting using bundled cub
target_compile_definitions(of_cudaobj PRIVATE THRUST_IGNORE_CUB_VERSION_CHECK)
endif()
# cc obj lib
......@@ -303,9 +297,7 @@ if (USE_CLANG_FORMAT)
add_dependencies(of_ccobj of_format)
endif()
if (BUILD_SHARED_LIBS)
target_link_libraries(of_ccobj of_protoobj of_cfgobj ${ONEFLOW_CUDA_LIBS} glog_imported)
endif()
target_link_libraries(of_ccobj of_protoobj of_cfgobj ${ONEFLOW_CUDA_LIBS} glog_imported)
target_compile_options(of_ccobj PRIVATE -Werror=return-type)
target_treat_warnings_as_errors(of_ccobj)
......@@ -322,7 +314,7 @@ target_compile_options(of_pyext_obj PRIVATE -Werror=return-type)
target_treat_warnings_as_errors(of_pyext_obj)
if(APPLE)
set(of_libs -Wl,-force_load ${ONEFLOW_CUDA_LIBS} of_ccobj of_protoobj of_cfgobj)
set(of_libs -Wl,-force_load of_ccobj of_protoobj of_cfgobj)
elseif(UNIX)
set(of_libs -Wl,--whole-archive ${ONEFLOW_CUDA_LIBS} of_ccobj of_protoobj of_cfgobj -Wl,--no-whole-archive -ldl -lrt)
elseif(WIN32)
......@@ -362,20 +354,6 @@ add_dependencies(of_pyscript_copy of_protoobj)
file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
# get_property(include_dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
# foreach(dir ${include_dirs})
# message("-I'${dir}' ")
# endforeach()
# build main
set(RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
foreach(cc ${of_main_cc})
get_filename_component(main_name ${cc} NAME_WE)
oneflow_add_executable(${main_name} ${cc})
target_link_libraries(${main_name} ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs})
set_target_properties(${main_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
endforeach()
# build test
if(BUILD_TESTING)
if (of_all_test_cc)
......@@ -384,14 +362,6 @@ if(BUILD_TESTING)
set_target_properties(oneflow_testexe PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
add_test(NAME oneflow_test COMMAND oneflow_testexe)
endif()
if (of_separate_test_cc)
foreach(cc ${of_separate_test_cc})
get_filename_component(test_name ${cc} NAME_WE)
string(CONCAT test_exe_name ${test_name} exe)
oneflow_add_executable(${test_exe_name} ${cc})
target_link_libraries(${test_exe_name} ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs})
endforeach()
endif()
endif()
# build include
......
......@@ -98,6 +98,8 @@ if (BUILD_CUDA)
message(FATAL_ERROR "cuda lib not found: ${cublas_lib_dir}/libcublas_static.a or ${cuda_lib_dir}/libcublas_static.a")
endif()
endif()
set(CMAKE_CUDA_COMPILER ${CUDA_NVCC_EXECUTABLE})
set(CMAKE_CUDA_STANDARD 11)
find_package(CUDNN REQUIRED)
endif()
......
......@@ -382,10 +382,15 @@ if __name__ == "__main__":
parser.add_argument("--cpu", default=False, action="store_true", required=False)
parser.add_argument("--bash", default=False, action="store_true", required=False)
parser.add_argument("--inplace", default=False, action="store_true", required=False)
parser.add_argument(
"--shared_lib", default=False, action="store_true", required=False
)
parser.add_argument("--retry", default=0, type=int)
args = parser.parse_args()
if args.skip_img:
"Arg skip_img is deprecated. Setting it has no effect. If you want to build image, use --build_img"
if args.skip_wheel:
args.skip_audit = True
print("args.extra_oneflow_cmake_args", args.extra_oneflow_cmake_args)
assert args.package_name
extra_oneflow_cmake_args = " ".join(
......@@ -396,6 +401,8 @@ if __name__ == "__main__":
cuda_versions = []
if args.use_aliyun_mirror:
extra_oneflow_cmake_args += " -DTHIRD_PARTY_MIRROR=aliyun"
if args.shared_lib:
extra_oneflow_cmake_args += " -DBUILD_SHARED_LIBS=ON"
if args.cpu:
extra_oneflow_cmake_args += " -DBUILD_CUDA=OFF"
cuda_versions = ["10.2"]
......@@ -499,6 +506,8 @@ gcc --version
if args.cpu:
assert len(cuda_versions) == 1
sub_dir += "-cpu"
if args.shared_lib:
sub_dir += "-shared"
cache_dir = os.path.join(cache_dir, sub_dir)
if args.build_img:
return
......
......@@ -61,10 +61,10 @@ def _test_instancenorm1d(test_case, device):
)
x = flow.Tensor(input_arr, device=flow.device(device))
y = m(x)
test_case.assertTrue(np.allclose(y.numpy(), output_arr, rtol=0.0001, atol=0.0001))
test_case.assertTrue(np.allclose(y.numpy(), output_arr, rtol=1e-3, atol=1e-3))
m.eval()
y = m(x)
test_case.assertTrue(np.allclose(y.numpy(), output_arr, rtol=0.0001, atol=0.0001))
test_case.assertTrue(np.allclose(y.numpy(), output_arr, rtol=1e-3, atol=1e-3))
def _test_instancenorm2d(test_case, device):
......@@ -419,7 +419,7 @@ class TestInstanceNorm(flow.unittest.TestCase):
for arg in GenArgList(arg_dict):
arg[0](test_case, *arg[1:])
@autotest(n=5, auto_backward=True, rtol=1e-4, atol=1e-4)
@autotest(n=5, auto_backward=True, rtol=1e-3, atol=1e-3)
def test_instancenorm_with_random_data(test_case):
height = random(1, 6).to(int)
width = random(1, 6).to(int)
......@@ -437,7 +437,7 @@ class TestInstanceNorm(flow.unittest.TestCase):
y = m(x)
return y
@autotest(n=5, auto_backward=True, rtol=1e-4, atol=1e-4)
@autotest(n=5, auto_backward=True, rtol=1e-3, atol=1e-3)
def test_instancenorm_with_random_data(test_case):
channel = random(1, 6).to(int)
height = random(1, 6).to(int)
......@@ -458,7 +458,7 @@ class TestInstanceNorm(flow.unittest.TestCase):
y = m(x)
return y
@autotest(n=5, auto_backward=False, rtol=1e-4, atol=1e-4)
@autotest(n=5, auto_backward=False, rtol=1e-3, atol=1e-3)
def test_instancenorm_with_random_data(test_case):
channel = random(1, 6).to(int)
depth = random(1, 6).to(int)
......
import subprocess
def get_changed_files(base=None, head=None):
changed = subprocess.check_output(
f"git diff --name-only --diff-filter=ACMRT {base} {head}",
shell=True,
text=True,
)
changed = str(changed).splitlines()
return changed
def should_run_single_client_tests(changed=None):
not_single_client_files = [
f
for f in changed
if (
f.endswith(".py")
and not f.startswith("python/oneflow/compatible/single_client")
)
or f.endswith(".yml")
or f.endswith(".rst")
or f.endswith(".md")
or f.endswith(".cmake")
or f.endswith("CMakeLists.txt")
]
print("[changed]", not_single_client_files)
print("[not_single_client_files]", not_single_client_files)
return len(not_single_client_files) < len(changed)
def print_github_action_output(name=None, value=None):
print(f"::set-output name={name}::{value}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--base", type=str, required=True)
parser.add_argument("--head", type=str, required=True)
parser.add_argument("--need_single_client_tests", action="store_true")
args = parser.parse_args()
files = get_changed_files(base=args.base, head=args.head)
if should_run_single_client_tests(changed=files) or args.need_single_client_tests:
print_github_action_output(name="should_run_single_client_tests", value="1")
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册