提交 d6a46850 编写于 作者: A A. Unique TensorFlower 提交者: TensorFlower Gardener

Improve build rules to compile NCCL from source, in particular for clang.

PiperOrigin-RevId: 225051897
上级 e3d751c2
# NVIDIA NCCL 2 # NVIDIA NCCL 2
# A package of optimized primitives for collective multi-GPU communication. # A package of optimized primitives for collective multi-GPU communication.
licenses(["restricted"]) licenses(["notice"])
exports_files(["LICENSE.txt"]) exports_files(["LICENSE.txt"])
load( load(
"@local_config_nccl//:build_defs.bzl", "@local_config_nccl//:build_defs.bzl",
"gen_nccl_h", "cuda_rdc_library",
"nccl_library", "gen_device_srcs",
"rdc_copts", "process_srcs",
"rdc_library",
)
load(
"@local_config_cuda//cuda:build_defs.bzl",
"cuda_default_copts",
) )
load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
# Generate the nccl.h header file. process_srcs(
gen_nccl_h( name = "process_srcs",
name = "nccl_h", srcs = glob([
output = "src/nccl.h", "**/*.cc",
template = "src/nccl.h.in", "**/*.h",
]),
) )
nccl_library( cc_library(
name = "src_hdrs", name = "src_hdrs",
hdrs = [ hdrs = [
"src/nccl.h",
# src/include/common_coll.h #includes "collectives/collectives.h".
# All other #includes of collectives.h are patched in process_srcs.
"src/collectives/collectives.h", "src/collectives/collectives.h",
"src/nccl.h",
], ],
data = [":process_srcs"],
strip_include_prefix = "src", strip_include_prefix = "src",
) )
nccl_library( cc_library(
name = "include_hdrs", name = "include_hdrs",
hdrs = glob(["src/include/*.h"]), hdrs = glob(["src/include/*.h"]),
data = [":process_srcs"],
strip_include_prefix = "src/include", strip_include_prefix = "src/include",
) )
filegroup( cc_library(
name = "device_hdrs", name = "device_hdrs",
srcs = glob(["src/collectives/device/*.h"]), hdrs = glob(["src/collectives/device/*.h"]),
strip_include_prefix = "src/collectives/device",
) )
filegroup( filegroup(
name = "device_srcs", name = "device_srcs",
srcs = [ srcs = [
"src/collectives/device/all_gather.cu", "src/collectives/device/all_gather.cu.cc",
"src/collectives/device/all_reduce.cu", "src/collectives/device/all_reduce.cu.cc",
"src/collectives/device/broadcast.cu", "src/collectives/device/broadcast.cu.cc",
"src/collectives/device/reduce.cu", "src/collectives/device/reduce.cu.cc",
"src/collectives/device/reduce_scatter.cu", "src/collectives/device/reduce_scatter.cu.cc",
], ],
) )
nccl_library( # NCCL compiles the same source files with different NCCL_OP defines. RDC
# compilation requires that each compiled module has a unique ID. Clang derives
# the module ID from the path only so we need to rename the files to get
# different IDs for different parts of compilation. NVCC does not have that
# problem because it generates IDs based on preprocessed content.
gen_device_srcs(
name = "sum", name = "sum",
srcs = [ srcs = [":device_srcs"],
":device_hdrs", NCCL_OP = 0,
":device_srcs",
],
copts = ["-DNCCL_OP=0"] + rdc_copts(),
linkstatic = True,
prefix = "sum_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
) )
nccl_library( gen_device_srcs(
name = "prod", name = "prod",
srcs = [ srcs = [":device_srcs"],
":device_hdrs", NCCL_OP = 1,
":device_srcs",
],
copts = ["-DNCCL_OP=1"] + rdc_copts(),
linkstatic = True,
prefix = "_prod",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
) )
nccl_library( gen_device_srcs(
name = "min", name = "min",
srcs = [ srcs = [":device_srcs"],
":device_hdrs", NCCL_OP = 2,
":device_srcs",
],
copts = ["-DNCCL_OP=2"] + rdc_copts(),
linkstatic = True,
prefix = "min_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
) )
nccl_library( gen_device_srcs(
name = "max", name = "max",
srcs = [ srcs = [":device_srcs"],
":device_hdrs", NCCL_OP = 3,
":device_srcs",
],
copts = ["-DNCCL_OP=3"] + rdc_copts(),
linkstatic = True,
prefix = "max_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
) )
nccl_library( cuda_rdc_library(
name = "functions", name = "device",
srcs = [ srcs = [
"src/collectives/device/functions.cu", "src/collectives/device/functions.cu.cc",
":device_hdrs",
],
copts = rdc_copts(),
linkstatic = True,
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
)
rdc_library(
name = "device_code",
deps = [
":functions",
":max", ":max",
":min", ":min",
":prod", ":prod",
":sum", ":sum",
], ],
deps = [
":device_hdrs",
":include_hdrs",
":src_hdrs",
],
) )
# Primary NCCL target. # Primary NCCL target.
nccl_library( tf_cuda_library(
name = "nccl", name = "nccl",
srcs = glob( srcs = glob(
include = ["src/**/*.cu"], include = ["src/**/*.cu.cc"],
# Exclude device-library code. # Exclude device-library code.
exclude = ["src/collectives/device/**"], exclude = ["src/collectives/device/**"],
) + [ ) + [
...@@ -162,13 +115,14 @@ nccl_library( ...@@ -162,13 +115,14 @@ nccl_library(
"src/nccl.h", "src/nccl.h",
], ],
hdrs = ["src/nccl.h"], hdrs = ["src/nccl.h"],
copts = cuda_default_copts(), copts = ["-Wno-vla"],
include_prefix = "third_party/nccl", include_prefix = "third_party/nccl",
strip_include_prefix = "src", strip_include_prefix = "src",
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
":device_code", ":device",
":include_hdrs", ":include_hdrs",
":src_hdrs", ":src_hdrs",
"@local_config_cuda//cuda:cudart_static",
], ],
) )
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册