Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
wux_labs
Tensorflow
提交
d6a46850
T
Tensorflow
项目概览
wux_labs
/
Tensorflow
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
Tensorflow
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d6a46850
编写于
12月 11, 2018
作者:
A
A. Unique TensorFlower
提交者:
TensorFlower Gardener
12月 11, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Improve build rules to compile NCCL from source, in particular for clang.
PiperOrigin-RevId: 225051897
上级
e3d751c2
变更
2
展开全部
隐藏空白更改
内联
并排
Showing
2 changed file
with
351 addition
and
270 deletion
+351
-270
third_party/nccl/archive.BUILD
third_party/nccl/archive.BUILD
+54
-100
third_party/nccl/build_defs.bzl.tpl
third_party/nccl/build_defs.bzl.tpl
+297
-170
未找到文件。
third_party/nccl/archive.BUILD
浏览文件 @
d6a46850
# NVIDIA NCCL 2
# A package of optimized primitives for collective multi-GPU communication.
licenses(["
restricted
"])
licenses(["
notice
"])
exports_files(["LICENSE.txt"])
load(
"@local_config_nccl//:build_defs.bzl",
"gen_nccl_h",
"nccl_library",
"rdc_copts",
"rdc_library",
)
load(
"@local_config_cuda//cuda:build_defs.bzl",
"cuda_default_copts",
"cuda_rdc_library",
"gen_device_srcs",
"process_srcs",
)
load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
# Generate the nccl.h header file.
gen_nccl_h(
name = "nccl_h",
output = "src/nccl.h",
template = "src/nccl.h.in",
process_srcs(
name = "process_srcs",
srcs = glob([
"**/*.cc",
"**/*.h",
]),
)
nccl
_library(
cc
_library(
name = "src_hdrs",
hdrs = [
"src/nccl.h",
# src/include/common_coll.h #includes "collectives/collectives.h".
# All other #includes of collectives.h are patched in process_srcs.
"src/collectives/collectives.h",
"src/nccl.h",
],
data = [":process_srcs"],
strip_include_prefix = "src",
)
nccl
_library(
cc
_library(
name = "include_hdrs",
hdrs = glob(["src/include/*.h"]),
data = [":process_srcs"],
strip_include_prefix = "src/include",
)
filegroup
(
cc_library
(
name = "device_hdrs",
srcs = glob(["src/collectives/device/*.h"]),
hdrs = glob(["src/collectives/device/*.h"]),
strip_include_prefix = "src/collectives/device",
)
filegroup(
name = "device_srcs",
srcs = [
"src/collectives/device/all_gather.cu",
"src/collectives/device/all_reduce.cu",
"src/collectives/device/broadcast.cu",
"src/collectives/device/reduce.cu",
"src/collectives/device/reduce_scatter.cu",
"src/collectives/device/all_gather.cu
.cc
",
"src/collectives/device/all_reduce.cu
.cc
",
"src/collectives/device/broadcast.cu
.cc
",
"src/collectives/device/reduce.cu
.cc
",
"src/collectives/device/reduce_scatter.cu
.cc
",
],
)
nccl_library(
# NCCL compiles the same source files with different NCCL_OP defines. RDC
# compilation requires that each compiled module has a unique ID. Clang derives
# the module ID from the path only so we need to rename the files to get
# different IDs for different parts of compilation. NVCC does not have that
# problem because it generates IDs based on preprocessed content.
gen_device_srcs(
name = "sum",
srcs = [
":device_hdrs",
":device_srcs",
],
copts = ["-DNCCL_OP=0"] + rdc_copts(),
linkstatic = True,
prefix = "sum_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
srcs = [":device_srcs"],
NCCL_OP = 0,
)
nccl_library
(
gen_device_srcs
(
name = "prod",
srcs = [
":device_hdrs",
":device_srcs",
],
copts = ["-DNCCL_OP=1"] + rdc_copts(),
linkstatic = True,
prefix = "_prod",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
srcs = [":device_srcs"],
NCCL_OP = 1,
)
nccl_library
(
gen_device_srcs
(
name = "min",
srcs = [
":device_hdrs",
":device_srcs",
],
copts = ["-DNCCL_OP=2"] + rdc_copts(),
linkstatic = True,
prefix = "min_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
srcs = [":device_srcs"],
NCCL_OP = 2,
)
nccl_library
(
gen_device_srcs
(
name = "max",
srcs = [
":device_hdrs",
":device_srcs",
],
copts = ["-DNCCL_OP=3"] + rdc_copts(),
linkstatic = True,
prefix = "max_",
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
srcs = [":device_srcs"],
NCCL_OP = 3,
)
nccl
_library(
name = "
functions
",
cuda_rdc
_library(
name = "
device
",
srcs = [
"src/collectives/device/functions.cu",
":device_hdrs",
],
copts = rdc_copts(),
linkstatic = True,
deps = [
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cuda_headers",
],
)
rdc_library(
name = "device_code",
deps = [
":functions",
"src/collectives/device/functions.cu.cc",
":max",
":min",
":prod",
":sum",
],
deps = [
":device_hdrs",
":include_hdrs",
":src_hdrs",
],
)
# Primary NCCL target.
nccl
_library(
tf_cuda
_library(
name = "nccl",
srcs = glob(
include = ["src/**/*.cu"],
include = ["src/**/*.cu
.cc
"],
# Exclude device-library code.
exclude = ["src/collectives/device/**"],
) + [
...
...
@@ -162,13 +115,14 @@ nccl_library(
"src/nccl.h",
],
hdrs = ["src/nccl.h"],
copts =
cuda_default_copts()
,
copts =
["-Wno-vla"]
,
include_prefix = "third_party/nccl",
strip_include_prefix = "src",
visibility = ["//visibility:public"],
deps = [
":device
_code
",
":device",
":include_hdrs",
":src_hdrs",
"@local_config_cuda//cuda:cudart_static",
],
)
third_party/nccl/build_defs.bzl.tpl
浏览文件 @
d6a46850
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录