Bazel: add cuda support

67cdc8e4 · storypku · 29b5b550 · 67cdc8e4 · 67cdc8e4 · 67cdc8e4
29 changed file
--- a/.bazelrc
+++ b/.bazelrc
 # load bazelrc from the legacy location
 # as recommended in https://github.com/bazelbuild/bazel/issues/6319
-import %workspace%/tools/bazel.rc
+try-import %workspace%/tools/bazel.rc
+try-import %workspace%/.apollo.bazelrc
+
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,6 @@ docs/demo_guide/*.record

 # bazel cache and others
 .cache/
+
+tools/python_bin_path.sh
+.apollo.bazelrc
--- a/WORKSPACE.in
+++ b/WORKSPACE.in
@@ -3,6 +3,9 @@ workspace(name = "apollo")
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe")

+load("//tools/gpus:cuda_configure.bzl", "cuda_configure")
+cuda_configure(name = "local_config_cuda")
+
 maybe(
    http_archive,
    name = "bazel_skylib",
@@ -80,9 +83,9 @@ new_local_repository(
 #    strip_prefix = "glog-0.4.0",
 #    url = "https://github.com/google/glog/archive/v0.4.0.tar.gz",
 #)
-
 # See https://github.com/bazelbuild/bazel/issues/11406
-http_archive(
+maybe(
+    http_archive,
    name = "boringssl",
    sha256 = "fb236ae74676dba515e1230aef4cc69ab265af72fc08784a6755a319dd013ca6",
    urls = ["http://182.92.10.148:8310/archive/6.0/boringssl-83da28a68f32023fd3b95a8ae94991a07b1f6c62.tar.gz"],

--- a/apollo.sh
+++ b/apollo.sh
@@ -479,16 +479,12 @@ function clean() {
 }

 function buildify() {
-  local buildifier_url=https://github.com/bazelbuild/buildtools/releases/download/0.4.5/buildifier
-  wget $buildifier_url -O ~/.buildifier
-  chmod +x ~/.buildifier
-  find . -name '*BUILD' -type f -exec ~/.buildifier -showlog -mode=fix {} +
+  find . -name '*BUILD' -or -name '*.bzl' -type f -exec buildifier -showlog -mode=fix {} +
  if [ $? -eq 0 ]; then
    success 'Buildify worked!'
  else
    fail 'Buildify failed!'
  fi
-  rm ~/.buildifier
 }

 function build_fe() {
@@ -585,10 +581,27 @@ function print_usage() {
  "
 }

+function bootstrap() {
+  if [ -z "$PYTHON_BIN_PATH" ]; then
+    PYTHON_BIN_PATH=$(which python3 || true)
+  fi
+  if [[ -f "${APOLLO_ROOT_DIR}/.apollo.bazelrc" ]]; then
+    return
+  fi
+  cp -f "${TOP_DIR}/tools/sample.bazelrc" "${APOLLO_ROOT_DIR}/.apollo.bazelrc"
+  # Set all env variables
+  # TODO(storypku): enable bootstrap.py inside docker
+  # $PYTHON_BIN_PATH ${APOLLO_ROOT_DIR}/tools/bootstrap.py $@
+  echo "bootstrap done"
+}
+
 function main() {

  check_machine_arch
  apollo_check_system_config
+
+  bootstrap
+
  check_esd_files

  DEFINES="--define ARCH=${MACHINE_ARCH} --define CAN_CARD=${CAN_CARD} --cxxopt=-DUSE_ESD_CAN=${USE_ESD_CAN}"

--- a/bootstrap.sh
+++ b/bootstrap.sh
+#!/usr/bin/env bash
+set -eo pipefail
+
+TOP_DIR=$(dirname "$0")
+
+if [[ "$1" == "--noninteractive" ]]; then
+    cp -f "${TOP_DIR}/tools/sample.bazelrc" "${TOP_DIR}/.storydev.bazelrc"
+    exit 0
+fi
+
+if [ -z "$PYTHON_BIN_PATH" ]; then
+    PYTHON_BIN_PATH=$(which python3 || true)
+fi
+
+# Set all env variables
+"$PYTHON_BIN_PATH" "${TOP_DIR}/tools/bootstrap.py" "$@"
+
+echo "Done bootstrap.sh"
--- a/modules/map/pnc_map/BUILD
+++ b/modules/map/pnc_map/BUILD
 load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
 load("//tools:cpplint.bzl", "cpplint")
-load("//tools:cuda_library.bzl", "cuda_library")

 package(default_visibility = ["//visibility:public"])

@@ -10,7 +10,8 @@ cuda_library(
    hdrs = ["cuda_util.h"],
    deps = [
        "//cyber/common:log",
-        "@cuda",
+        "//modules/common/math:geometry",
+        "@local_config_cuda//cuda:cudart",
    ],
 )

@@ -87,6 +88,7 @@ cc_test(
    deps = [
        ":path",
        "//modules/common/util",
+        "//modules/routing/proto:routing_cc_proto",
        "@com_google_googletest//:gtest_main",
    ],
 )

--- a/tools/bazel.rc
+++ b/tools/bazel.rc
-# bazelrc file
-# https://docs.bazel.build/versions/master/user-manual.html
-
-# bazel >= 0.18 looks for %workspace%/.bazelrc (which redirects here)
-# Older bazel versions look for %workspace%/tools/bazel.rc (this file)
-# See https://github.com/bazelbuild/bazel/issues/6319
-
+# Apollo Bazel configuration file.
+# This file tries to group and simplify build options for Apollo
 # +------------------------------------------------------------+
 # | Startup Options                                            |
 # +------------------------------------------------------------+
 startup --batch_cpu_scheduling
-
 startup --host_jvm_args="-XX:-UseParallelGC"
+startup --output_user_root="/apollo/.cache/bazel"

-startup --output_user_root=/apollo/.cache/bazel
 # +------------------------------------------------------------+
-# | Test Configurations                                        |
+# | Common Options                                             |
 # +------------------------------------------------------------+
-
-# By default prints output only from failed tests.
-test --test_output=errors
-
-# +------------------------------------------------------------+
-# | CPP Lint Tests & Unit Tests                                |
-# +------------------------------------------------------------+
-# By default, cpplint tests are run as part of `bazel test` alongside all of
-# the other compilation and test targets.  This is a convenience shortcut to
-# only do the cpplint testing and nothing else.
-# Do bazel test --config=cpplint <target> to enable this configuration.
-# To enable the lint test, the BUILD *must* load the cpplint.bzl by having
-# 'load("//tools:cpplint.bzl", "cpplint")' at the beginning and 'cpplint()'
-# at the end.
-
-test:cpplint --test_tag_filters=cpplint
-test:cpplint --build_tests_only
-
-# Regular unit tests.
-test:unit_test --test_tag_filters=-cpplint
-
-# Coverage tests
-test:coverage --test_tag_filters=-cpplint
-test:coverage --copt=--coverage
-test:coverage --cxxopt=--coverage
-test:coverage --cxxopt=-fprofile-arcs
-test:coverage --cxxopt=-ftest-coverage
-test:coverage --linkopt=-coverage
-test:coverage --linkopt=-lgcov
-test:coverage --linkopt=-lgcc
-test:coverage --linkopt=-lc
+# Force bazel output to use colors (good for jenkins) and print useful errors.
+common --color=yes

 # +------------------------------------------------------------+
 # | Build Configurations                                       |
 # +------------------------------------------------------------+
-
-# build with profiling
-build:cpu_prof --linkopt=-lprofiler
-
-# Specify protobuf cc toolchain
-build --proto_toolchain_for_cc="@com_google_protobuf//:cc_toolchain"
-
+# Make Bazel print out all options from rc files.
+build --announce_rc
 build --show_timestamps

 # Work around the sandbox issue.
 build --spawn_strategy=standalone

+# Enable colorful output of GCC
+build --cxxopt="-fdiagnostics-color=always"
+
 # Do not show warnings from external dependencies.
 # build --output_filter="^//"

-build --show_timestamps
-
-# build --copt="-I/usr/include/python3.6m"
 # TODO(storypku): disable the following line temporarily as
 # external/upb/upb/decode.c:164 can't compile
 #build --copt="-Werror=sign-compare"
+build --copt="-Werror=sign-compare"
 build --copt="-Werror=return-type"
 build --copt="-Werror=unused-variable"
 build --copt="-Werror=unused-but-set-variable"
 build --copt="-Werror=switch"
-build --cxxopt="-Werror=sign-compare"
 build --cxxopt="-Werror=reorder"

-# Enable C++14
-build --cxxopt="-std=c++1y"
+# Default paths for SYSTEM LIBRARIES
+build --define=PREFIX=/usr
+build --define=LIBDIR=$(PREFIX)/lib
+build --define=INCLUDEDIR=$(PREFIX)/include

-# Enable colorful output of GCC
-build --cxxopt="-fdiagnostics-color=always"
+# build --enable_platform_specific_config
+
+# dbg config, as a shorthand for '--config=opt -c dbg'
+build:dbg --config=opt -c dbg
+
+## build -c opt
+build:opt --copt=-march=native
+build:opt --host_copt=-march=native
+build:opt --define with_default_optimizations=true
+
+# Instruction set optimizations
+build:native_arch_linux --copt=-march=native
+
+# Build Apollo with C++ 17 features.
+build:c++17 --cxxopt=-std=c++1z
+build:c++17 --cxxopt=-stdlib=libc++
+build:c++1z --config=c++17
+
+# Enable C++14 (aka c++1y) by default
+build --cxxopt="-std=c++14"
+build --host_cxxopt="-std=c++14"
+
+# build with profiling
+build:cpu_prof --linkopt=-lprofiler
+
+# +------------------------------------------------------------+
+# | Test Configurations                                        |
+# +------------------------------------------------------------+
+test --flaky_test_attempts=3
+test --test_size_filters=small,medium
+test --test_env=LD_LIBRARY_PATH
+
+# By default prints output only from failed tests.
+test --test_output=errors
+
+test:coverage --copt=--coverage
+test:coverage --cxxopt=--coverage
+test:coverage --cxxopt=-fprofile-arcs
+test:coverage --cxxopt=-ftest-coverage
+test:coverage --linkopt=-coverage
+test:coverage --linkopt=-lgcov
+test:coverage --linkopt=-lgcc
+test:coverage --linkopt=-lc
+
+# +------------------------------------------------------------+
+# | CPP Lint Tests & Unit Tests                                |
+# +------------------------------------------------------------+
+# By default, cpplint tests are run as part of `bazel test` alongside all of
+# the other compilation and test targets.  This is a convenience shortcut to
+# only do the cpplint testing and nothing else.
+# Do bazel test --config=cpplint <target> to enable this configuration.
+# To enable the lint test, the BUILD *must* load the cpplint.bzl by having
+# 'load("//tools:cpplint.bzl", "cpplint")' at the beginning and 'cpplint()'
+# at the end.
+test:cpplint --test_tag_filters=cpplint
+test:cpplint --build_tests_only
+# Regular unit tests.
+test:unit_test --test_tag_filters=-cpplint
+# Coverage tests
+test:coverage --test_tag_filters=-cpplint

 # +------------------------------------------------------------+
 # | Python Configurations                                      |
 # +------------------------------------------------------------+
-run --python_path=/usr/bin/python3
+# build --copt="-I/usr/include/python3.6m"
--- a/tools/bootstrap.py
+++ b/tools/bootstrap.py
--- a/tools/cuda_library.bzl
+++ b/tools/cuda_library.bzl
-cuda_srcs = [".cu", ".cc", ".cpp"]
-cuda_headers = [".h", ".hpp"]
-
-cuda_arch = " ".join([
-    "-arch=sm_30",
-    "-gencode=arch=compute_30,code=sm_30",
-    "-gencode=arch=compute_50,code=sm_50",
-    "-gencode=arch=compute_52,code=sm_52",
-    "-gencode=arch=compute_60,code=sm_60",
-    "-gencode=arch=compute_61,code=sm_61",
-    "-gencode=arch=compute_61,code=compute_61",
-])
-
-def cuda_library_impl(ctx):
-    flags = " ".join(ctx.attr.flags)
-    output = ctx.outputs.out
-    lib_flags = ["-std=c++11", "--shared", "--compiler-options -fPIC"]
-    args = [f.path for f in ctx.files.srcs]
-    deps_flags = []
-    for f in ctx.attr.deps:
-        deps_flags += f.cc.link_flags
-        deps_flags += ["-I" + d for d in f.cc.include_directories]
-        deps_flags += ["-I" + d for d in f.cc.quote_include_directories]
-        deps_flags += ["-I" + d for d in f.cc.system_include_directories]
-
-    ctx.actions.run_shell(
-        inputs = ctx.files.srcs + ctx.files.hdrs + ctx.files.deps,
-        outputs = [ctx.outputs.out],
-        arguments = args,
-        env = {"PATH": "/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin"},
-        command = "nvcc %s %s %s -I. %s -o %s" % (cuda_arch, " ".join(lib_flags), " ".join(args), " ".join(deps_flags), output.path),
-    )
-
-def cuda_binary_impl(ctx):
-    flags = " ".join(ctx.attr.flags)
-    args = ctx.attr.flags + [f.path for f in ctx.files.srcs] + [f.path for f in ctx.files.hdrs]
-    deps_flags = []
-    for f in ctx.attr.deps:
-        deps_flags += f.cc.link_flags
-        deps_flags += ["-I" + d for d in f.cc.include_directories]
-        deps_flags += ["-I" + d for d in f.cc.quote_include_directories]
-        deps_flags += ["-I" + d for d in f.cc.system_include_directories]
-    output = ctx.outputs.out
-    ctx.actions.run_shell(
-        inputs = ctx.files.srcs + ctx.files.hdrs + ctx.files.deps,
-        outputs = [ctx.outputs.out],
-        arguments = args,
-        env = {"PATH": "/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin"},
-        command = "nvcc %s %s %s -o %s" % (" ".join(cuda_arch), " ".join(args), " ".join(deps_flags), output.path),
-    )
-
-cuda_library = rule(
-    attrs = {
-        "hdrs": attr.label_list(allow_files = cuda_headers),
-        "srcs": attr.label_list(allow_files = cuda_srcs),
-        "deps": attr.label_list(allow_files = False),
-        "flags": attr.label_list(allow_files = False),
-    },
-    outputs = {"out": "lib%{name}.so"},
-    implementation = cuda_library_impl,
-)
-
-cuda_binary = rule(
-    attrs = {
-        "hdrs": attr.label_list(allow_files = cuda_headers),
-        "srcs": attr.label_list(allow_files = cuda_srcs),
-        "deps": attr.label_list(allow_files = False),
-        "flags": attr.label_list(allow_files = False),
-    },
-    executable = True,
-    outputs = {"out": "%{name}"},
-    implementation = cuda_binary_impl,
-)
--- a/tools/gpus/BUILD
+++ b/tools/gpus/BUILD
+package(
+    default_visibility = ["//visibility:public"],
+)
--- a/tools/gpus/README.md
+++ b/tools/gpus/README.md
+## How to generate "find_cuda_config.py.gz.base64"
+
+```
+$ # In tools/gpus/ dir
+$ python3 compress_find_cuda_config.py
+```
--- a/tools/gpus/check_cuda_libs.py
+++ b/tools/gpus/check_cuda_libs.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Verifies that a list of libraries is installed on the system.
+
+Takes a a list of arguments with every two subsequent arguments being a logical
+tuple of (path, check_soname). The path to the library and either True or False
+to indicate whether to check the soname field on the shared library.
+
+Example Usage:
+./check_cuda_libs.py /path/to/lib1.so True /path/to/lib2.so False
+"""
+import os
+import os.path
+import platform
+import subprocess
+import sys
+
+# pylint: disable=g-import-not-at-top,g-importing-member
+try:
+  from shutil import which
+except ImportError:
+  from distutils.spawn import find_executable as which
+# pylint: enable=g-import-not-at-top,g-importing-member
+
+
+class ConfigError(Exception):
+  pass
+
+
+def _is_windows():
+  return platform.system() == "Windows"
+
+
+def check_cuda_lib(path, check_soname=True):
+  """Tests if a library exists on disk and whether its soname matches the filename.
+
+  Args:
+    path: the path to the library.
+    check_soname: whether to check the soname as well.
+
+  Raises:
+    ConfigError: If the library does not exist or if its soname does not match
+    the filename.
+  """
+  if not os.path.isfile(path):
+    raise ConfigError("No library found under: " + path)
+  objdump = which("objdump")
+  if check_soname and objdump is not None and not _is_windows():
+    # Decode is necessary as in py3 the return type changed from str to bytes
+    output = subprocess.check_output([objdump, "-p", path]).decode("utf-8")
+    output = [line for line in output.splitlines() if "SONAME" in line]
+    sonames = [line.strip().split(" ")[-1] for line in output]
+    if not any([soname == os.path.basename(path) for soname in sonames]):
+      raise ConfigError("None of the libraries match their SONAME: " + path)
+
+
+def main():
+  try:
+    args = [argv for argv in sys.argv[1:]]
+    if len(args) % 2 == 1:
+      raise ConfigError("Expected even number of arguments")
+    checked_paths = []
+    for i in range(0, len(args), 2):
+      path = args[i]
+      check_cuda_lib(path, check_soname=args[i + 1] == "True")
+      checked_paths.append(path)
+    # pylint: disable=superfluous-parens
+    print(os.linesep.join(checked_paths))
+    # pylint: enable=superfluous-parens
+  except ConfigError as e:
+    sys.stderr.write(str(e))
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+  main()
--- a/tools/gpus/compress_find_cuda_config.py
+++ b/tools/gpus/compress_find_cuda_config.py
+#! /usr/bin/env python3
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Compresses the contents of find_cuda_config.py
+
+The compressed file is what is actually being used. It works around remote
+config not being able to upload files yet.
+"""
+import base64
+import zlib
+
+
+def main():
+    with open('find_cuda_config.py', 'rb') as f:
+        data = f.read()
+
+    compressed = zlib.compress(data)
+    b64encoded = base64.b64encode(compressed)
+
+    with open('find_cuda_config.py.gz.base64', 'wb') as f:
+        f.write(b64encoded)
+
+
+if __name__ == '__main__':
+    main()
--- a/tools/gpus/crosstool/BUILD
+++ b/tools/gpus/crosstool/BUILD
--- a/tools/gpus/crosstool/BUILD.tpl
+++ b/tools/gpus/crosstool/BUILD.tpl
+# This file is expanded from a template by cuda_configure.bzl
+# Update cuda_configure.bzl#verify_build_defines when adding new variables.
+
+load(":cc_toolchain_config.bzl", "cc_toolchain_config")
+
+licenses(["restricted"])
+
+package(default_visibility = ["//visibility:public"])
+
+toolchain(
+    name = "toolchain-linux-x86_64",
+    exec_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    target_compatible_with = [
+        "@bazel_tools//platforms:linux",
+        "@bazel_tools//platforms:x86_64",
+    ],
+    toolchain = ":cc-compiler-local",
+    toolchain_type = "@bazel_tools//tools/cpp:toolchain_type",
+)
+
+cc_toolchain_suite(
+    name = "toolchain",
+    toolchains = {
+        "local|compiler": ":cc-compiler-local",
+        "aarch64": ":cc-compiler-local",
+        "k8": ":cc-compiler-local",
+
+    },
+)
+
+cc_toolchain(
+    name = "cc-compiler-local",
+    all_files = "%{compiler_deps}",
+    compiler_files = "%{compiler_deps}",
+    ar_files = "%{compiler_deps}",
+    as_files = "%{compiler_deps}",
+    dwp_files = ":empty",
+    linker_files = "%{compiler_deps}",
+    objcopy_files = ":empty",
+    strip_files = ":empty",
+    # To support linker flags that need to go to the start of command line
+    # we need the toolchain to support parameter files. Parameter files are
+    # last on the command line and contain all shared libraries to link, so all
+    # regular options will be left of them.
+    supports_param_files = 1,
+    toolchain_identifier = "local_linux",
+    toolchain_config = ":cc-compiler-local-config",
+)
+
+cc_toolchain_config(
+    name = "cc-compiler-local-config",
+    cpu = "local",
+    builtin_include_directories = [%{cxx_builtin_include_directories}],
+    extra_no_canonical_prefixes_flags = [%{extra_no_canonical_prefixes_flags}],
+    host_compiler_path = "%{host_compiler_path}",
+    host_compiler_prefix = "%{host_compiler_prefix}",
+    host_compiler_warnings = [%{host_compiler_warnings}],
+    host_unfiltered_compile_flags = [%{unfiltered_compile_flags}],
+    linker_bin_path = "%{linker_bin_path}",
+    builtin_sysroot = "%{builtin_sysroot}",
+    cuda_path = "%{cuda_toolkit_path}",
+    compiler = "%{compiler}",
+)
+
+filegroup(
+    name = "empty",
+    srcs = [],
+)
+
+filegroup(
+    name = "crosstool_wrapper_driver_is_not_gcc",
+    srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"],
+)
+
--- a/tools/gpus/crosstool/LICENSE
+++ b/tools/gpus/crosstool/LICENSE
+Copyright 2015 The TensorFlow Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2015, The TensorFlow Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/tools/gpus/crosstool/cc_toolchain_config.bzl.tpl
+++ b/tools/gpus/crosstool/cc_toolchain_config.bzl.tpl
--- a/tools/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+++ b/tools/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+#!/usr/bin/env python
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Crosstool wrapper for compiling CUDA programs.
+
+SYNOPSIS:
+  crosstool_wrapper_is_not_gcc [options passed in by cc_library()
+                                or cc_binary() rule]
+
+DESCRIPTION:
+  This script is expected to be called by the cc_library() or cc_binary() bazel
+  rules. When the option "-x cuda" is present in the list of arguments passed
+  to this script, it invokes the nvcc CUDA compiler. Most arguments are passed
+  as is as a string to --compiler-options of nvcc. When "-x cuda" is not
+  present, this wrapper invokes hybrid_driver_is_not_gcc with the input
+  arguments as is.
+
+NOTES(storypku): Move this file to
+    //tools/gpus/crosstool/crosstool_wrapper_is_not_gcc ?
+
+"""
+
+from __future__ import print_function
+
+__author__ = 'keveman@google.com (Manjunath Kudlur)'
+
+from argparse import ArgumentParser
+import os
+import subprocess
+import re
+import sys
+import pipes
+
+# Template values set by cuda_autoconf.
+CPU_COMPILER = ('%{cpu_compiler}')
+GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}')
+
+NVCC_PATH = '%{nvcc_path}'
+PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH)
+NVCC_VERSION = '%{cuda_version}'
+
+def Log(s):
+  print('//tools/gpus/crosstool: {0}'.format(s))
+
+
+def GetOptionValue(argv, option):
+  """Extract the list of values for option from the argv list.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    option: The option whose value to extract, without the leading '-'.
+
+  Returns:
+    A list of values, either directly following the option,
+    (eg., -opt val1 val2) or values collected from multiple occurrences of
+    the option (eg., -opt val1 -opt val2).
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument(option, nargs='*', action='append')
+  option = option.lstrip('-').replace('-', '_')
+  args, _ = parser.parse_known_args(argv)
+  if not args or not vars(args)[option]:
+    return []
+  else:
+    return sum(vars(args)[option], [])
+
+
+def GetHostCompilerOptions(argv):
+  """Collect the -isystem, -iquote, and --sysroot option values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be used as the --compiler-options to nvcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-isystem', nargs='*', action='append')
+  parser.add_argument('-iquote', nargs='*', action='append')
+  parser.add_argument('--sysroot', nargs=1)
+  parser.add_argument('-g', nargs='*', action='append')
+  parser.add_argument('-fno-canonical-system-headers', action='store_true')
+  parser.add_argument('-no-canonical-prefixes', action='store_true')
+
+  args, _ = parser.parse_known_args(argv)
+
+  opts = ''
+
+  if args.isystem:
+    opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, []))
+  if args.iquote:
+    opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
+  if args.g:
+    opts += ' -g' + ' -g'.join(sum(args.g, []))
+  if args.fno_canonical_system_headers:
+    opts += ' -fno-canonical-system-headers'
+  if args.no_canonical_prefixes:
+    opts += ' -no-canonical-prefixes'
+  if args.sysroot:
+    opts += ' --sysroot ' + args.sysroot[0]
+
+  return opts
+
+def _update_options(nvcc_options):
+  if NVCC_VERSION in ("7.0",):
+    return nvcc_options
+
+  update_options = { "relaxed-constexpr" : "expt-relaxed-constexpr" }
+  return [ update_options[opt] if opt in update_options else opt
+                    for opt in nvcc_options ]
+
+def GetNvccOptions(argv):
+  """Collect the -nvcc_options values from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+
+  Returns:
+    The string that can be passed directly to nvcc.
+  """
+
+  parser = ArgumentParser()
+  parser.add_argument('-nvcc_options', nargs='*', action='append')
+
+  args, _ = parser.parse_known_args(argv)
+
+  if args.nvcc_options:
+    options = _update_options(sum(args.nvcc_options, []))
+    return ' '.join(['--'+a for a in options])
+  return ''
+
+def system(cmd):
+  """Invokes cmd with os.system().
+
+  Args:
+    cmd: The command.
+
+  Returns:
+    The exit code if the process exited with exit() or -signal
+    if the process was terminated by a signal.
+  """
+  retv = os.system(cmd)
+  if os.WIFEXITED(retv):
+    return os.WEXITSTATUS(retv)
+  else:
+    return -os.WTERMSIG(retv)
+
+def InvokeNvcc(argv, log=False):
+  """Call nvcc with arguments assembled from argv.
+
+  Args:
+    argv: A list of strings, possibly the argv passed to main().
+    log: True if logging is requested.
+
+  Returns:
+    The return value of calling system('nvcc ' + args)
+  """
+
+  host_compiler_options = GetHostCompilerOptions(argv)
+  nvcc_compiler_options = GetNvccOptions(argv)
+  opt_option = GetOptionValue(argv, '-O')
+  m_options = GetOptionValue(argv, '-m')
+  m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']])
+  include_options = GetOptionValue(argv, '-I')
+  out_file = GetOptionValue(argv, '-o')
+  depfiles = GetOptionValue(argv, '-MF')
+  defines = GetOptionValue(argv, '-D')
+  defines = ''.join([' -D' + define for define in defines])
+  undefines = GetOptionValue(argv, '-U')
+  undefines = ''.join([' -U' + define for define in undefines])
+  std_options = GetOptionValue(argv, '-std')
+  # Supported -std flags as of CUDA 10.2. Only keep last to mimic gcc/clang.
+  # See: https://gist.github.com/ax3l/9489132#device-side-c-standard-support
+  # Updated by storypku
+  nvcc_allowed_std_options = ["c++03", "c++11", "c++14"]
+  std_options = ''.join([' -std=' + define
+      for define in std_options if define in nvcc_allowed_std_options][-1:])
+
+  # The list of source files get passed after the -c option. I don't know of
+  # any other reliable way to just get the list of source files to be compiled.
+  src_files = GetOptionValue(argv, '-c')
+
+  # Pass -w through from host to nvcc, but don't do anything fancier with
+  # warnings-related flags, since they're not necessarily the same across
+  # compilers.
+  warning_options = ' -w' if '-w' in argv else ''
+
+  if len(src_files) == 0:
+    return 1
+  if len(out_file) != 1:
+    return 1
+
+  opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0)
+         else ' -g')
+
+  includes = (' -I ' + ' -I '.join(include_options)
+              if len(include_options) > 0
+              else '')
+
+  # Unfortunately, there are other options that have -c prefix too.
+  # So allowing only those look like C/C++ files.
+  src_files = [f for f in src_files if
+               re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.cu$|\.C$', f)]
+  srcs = ' '.join(src_files)
+  out = ' -o ' + out_file[0]
+
+  nvccopts = '-D_FORCE_INLINES '
+  for capability in GetOptionValue(argv, "--cuda-gpu-arch"):
+    capability = capability[len('sm_'):]
+    nvccopts += r'-gencode=arch=compute_%s,\"code=sm_%s\" ' % (capability,
+                                                               capability)
+  for capability in GetOptionValue(argv, '--cuda-include-ptx'):
+    capability = capability[len('sm_'):]
+    nvccopts += r'-gencode=arch=compute_%s,\"code=compute_%s\" ' % (capability,
+                                                                    capability)
+  nvccopts += nvcc_compiler_options
+  nvccopts += undefines
+  nvccopts += defines
+  nvccopts += std_options
+  nvccopts += m_options
+  nvccopts += warning_options
+
+  if depfiles:
+    # Generate the dependency file
+    depfile = depfiles[0]
+    cmd = (NVCC_PATH + ' ' + nvccopts +
+           ' --compiler-options "' + host_compiler_options + '"' +
+           ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
+           ' -I .' +
+           ' -x cu ' + opt + includes + ' ' + srcs + ' -M -o ' + depfile)
+    if log: Log(cmd)
+    exit_status = system(cmd)
+    if exit_status != 0:
+      return exit_status
+
+  cmd = (NVCC_PATH + ' ' + nvccopts +
+         ' --compiler-options "' + host_compiler_options + ' -fPIC"' +
+         ' --compiler-bindir=' + GCC_HOST_COMPILER_PATH +
+         ' -I .' +
+         ' -x cu ' + opt + includes + ' -c ' + srcs + out)
+
+  # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'.
+  # Need to investigate and fix.
+  cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd
+  if log: Log(cmd)
+  return system(cmd)
+
+
+def main():
+  parser = ArgumentParser()
+  parser.add_argument('-x', nargs=1)
+  parser.add_argument('--cuda_log', action='store_true')
+  args, leftover = parser.parse_known_args(sys.argv[1:])
+
+  if args.x and args.x[0] == 'cuda':
+    if args.cuda_log: Log('-x cuda')
+    leftover = [pipes.quote(s) for s in leftover]
+    args.cuda_log = True
+    if args.cuda_log: Log('using nvcc')
+    return InvokeNvcc(leftover, log=args.cuda_log)
+
+  # Strip our flags before passing through to the CPU compiler for files which
+  # are not -x cuda. We can't just pass 'leftover' because it also strips -x.
+  # We not only want to pass -x to the CPU compiler, but also keep it in its
+  # relative location in the argv list (the compiler is actually sensitive to
+  # this).
+  cpu_compiler_flags = [flag for flag in sys.argv[1:]
+                             if not flag.startswith(('--cuda_log'))]
+
+  return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
+
+if __name__ == '__main__':
+  sys.exit(main())
--- a/tools/gpus/cuda/BUILD
+++ b/tools/gpus/cuda/BUILD
--- a/tools/gpus/cuda/BUILD.tpl
+++ b/tools/gpus/cuda/BUILD.tpl
+load(":build_defs.bzl", "cuda_header_library")
+load("@bazel_skylib//:bzl_library.bzl", "bzl_library")
+
+licenses(["restricted"])  # MPL2, portions GPL v3, LGPL v3, BSD-like
+
+package(default_visibility = ["//visibility:public"])
+
+config_setting(
+    name = "using_nvcc",
+    values = {
+        "define": "using_cuda_nvcc=true",
+    },
+)
+
+config_setting(
+    name = "using_clang",
+    values = {
+        "define": "using_cuda_clang=true",
+    },
+)
+
+# Equivalent to using_clang && -c opt.
+config_setting(
+    name = "using_clang_opt",
+    values = {
+        "define": "using_cuda_clang=true",
+        "compilation_mode": "opt",
+    },
+)
+
+cuda_header_library(
+    name = "cuda_headers",
+    hdrs = [
+        "cuda/cuda_config.h",
+        ":cuda-include"
+    ],
+    include_prefix = "tools/gpus",
+    includes = [
+        ".",  # required to include cuda/cuda/cuda_config.h as cuda/config.h
+        "cuda/include",
+    ],
+)
+
+cc_library(
+    name = "cudart_static",
+    srcs = ["cuda/lib/%{cudart_static_lib}"],
+    linkopts = [
+        "-ldl",
+        "-lpthread",
+        %{cudart_static_linkopt}
+    ],
+)
+
+cc_library(
+    name = "cuda_driver",
+    srcs = ["cuda/lib/%{cuda_driver_lib}"],
+)
+
+cc_library(
+    name = "cudart",
+    srcs = ["cuda/lib/%{cudart_lib}"],
+    data = ["cuda/lib/%{cudart_lib}"],
+    linkstatic = 1,
+)
+
+cuda_header_library(
+    name = "cublas_headers",
+    hdrs = [":cublas-include"],
+    include_prefix = "tools/gpus/cuda/include",
+    strip_include_prefix = "cublas/include",
+    deps = [":cuda_headers"],
+    includes = ["cublas/include"],
+)
+
+cc_library(
+    name = "cublas",
+    srcs = ["cuda/lib/%{cublas_lib}"],
+    data = ["cuda/lib/%{cublas_lib}"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "cusolver",
+    srcs = ["cuda/lib/%{cusolver_lib}"],
+    data = ["cuda/lib/%{cusolver_lib}"],
+    linkopts = ["-lgomp"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "cudnn",
+    srcs = ["cuda/lib/%{cudnn_lib}"],
+    data = ["cuda/lib/%{cudnn_lib}"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "cudnn_header",
+    hdrs = [":cudnn-include"],
+    include_prefix = "tools/gpus/cudnn",
+    strip_include_prefix = "cudnn/include",
+    deps = [":cuda_headers"],
+)
+
+cc_library(
+    name = "cufft",
+    srcs = ["cuda/lib/%{cufft_lib}"],
+    data = ["cuda/lib/%{cufft_lib}"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "curand",
+    srcs = ["cuda/lib/%{curand_lib}"],
+    data = ["cuda/lib/%{curand_lib}"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "cuda",
+    deps = [
+        ":cublas",
+        ":cuda_headers",
+        ":cudart",
+        ":cudnn",
+        ":cufft",
+        ":curand",
+    ],
+)
+
+cuda_header_library(
+    name = "cupti_headers",
+    hdrs = [":cuda-extras"],
+    include_prefix="tools/gpus",
+    includes = ["cuda/extras/CUPTI/include/"],
+    deps = [":cuda_headers"],
+)
+
+cc_library(
+    name = "cupti_dsos",
+    data = ["cuda/lib/%{cupti_lib}"],
+)
+
+cc_library(
+    name = "cusparse",
+    srcs = ["cuda/lib/%{cusparse_lib}"],
+    data = ["cuda/lib/%{cusparse_lib}"],
+    linkopts = ["-lgomp"],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "libdevice_root",
+    data = [":cuda-nvvm"],
+)
+
+filegroup(
+    name = "cuda_root",
+    srcs = [
+        "cuda/bin/fatbinary",
+        "cuda/bin/bin2c",
+    ],
+)
+
+bzl_library(
+    name = "build_defs_bzl",
+    srcs = ["build_defs.bzl"],
+    deps = [
+        "@bazel_skylib//lib:selects",
+    ],
+)
+
+%{copy_rules}
--- a/tools/gpus/cuda/LICENSE
+++ b/tools/gpus/cuda/LICENSE
+Copyright 2015 The TensorFlow Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2015, The TensorFlow Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/tools/gpus/cuda/build_defs.bzl.tpl
+++ b/tools/gpus/cuda/build_defs.bzl.tpl
+# Macros for building CUDA code.
+def if_cuda(if_true, if_false = []):
+    """Shorthand for select()'ing on whether we're building with CUDA.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with CUDA enabled.  Otherwise, the select statement evaluates to if_false.
+
+    """
+    return select({
+        "@local_config_cuda//cuda:using_nvcc": if_true,
+        "@local_config_cuda//cuda:using_clang": if_true,
+        "//conditions:default": if_false,
+    })
+
+def if_cuda_clang(if_true, if_false = []):
+   """Shorthand for select()'ing on wheteher we're building with cuda-clang.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with cuda-clang.  Otherwise, the select statement evaluates to if_false.
+
+   """
+   return select({
+       "@local_config_cuda//cuda:using_clang": if_true,
+       "//conditions:default": if_false
+   })
+
+def if_cuda_clang_opt(if_true, if_false = []):
+   """Shorthand for select()'ing on wheteher we're building with cuda-clang
+   in opt mode.
+
+    Returns a select statement which evaluates to if_true if we're building
+    with cuda-clang in opt mode. Otherwise, the select statement evaluates to
+    if_false.
+
+   """
+   return select({
+       "@local_config_cuda//cuda:using_clang_opt": if_true,
+       "//conditions:default": if_false
+   })
+
+def cuda_default_copts():
+    """Default options for all CUDA compilations."""
+    return if_cuda(
+        ["-x", "cuda", "-DAPOLLO_CUDA=1"]
+    ) + if_cuda_clang_opt(
+        # Some important CUDA optimizations are only enabled at O3.
+        ["-O3"]
+    ) + %{cuda_extra_copts}
+
+def cuda_is_configured():
+    """Returns true if CUDA was enabled during the configure process."""
+    return %{cuda_is_configured}
+
+def cuda_gpu_architectures():
+    """Returns a list of supported GPU architectures."""
+    return %{cuda_gpu_architectures}
+
+def if_cuda_is_configured(x):
+    """Tests if the CUDA was enabled during the configure process.
+
+    Unlike if_cuda(), this does not require that we are building with
+    --config=cuda. Used to allow non-CUDA code to depend on CUDA libraries.
+    """
+    if cuda_is_configured():
+      return select({"//conditions:default": x})
+    return select({"//conditions:default": []})
+
+def cuda_header_library(
+        name,
+        hdrs,
+        include_prefix = None,
+        strip_include_prefix = None,
+        deps = [],
+        **kwargs):
+    """Generates a cc_library containing both virtual and system include paths.
+
+    Generates both a header-only target with virtual includes plus the full
+    target without virtual includes. This works around the fact that bazel can't
+    mix 'includes' and 'include_prefix' in the same target."""
+
+    native.cc_library(
+        name = name + "_virtual",
+        hdrs = hdrs,
+        include_prefix = include_prefix,
+        strip_include_prefix = strip_include_prefix,
+        deps = deps,
+        visibility = ["//visibility:private"],
+    )
+
+    native.cc_library(
+        name = name,
+        textual_hdrs = hdrs,
+        deps = deps + [":%s_virtual" % name],
+        **kwargs
+    )
+
+def cuda_library(copts = [], **kwargs):
+    """Wrapper over cc_library which adds default CUDA options."""
+    native.cc_library(copts = cuda_default_copts() + copts, **kwargs)
--- a/tools/gpus/cuda/cuda_config.h.tpl
+++ b/tools/gpus/cuda/cuda_config.h.tpl
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef CUDA_CUDA_CONFIG_H_
+#define CUDA_CUDA_CONFIG_H_
+
+#define TF_CUDA_VERSION "%{cuda_version}"
+#define TF_CUDA_LIB_VERSION "%{cuda_lib_version}"
+#define TF_CUDNN_VERSION "%{cudnn_version}"
+
+#define TF_CUDA_TOOLKIT_PATH "%{cuda_toolkit_path}"
+
+#endif  // CUDA_CUDA_CONFIG_H_
--- a/tools/gpus/cuda_configure.bzl
+++ b/tools/gpus/cuda_configure.bzl
--- a/tools/gpus/find_cuda_config.py
+++ b/tools/gpus/find_cuda_config.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# NOTE(storypku): Tailored to support Linux ONLY.
+# Origin: tensorflow/third_party/gpus/find_cuda_config.py
+# =============================================================================
+"""Prints CUDA library and header directories and versions found on the system.
+
+The script searches for CUDA library and header files on the system, inspects
+them to determine their version and prints the configuration to stdout.
+The paths to inspect and the required versions are specified through environment
+variables. If no valid configuration is found, the script prints to stderr and
+returns an error code.
+
+The list of libraries to find is specified as arguments. Supported libraries are
+CUDA (includes cuBLAS), cuDNN, NCCL, and TensorRT.
+
+The script takes a list of base directories specified by the TF_CUDA_PATHS
+environment variable as comma-separated glob list. The script looks for headers
+and library files in a hard-coded set of subdirectories from these base paths.
+If TF_CUDA_PATHS is not specified, a OS specific default is used:
+
+  Linux:   /usr/local/cuda, /usr, and paths from 'ldconfig -p'.
+
+For backwards compatibility, some libraries also use alternative base
+directories from other environment variables if they are specified. List of
+library-specific environment variables:
+
+  Library   Version env variable  Additional base directories
+  ----------------------------------------------------------------
+  CUDA      TF_CUDA_VERSION       CUDA_TOOLKIT_PATH
+  cuBLAS    TF_CUBLAS_VERSION     CUDA_TOOLKIT_PATH
+  cuDNN     TF_CUDNN_VERSION      CUDNN_INSTALL_PATH
+  NCCL      TF_NCCL_VERSION       NCCL_INSTALL_PATH, NCCL_HDR_PATH
+  TensorRT  TF_TENSORRT_VERSION   TENSORRT_INSTALL_PATH
+
+Versions environment variables can be of the form 'x' or 'x.y' to request a
+specific version, empty or unspecified to accept any version.
+
+The output of a found library is of the form:
+tf_<library>_version: x.y.z
+tf_<library>_header_dir: ...
+tf_<library>_library_dir: ...
+"""
+
+import io
+import os
+import glob
+import platform
+import re
+import subprocess
+import sys
+
+# pylint: disable=g-import-not-at-top
+try:
+    from shutil import which
+except ImportError:
+    from distutils.spawn import find_executable as which
+# pylint: enable=g-import-not-at-top
+
+
+class ConfigError(Exception):
+    pass
+
+
+def _is_linux():
+    return platform.system() == "Linux"
+
+
+def _matches_version(actual_version, required_version):
+    """Checks whether some version meets the requirements.
+
+      All elements of the required_version need to be present in the
+      actual_version.
+
+          required_version  actual_version  result
+          -----------------------------------------
+          1                 1.1             True
+          1.2               1               False
+          1.2               1.3             False
+                            1               True
+
+      Args:
+        required_version: The version specified by the user.
+        actual_version: The version detected from the CUDA installation.
+      Returns: Whether the actual version matches the required one.
+  """
+    if actual_version is None:
+        return False
+
+    # Strip spaces from the versions.
+    actual_version = actual_version.strip()
+    required_version = required_version.strip()
+    return actual_version.startswith(required_version)
+
+
+def _at_least_version(actual_version, required_version):
+    actual = [int(v) for v in actual_version.split(".")]
+    required = [int(v) for v in required_version.split(".")]
+    return actual >= required
+
+
+def _get_header_version(path, name):
+    """Returns preprocessor defines in C header file."""
+    for line in io.open(path, "r", encoding="utf-8").readlines():
+        match = re.match("#define %s +(\d+)" % name, line)
+        if match:
+            return match.group(1)
+    return ""
+
+
+def _cartesian_product(first, second):
+    """Returns all path combinations of first and second."""
+    return [os.path.join(f, s) for f in first for s in second]
+
+
+def _get_ld_config_paths():
+    """Returns all directories from 'ldconfig -p'."""
+    if not _is_linux():
+        return []
+    ldconfig_path = which("ldconfig") or "/sbin/ldconfig"
+    output = subprocess.check_output([ldconfig_path, "-p"])
+    pattern = re.compile(".* => (.*)")
+    result = set()
+    for line in output.splitlines():
+        try:
+            match = pattern.match(line.decode("ascii"))
+        except UnicodeDecodeError:
+            match = False
+        if match:
+            result.add(os.path.dirname(match.group(1)))
+    return sorted(list(result))
+
+
+def _get_default_cuda_paths(cuda_version):
+    if not cuda_version:
+        cuda_version = "*"
+    elif not "." in cuda_version:
+        cuda_version = cuda_version + ".*"
+
+    return [
+        "/usr/local/cuda-%s" % cuda_version, "/usr/local/cuda", "/usr",
+        "/usr/local/cudnn"
+    ] + _get_ld_config_paths()
+
+
+def _header_paths():
+    """Returns hard-coded set of relative paths to look for header files."""
+    return [
+        "",
+        "include",
+        "include/cuda",
+        "include/*-linux-gnu",
+        "extras/CUPTI/include",
+        "include/cuda/CUPTI",
+    ]
+
+
+def _library_paths():
+    """Returns hard-coded set of relative paths to look for library files."""
+    return [
+        "",
+        "lib64",
+        "lib",
+        "lib/*-linux-gnu",
+        "lib/x64",
+        "extras/CUPTI/*",
+    ]
+
+
+def _not_found_error(base_paths, relative_paths, filepattern):
+    base_paths = "".join(
+        ["\n        '%s'" % path for path in sorted(base_paths)])
+    relative_paths = "".join(
+        ["\n        '%s'" % path for path in relative_paths])
+    return ConfigError(
+        "Could not find any %s in any subdirectory:%s\nof:%s\n" %
+        (filepattern, relative_paths, base_paths))
+
+
+def _find_file(base_paths, relative_paths, filepattern):
+    for path in _cartesian_product(base_paths, relative_paths):
+        for file in glob.glob(os.path.join(path, filepattern)):
+            return file
+    raise _not_found_error(base_paths, relative_paths, filepattern)
+
+
+def _find_library(base_paths, library_name, required_version):
+    """Returns first valid path to the requested library."""
+    filepattern = ".".join(
+        ["lib" + library_name, "so"] + required_version.split(".")[:1]) + "*"
+    return _find_file(base_paths, _library_paths(), filepattern)
+
+
+def _find_versioned_file(base_paths, relative_paths, filepattern,
+                         required_version, get_version):
+    """Returns first valid path to a file that matches the requested version."""
+    for path in _cartesian_product(base_paths, relative_paths):
+        for file in glob.glob(os.path.join(path, filepattern)):
+            actual_version = get_version(file)
+            if _matches_version(actual_version, required_version):
+                return file, actual_version
+    raise _not_found_error(
+        base_paths, relative_paths,
+        filepattern + " matching version '%s'" % required_version)
+
+
+def _find_header(base_paths, header_name, required_version, get_version):
+    """Returns first valid path to a header that matches the requested version."""
+    return _find_versioned_file(base_paths, _header_paths(), header_name,
+                                required_version, get_version)
+
+
+def _find_cuda_config(base_paths, required_version):
+    def get_header_version(path):
+        version = int(_get_header_version(path, "CUDA_VERSION"))
+        if not version:
+            return None
+        return "%d.%d" % (version // 1000, version % 1000 // 10)
+
+    cuda_header_path, header_version = _find_header(
+        base_paths, "cuda.h", required_version, get_header_version)
+    cuda_version = header_version  # x.y, see above.
+
+    cuda_library_path = _find_library(base_paths, "cudart", cuda_version)
+
+    def get_nvcc_version(path):
+        pattern = "Cuda compilation tools, release \d+\.\d+, V(\d+\.\d+\.\d+)"
+        for line in subprocess.check_output([path, "--version"]).splitlines():
+            match = re.match(pattern, line.decode("ascii"))
+            if match:
+                return match.group(1)
+        return None
+
+    nvcc_name = "nvcc"
+    nvcc_path, nvcc_version = _find_versioned_file(base_paths, [
+        "",
+        "bin",
+    ], nvcc_name, cuda_version, get_nvcc_version)
+
+    nvvm_path = _find_file(base_paths, [
+        "nvvm/libdevice",
+        "share/cuda",
+        "lib/nvidia-cuda-toolkit/libdevice",
+    ], "libdevice*.10.bc")
+
+    cupti_header_path = _find_file(base_paths, _header_paths(), "cupti.h")
+    cupti_library_path = _find_library(base_paths, "cupti", required_version)
+
+    cuda_binary_dir = os.path.dirname(nvcc_path)
+    nvvm_library_dir = os.path.dirname(nvvm_path)
+
+    # XLA requires the toolkit path to find ptxas and libdevice.
+    # TODO(csigg): pass in both directories instead.
+    cuda_toolkit_paths = (
+        os.path.normpath(os.path.join(cuda_binary_dir, "..")),
+        os.path.normpath(os.path.join(nvvm_library_dir, "../..")),
+    )
+    if cuda_toolkit_paths[0] != cuda_toolkit_paths[1]:
+        raise ConfigError(
+            "Inconsistent CUDA toolkit path: %s vs %s" % cuda_toolkit_paths)
+
+    return {
+        "cuda_version": cuda_version,
+        "cuda_include_dir": os.path.dirname(cuda_header_path),
+        "cuda_library_dir": os.path.dirname(cuda_library_path),
+        "cuda_binary_dir": cuda_binary_dir,
+        "nvvm_library_dir": nvvm_library_dir,
+        "cupti_include_dir": os.path.dirname(cupti_header_path),
+        "cupti_library_dir": os.path.dirname(cupti_library_path),
+        "cuda_toolkit_path": cuda_toolkit_paths[0],
+    }
+
+
+def _find_cublas_config(base_paths, required_version, cuda_version):
+
+    if _at_least_version(cuda_version, "10.1"):
+
+        def get_header_version(path):
+            version = (_get_header_version(path, name)
+                       for name in ("CUBLAS_VER_MAJOR", "CUBLAS_VER_MINOR",
+                                    "CUBLAS_VER_PATCH"))
+            return ".".join(version)
+
+        header_path, header_version = _find_header(
+            base_paths, "cublas_api.h", required_version, get_header_version)
+        # cuBLAS uses the major version only.
+        cublas_version = header_version.split(".")[0]
+
+        if not _matches_version(cuda_version, cublas_version):
+            raise ConfigError(
+                "cuBLAS version %s does not match CUDA version %s" %
+                (cublas_version, cuda_version))
+
+    else:
+        # There is no version info available before CUDA 10.1, just find the file.
+        header_path = _find_file(base_paths, _header_paths(), "cublas_api.h")
+        # cuBLAS version is the same as CUDA version (x.y).
+        cublas_version = required_version
+
+    library_path = _find_library(base_paths, "cublas", cublas_version)
+
+    return {
+        "cublas_include_dir": os.path.dirname(header_path),
+        "cublas_library_dir": os.path.dirname(library_path),
+    }
+
+
+def _find_cudnn_config(base_paths, required_version):
+    def get_header_version(path):
+        version = (_get_header_version(path, name)
+                   for name in ("CUDNN_MAJOR", "CUDNN_MINOR",
+                                "CUDNN_PATCHLEVEL"))
+        return ".".join(version)
+
+    header_path, header_version = _find_header(
+        base_paths, "cudnn.h", required_version, get_header_version)
+    cudnn_version = header_version.split(".")[0]
+
+    library_path = _find_library(base_paths, "cudnn", cudnn_version)
+
+    return {
+        "cudnn_version": cudnn_version,
+        "cudnn_include_dir": os.path.dirname(header_path),
+        "cudnn_library_dir": os.path.dirname(library_path),
+    }
+
+
+def _find_nccl_config(base_paths, required_version):
+    def get_header_version(path):
+        version = (_get_header_version(path, name)
+                   for name in ("NCCL_MAJOR", "NCCL_MINOR", "NCCL_PATCH"))
+        return ".".join(version)
+
+    header_path, header_version = _find_header(
+        base_paths, "nccl.h", required_version, get_header_version)
+    nccl_version = header_version.split(".")[0]
+
+    library_path = _find_library(base_paths, "nccl", nccl_version)
+
+    return {
+        "nccl_version": nccl_version,
+        "nccl_include_dir": os.path.dirname(header_path),
+        "nccl_library_dir": os.path.dirname(library_path),
+    }
+
+
+def _find_tensorrt_config(base_paths, required_version):
+    def get_header_version(path):
+        version = (_get_header_version(path, name)
+                   for name in ("NV_TENSORRT_MAJOR", "NV_TENSORRT_MINOR",
+                                "NV_TENSORRT_PATCH"))
+        # `version` is a generator object, so we convert it to a list before using
+        # it (muitiple times below).
+        version = list(version)
+        if not all(version):
+            return None  # Versions not found, make _matches_version returns False.
+        return ".".join(version)
+
+    try:
+        header_path, header_version = _find_header(
+            base_paths, "NvInfer.h", required_version, get_header_version)
+    except ConfigError:
+        # TensorRT 6 moved the version information to NvInferVersion.h.
+        header_path, header_version = _find_header(
+            base_paths, "NvInferVersion.h", required_version,
+            get_header_version)
+
+    tensorrt_version = header_version.split(".")[0]
+    library_path = _find_library(base_paths, "nvinfer", tensorrt_version)
+
+    return {
+        "tensorrt_version": tensorrt_version,
+        "tensorrt_include_dir": os.path.dirname(header_path),
+        "tensorrt_library_dir": os.path.dirname(library_path),
+    }
+
+
+def _list_from_env(env_name, default=[]):
+    """Returns comma-separated list from environment variable."""
+    if env_name in os.environ:
+        return os.environ[env_name].split(",")
+    return default
+
+
+def _get_legacy_path(env_name, default=[]):
+    """Returns a path specified by a legacy environment variable.
+
+  CUDNN_INSTALL_PATH, NCCL_INSTALL_PATH, TENSORRT_INSTALL_PATH set to
+  '/usr/lib/x86_64-linux-gnu' would previously find both library and header
+  paths. Detect those and return '/usr', otherwise forward to _list_from_env().
+  """
+    if env_name in os.environ:
+        match = re.match("^(/[^/ ]*)+/lib/\w+-linux-gnu/?$",
+                         os.environ[env_name])
+        if match:
+            return [match.group(1)]
+    return _list_from_env(env_name, default)
+
+
+def _normalize_path(path):
+    """Returns normalized path, with forward slashes on Windows."""
+    return os.path.realpath(path)
+
+
+def find_cuda_config():
+    """Returns a dictionary of CUDA library and header file paths."""
+    libraries = [argv.lower() for argv in sys.argv[1:]]
+    cuda_version = os.environ.get("TF_CUDA_VERSION", "")
+    base_paths = _list_from_env("TF_CUDA_PATHS",
+                                _get_default_cuda_paths(cuda_version))
+    base_paths = [path for path in base_paths if os.path.exists(path)]
+
+    result = {}
+    if "cuda" in libraries:
+        cuda_paths = _list_from_env("CUDA_TOOLKIT_PATH", base_paths)
+        result.update(_find_cuda_config(cuda_paths, cuda_version))
+
+        cuda_version = result["cuda_version"]
+        cublas_paths = base_paths
+        if tuple(int(v) for v in cuda_version.split(".")) < (10, 1):
+            # Before CUDA 10.1, cuBLAS was in the same directory as the toolkit.
+            cublas_paths = cuda_paths
+        cublas_version = os.environ.get("TF_CUBLAS_VERSION", "")
+        result.update(
+            _find_cublas_config(cublas_paths, cublas_version, cuda_version))
+
+    if "cudnn" in libraries:
+        cudnn_paths = _get_legacy_path("CUDNN_INSTALL_PATH", base_paths)
+        cudnn_version = os.environ.get("TF_CUDNN_VERSION", "")
+        result.update(_find_cudnn_config(cudnn_paths, cudnn_version))
+
+    if "nccl" in libraries:
+        nccl_paths = _get_legacy_path("NCCL_INSTALL_PATH", base_paths)
+        nccl_version = os.environ.get("TF_NCCL_VERSION", "")
+        result.update(_find_nccl_config(nccl_paths, nccl_version))
+
+    if "tensorrt" in libraries:
+        tensorrt_paths = _get_legacy_path("TENSORRT_INSTALL_PATH", base_paths)
+        tensorrt_version = os.environ.get("TF_TENSORRT_VERSION", "")
+        result.update(_find_tensorrt_config(tensorrt_paths, tensorrt_version))
+
+    for k, v in result.items():
+        if k.endswith("_dir") or k.endswith("_path"):
+            result[k] = _normalize_path(v)
+
+    return result
+
+
+def main():
+    try:
+        for key, value in sorted(find_cuda_config().items()):
+            print("{}: {}".format(key, value))
+    except ConfigError as e:
+        sys.stderr.write(str(e))
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/tools/gpus/find_cuda_config.py.gz.base64
+++ b/tools/gpus/find_cuda_config.py.gz.base64
+eJzNPGtz47a13/UrUG53TO7K1Dq3k+lV63Qc7+7Et66dsbWbm/G6KkRCEmOKVPmQrWby33vOAUACIKW1nc1MOBOvCOIcnPcDBPOCnebrbZEslhX76s3R/7LJUrCJyMq8eJ/m9+ykrpZ5UYbsJE3ZFU4r2ZUoRbERcTh4MXjBzpMIpouY1VksClYB/MmaR/CPejJkH0VRJnnGvgrfMB8neOqRF/wFMGzzmq34lmV5xepSAIqkZPMkFUw8RGJdsSRjUb5apwnPIsHuk2pJyygkQAb7UaHIZxWH2Rzmr+Fubs5jvCKC8VpW1Xo8Gt3f34eciA3zYjFK5cRydH52+u7i+t0hEEwgH7JUlCUrxL/rpABWZ1vG10BPxGdAZcrvWV4wvigEPKtypPe+SKokWwxZmc+re14IwBInZVUks7qyhKWpA57NCSAunjHv5JqdXXvs25Prs+sh4PjhbPLd5YcJ++Hk6urkYnL27ppdXrHTy4u3Z5Ozywu4e89OLn5kfz+7eDtkAkQFy4iHdYH0A5EJipFUx66FsAiY55Kgci2iZJ5EwFe2qPlCsEW+EUUG7LC1KFZJicosgbwYsKTJKql4RSMdpnCZ4y96DQDjxeXknV9WebFd39XBmE14kuaFFH1Zr9d5UQEBWf3ALi/Of0QaLsHAk2zMKjLsORj2CIysiKdrXlTb0WJdl6N5ksXTqI75NMqzebII19svTf3A87zviyQDHzr98PYEZDcreLFFSbKl4Ci8GOwrAtYSQQJmG+k64A85SBetglS0LSuxCgcD9NYyKhJwklLwAgy5JD3uQo9eVdpYhmCuqPKqHMDgCoUYiwr1nJF9JIUmghCtJf0IL+VUF6R9En4V53UVElVrXi1L6QyEnYARqnGihjXwDm10qMRlkdeLJRPZJinybCWyarDhRYKuBnHobA5xgm14msQOAYkS0lAyJ6WiySXiRFGQ1RaiqguyYAZDIK4oj4WSZgo+iIFDCg/VALBoG4i/pZIj2YsaqQOirqXVwXgLhj5PavCTLErrGIai+tvzk+tgCD/eXlwM2cXp6fmQBCMj7tXEVmnF7xBRQ9OMg5uaFtLSAxEJuZ68n+Ka0+9PJt9dDwwRMi1CpByC6YoflgKsnyPRizSf0SIhM1ZP8/xOWpM0nnKAlGqjkpZEsXbJi/gQRRiDERKhZT0zyZwX+QrJA+qJBbKNcACqtOhFCWMOaLgC2bDL6zYgxWLO67TCeZAn4vFgwKSjjyGkj+qyGKV5xNMRevGQBqR0pS0SFQdpLK2GHa4PQNrvgb0Zj+4gRMckGJibzJI0qbYYu1fC1Gha5pSgeAr+kcHEjWRo0GE2l3G3R/4gM8pKW9vsQ2CElDxQAj5suO7FoniXumBNhoW5rabZSRwn6Bo87ZgOQB/+ygtQkH3TpRX58d3VNeQhOUjPp5PLy/O/n01IxwAjnaCBwRsLqh8G/MVY5+LCXkgOnV1cT07OzzUQeldDHN44xNGQCSMdcvrd2yuNQrsloZhASXB5dTUx0DRD1tKDjzqy9RtABHFnJnR1Ag4GZvlwgNn54CHcHmDAwSApwB74oDEDFS4hqa/W1RZn15kRNXPGIyqWeLbVc1U0gZi8rskvuUoj2onBkQwqxoNqPv2revbNVCEZMyAq/I/9TEaEKRjUmIVhaD9UP9qnkPYGA6g8MDEnuf6Vl/oXxh/9e53yConR9xBF1S8IKusij6CQaUa2JdYD620KQX6M5RMK+HhxKJ8fQjA55NVhla8HVbEdD1Dr5J7lsq6SlCk098skWg5UrXlGY+8wLRgAWJkhSBmWa36faUiqGcSDiOpKx1aJrCVKZDtpGgyilENZdkoBiZb03xEVIPZArr6GCTARIh+bJiWIFqKdr57JNNZILJT53A+gaMEKG2Z6GnTFK6wNtE59HlU1T6eNTemkrEfUCqC406WI7pAtQSGNQqKuB1ZCqFJAwct0OBhIB8OWQaRyUNuZuxDLVM0MDoFFKnpKQuWJQmJT2uCW/Du4nMk4o4R8YYA8Jbjp64i511Foj02KWpgA4VcugHP/HlLJZyDC/9kL0b3cNYgorYliUY4baFduY8r6WmidmgIyXhE2wLaIbVCsGyMsKHTClwkCKsCKpylVaRrRlSzBxuwHZVg4W+JuzUsarV015plAHBhSEA8kU0fpENMuYJLJLrmJlCCNQu8DfdYaWOWRUZ40Fakk0sF77Joi9mprPxj0iRRmu0POfKKpgxHakRL7W7/jkNqTeTVNBS+rp7qyku0xu4Gw5G8CKuw2VMA5RECDWPle6AW3Fmt9oF0eO8AGn+ybViqanYWodDLRDGG1NmQZX4k2CilzwRChcgCQAAigR6Ei9NRscEJtHEhnim0MzEjyMF8Ljd0rPEikGRSt0NUee3U1P/yzF4QFIEGAUodYvMgMSaEh/fS9F3Jl9rJkr/1P8evAYy+J4CEtFzSgYJ0EMrY8VwmFnoQL6HbW/pFlFl4TtyMwCFEmPJsC23EdVf48KcoKSlMBZWzcFRD4GZW7WMrOkkw15hB8CY6qYQnaCEmteZOXIQKGP+VJ5s9hBannOQpPAuMtSVtiuDV1mMaqbZ5Sse33U9apk+1y3HBq7AM6Cc8kV9qXBqdVQUeUfX1PD3sB1kneqARRjJpBglRF0bFRVoQR5rqpfOLfWLjBZA7X3m2gknKFDYC0CewZwOjA6F+x42+YH74KPK1NzD64hKiU35sWKdeRLtMxuqZeca1Qra1MEeHCWGDz5Xu8jJLEC1rzUyXNhyzBCW9pmlHZuKjtHLPLdpGnkMexrw0GlIq279v2HFgWXVJ37GOP6UsUQWCaj+rs5P6LtCD6accwZRfmk5Y6cxSY8V5JPYtUQUFQoo3ExwBbt68BFJBZztIAek7befiyxGBgIhh2JnlqyBvuwpNlkvpbWL3fv7T0VPDc5XXd3rwQqexcmx0abPSNPl+29p340JJqkq32NnqGFKfd8VeH5NSHi6w2H4uHquDl6PTD95Oz0WfQylnqaROHdN/xRWRh7XM8UhgA8/WfnAHndhf3+OjBBrYE8splFkx6Ss3clLawfGzwJefDhi19jzyouKGk0s5GV/Fk0G+WvvE+Zfr3wcvyAC2aAiwKhn4kjUu3mIJb7fLm6s/Gb6O5tcKJ2TG1AjvN6zQmX6f9OmyEX8otKvhlbEltxy/LT1k+p39g7QaDbwiqK0WD08b7qAFEqCfK32S0J8nvRmakCMrO+JYEcGAHHeIf30rjMnOZywe9pQjOkPLlSSmeb1yWWJQHWcDaRWWxtLvr1C4rCw+520vyAhfV7YAo2w3XbVvzteSg6YVd20OnhLBqk+KVuYfBdk9FezM+ug0wG7yyYsEOG3Cj0R5BqaXE0yxpaCnSulwmhgxTyJOEzKVpVUtedfowKXgtILPY/j2YdKdtM3gnBw+s6cnzd0d2ONLQIWGfYzV49ii9lY5h2mCGUi/4Vk7zqmPpzuaRzE1meUshqojo98pnWY8qJZ5gP5Y77XEJp+Kxid+7Q/J517DkZLwLdKy31xoQcEc3a9hLa5bYS+9ufz1zN92s6VU126lgDRHi9ofbL3kv4/BljMbhaxJGI3b05s2bYUPTS7qX44Gsd0kGhsAbabd8WEbVa88eYgmX3i7TslEG7cLtIs6q7AVuSmMXLBif5RsRGuSagbehry8ZEV1F5Q2t1RTnWp3ZJop2KdPIM6eAgclWUL8OzVPpywJfvXyKX38K4c+QffT1b/oTeFb0083hzq5UN6OHiiboSXe0kHh19i6a6mZ/56gsracBNGyqZwPDtUIaJBGie6Kg8MZrh9VmjyHkRmV7YsCO+hu6fF0mD9tVh0435mo10FRuVrbN7FsWZ4/AqGKxSSKrTymhzeg2P1jfZ5skTvghtYloHndJ1UEBhHvN2Kvw6E04i7zGGddVYnrjbko7QdIjYHDCwED1FE+B+T0ObIYJ3G6Sb30Al7s50Cg7aIVtvCfqhVD6CPSm7f+fn2gCZDJRQmzSDpX96+qBy7MTjRxDhWBy+fbSj8pksQjG9GYFPW2WA7C5MYXb1SC9sOVMrdM0NG2Y00RneYFvjpd2reKIBeQYQh0ZDB8J7gqJ4EcGikDvinSpvHlzy/5w3Pfg6NbYTKOqpLebIqs9yyD/lQnII6vkXr4p8jG2V5uSGVse1lKBtWfyc+sMpj96Y9s9nVmq60f2YaZrI256ClxwQ3q7wE0n6MC3ytN0Guq0o4GzVkd5JmZ0vs9x5vh60EHwOd5cD+8wZypLs+cakYT5xSmMZikvH1MaOZl1PNAG232N4eyYQeA78jQAXo8rr/Bq88ie+opeL+yqFjERU76C8OB77RmF6T9O/u/yyqPqrB07u8Cxz1aeJHcD7vuTyel3bs7V1ZpuW+0wi9dzijG8nHBOGuTr5IllGV4v9BGOulRxeMV/yttTYnmWbtv3hWqpXaWc2V+/uW3Z1K8A3NbMNhMbubu3sT+4kT4UJ00FXLI4F/IUkiydKOi1j83dIn35NhWOzSvVibQ0Xkm+wNemhZAHnhr0STaH1mnDoYTEwwQzAXaoXqGiOwzZT3Wp9rboyAa+6uqzi6eVBYYl9OjYeKtKp+rQK3hpi8WHWjzYo3HXvKREnlJ8IEKvo+7d6YWm7Q+wO0Mrwe6PrT1R1Y2QcZb9hr3jcwKbG9TwxJQRz+j2kaFMTacIdv7u47tzM4ztD2FfopfMsqc3k6COp4SgJ/WQWSZbyHaNfZVPO0vm3PZ+6M57pgUj6K804CyK0t+1/dJJvcZ85Z20XnXXSa+/sV2ixJ5oliTk38YqETXQYq6w0ybNSVi4GrdDZ9azDJIgf6U9yoP7RfX7tsmP7RnR1jTNwcfGVxOoY8gv2L8Uef/CtMyBy0wUHLpXls9+gjYWDy6zezqfDxMrBu0a7cnSMXJVVdRlki0MlDDHX9VJlaxx1z9ZQRE0E2l+b+T1VlT0Gr9TF6qSjaepb2vC8UDcGMIlP6rTVvKNnTy5v+J3olP0MX1enw4ohI/0aOsExRepmi82Z9lcFE/0cnUIw6hDrTpQnzD+mq3yjYjNc2hUEBar5vMKtb6SW7jsLf5+NXsN+j4uLeg+jqXotbc+MrghzBNi2yZBOoE8d52dIc6d6I07sMOe2c8Kdw3080Me+tcUz0dNRbbx4T+1j6lOyRzf3HZfw7jfdJCz0xmrvkPo5lErjZ/OJJWhmt45cdU+utEQt1qPQ896Pa/otM6HiQWPJLePZIjLbT3rOCrEMMLTzxNqv/slwLDvoH/vwX06FVLlgOVAnsbBMxl//nr69Z/aMxsH7J5OGawLsUnyuky3siGjXcTuB1cDpj52YW/pbCy4d44fkMAEJSta6mAovxi5T+R3ePg1Cnq8YwmBc/L1c5rrnlz8pz+6+eeI3b4KXhN7n+5ft7yN/vbHfcmpzwAeedTxxn5VYJ0N/Zy1B+2JF4iFafIfGQnMRG5YTTNJvowcyq9FtUhL6O2W8gO4H0Br+X3nZI9200LwtF1GkdB5LdhntXES0Tc3BX1/uu9DPGUamoL2Q6NjdsOLxSaE/AsRW57DxAF6MbQtQ/x9czS+vW23qNtI26opBMfzPefLHKxJlLdah4AcNXjWl1mPqFkedYyvZ116oWUdHjAeg1FphYgHoK+U6rjVcV6dr/z5F+0RtLFJ5/waaTqH/Hbx2/nsyLNO/BjRkA5A1usYwqzffVPcLtK/EdSjMYnyxt4Wv3X3UzThLVGm71U1FG6+ez7bxGjk3ID9lflHb4bsyKnQXrBvO5tOaiPonpfqwwy5CdQcpsLtIONlTGghdGhvhbN7t6jXfs2vxAwL7irEWr1vu9qkyN1R6teYMivo8nfbFXTcjWG56c7r5qQdtuXuU/S7cvvx215J9OxEGYS6GxYGr9Q77mCVerndnHYy7Q5Gnc63h0/ze71HsGnuV7Q0Og2wwaMu0nbx2RRxu3ntLSF28NtTEPfw7H5g+Ai+3b7YprunQlYywAhxN9QfcRDepBIr6/wAyOkOaIzlByke1bF0nt4apZdHnUaPAtrdLQrOSdsbu0hXX2jJ/Lri0MUpXFb7RuSK7RCPGdXCOH3azciaD4ck+g7c937+ZQzZwgtlX+W3OINdzRqGN2PnHtOv/JI8xP/Fg/DLqvCF0Z3jBMhVFZ6IGAzwVReVM9MpfZU3nSKP06knMUqGB/8F08JPUg==
\ No newline at end of file
--- a/tools/platform/common.bzl
+++ b/tools/platform/common.bzl
--- a/tools/platform/cuda_build_defs.bzl
+++ b/tools/platform/cuda_build_defs.bzl
+"""Open source build configurations for CUDA."""
+
+load("@local_config_cuda//cuda:build_defs.bzl", _if_cuda_is_configured = "if_cuda_is_configured")
+
+# We perform this indirection so that the copybara tool can distinguish this
+# macro from others provided by the same file.
+def if_cuda_is_configured(x):
+    return _if_cuda_is_configured(x)
--- a/tools/sample.bazelrc
+++ b/tools/sample.bazelrc
+build --action_env PYTHON_BIN_PATH="/usr/bin/python3"
+build --action_env PYTHON_LIB_PATH="/usr/lib/python3/dist-packages"
+build --python_path="/usr/bin/python3"
+# build --config=tensorrt
+build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-10.2"
+build --action_env TF_CUDA_COMPUTE_CAPABILITIES="6.0,6.1,7.0,7.2,7.5"
+build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/x86_64-linux-gnu-gcc-7"
+build --config=cuda
+
+## The following was adapted from tensorflow/.bazelrc
+
+# This config refers to building with CUDA available. It does not necessarily
+# mean that we build CUDA op kernels.
+build:using_cuda --define=using_cuda=true
+build:using_cuda --action_env TF_NEED_CUDA=1
+build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+
+# This config refers to building CUDA op kernels with nvcc.
+build:cuda --config=using_cuda
+build:cuda --define=using_cuda_nvcc=true
+
+# This config refers to building CUDA op kernels with clang.
+build:cuda_clang --config=using_cuda
+build:cuda_clang --define=using_cuda_clang=true
+build:cuda_clang --define=using_clang=true
+build:cuda_clang --action_env TF_CUDA_CLANG=1
+
+build:tensorrt --action_env TF_NEED_TENSORRT=1
+build:nonccl --define=no_nccl_support=true