Merge changes from github.

PiperOrigin-RevId: 167401527

Merge changes from github.
PiperOrigin-RevId: 167401527
d57572e9 · Martin Wicke · TensorFlower Gardener · ddba1e0a · d57572e9 · d57572e9
33 changed file
--- a/README.md
+++ b/README.md
@@ -36,7 +36,15 @@ and discussion, and please direct specific questions to [Stack Overflow](https:/

 People who are a little more adventurous can also try our nightly binaries:

+**Nightly pip packages**
+* We are pleased to announce that TensorFlow now offers nightly pip packages
+under the [tf-nightly](https://pypi.python.org/pypi/tf-nightly) project on pypi.
+Simply run `pip install tf-nightly` in a clean environment to install the nightly
+tensorflow  build. We currently only support CPU-only packages on Linux and Mac.
+GPU packages on all platforms and Windows CPU-only packages will arrive soon!

+
+**Individual whl files**
 * Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-python35-linux-cpu/))
 * Linux GPU: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow_gpu-1.3.0-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-linux-gpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
 * Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-1.3.0-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))

--- a/RELEASE.md
+++ b/RELEASE.md
@@ -46,7 +46,7 @@ See also [TensorBoard 0.1.4](https://github.com/tensorflow/tensorboard/releases/
  * Display feed values with the `print_feed` or `pf` command and clickable links in the curses UI.
  * Runtime profiler at the op level and the Python source line level with the `run -p` command.
 * Initial release of the statistical distribution library `tf.distributions`.
-* GPU kernels and speed improvements for for unary `tf.where` and `tf.nn.top_k`.
+* GPU kernels and speed improvements for unary `tf.where` and `tf.nn.top_k`.
 * Monotonic Attention wrappers added to `tf.contrib.seq2seq`.
 * Added `tf.contrib.signal`, a library for signal processing primitives.
 * Added `tf.contrib.resampler`, containing CPU and GPU ops for differentiable resampling of images.

--- a/WORKSPACE
+++ b/WORKSPACE
@@ -2,11 +2,11 @@ workspace(name = "org_tensorflow")

 http_archive(
    name = "io_bazel_rules_closure",
-    sha256 = "bc41b80486413aaa551860fc37471dbc0666e1dbb5236fb6177cb83b0c105846",
-    strip_prefix = "rules_closure-dec425a4ff3faf09a56c85d082e4eed05d8ce38f",
+    sha256 = "25f5399f18d8bf9ce435f85c6bbf671ec4820bc4396b3022cc5dc4bc66303609",
+    strip_prefix = "rules_closure-0.4.2",
    urls = [
-        "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz",  # 2017-06-02
-        "https://github.com/bazelbuild/rules_closure/archive/dec425a4ff3faf09a56c85d082e4eed05d8ce38f.tar.gz",
+        "http://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz",  # 2017-08-29
+        "https://github.com/bazelbuild/rules_closure/archive/0.4.2.tar.gz",
    ],
 )


--- a/configure.py
+++ b/configure.py
@@ -143,7 +143,7 @@ def run_shell(cmd, allow_non_zero=False):

 def cygpath(path):
  """Convert path from posix to windows."""
-  return run_shell(['cygpath', '-m', path])
+  return os.path.abspath(path).replace('\\', '/')


 def get_python_path(environ_cp, python_bin_path):
@@ -196,7 +196,7 @@ def setup_python(environ_cp, bazel_version):
    environ_cp['PYTHON_BIN_PATH'] = ''

  # Convert python path to Windows style before checking lib and version
-  if is_cygwin():
+  if is_windows() or is_cygwin():
    python_bin_path = cygpath(python_bin_path)

  # Get PYTHON_LIB_PATH
@@ -219,7 +219,7 @@ def setup_python(environ_cp, bazel_version):
  python_major_version = get_python_major_version(python_bin_path)

  # Convert python path to Windows style before writing into bazel.rc
-  if is_cygwin():
+  if is_windows() or is_cygwin():
    python_lib_path = cygpath(python_lib_path)

  # Set-up env variables used by python_configure.bzl
@@ -600,7 +600,7 @@ def set_tf_cuda_version(environ_cp):

    # Find out where the CUDA toolkit is installed
    default_cuda_path = _DEFAULT_CUDA_PATH
-    if is_cygwin():
+    if is_windows() or is_cygwin():
      default_cuda_path = cygpath(
          environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN))
    elif is_linux():
@@ -660,7 +660,7 @@ def set_tf_cunn_version(environ_cp):
    # unusable. Going through one more level of expansion to handle that.
    cudnn_install_path = os.path.realpath(
        os.path.expanduser(cudnn_install_path))
-    if is_cygwin():
+    if is_windows() or is_cygwin():
      cudnn_install_path = cygpath(cudnn_install_path)

    if is_windows():

--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -95,6 +95,14 @@ Status SeluGradHelper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Selu", SeluGradHelper);

+Status L2LossGrad(const Scope& scope, const Operation& op,
+                  const std::vector<Output>& grad_inputs,
+                  std::vector<Output>* grad_outputs) {
+  grad_outputs->push_back(Mul(scope, op.input(0), grad_inputs[0]));
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("L2Loss", L2LossGrad);
+
 Status BiasAddGradHelper(const Scope& scope, const Operation& op,
                         const std::vector<Output>& grad_inputs,
                         std::vector<Output>* grad_outputs) {

--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -122,6 +122,14 @@ TEST_F(NNGradTest, SeluGrad) {
  RunTest(x, x_init_value, y, shape);
 }

+TEST_F(NNGradTest, L2LossGrad) {
+  TensorShape x_shape({5, 2});
+  TensorShape y_shape({1});
+  auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
+  auto y = L2Loss(scope_, x);
+  RunTest(x, x_shape, y, y_shape);
+}
+
 TEST_F(NNGradTest, BiasAddGradHelper) {
  TensorShape shape({4, 5});
  TensorShape bias_shape({5});

--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -389,7 +389,7 @@ StatusOr<string> CompileModuleToPtx(llvm::Module* module,

  // Loop unrolling exposes more opportunities for SROA. Therefore, we run SROA
  // again after the standard optimization passes [http://b/13329423].
-  // TODO(jingyue): SROA may further expose more optimization opportunities, such
+  // TODO(jingyue): SROA may further expose more optimization opportunities such
  // as more precise alias analysis and more function inlining (SROA may change
  // the inlining cost of a function). For now, running SROA already emits good
  // enough code for the evaluated benchmarks. We may want to run more

--- a/tensorflow/contrib/cmake/tf_tests.cmake
+++ b/tensorflow/contrib/cmake/tf_tests.cmake
@@ -82,6 +82,7 @@ function(AddTest)
  set_tests_properties(${_AT_TARGET}
    PROPERTIES ENVIRONMENT "TEST_TMPDIR=${tempdir};TEST_SRCDIR=${testdir}"
  )
+  set_tests_properties(${_AT_TARGET} PROPERTIES TIMEOUT "600")

  foreach(datafile ${_AT_DATA})
    file(RELATIVE_PATH datafile_rel ${tensorflow_source_dir} ${datafile})
@@ -117,6 +118,7 @@ function(AddPythonTests)
    if (_AT_DEPENDS)
      add_dependencies(${_AT_TARGET} ${_AT_DEPENDS})
    endif()
+    set_tests_properties(${sourcefile} PROPERTIES TIMEOUT "600")
  endforeach()
 endfunction(AddPythonTests)


--- a/tensorflow/contrib/gdr/BUILD
+++ b/tensorflow/contrib/gdr/BUILD
@@ -62,6 +62,7 @@ tf_cuda_library(
    }),
    deps = [
        ":gdr_proto_cc",
+        "//tensorflow/core:core_cpu_internal",
        "//tensorflow/core:framework",
        "//tensorflow/core:gpu_runtime",
        "//tensorflow/core:lib",

--- a/tensorflow/contrib/gdr/gdr_memory_manager.h
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.h
@@ -16,14 +16,9 @@ limitations under the License.
 #ifndef GDR_MEMORY_MANAGER_H_
 #define GDR_MEMORY_MANAGER_H_

+#include "google/protobuf/any.pb.h"
 #include "tensorflow/core/lib/core/status.h"

-namespace google {
-namespace protobuf {
-class Any;
-}
-}
-
 namespace tensorflow {

 class Device;

--- a/tensorflow/contrib/layers/__init__.py
+++ b/tensorflow/contrib/layers/__init__.py
@@ -115,6 +115,7 @@ _allowed_symbols = ['bias_add',
                    'legacy_linear',
                    'legacy_relu',
                    'OPTIMIZER_CLS_NAMES',
+                    'OPTIMIZER_SUMMARIES',
                    'regression_target',
                    'SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY',
                    'summaries']

--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
@@ -129,8 +129,9 @@ def optimize_loss(loss,
               `None` to use all trainable variables.
    name: The name for this operation is used to scope operations and summaries.
    summaries: List of internal quantities to visualize on tensorboard. If not
-               set only the loss and the learning rate will be reported. The
-               complete list is in OPTIMIZER_SUMMARIES.
+               set, the loss, the learning rate, and the global norm of the
+               gradients will be reported. The complete list of possible values
+               is in OPTIMIZER_SUMMARIES.
    colocate_gradients_with_ops: If True, try colocating gradients with the
                                 corresponding op.
    increment_global_step: Whether to increment `global_step`. If your model

--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,7 +28,6 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin

 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging

@@ -44,7 +43,7 @@ def _get_in_out_shape(x_shape, y_shape, n_classes, batch_size=None):
  x_is_dict, y_is_dict = isinstance(
      x_shape, dict), y_shape is not None and isinstance(y_shape, dict)
  if y_is_dict and n_classes is not None:
-    assert (isinstance(n_classes, dict))
+    assert isinstance(n_classes, dict)

  if batch_size is None:
    batch_size = list(x_shape.values())[0][0] if x_is_dict else x_shape[0]
@@ -322,10 +321,12 @@ class DataFeeder(object):

    self._x = dict([(k, check_array(v, v.dtype)) for k, v in list(x.items())
                   ]) if x_is_dict else check_array(x, x.dtype)
-    self._y = None if y is None else \
-      dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())]) if x_is_dict else check_array(y, y.dtype)
+    self._y = None if y is None else (
+        dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())])
+        if y_is_dict else check_array(y, y.dtype))

-    # self.n_classes is not None means we're converting raw target indices to one-hot.
+    # self.n_classes is not None means we're converting raw target indices
+    # to one-hot.
    if n_classes is not None:
      if not y_is_dict:
        y_dtype = (np.int64
@@ -344,12 +345,15 @@ class DataFeeder(object):
        x_shape, y_shape, n_classes, batch_size)

    # Input dtype matches dtype of x.
-    self._input_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())]) if x_is_dict \
-      else _check_dtype(self._x.dtype)
-
-    # note: self._output_dtype = np.float32 when y is None
-    self._output_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())]) if y_is_dict \
-      else _check_dtype(self._y.dtype) if y is not None else np.float32
+    self._input_dtype = (
+        dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())])
+        if x_is_dict else _check_dtype(self._x.dtype))
+
+    # self._output_dtype == np.float32 when y is None
+    self._output_dtype = (
+        dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())])
+        if y_is_dict else (
+            _check_dtype(self._y.dtype) if y is not None else np.float32))

    # self.n_classes is None means we're passing in raw target indices
    if n_classes is not None and y_is_dict:

--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -316,14 +316,14 @@ ifeq ($(TARGET),IOS)
 	IPHONESIMULATOR_SYSROOT := $(shell xcrun --sdk iphonesimulator \
 	--show-sdk-path)
 	IOS_SDK_VERSION := $(shell xcrun --sdk iphoneos --show-sdk-version)
-	MIN_SDK_VERSION := 8.0
+	MIN_SDK_VERSION := 9.0
 # Override IOS_ARCH with ARMV7, ARMV7S, ARM64, or I386.
 	IOS_ARCH := X86_64
 	ifeq ($(IOS_ARCH),ARMV7)
 		CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
 		-arch armv7 \
 		-fembed-bitcode \
-		-D__thread= \
+		-D__thread=thread_local \
 		-DUSE_GEMM_FOR_CONV \
 		-Wno-c++11-narrowing \
 		-mno-thumb \
@@ -347,7 +347,7 @@ ifeq ($(TARGET),IOS)
 		CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
 		-arch armv7s \
 		-fembed-bitcode \
-		-D__thread= \
+		-D__thread=thread_local \
 		-DUSE_GEMM_FOR_CONV \
 		-Wno-c++11-narrowing \
 		-mno-thumb \
@@ -371,7 +371,7 @@ ifeq ($(TARGET),IOS)
 		CXXFLAGS += -miphoneos-version-min=$(MIN_SDK_VERSION) \
 		-arch arm64 \
 		-fembed-bitcode \
-		-D__thread= \
+		-D__thread=thread_local \
 		-DUSE_GEMM_FOR_CONV \
 		-Wno-c++11-narrowing \
 		-DTF_LEAN_BINARY \
@@ -395,7 +395,7 @@ ifeq ($(TARGET),IOS)
 		-arch i386 \
 		-mno-sse \
 		-fembed-bitcode \
-		-D__thread= \
+		-D__thread=thread_local \
 		-DUSE_GEMM_FOR_CONV \
 		-Wno-c++11-narrowing \
 		-DTF_LEAN_BINARY \
@@ -418,7 +418,7 @@ ifeq ($(TARGET),IOS)
 		CXXFLAGS += -mios-simulator-version-min=$(MIN_SDK_VERSION) \
 		-arch x86_64 \
 		-fembed-bitcode \
-		-D__thread= \
+		-D__thread=thread_local \
 		-DUSE_GEMM_FOR_CONV \
 		-Wno-c++11-narrowing \
 		-DTF_LEAN_BINARY \

--- a/tensorflow/contrib/makefile/README.md
+++ b/tensorflow/contrib/makefile/README.md
@@ -201,7 +201,8 @@ tensorflow/contrib/makefile/compile_ios_protobuf.sh

 Then, you will need to compile the nsync library for iOS:

-```export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
+```bash
+export HOST_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh`
 export TARGET_NSYNC_LIB=`tensorflow/contrib/makefile/compile_nsync.sh -t ios`
 ```


--- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
+++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc
@@ -35,14 +35,18 @@ limitations under the License.

 namespace tensorflow {

+static void StartAbortRendevous(Rendezvous* rendez, const Status& s) {
+  rendez->StartAbort(s);
+  rendez->Unref();
+}
+
 BaseRendezvousMgr::BaseRendezvousMgr(const WorkerEnv* worker_env)
    : worker_env_(worker_env) {}

 BaseRendezvousMgr::~BaseRendezvousMgr() {
  for (auto& p : table_) {
-    BaseRemoteRendezvous* rendez = p.second;
-    rendez->StartAbort(errors::Aborted("Shutdown"));
-    rendez->Unref();
+    auto rendez = p.second;
+    StartAbortRendevous(rendez, errors::Aborted("Shutdown"));
  }
 }

@@ -52,7 +56,7 @@ RemoteRendezvous* BaseRendezvousMgr::Find(int64 step_id) {

 BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) {
  mutex_lock l(mu_);
-  Table::iterator iter = table_.find(step_id);
+  auto iter = table_.find(step_id);
  if (iter == table_.end()) {
    auto rr = Create(step_id, worker_env_);
    iter = table_.insert({step_id, rr}).first;
@@ -64,7 +68,7 @@ BaseRemoteRendezvous* BaseRendezvousMgr::FindOrCreate(int64 step_id) {
 void BaseRendezvousMgr::RecvLocalAsync(int64 step_id,
                                       const Rendezvous::ParsedKey& parsed,
                                       Rendezvous::DoneCallback done) {
-  BaseRemoteRendezvous* rendez = FindOrCreate(step_id);
+  auto rendez = FindOrCreate(step_id);
  using namespace std::placeholders;
  Rendezvous::DoneCallback done_cb = std::bind(
      [rendez](Rendezvous::DoneCallback done,
@@ -101,15 +105,15 @@ void BaseRendezvousMgr::Cleanup(int64 step_id) {
  Rendezvous* rendez = nullptr;
  {
    mutex_lock l(mu_);
-    Table::iterator iter = table_.find(step_id);
+    auto iter = table_.find(step_id);
    if (iter != table_.end()) {
      rendez = iter->second;
      table_.erase(iter);
    }
  }
-  if (!rendez) return;
-  rendez->StartAbort(errors::Aborted("Cleanup ", step_id));
-  rendez->Unref();
+  if (rendez) {
+    StartAbortRendevous(rendez, errors::Aborted("Cleanup ", step_id));
+  }
 }

 void BaseRendezvousMgr::CleanupAll() {
@@ -122,8 +126,7 @@ void BaseRendezvousMgr::CleanupAll() {
    table_.clear();
  }
  for (auto rendez : rendezs) {
-    rendez->StartAbort(errors::Aborted("Shutdown"));
-    rendez->Unref();
+    StartAbortRendevous(rendez, errors::Aborted("Shutdown"));
  }
 }

@@ -165,7 +168,7 @@ Status BaseRemoteRendezvous::Initialize(WorkerSession* session) {
    session_ = session;
    std::swap(deferred_calls, deferred_calls_);
  }
-  for (DeferredCall& call : deferred_calls) {
+  for (auto& call : deferred_calls) {
    RecvLocalAsyncInternal(call.parsed, std::move(call.done));
  }
  return Status::OK();

--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -310,7 +310,7 @@ class OpKernelConstruction {
  FunctionLibraryRuntime* function_library() const { return flib_; }

  // The GraphDef version whose behavior we should follow.
-  const int graph_def_version() const { return graph_def_version_; }
+  int graph_def_version() const { return graph_def_version_; }

  // Helper routines for the OP_REQUIRES macros
  void CtxFailure(Status s);

--- a/tensorflow/core/profiler/g3doc/advise.md
+++ b/tensorflow/core/profiler/g3doc/advise.md
@@ -86,7 +86,7 @@ For example:
 *   Checks RecvTensor RPC latency and bandwidth.
 *   Checks CPU/Memory utilization of the job.

-####AcceleratorUtilization Checker
+#### AcceleratorUtilization Checker
 * Checks what percentage of time the accelerator spends on computation.

 #### OperationChecker
@@ -100,7 +100,7 @@ For example:
 *   Checks the most expensive graph nodes.
 *   Checks the most expensive graph-building Python codes.

-####Contribute Your Checker
+#### Contribute Your Checker

 Follow examples of accelerator_utilization_checker.h


--- a/tensorflow/core/profiler/g3doc/command_line.md
+++ b/tensorflow/core/profiler/g3doc/command_line.md
@@ -51,7 +51,7 @@ It defines _checkpoint_variable op type. It also provides checkpointed tensors'
 Note: this feature is not well maintained now.


-###Start `tfprof`
+### Start `tfprof`

 #### Build `tfprof`

@@ -140,9 +140,9 @@ tfprof>
 -output
 ```

-###Examples
+### Examples

-####Profile Python Time
+#### Profile Python Time
 ```shell
 # Requires --graph_path --op_log_path
 tfprof> code -max_depth 1000 -show_name_regexes .*model_analyzer.*py.* -select micros -account_type_regexes .* -order_by micros

--- a/tensorflow/core/profiler/g3doc/options.md
+++ b/tensorflow/core/profiler/g3doc/options.md
-##Options
+## Options

-###Overview
+### Overview

 For all tfprof views, the profiles are processed with the following procedures

@@ -35,7 +35,7 @@ For all tfprof views, the profiles are processed with the following procedures
 4) Finally, the filtered data structure is output in a format depending
   on the `-output` option.

-####Option Semantics In Different View
+#### Option Semantics In Different View
 options usually have the same semantics in different views. However, some
 can vary. For example `-max_depth` in scope view means the depth of
 name scope <b>tree</b>. In op view, it means the length of operation <b>list</b>.
@@ -68,7 +68,7 @@ output_bytes: The memory output by the operation. It's not necessarily requested
              by the current operation. For example, it can be a tensor
              forwarded from input to output, with in-place mutation.

-###Docs
+### Docs

 `-max_depth`: Show nodes that are at most this number of hops from starting node in the data structure.


--- a/tensorflow/core/profiler/g3doc/profile_memory.md
+++ b/tensorflow/core/profiler/g3doc/profile_memory.md
-##Profile Memory
+## Profile Memory

 It is generally a good idea to visualize the memory usage in timeline.
 It allows you to see the memory consumption of each GPU over time.

--- a/tensorflow/core/profiler/g3doc/profile_model_architecture.md
+++ b/tensorflow/core/profiler/g3doc/profile_model_architecture.md
-##Profile Model Architecture
+## Profile Model Architecture

 * [Profile Model Parameters](#profile-model-parameters)
 * [Profile Model Float Operations](#profile-model-float-operations)

-###Profile Model Parameters
+### Profile Model Parameters

 <b>Notes:</b>
 `VariableV2` operation type might contain variables created by TensorFlow
@@ -39,9 +39,9 @@ param_stats = tf.profiler.profile(
 sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
 ```

-###Profile Model Float Operations
+### Profile Model Float Operations

-####Caveats
+#### Caveats

 For an operation to have float operation statistics:


--- a/tensorflow/core/profiler/g3doc/profile_time.md
+++ b/tensorflow/core/profiler/g3doc/profile_time.md
-##Profile Time
+## Profile Time

 * [Times in TensorFlow and tfprof](#times-in-tensorflow-and-tfprof)
 * [Profile by Python Code](#profile-by-python-code)
@@ -7,7 +7,7 @@
 * [Profile by Name Scope](#profile-by-name-scope)


-###Times in TensorFlow and tfprof
+### Times in TensorFlow and tfprof
 When we run a model, Tensorflow schedules and runs the nodes (operations)
 in the graph. An operation can be placed on an accelerator or on CPU.

@@ -37,7 +37,7 @@ When an operation is placed on CPU, it will completely run on CPU. Hence,
 should be 0.


-###Profile by Python Code
+### Profile by Python Code
 ```python
 # In code view, the time of each line of Python code is the aggregated
 # times of all operations created by that line.
@@ -112,7 +112,7 @@ Set ```-output timeline:outfile=<filename>``` to generate timeline instead of st
 </left>


-###Profile by Operation Type
+### Profile by Operation Type
 ```python
 # In op view, you can view the aggregated time of each operation type.
 tfprof> op -select micros,occurrence -order_by micros
@@ -138,7 +138,7 @@ MatMul                        618.97ms (63.56%, 16.51%), |/job:worker/replica:0/
 ```


-###Profile by Graph
+### Profile by Graph

 Usually, use graph view to generate a timeline to visualize the result.

@@ -163,7 +163,7 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
 ******************************************************
 ```

-###Profile by Name Scope
+### Profile by Name Scope

 Usually scope view allows you to pin point the problematic places if you
 have properly named your operations with tf.name_scope or tf.variable_scope.

--- a/tensorflow/docs_src/install/install_linux.md
+++ b/tensorflow/docs_src/install/install_linux.md
@@ -151,10 +151,10 @@ Take the following steps to install TensorFlow with Virtualenv:
     (tensorflow)$ <b>pip install --upgrade tensorflow-gpu</b>  # for Python 2.7 and GPU
     (tensorflow)$ <b>pip3 install --upgrade tensorflow-gpu</b> # for Python 3.n and GPU</pre>

-     If the preceding command succeeds, skip Step 5. If the preceding
-     command fails, perform Step 5.
+     If the preceding command succeeds, skip Step 6. If the preceding
+     command fails, perform Step 6.

-  5. (Optional) If Step 4 failed (typically because you invoked a pip version
+  6. (Optional) If Step 5 failed (typically because you invoked a pip version
     lower than 8.1), install TensorFlow in the active virtualenv environment
     by issuing a command of the following format:


--- a/tensorflow/docs_src/install/install_windows.md
+++ b/tensorflow/docs_src/install/install_windows.md
@@ -71,12 +71,14 @@ Use that package at your own risk.

 ## Installing with native pip

-If the following version of Python is not installed on your machine,
+If one of the following versions of Python is not installed on your machine,
 install it now:

  * [Python 3.5.x 64-bit from python.org](https://www.python.org/downloads/release/python-352/)
+  * [Python 3.6.x 64-bit from python.org](https://www.python.org/downloads/release/python-362/)

-Note that Python 3.5.x comes with the pip3 package manager, which is the
+-TensorFlow supports Python 3.5.x and 3.6.x on Windows.
+Note that Python 3 comes with the pip3 package manager, which is the
 program you'll use to install TensorFlow.

 To install TensorFlow, start a terminal. Then issue the appropriate

--- a/tensorflow/examples/speech_commands/README.md
+++ b/tensorflow/examples/speech_commands/README.md
 # Speech Commands Example

 This is a basic speech recognition example. For more information, see the
-tutorial at http://tensorflow.org/tutorials/audio_recognition.
+tutorial at https://www.tensorflow.org/versions/master/tutorials/audio_recognition.
--- a/tensorflow/python/feature_column/feature_column.py
+++ b/tensorflow/python/feature_column/feature_column.py
@@ -2473,7 +2473,7 @@ class _IndicatorColumn(_DenseColumn,
      weighted_column = sparse_ops.sparse_merge(
          sp_ids=id_tensor,
          sp_values=weight_tensor,
-          vocab_size=self._variable_shape[-1])
+          vocab_size=int(self._variable_shape[-1]))
      return sparse_ops.sparse_tensor_to_dense(weighted_column)

    dense_id_tensor = sparse_ops.sparse_tensor_to_dense(

--- a/tensorflow/python/feature_column/feature_column_test.py
+++ b/tensorflow/python/feature_column/feature_column_test.py
@@ -3206,6 +3206,20 @@ class IndicatorColumnTest(test.TestCase):
    with _initialized_session():
      self.assertAllEqual([[0, 0, 1], [1, 0, 0]], indicator_tensor.eval())

+  def test_transform_with_weighted_column(self):
+    # Github issue 12557
+    ids = fc.categorical_column_with_vocabulary_list(
+        key='ids', vocabulary_list=('a', 'b', 'c'))
+    weights = fc.weighted_categorical_column(ids, 'weights')
+    indicator = fc.indicator_column(weights)
+    features = {
+        'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)),
+        'weights': constant_op.constant([2., 4., 6.], shape=(1, 3))
+    }
+    indicator_tensor = _transform_features(features, [indicator])[indicator]
+    with _initialized_session():
+      self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval())
+
  def test_linear_model(self):
    animal = fc.indicator_column(
        fc.categorical_column_with_identity('animal', num_buckets=4))

--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -82,7 +82,7 @@ class DeviceDescription {

  // Returns the limit on the number of simultaneously resident blocks
  // on a multiprocessor.
-  const uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; }
+  uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; }

  // Returns the limit on the total number of threads that can be launched in a
  // single block; i.e. the limit on x * y * z dimensions of a ThreadDim.
@@ -141,7 +141,7 @@ class DeviceDescription {
  uint64 device_memory_size() const { return device_memory_size_; }

  // Returns the device's core clock rate in GHz.
-  const float clock_rate_ghz() const { return clock_rate_ghz_; }
+  float clock_rate_ghz() const { return clock_rate_ghz_; }

  // Returns whether ECC is enabled.
  bool ecc_enabled() const { return ecc_enabled_; }

--- a/tensorflow/stream_executor/kernel.h
+++ b/tensorflow/stream_executor/kernel.h
@@ -302,7 +302,7 @@ class KernelArgIterator {
  //
  // Returns a default-constructed KernelArg if there is no next argument.
  KernelArg next() {
-    KernelArg result;
+    KernelArg result = {};
    if (!has_next()) {
      return result;
    } else if ((shmem_indices_iter_ != shmem_indices_end_) &&

--- a/tensorflow/tools/ci_build/update_version.py
+++ b/tensorflow/tools/ci_build/update_version.py
@@ -276,8 +276,9 @@ def check_for_lingering_string(lingering_string):
  """Check for given lingering strings."""
  formatted_string = lingering_string.replace(".", r"\.")
  try:
-    linger_strs = subprocess.check_output(
-        ['grep', '-rnoH', formatted_string, TF_SRC_DIR]).split("\n")
+    linger_str_output = subprocess.check_output(
+        ["grep", "-rnoH", formatted_string, TF_SRC_DIR])
+    linger_strs = linger_str_output.decode("utf8").split("\n")
  except subprocess.CalledProcessError:
    linger_strs = []


--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -84,6 +84,7 @@ py_binary(
        "//tensorflow/python/saved_model",
        "//tensorflow/python:spectral_ops_test_util",
        "//tensorflow/python/tools:tools_pip",
+        "//tensorflow/python/eager:eager_pip",
        # These targets don't build on Windows yet. Exclude them for now.
        # "//tensorflow/contrib/ndlstm",
        # "//tensorflow/contrib/slim",

--- a/third_party/sqlite.BUILD
+++ b/third_party/sqlite.BUILD
@@ -2,9 +2,9 @@
 #   Sqlite3 library. Provides utilities for interacting
 #   with sqlite3 databases.

-licenses(["notice"])  # BSD/MIT-like license
+licenses(["unencumbered"])  # Public Domain

-exports_files(["LICENSE"])
+# exports_files(["LICENSE"])

 cc_library(
    name = "sqlite",