Merge changes from github.

Change: 124183870

Merge changes from github.
Change: 124183870
b1b2dc89 · Martin Wicke · TensorFlower Gardener · ca2a66bd · b1b2dc89 · b1b2dc89
37 changed file
--- a/tensorflow/contrib/README.md
+++ b/tensorflow/contrib/README.md
@@ -7,7 +7,7 @@ The contrib directory contains project directories, each of which has designated
 owners. It is meant to contain features and contributions that eventually should
 get merged into core TensorFlow, but whose interfaces may still change, or which
 require some testing to see whether they can find broader acceptance. We are
-trying to keep dupliction within contrib to a minimum, so you may be asked to
+trying to keep duplication within contrib to a minimum, so you may be asked to
 refactor code in contrib to use some feature inside core or in another project
 in contrib rather than reimplementing the feature.


--- a/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/camera/camera_example.xcodeproj/project.pbxproj
@@ -291,7 +291,7 @@
 					"$(SRCROOT)/../../../..",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
-				IPHONEOS_DEPLOYMENT_TARGET = 7.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/lib",
 					"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
@@ -350,7 +350,7 @@
 					"$(SRCROOT)/../../../..",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
-				IPHONEOS_DEPLOYMENT_TARGET = 7.0;
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/lib",
 					"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",

--- a/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
+++ b/tensorflow/contrib/ios_examples/simple/tf_ios_makefile_example.xcodeproj/project.pbxproj
@@ -7,6 +7,8 @@
 	objects = {

 /* Begin PBXBuildFile section */
+		590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
+		590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
 		59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
 		59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
 		59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
@@ -20,6 +22,8 @@
 /* End PBXBuildFile section */

 /* Begin PBXFileReference section */
+		590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
+		590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
 		5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
 		59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
@@ -46,6 +50,8 @@
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
+				590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
 				59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
 				59A3D0141CF4E82500C4259F /* CoreGraphics.framework in Frameworks */,
 			);
@@ -57,6 +63,8 @@
 		591157921CF4011C00C31E3A = {
 			isa = PBXGroup;
 			children = (
+				590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
+				590E7D871D02091F00DF5523 /* libprotobuf.a */,
 				59A3D0171CF4E86100C4259F /* UIKit.framework */,
 				59A3D0151CF4E83D00C4259F /* Foundation.framework */,
 				59A3D0131CF4E82500C4259F /* CoreGraphics.framework */,
@@ -272,6 +280,7 @@
 					"$(SRCROOT)/../../makefile/gen/proto",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
@@ -299,11 +308,13 @@
 					"$(SRCROOT)/../../makefile/gen/proto",
 				);
 				INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.2;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
 				LIBRARY_SEARCH_PATHS = (
 					"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
 					"$(SRCROOT)/../../makefile/gen/lib",
 				);
+				ONLY_ACTIVE_ARCH = NO;
 				OTHER_LDFLAGS = (
 					"-force_load",
 					"$(SRCROOT)/../../makefile/gen/lib/libtensorflow-core.a",

--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -36,7 +36,7 @@ HOST_BINDIR := $(MAKEFILE_DIR)/gen/host_bin/
 HOST_GENDIR := $(MAKEFILE_DIR)/gen/host_obj/

 # Which Eigen version we're using.
-EIGEN_HASH := f3a13643ac1f
+EIGEN_HASH := d02e6a705c30

 # Settings for the host compiler.
 HOST_CXX := gcc
@@ -168,6 +168,9 @@ ifeq ($(TARGET),IOS)
 		-D__thread= \
 		-Wno-c++11-narrowing \
 		-mno-thumb \
+		-DTF_LEAN_BINARY \
+		-DMIN_LOG_LEVEL=0 \
+		-fno-exceptions \
 		-isysroot \
 		${IPHONEOS_SYSROOT}
 		LDFLAGS := -arch armv7 \
@@ -182,10 +185,16 @@ ifeq ($(TARGET),IOS)
 		-D__thread= \
 		-Wno-c++11-narrowing \
 		-mno-thumb \
+		-DTF_LEAN_BINARY \
+		-DMIN_LOG_LEVEL=0 \
+		-fno-exceptions \
 		-isysroot \
 		${IPHONEOS_SYSROOT}
 		LDFLAGS := -arch armv7s \
 		-miphoneos-version-min=${MIN_SDK_VERSION} \
+		-Xlinker -S \
+		-Xlinker -x \
+		-Xlinker -dead_strip \
 		-all_load \
 		-L$(GENDIR)protobuf_ios/lib \
 		-lz
@@ -195,10 +204,16 @@ ifeq ($(TARGET),IOS)
 		-arch arm64 \
 		-D__thread= \
 		-Wno-c++11-narrowing \
+		-DTF_LEAN_BINARY \
+		-DMIN_LOG_LEVEL=0 \
+		-fno-exceptions \
 		-isysroot \
 		${IPHONEOS_SYSROOT}
 		LDFLAGS := -arch arm64 \
 		-miphoneos-version-min=${MIN_SDK_VERSION} \
+		-Xlinker -S \
+		-Xlinker -x \
+		-Xlinker -dead_strip \
 		-all_load \
 		-L$(GENDIR)protobuf_ios/lib \
 		-lz
@@ -208,10 +223,16 @@ ifeq ($(TARGET),IOS)
 		-arch i386 \
 		-D__thread= \
 		-Wno-c++11-narrowing \
+		-DTF_LEAN_BINARY \
+		-DMIN_LOG_LEVEL=0 \
+		-fno-exceptions \
 		-isysroot \
 		${IPHONESIMULATOR_SYSROOT}
 		LDFLAGS := -arch i386 \
 		-mios-simulator-version-min=${MIN_SDK_VERSION} \
+		-Xlinker -S \
+		-Xlinker -x \
+		-Xlinker -dead_strip \
 		-all_load \
 		-L$(GENDIR)protobuf_ios/lib \
 		-lz
@@ -221,10 +242,16 @@ ifeq ($(TARGET),IOS)
 		-arch x86_64 \
 		-D__thread= \
 		-Wno-c++11-narrowing \
+		-DTF_LEAN_BINARY \
+		-DMIN_LOG_LEVEL=0 \
+		-fno-exceptions \
 		-isysroot \
 		${IPHONESIMULATOR_SYSROOT}
 		LDFLAGS := -arch x86_64 \
 		-mios-simulator-version-min=${MIN_SDK_VERSION} \
+		-Xlinker -S \
+		-Xlinker -x \
+		-Xlinker -dead_strip \
 		-all_load \
 		-L$(GENDIR)protobuf_ios/lib \
 		-lz

--- a/tensorflow/contrib/makefile/compile_android_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_android_protobuf.sh
+#!/bin/bash
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Builds protobuf 3 for Android. Pass in the location of your NDK as the first
+# argument to the script, for example:
+# tensorflow/contrib/makefile/compile_android_protobuf.sh \
+# ${HOME}/toolchains/clang-21-stl-gnu
+
+if [[ $# -ne 1 ]]
+then
+  echo "You need to pass in the Android NDK as the first argument, e.g:"
+  echo "tensorflow/contrib/makefile/compile_android_protobuf.sh \
+ ${HOME}/toolchains/clang-21-stl-gnu"
+  exit 1
+fi
+
+cd tensorflow/contrib/makefile
+
+GENDIR=`pwd`/gen/protobuf/
+LIBDIR=${GENDIR}lib
+mkdir -p ${LIBDIR}
+
+export NDK=$1
+export PATH=${NDK}/bin:$PATH
+export SYSROOT=${NDK}/sysroot
+export CC="arm-linux-androideabi-gcc --sysroot $SYSROOT"
+export CXX="arm-linux-androideabi-g++ --sysroot $SYSROOT"
+export CXXSTL=$NDK/sources/cxx-stl/gnu-libstdc++/4.6
+ 
+cd downloads/protobuf
+
+mkdir build
+
+./autogen.sh
+if [ $? -ne 0 ]
+then
+  echo "./autogen.sh command failed."
+  exit 1
+fi
+ 
+./configure --prefix=$(pwd)/build \
+--host=arm-linux-androideabi \
+--with-sysroot=$SYSROOT \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc=protoc \
+CFLAGS="-march=armv7-a" \
+CXXFLAGS="-march=armv7-a -I$CXXSTL/include -I$CXXSTL/libs/armeabi-v7a/include"
+if [ $? -ne 0 ]
+then
+  echo "./configure command failed."
+  exit 1
+fi
+
+make
+if [ $? -ne 0 ]
+then
+  echo "make command failed."
+  exit 1
+fi
+
+cp src/.libs/* ${LIBDIR}
+if [ $? -ne 0 ]
+then
+  echo "cp command failed."
+  exit 1
+fi
--- a/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_ios_protobuf.sh
+#!/bin/bash -x
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Builds protobuf 3 for iOS.
+
+cd tensorflow/contrib/makefile
+
+GENDIR=`pwd`/gen/protobuf_ios/
+LIBDIR=${GENDIR}lib
+mkdir -p ${LIBDIR}
+
+OSX_VERSION=darwin14.0.0
+
+IPHONEOS_PLATFORM=`xcrun --sdk iphoneos --show-sdk-platform-path`
+IPHONEOS_SYSROOT=`xcrun --sdk iphoneos --show-sdk-path`
+IPHONESIMULATOR_PLATFORM=`xcrun --sdk iphonesimulator --show-sdk-platform-path`
+IPHONESIMULATOR_SYSROOT=`xcrun --sdk iphonesimulator --show-sdk-path`
+IOS_SDK_VERSION=`xcrun --sdk iphoneos --show-sdk-version`
+MIN_SDK_VERSION=9.2
+
+CFLAGS="-DNDEBUG -g -O0 -pipe -fPIC -fcxx-exceptions"
+CXXFLAGS="${CFLAGS} -std=c++11 -stdlib=libc++"
+LDFLAGS="-stdlib=libc++"
+LIBS="-lc++ -lc++abi"
+
+cd downloads/protobuf
+
+./autogen.sh
+if [ $? -ne 0 ]
+then
+  echo "./autogen.sh command failed."
+  exit 1
+fi
+
+make distclean
+./configure \
+--build=x86_64-apple-${OSX_VERSION} \
+--host=i386-apple-${OSX_VERSION} \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc=protoc \
+--prefix=${LIBDIR}/iossim_386 \
+--exec-prefix=${LIBDIR}/iossim_386 \
+"CFLAGS=${CFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch i386 \
+-isysroot ${IPHONESIMULATOR_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch i386 \
+-isysroot \
+${IPHONESIMULATOR_SYSROOT}" \
+LDFLAGS="-arch i386 \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS} \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+"LIBS=${LIBS}"
+make
+make install
+
+make distclean
+./configure \
+--build=x86_64-apple-${OSX_VERSION} \
+--host=x86_64-apple-${OSX_VERSION} \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc=protoc \
+--prefix=${LIBDIR}/iossim_x86_64 \
+--exec-prefix=${LIBDIR}/iossim_x86_64 \
+"CFLAGS=${CFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch x86_64 \
+-isysroot ${IPHONESIMULATOR_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+-arch x86_64 \
+-isysroot \
+${IPHONESIMULATOR_SYSROOT}" \
+LDFLAGS="-arch x86_64 \
+-mios-simulator-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS} \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
+-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
+"LIBS=${LIBS}"
+make
+make install
+
+make distclean
+./configure \
+--build=x86_64-apple-${OSX_VERSION} \
+--host=armv7-apple-${OSX_VERSION} \
+--with-protoc=protoc \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm7 \
+--exec-prefix=${LIBDIR}/ios_arm7 \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7 \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7 \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch armv7 \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make
+make install
+
+make distclean
+./configure \
+--build=x86_64-apple-${OSX_VERSION} \
+--host=armv7s-apple-${OSX_VERSION} \
+--with-protoc=protoc \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm7s \
+--exec-prefix=${LIBDIR}/ios_arm7s \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7s \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXX=${CXX}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch armv7s \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch armv7s \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make
+make install
+
+make distclean
+./configure \
+--build=x86_64-apple-${OSX_VERSION} \
+--host=arm \
+--with-protoc=protoc \
+--disable-shared \
+--prefix=${LIBDIR}/ios_arm64 \
+--exec-prefix=${LIBDIR}/ios_arm64 \
+"CFLAGS=${CFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch arm64 \
+-isysroot ${IPHONEOS_SYSROOT}" \
+"CXXFLAGS=${CXXFLAGS} \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+-arch arm64 \
+-isysroot ${IPHONEOS_SYSROOT}" \
+LDFLAGS="-arch arm64 \
+-miphoneos-version-min=${MIN_SDK_VERSION} \
+${LDFLAGS}" \
+"LIBS=${LIBS}"
+make
+make install
+
+lipo \
+${LIBDIR}/iossim_386/lib/libprotobuf.a \
+${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \
+${LIBDIR}/ios_arm7/lib/libprotobuf.a \
+${LIBDIR}/ios_arm7s/lib/libprotobuf.a \
+${LIBDIR}/ios_arm64/lib/libprotobuf.a \
+-create \
+-output ${LIBDIR}/libprotobuf.a
+
+lipo \
+${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \
+${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \
+${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \
+-create \
+-output ${LIBDIR}/libprotobuf-lite.a
--- a/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+++ b/tensorflow/contrib/makefile/compile_ios_tensorflow.sh
+#!/bin/bash -x
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Builds the TensorFlow core library with ARM and x86 architectures for iOS, and
+# packs them into a fat file.
+
+GENDIR=tensorflow/contrib/makefile/gen/
+LIBDIR=${GENDIR}lib
+LIB_PREFIX=libtensorflow-core
+
+make -f tensorflow/contrib/makefile/Makefile cleantarget
+make -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" $2 $3
+if [ $? -ne 0 ]
+then
+  echo "armv7 compilation failed."
+  exit 1
+fi
+
+make -f tensorflow/contrib/makefile/Makefile cleantarget
+make -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" $2 $3
+if [ $? -ne 0 ]
+then
+  echo "arm7vs compilation failed."
+  exit 1
+fi
+
+make -f tensorflow/contrib/makefile/Makefile cleantarget
+make -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" $2 $3
+if [ $? -ne 0 ]
+then
+  echo "arm64 compilation failed."
+  exit 1
+fi
+
+make -f tensorflow/contrib/makefile/Makefile cleantarget
+make -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" $2 $3
+if [ $? -ne 0 ]
+then
+  echo "i386 compilation failed."
+  exit 1
+fi
+
+make -f tensorflow/contrib/makefile/Makefile cleantarget
+make -f tensorflow/contrib/makefile/Makefile \
+TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" $2 $3
+if [ $? -ne 0 ]
+then
+  echo "x86_64 compilation failed."
+  exit 1
+fi
+
+lipo \
+${LIBDIR}/${LIB_PREFIX}-armv7.a \
+${LIBDIR}/${LIB_PREFIX}-armv7s.a \
+${LIBDIR}/${LIB_PREFIX}-arm64.a \
+${LIBDIR}/${LIB_PREFIX}-i386.a \
+${LIBDIR}/${LIB_PREFIX}-x86_64.a \
+-create \
+-output ${LIBDIR}/${LIB_PREFIX}.a
--- a/tensorflow/contrib/makefile/compile_pi_protobuf.sh
+++ b/tensorflow/contrib/makefile/compile_pi_protobuf.sh
+#!/bin/bash -x
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Builds protobuf 3 for iOS.
+
+cd tensorflow/contrib/makefile
+
+GENDIR=`pwd`/gen/protobuf_pi/
+LIBDIR=${GENDIR}
+mkdir -p ${LIBDIR}
+
+CXX=arm-linux-gnueabihf-g++
+
+cd downloads/protobuf
+
+./autogen.sh
+if [ $? -ne 0 ]
+then
+  echo "./autogen.sh command failed."
+  exit 1
+fi
+
+make distclean
+./configure \
+--build=i686-pc-linux-gnu \
+--host=arm-linux \
+--target=arm-linux \
+--disable-shared \
+--enable-cross-compile \
+--with-protoc=protoc \
+--prefix=${LIBDIR} \
+--exec-prefix=${LIBDIR} \
+"CXX=${CXX}" \
+make
+make install
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
+#!/bin/bash -x
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
+
+mkdir ${DOWNLOADS_DIR}
+
+EIGEN_HASH=d02e6a705c30
+curl "https://bitbucket.org/eigen/eigen/get/${EIGEN_HASH}.tar.gz" \
+-o /tmp/eigen-${EIGEN_HASH}.tar.gz
+tar xzf /tmp/eigen-${EIGEN_HASH}.tar.gz -C ${DOWNLOADS_DIR}
+
+git clone https://github.com/google/re2.git ${DOWNLOADS_DIR}/re2
+git clone https://github.com/google/gemmlowp.git ${DOWNLOADS_DIR}/gemmlowp
+git clone https://github.com/google/protobuf.git ${DOWNLOADS_DIR}/protobuf
+
+# JPEG_VERSION=v9a
+# curl "http://www.ijg.org/files/jpegsrc.${JPEG_VERSION}.tar.gz" \
+# -o /tmp/jpegsrc.${JPEG_VERSION}.tar.gz
+# tar xzf /tmp/jpegsrc.${JPEG_VERSION}.tar.gz -C ${DOWNLOADS_DIR}
+
+# PNG_VERSION=v1.2.53
+# curl -L "https://github.com/glennrp/libpng/archive/${PNG_VERSION}.zip" \
+# -o /tmp/pngsrc.${PNG_VERSION}.zip
+# unzip /tmp/pngsrc.${PNG_VERSION}.zip -d ${DOWNLOADS_DIR}
--- a/tensorflow/contrib/makefile/gen_file_lists.sh
+++ b/tensorflow/contrib/makefile/gen_file_lists.sh
+#!/bin/bash -x
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# This script generates the source file lists needed by the makefile by querying
+# the master Bazel build configuration.
+
+bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
+grep "//tensorflow/.*\.cc$" | \
+grep -v "gen_proto_text" | \
+grep -E -v "jpeg" | \
+grep -E -v "png" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/tf_cc_files.txt
+
+bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
+grep "//tensorflow/.*\.proto$" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/tf_proto_files.txt
+
+bazel query 'kind("generated file", deps(//tensorflow/core:proto_text))' | \
+grep "pb_text\.cc$" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/tf_pb_text_files.txt
+
+bazel query 'kind("source file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
+grep -E "//tensorflow/.*\.cc$" | \
+grep -E -v "jpeg" | \
+grep -E -v "png" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/proto_text_cc_files.txt
+
+bazel query 'kind("generated file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
+grep -E "//tensorflow/.*\.cc$" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/proto_text_pb_cc_files.txt
+
+bazel query 'kind("generated file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
+grep -E "//tensorflow/.*\.h$" | \
+sed -E 's#^//##g' | \
+sed -E 's#:#/#g' \
+> make/proto_text_pb_h_files.txt
--- a/tensorflow/contrib/slim/BUILD
+++ b/tensorflow/contrib/slim/BUILD
@@ -43,17 +43,18 @@ py_library(
    ],
 )

-py_test(
-    name = "learning_test",
-    srcs = ["python/slim/learning_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/contrib/slim",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
-    ],
-)
+# TODO(nsilberman): Fix this test and re-enable.
+#py_test(
+#    name = "learning_test",
+#    srcs = ["python/slim/learning_test.py"],
+#    srcs_version = "PY2AND3",
+#    deps = [
+#        "//tensorflow:tensorflow_py",
+#        "//tensorflow/contrib/slim",
+#        "//tensorflow/python:framework_test_lib",
+#        "//tensorflow/python:platform_test",
+#    ],
+#)

 py_library(
    name = "queues",

--- a/tensorflow/core/kernels/cudnn_pooling_gpu.cc
+++ b/tensorflow/core/kernels/cudnn_pooling_gpu.cc
@@ -16,6 +16,8 @@ limitations under the License.
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS

+#include <array>
+
 #include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/conv_3d.h"

--- a/tensorflow/core/kernels/l2loss_op.cc
+++ b/tensorflow/core/kernels/l2loss_op.cc
@@ -68,6 +68,7 @@ namespace functor {
  extern template struct L2Loss<GPUDevice, T>;

 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(double);
 DECLARE_GPU_SPEC(Eigen::half);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -79,6 +80,7 @@ DECLARE_GPU_SPEC(Eigen::half);
      L2LossOp<GPUDevice, T>);

 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(double);
 REGISTER_GPU_KERNEL(Eigen::half);
 #undef REGISTER_GPU_KERNEL


--- a/tensorflow/core/kernels/l2loss_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/l2loss_op_gpu.cu.cc
@@ -25,6 +25,7 @@ namespace tensorflow {

 typedef Eigen::GpuDevice GPUDevice;
 template struct functor::L2Loss<GPUDevice, float>;
+template struct functor::L2Loss<GPUDevice, double>;
 template struct functor::L2Loss<GPUDevice, Eigen::half>;

 }  // namespace tensorflow

--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -48,7 +48,9 @@ struct ApplyAdadelta<CPUDevice, T> {
                  typename TTypes<T>::ConstFlat grad) {
    accum.device(d) =
        accum * rho() + grad.square() * (static_cast<T>(1) - rho());
-    const auto update = accum_update * (accum + epsilon()).rsqrt() * grad;
+    const auto update = 
+	(accum_update + epsilon()).sqrt() *
+	(accum + epsilon()).rsqrt() * grad;
    accum_update.device(d) =
        accum_update * rho() + update.square() * (static_cast<T>(1) - rho());
    var.device(d) -= update * lr();

--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -1665,10 +1665,25 @@ class ComplexMakeRealImagTest(tf.test.TestCase):
                                                  delta=epsilon)
    self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)

+  def _compareBroadcastGradient(self, x):
+    x_ = tf.convert_to_tensor(x)
+    epsilon = 1e-3
+    with self.test_session():
+      for args in [(x_, 0.), (0., x_)]:
+          z = tf.reduce_sum(tf.complex_abs(tf.complex(*args)))
+          jacob_t, jacob_n = tf.test.compute_gradient(x_,
+                                                      list(x.shape),
+                                                      z,
+                                                      [1],
+                                                      x_init_value=x,
+                                                      delta=epsilon)
+          self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
+
  def testGradient(self):
    # complex64
    data = np.arange(1, 2, 0.10).reshape([5, 2]).astype(np.float32)
    self._compareGradient(data)
+    self._compareBroadcastGradient(data)
    # complex128
    data = np.arange(1, 2, 0.10).reshape([5, 2]).astype(np.float64)
    self._compareGradient(data)

--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -48,6 +48,15 @@ class ShapeOpsTest(tf.test.TestCase):
    self.assertAllEqual(np_ans, result)
    self.assertShapeEqual(np_ans, tf_ans)

+  def _compareShapeSparse(self, x_np, use_gpu=False):
+    np_ans = np.array(np.shape(x_np))
+    x_tf, unused_nnz = _sparsify(x_np)
+    with self.test_session(use_gpu=use_gpu):
+      tf_ans = tf.shape(x_tf)
+      result = tf_ans.eval()
+    self.assertAllEqual(np_ans, result)
+    self.assertShapeEqual(np_ans, tf_ans)
+
  def _compareShapeN(self, x, use_gpu=False):
    np_ans = np.array(np.shape(x))
    with self.test_session(use_gpu=use_gpu) as sess:
@@ -67,7 +76,7 @@ class ShapeOpsTest(tf.test.TestCase):

  def _compareRankSparse(self, x_np, use_gpu=False):
    np_ans = np.asarray(np.ndim(x_np))
-    x_tf, nnz = _sparsify(x_np)
+    x_tf, unused_nnz = _sparsify(x_np)
    with self.test_session(use_gpu=use_gpu):
      tf_ans = tf.rank(x_tf)
      result = tf_ans.eval()
@@ -87,6 +96,7 @@ class ShapeOpsTest(tf.test.TestCase):
    self._compareShapeN(x, use_gpu=False)
    self._compareRank(x, use_gpu=False)
    self._compareSize(x, use_gpu=False)
+    self._compareShapeSparse(x, use_gpu=False)
    self._compareRankSparse(x, use_gpu=False)

  def _testGpu(self, x):
@@ -94,6 +104,7 @@ class ShapeOpsTest(tf.test.TestCase):
    self._compareShapeN(x, use_gpu=True)
    self._compareRank(x, use_gpu=True)
    self._compareSize(x, use_gpu=True)
+    self._compareShapeSparse(x, use_gpu=True)
    self._compareRankSparse(x, use_gpu=True)

  def _testAll(self, x):

--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -100,6 +100,32 @@ _baseslice = slice
 listdiff = gen_array_ops.list_diff


+def shape(input, name=None):
+  """Returns the shape of a tensor.
+
+  This operation returns a 1-D integer tensor representing the shape of `input`.
+
+  For example:
+
+  ```python
+  # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+  shape(t) ==> [2, 2, 3]
+  ```
+
+  Args:
+    input: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` of type `int32`.
+  """
+  with ops.op_scope([input], name, "Shape") as name:
+    if isinstance(input, ops.SparseTensor):
+      return input.shape
+    else:
+      return gen_array_ops.shape(input, name=name)
+
+    
 def rank(input, name=None):
  """Returns the rank of a tensor.


--- a/tensorflow/python/ops/math_grad.py
+++ b/tensorflow/python/ops/math_grad.py
@@ -681,9 +681,15 @@ ops.NoGradient("LinSpace")


 @ops.RegisterGradient("Complex")
-def _ComplexGrad(_, grad):
+def _ComplexGrad(op, grad):
  """Returns the real and imaginary components of 'grad', respectively."""
-  return math_ops.real(grad), math_ops.imag(grad)
+  x = op.inputs[0]
+  y = op.inputs[1]
+  sx = array_ops.shape(x)
+  sy = array_ops.shape(y)
+  rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
+  return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx),
+          array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy))


 @ops.RegisterGradient("Real")

--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -116,11 +116,13 @@ class LogSoftmaxTest(tf.test.TestCase):
 class L2LossTest(tf.test.TestCase):

  def testL2Loss(self):
-    with self.test_session():
-      x = tf.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x")
-      l2loss = tf.nn.l2_loss(x)
-      value = l2loss.eval()
-    self.assertAllClose(7.0, value)
+    for dtype in [tf.float32, tf.float64]:
+      with self.test_session():
+        x = tf.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x",
+                        dtype=dtype)
+        l2loss = tf.nn.l2_loss(x)
+        value = l2loss.eval()
+      self.assertAllClose(7.0, value)

  def testGradient(self):
    x_shape = [20, 7, 3]

--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -126,7 +126,8 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
      state = initial_state
    else:
      if not dtype:
-        raise ValueError("If no initial_state is provided, dtype must be.")
+        raise ValueError("If no initial_state is provided, "
+                           "dtype must be specified")
      state = cell.zero_state(batch_size, dtype)

    if sequence_length is not None:  # Prepare variables

--- a/tensorflow/python/training/adadelta_test.py
+++ b/tensorflow/python/training/adadelta_test.py
@@ -20,104 +20,94 @@ from __future__ import print_function
 import tensorflow.python.platform

 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 import tensorflow as tf

-
 class AdadeltaOptimizerTest(tf.test.TestCase):
-
  def testBasic(self):
+    num_updates = 4 # number of ADADELTA steps to perform
    for dtype in [tf.half, tf.float32]:
-      with self.test_session():
-        var0 = tf.Variable([1.0, 2.0], dtype=dtype)
-        var1 = tf.Variable([3.0, 4.0], dtype=dtype)
-        grads0 = tf.constant([0.1, 0.1], dtype=dtype)
-        grads1 = tf.constant([0.01, 0.01], dtype=dtype)
-        lr = 1.0
-        rho = 0.95
-        epsilon = 1e-8
-
-        adadelta_opt = tf.train.AdadeltaOptimizer(lr, rho=rho, epsilon=epsilon)
-        adadelta_update = adadelta_opt.apply_gradients(zip(
-            [grads0, grads1], [var0, var1]))
-        tf.initialize_all_variables().run()
-
-        # Check we have slots
-        self.assertEqual(["accum", "accum_update"],
-                         adadelta_opt.get_slot_names())
-        slot0 = adadelta_opt.get_slot(var0, "accum")
-        self.assertEquals(slot0.get_shape(), var0.get_shape())
-        self.assertFalse(slot0 in tf.trainable_variables())
-
-        slot0_update = adadelta_opt.get_slot(var0, "accum_update")
-        self.assertEquals(slot0_update.get_shape(), var0.get_shape())
-        self.assertFalse(slot0_update in tf.trainable_variables())
-
-        slot1 = adadelta_opt.get_slot(var1, "accum")
-        self.assertEquals(slot1.get_shape(), var1.get_shape())
-        self.assertFalse(slot1 in tf.trainable_variables())
-
-        slot1_update = adadelta_opt.get_slot(var1, "accum_update")
-        self.assertEquals(slot1_update.get_shape(), var1.get_shape())
-        self.assertFalse(slot1_update in tf.trainable_variables())
-
-        # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], var0.eval())
-        self.assertAllClose([3.0, 4.0], var1.eval())
-
-        adadelta_update.run()
-
-        # Check that the accumulators have been updated.
-        grad = 0.1
-        accum = 0
-        accum_update = 0
-
-        accum = accum * rho + (grad**2) * (1 - rho)
-        update1 = np.sqrt(accum_update + epsilon) * (
-            1. / np.sqrt(accum + epsilon)) * grad
-        accum_update = accum_update * rho + (update1**2) * (1.0 - rho)
-
-        self.assertAllCloseAccordingToType(
-            np.array([accum, accum]), slot0.eval())
-        self.assertAllCloseAccordingToType(
-            np.array([accum_update, accum_update]), slot0_update.eval())
-
-        # Check that the parameters have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([1.0 - update1 * lr, 2.0 - update1 * lr]),
-            var0.eval(),
-            rtol=1e-3)
-
-        self.assertAllCloseAccordingToType(
-            np.array([3.0 - update1 * lr, 4.0 - update1 * lr]),
-            var1.eval(),
-            rtol=1e-3)
-
-        # Step 2: the momentum accumulators contain the previous update.
-        accum = accum * rho + (grad**2) * (1 - rho)
-        update2 = ((accum_update + epsilon)**0.5 *
-                   (1. / (accum + epsilon)**0.5) * grad)
-        accum_update = accum_update * rho + (update2**2) * (1.0 - rho)
-
-        adadelta_update.run()
-
-        # Check that the momentum accumulators have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([accum, accum]), slot0.eval())
-        self.assertAllCloseAccordingToType(
-            np.array([accum_update, accum_update]), slot0_update.eval())
-
-        # Check that the parameters have been updated.
-        self.assertAllCloseAccordingToType(
-            np.array([1.0 - update1 - update2, 2.0 - update1 - update2]),
-            var0.eval(),
-            rtol=1e-3)
-
-        self.assertAllCloseAccordingToType(
-            np.array([3.0 - update1 - update2, 4.0 - update1 - update2]),
-            var1.eval(),
-            rtol=1e-3)
-
+      for grad in [0.2, 0.1, 0.01]:
+        for lr in [1.0, 0.5, 0.1]:
+          with self.test_session():
+            var0_init = [1.0, 2.0]
+            var1_init = [3.0, 4.0]
+            var0 = tf.Variable(var0_init, dtype=dtype)
+            var1 = tf.Variable(var1_init, dtype=dtype)
+
+            grads = tf.constant([grad, grad], dtype=dtype)
+
+            accum = 0.0
+            accum_update = 0.0
+
+            # ADADELTA gradient optimizer
+            rho = 0.95
+            epsilon = 1e-8
+            adadelta_opt = tf.train.AdadeltaOptimizer(lr, rho, epsilon)
+            adadelta_update = adadelta_opt.apply_gradients(zip(
+              [grads, grads], [var0, var1]))
+
+            tf.initialize_all_variables().run()
+
+            # Assign slots
+            slot = [None] * 2
+            slot_update = [None] * 2
+            self.assertEqual(["accum", "accum_update"],
+              adadelta_opt.get_slot_names())
+            slot[0] = adadelta_opt.get_slot(var0, "accum")
+            self.assertEquals(slot[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot[0] in tf.trainable_variables())
+
+            slot_update[0] = adadelta_opt.get_slot(var0, "accum_update")
+            self.assertEquals(slot_update[0].get_shape(), var0.get_shape())
+            self.assertFalse(slot_update[0] in tf.trainable_variables())
+
+            slot[1] = adadelta_opt.get_slot(var1, "accum")
+            self.assertEquals(slot[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot[1] in tf.trainable_variables())
+
+            slot_update[1] = adadelta_opt.get_slot(var1, "accum_update")
+            self.assertEquals(slot_update[1].get_shape(), var1.get_shape())
+            self.assertFalse(slot_update[1] in tf.trainable_variables())
+
+            # Fetch params to validate initial values
+            self.assertAllClose(var0_init, var0.eval())
+            self.assertAllClose(var1_init, var1.eval())
+
+            update = [None] * num_updates
+            tot_update = 0
+            for step in range(num_updates):
+              # Run adadelta update for comparison
+              adadelta_update.run()
+
+              # Perform initial update without previous accum values
+              accum = accum * rho + (grad**2) * (1 - rho)
+              update[step] = (np.sqrt(accum_update + epsilon) *
+                (1. / np.sqrt(accum + epsilon)) * grad)
+              accum_update = (accum_update * rho + (update[step]**2) *
+                (1.0 - rho))
+              tot_update += update[step] * lr
+
+              # Check that the accumulators have been updated
+              for slot_idx in range(2):
+                self.assertAllCloseAccordingToType(
+                  np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
+                  slot[slot_idx].eval())
+
+                self.assertAllCloseAccordingToType(
+                  np.array([accum_update, accum_update],
+                  dtype=dtype.as_numpy_dtype()),
+                  slot_update[slot_idx].eval())
+
+              # Check that the parameters have been updated
+              self.assertAllCloseAccordingToType(
+                np.array([var0_init[0] - tot_update,
+                var0_init[1] - tot_update], dtype=dtype.as_numpy_dtype()),
+                var0.eval(), rtol=1e-3)
+
+              self.assertAllCloseAccordingToType(
+                np.array([var1_init[0] - tot_update,
+                var1_init[1] - tot_update], dtype=dtype.as_numpy_dtype()),
+                var1.eval(), rtol=1e-3)

 if __name__ == "__main__":
  tf.test.main()
--- a/tensorflow/stream_executor/cuda/cuda_activation.cc
+++ b/tensorflow/stream_executor/cuda/cuda_activation.cc
@@ -27,8 +27,7 @@ CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec);
 CUDAExecutor *ExtractCudaExecutor(StreamExecutor *stream_exec);

 ScopedActivateExecutorContext::ScopedActivateExecutorContext(
-    CUDAExecutor *cuda_exec)
-    : cuda_exec_(cuda_exec),
+    CUDAExecutor *cuda_exec):
      driver_scoped_activate_context_(
          new ScopedActivateContext{ExtractCudaContext(cuda_exec)}) { }


--- a/tensorflow/stream_executor/cuda/cuda_activation.h
+++ b/tensorflow/stream_executor/cuda/cuda_activation.h
@@ -51,8 +51,6 @@ class ScopedActivateExecutorContext {
  ~ScopedActivateExecutorContext();

 private:
-  // The CUDA executor implementation whose context is activated.
-  CUDAExecutor* cuda_exec_;

  // The cuda.h-using datatype that we wrap.
  ScopedActivateContext* driver_scoped_activate_context_;

--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -457,6 +457,7 @@ class ScopedFilterDescriptor {
                 << ToString(status);
    }

+#if CUDNN_VERSION >= 5000
    // TODO(b/23032134): Even if the filter layout is not supported,
    // cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because it
    // does not take layout as an input. Maybe force cuDNN by giving wrong
@@ -471,6 +472,7 @@ class ScopedFilterDescriptor {
                   << FilterLayoutString(filter_descriptor.layout());
        break;
    }
+#endif

    std::vector<int> dims(2 + filter_descriptor.ndims());
    dims[0] = filter_descriptor.output_feature_map_count();
@@ -666,7 +668,7 @@ class ScopedActivationDescriptor {
        mode = CUDNN_ACTIVATION_TANH;
        break;
      default:
-        LOG(ERROR) << "unrecognized activation mode: "
+        LOG(FATAL) << "unrecognized activation mode: "
                   << static_cast<int>(activation_mode);
    }

@@ -1916,6 +1918,7 @@ bool CudnnSupport::DoNormalize(
    Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
    const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoDepthConcatenate(
@@ -1977,6 +1980,7 @@ bool CudnnSupport::DoElementwiseOperate(
    const dnn::BatchDescriptor& output_dimensions,
    DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoXYPad(Stream* stream,
@@ -1985,6 +1989,7 @@ bool CudnnSupport::DoXYPad(Stream* stream,
                           int64 left_pad, int64 right_pad, int64 top_pad,
                           int64 bottom_pad, DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoXYSlice(Stream* stream,
@@ -1994,6 +1999,7 @@ bool CudnnSupport::DoXYSlice(Stream* stream,
                             int64 bottom_trim,
                             DeviceMemory<float>* output_data) {
  LOG(FATAL) << "not yet implemented";  // TODO(leary)
+  return false;
 }

 bool CudnnSupport::DoMemcpyD2HQuantized(

--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -32,7 +32,7 @@ namespace cuda {

 class CUDAExecutor;

-// Opaque and unique identifer for the cuDNN plugin.
+// Opaque and unique identifier for the cuDNN plugin.
 extern const PluginId kCuDnnPlugin;

 // cudnn-library based DNN support. For details on overridden interface

--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -235,6 +235,8 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
  }

  if (on_disk_spec != nullptr) {
+    LOG(WARNING) << "loading CUDA kernel from disk is not supported";
+    return false;
  } else if (spec.has_cuda_ptx_in_memory()) {
    kernelname = &spec.cuda_ptx_in_memory().kernelname();


--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -49,6 +49,7 @@ string QuantizedActivationModeString(QuantizedActivationMode mode) {
      LOG(FATAL) << "Unknown quantized_activation_mode "
                 << static_cast<int32>(mode);
  }
+  return "unknown quantized_activation_mode";
 }

 string ActivationModeString(ActivationMode mode) {
@@ -66,6 +67,7 @@ string ActivationModeString(ActivationMode mode) {
    default:
      LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
  }
+  return "unknown activation_mode";
 }

 string ElementwiseOperationString(ElementwiseOperation op) {
@@ -77,6 +79,7 @@ string ElementwiseOperationString(ElementwiseOperation op) {
    default:
      LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
  }
+  return "unknown element wise op";
 }

 string DataLayoutString(DataLayout layout) {
@@ -92,6 +95,7 @@ string DataLayoutString(DataLayout layout) {
    default:
      LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
  }
+  return "unknown data layout";
 }

 string FilterLayoutString(FilterLayout layout) {
@@ -105,6 +109,7 @@ string FilterLayoutString(FilterLayout layout) {
    default:
      LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
  }
+  return "unknown filter layout";
 }

 string ShortPoolingModeString(PoolingMode mode) {
@@ -116,6 +121,7 @@ string ShortPoolingModeString(PoolingMode mode) {
    default:
      LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
  }
+  return "unknown filter layout";
 }

 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
@@ -166,7 +172,7 @@ std::vector<int64> ReorderDims(const std::vector<int64>& input,
  reordered[b_idx_to] = input[b_idx_from];
  reordered[d_idx_to] = input[d_idx_from];

-  for (int i = 0; i < input.size() - 2;
+  for (size_t i = 0; i < input.size() - 2;
       i++, spatial_idx_from++, spatial_idx_to++) {
    reordered[spatial_idx_to] = input[spatial_idx_from];
  }

--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -354,7 +354,7 @@ class FilterDescriptor {
 // Arguments:
 // - zero_padding_height: padding of the "y dimension" of the input data. Note
 //    that this is different from the height of the filter.
-// - zero_padding_width: analogouus to the height above, but in the "x
+// - zero_padding_width: analogous to the height above, but in the "x
 //    dimension".
 // - vertical_filter_stride: the convolution slides a 2-dimensional window of
 //    filter-height-by-filter-width over the input layer -- the center of that
@@ -767,7 +767,7 @@ class DnnSupport {
  //  filter_descriptor: dimensions of the convolution filter.
  //  filter_data: coefficients for the convolution filter.
  //  output_descriptor: dimensions of the output gradients, which is the same
-  //    as the dimensions of the ouput.
+  //    as the dimensions of the output.
  //  backward_output_data: un-owned device memory region which contains the
  //    backprop of the output.
  //  convolution_descriptor: stride of the convolution filter.
@@ -813,7 +813,7 @@ class DnnSupport {
  //  input_data: un-owned device memory region which contains the
  //    convolution input.
  //  output_descriptor: dimensions of the output gradients, which is the same
-  //    as the dimensions of the ouput.
+  //    as the dimensions of the output.
  //  backward_output_data: un-owned device memory region which contains the
  //    backprop of the output.
  //  convolution_descriptor: stride of the convolution filter.

--- a/tensorflow/stream_executor/stream.h
+++ b/tensorflow/stream_executor/stream.h
@@ -63,10 +63,13 @@ class DeviceMemory;
 class Timer;

 namespace dnn {
-struct BatchDescriptor;
-struct FilterDescriptor;
-struct ConvolutionDescriptor;
-struct ProfileResult;
+class BatchDescriptor;
+class FilterDescriptor;
+class ConvolutionDescriptor;
+class BatchDescriptor;
+class FilterDescriptor;
+class ConvolutionDescriptor;
+class ProfileResult;
 typedef int64 AlgorithmType;
 }  // namespace dnn

@@ -1257,7 +1260,7 @@ class Stream {
  // back-end implementation will be appropriately seeded by default.
  // At a minimum 16 bytes of data are required in the seed buffer.
  //
-  // To seed with good (non-reproducable) data:
+  // To seed with good (non-reproducible) data:
  //   File* f = File::Open("/dev/random", "r");
  //   int64 bytes_read = f->Read(seed_data, bytes_to_read);
  //   < error checking >
@@ -1297,7 +1300,7 @@ class Stream {
                     uint64 size);

  // Alternative interface for memcpying from device to host that takes an
-  // array slice. Checks that the destination size can accomodate the host
+  // array slice. Checks that the destination size can accommodate the host
  // slice size.
  template <typename T>
  Stream &ThenMemcpyD2H(const DeviceMemory<T> &gpu_src,
@@ -1308,7 +1311,7 @@ class Stream {
  }

  // Alternative interface for memcpying from host to device that takes an
-  // array slice. Checks that the destination size can accomodate the host
+  // array slice. Checks that the destination size can accommodate the host
  // slice size.
  template <typename T>
  Stream &ThenMemcpyH2D(port::ArraySlice<T> host_src,
@@ -1339,7 +1342,7 @@ class Stream {

  // Entrain onto the stream: a memset of a 32-bit pattern at a GPU location
  // of
-  // size bytes, where bytes must be evenly 32-bit sized (i.e. evently
+  // size bytes, where bytes must be evenly 32-bit sized (i.e. evenly
  // divisible
  // by 4). The location must not be null.
  Stream &ThenMemset32(DeviceMemoryBase *location, const uint32 &pattern,

--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -50,10 +50,6 @@ string StackTraceIfVLOG10() {
  }
 }

-// Maximum stack depth to report when generating backtrace on mem allocation
-// (for GPU memory leak checker)
-static const int kMaxStackDepth = 256;
-
 // Make sure the executor is done with its work; we know (because this isn't
 // publicly visible) that all enqueued work is quick.
 void BlockOnThreadExecutor(port::ThreadPool *executor) {

--- a/tensorflow/tools/ci_build/ci_build.sh
+++ b/tensorflow/tools/ci_build/ci_build.sh
@@ -119,7 +119,7 @@ DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g')
 DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]')

 # Print arguments.
-echo "WORKSAPCE: ${WORKSPACE}"
+echo "WORKSPACE: ${WORKSPACE}"
 echo "CI_DOCKER_EXTRA_PARAMS: ${CI_DOCKER_EXTRA_PARAMS[@]}"
 echo "COMMAND: ${COMMAND[@]}"
 echo "CI_COMMAND_PREFIX: ${CI_COMMAND_PREFIX[@]}"

--- a/tensorflow/tools/dist_test/README.md
+++ b/tensorflow/tools/dist_test/README.md
@@ -56,7 +56,7 @@ using the command described at the end of the previous section.
 **Asynchronous and synchronous parameter updates**

 There are two modes for the coordination of the parameters from multiple
-workers: asynchronous and synchrnous.
+workers: asynchronous and synchronous.

 In the asynchronous mode, the parameter updates (gradients) from the workers
 are applied to the parameters without any explicit coordination. This is the

--- a/tensorflow/tools/dist_test/python/mnist_replica.py
+++ b/tensorflow/tools/dist_test/python/mnist_replica.py
@@ -25,7 +25,7 @@ values for --worker_index. There should be exactly one invocation with
 initialization. The other, non-master, sessions will wait for the master
 session to finish the initialization before proceeding to the training stage.

-The coordination between the multpile worker invocations occurs due to
+The coordination between the multiple worker invocations occurs due to
 the definition of the parameters on the same ps devices. The parameter updates
 from one worker is visible to all other workers. As such, the workers can
 perform forward computation and gradient calculation in parallel, which
@@ -61,7 +61,7 @@ flags.DEFINE_integer("num_workers", None,
 flags.DEFINE_integer("num_parameter_servers", 2,
                     "Total number of parameter servers (must be >= 1)")
 flags.DEFINE_integer("replicas_to_aggregate", None,
-                     "Number of replicas to aggregate before paramter update"
+                     "Number of replicas to aggregate before parameter update"
                     "is applied (For sync_replicas mode only; default: "
                     "num_workers)")
 flags.DEFINE_integer("grpc_port", 2222,
@@ -77,7 +77,7 @@ flags.DEFINE_string("worker_grpc_url", None,
                    "grpc://tf-worker0:2222)")
 flags.DEFINE_boolean("sync_replicas", False,
                     "Use the sync_replicas (synchronized replicas) mode, "
-                     "wherein the parameter updates from workersare aggregated "
+                     "wherein the parameter updates from workers are aggregated "
                     "before applied to avoid stale gradients")
 FLAGS = flags.FLAGS


--- a/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh
+++ b/tensorflow/tools/dist_test/scripts/create_tf_cluster.sh
@@ -19,7 +19,7 @@
 # Usage:
 #   create_tf_cluster.sh <num_workers> <num_parameter_servers>
 #
-# In addition, this script obeys values in the folllowing environment variables:
+# In addition, this script obeys values in the following environment variables:
 #   TF_DIST_LOCAL_CLUSTER:        create TensorFlow cluster on local machine
 #   TF_DIST_SERVER_DOCKER_IMAGE:  overrides the default docker image to launch
 #                                 TensorFlow (GRPC) servers with

--- a/tensorflow/tools/dist_test/scripts/dist_test.sh
+++ b/tensorflow/tools/dist_test/scripts/dist_test.sh
@@ -20,7 +20,7 @@
 # This script tears down any existing TensorFlow cluster, consisting of
 # services, replication controllers and pods, before creating a new cluster.
 # The cluster containers a number of parameter server services and a number of
-# worker services. The paramater servers will hold parameters of the ML model,
+# worker services. The parameter servers will hold parameters of the ML model,
 # e.g., weights and biases of the NN layers, while the workers will hold the
 # TensorFlow ops.
 #
@@ -45,7 +45,7 @@
 #   updates.
 #
 #
-# This script obeys values in the folllowing environment variables:
+# This script obeys values in the following environment variables:
 #   TF_DIST_GRPC_SERVER_URLS:     If it is set to a list of valid server urls,
 #                                 separated with spaces or commas
 #                                 (e.g., "grpc://1.2.3.4:2222 grpc//5.6.7.8:2222"),

--- a/third_party/gpus/cuda/BUILD
+++ b/third_party/gpus/cuda/BUILD
@@ -157,7 +157,7 @@ cc_library(
 # This rule checks if Cuda libraries in the source tree has been properly configured.
 # The output list makes bazel runs this rule first if the Cuda files are missing.
 # This gives us an opportunity to check and print a meaningful error message.
-# But we will need to create the output file list to make bazel happy in a successfull run.
+# But we will need to create the output file list to make bazel happy in a successful run.
 genrule(
    name = "cuda_check",
    srcs = [