提交 b1b2dc89 编写于 作者: M Martin Wicke 提交者: TensorFlower Gardener

Merge changes from github.

Change: 124183870
上级 ca2a66bd
......@@ -7,7 +7,7 @@ The contrib directory contains project directories, each of which has designated
owners. It is meant to contain features and contributions that eventually should
get merged into core TensorFlow, but whose interfaces may still change, or which
require some testing to see whether they can find broader acceptance. We are
trying to keep dupliction within contrib to a minimum, so you may be asked to
trying to keep duplication within contrib to a minimum, so you may be asked to
refactor code in contrib to use some feature inside core or in another project
in contrib rather than reimplementing the feature.
......
......@@ -291,7 +291,7 @@
"$(SRCROOT)/../../../..",
);
INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 7.0;
IPHONEOS_DEPLOYMENT_TARGET = 9.2;
LIBRARY_SEARCH_PATHS = (
"$(SRCROOT)/../../makefile/gen/lib",
"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
......@@ -350,7 +350,7 @@
"$(SRCROOT)/../../../..",
);
INFOPLIST_FILE = "$(SRCROOT)/Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 7.0;
IPHONEOS_DEPLOYMENT_TARGET = 9.2;
LIBRARY_SEARCH_PATHS = (
"$(SRCROOT)/../../makefile/gen/lib",
"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
......
......@@ -7,6 +7,8 @@
objects = {
/* Begin PBXBuildFile section */
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D861D02091F00DF5523 /* libprotobuf-lite.a */; };
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 590E7D871D02091F00DF5523 /* libprotobuf.a */; };
59A3D0011CF4E68100C4259F /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 59A3CFF21CF4E68100C4259F /* AppDelegate.mm */; };
59A3D0031CF4E68100C4259F /* grace_hopper.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF51CF4E68100C4259F /* grace_hopper.jpg */; };
59A3D0051CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt in Resources */ = {isa = PBXBuildFile; fileRef = 59A3CFF71CF4E68100C4259F /* imagenet_comp_graph_label_strings.txt */; };
......@@ -20,6 +22,8 @@
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../makefile/gen/protobuf_ios/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
590E7D871D02091F00DF5523 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../makefile/gen/protobuf_ios/lib/libprotobuf.a; sourceTree = "<group>"; };
5911579B1CF4011C00C31E3A /* tf_ios_makefile_example.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = tf_ios_makefile_example.app; sourceTree = BUILT_PRODUCTS_DIR; };
59A3CFF11CF4E68100C4259F /* AppDelegate.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
59A3CFF21CF4E68100C4259F /* AppDelegate.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
......@@ -46,6 +50,8 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
590E7D8A1D0209DD00DF5523 /* libprotobuf.a in Frameworks */,
590E7D881D02091F00DF5523 /* libprotobuf-lite.a in Frameworks */,
59A3D0181CF4E86100C4259F /* UIKit.framework in Frameworks */,
59A3D0141CF4E82500C4259F /* CoreGraphics.framework in Frameworks */,
);
......@@ -57,6 +63,8 @@
591157921CF4011C00C31E3A = {
isa = PBXGroup;
children = (
590E7D861D02091F00DF5523 /* libprotobuf-lite.a */,
590E7D871D02091F00DF5523 /* libprotobuf.a */,
59A3D0171CF4E86100C4259F /* UIKit.framework */,
59A3D0151CF4E83D00C4259F /* Foundation.framework */,
59A3D0131CF4E82500C4259F /* CoreGraphics.framework */,
......@@ -272,6 +280,7 @@
"$(SRCROOT)/../../makefile/gen/proto",
);
INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.2;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
LIBRARY_SEARCH_PATHS = (
"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
......@@ -299,11 +308,13 @@
"$(SRCROOT)/../../makefile/gen/proto",
);
INFOPLIST_FILE = "$(SRCROOT)/RunModel-Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 9.2;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
LIBRARY_SEARCH_PATHS = (
"$(SRCROOT)/../../makefile/gen/protobuf_ios/lib",
"$(SRCROOT)/../../makefile/gen/lib",
);
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = (
"-force_load",
"$(SRCROOT)/../../makefile/gen/lib/libtensorflow-core.a",
......
......@@ -36,7 +36,7 @@ HOST_BINDIR := $(MAKEFILE_DIR)/gen/host_bin/
HOST_GENDIR := $(MAKEFILE_DIR)/gen/host_obj/
# Which Eigen version we're using.
EIGEN_HASH := f3a13643ac1f
EIGEN_HASH := d02e6a705c30
# Settings for the host compiler.
HOST_CXX := gcc
......@@ -168,6 +168,9 @@ ifeq ($(TARGET),IOS)
-D__thread= \
-Wno-c++11-narrowing \
-mno-thumb \
-DTF_LEAN_BINARY \
-DMIN_LOG_LEVEL=0 \
-fno-exceptions \
-isysroot \
${IPHONEOS_SYSROOT}
LDFLAGS := -arch armv7 \
......@@ -182,10 +185,16 @@ ifeq ($(TARGET),IOS)
-D__thread= \
-Wno-c++11-narrowing \
-mno-thumb \
-DTF_LEAN_BINARY \
-DMIN_LOG_LEVEL=0 \
-fno-exceptions \
-isysroot \
${IPHONEOS_SYSROOT}
LDFLAGS := -arch armv7s \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
-all_load \
-L$(GENDIR)protobuf_ios/lib \
-lz
......@@ -195,10 +204,16 @@ ifeq ($(TARGET),IOS)
-arch arm64 \
-D__thread= \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-DMIN_LOG_LEVEL=0 \
-fno-exceptions \
-isysroot \
${IPHONEOS_SYSROOT}
LDFLAGS := -arch arm64 \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
-all_load \
-L$(GENDIR)protobuf_ios/lib \
-lz
......@@ -208,10 +223,16 @@ ifeq ($(TARGET),IOS)
-arch i386 \
-D__thread= \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-DMIN_LOG_LEVEL=0 \
-fno-exceptions \
-isysroot \
${IPHONESIMULATOR_SYSROOT}
LDFLAGS := -arch i386 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
-all_load \
-L$(GENDIR)protobuf_ios/lib \
-lz
......@@ -221,10 +242,16 @@ ifeq ($(TARGET),IOS)
-arch x86_64 \
-D__thread= \
-Wno-c++11-narrowing \
-DTF_LEAN_BINARY \
-DMIN_LOG_LEVEL=0 \
-fno-exceptions \
-isysroot \
${IPHONESIMULATOR_SYSROOT}
LDFLAGS := -arch x86_64 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-Xlinker -S \
-Xlinker -x \
-Xlinker -dead_strip \
-all_load \
-L$(GENDIR)protobuf_ios/lib \
-lz
......
#!/bin/bash
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Builds protobuf 3 for Android. Pass in the location of your NDK as the first
# argument to the script, for example:
# tensorflow/contrib/makefile/compile_android_protobuf.sh \
# ${HOME}/toolchains/clang-21-stl-gnu
if [[ $# -ne 1 ]]
then
echo "You need to pass in the Android NDK as the first argument, e.g:"
echo "tensorflow/contrib/makefile/compile_android_protobuf.sh \
${HOME}/toolchains/clang-21-stl-gnu"
exit 1
fi
cd tensorflow/contrib/makefile
GENDIR=`pwd`/gen/protobuf/
LIBDIR=${GENDIR}lib
mkdir -p ${LIBDIR}
export NDK=$1
export PATH=${NDK}/bin:$PATH
export SYSROOT=${NDK}/sysroot
export CC="arm-linux-androideabi-gcc --sysroot $SYSROOT"
export CXX="arm-linux-androideabi-g++ --sysroot $SYSROOT"
export CXXSTL=$NDK/sources/cxx-stl/gnu-libstdc++/4.6
cd downloads/protobuf
mkdir build
./autogen.sh
if [ $? -ne 0 ]
then
echo "./autogen.sh command failed."
exit 1
fi
./configure --prefix=$(pwd)/build \
--host=arm-linux-androideabi \
--with-sysroot=$SYSROOT \
--disable-shared \
--enable-cross-compile \
--with-protoc=protoc \
CFLAGS="-march=armv7-a" \
CXXFLAGS="-march=armv7-a -I$CXXSTL/include -I$CXXSTL/libs/armeabi-v7a/include"
if [ $? -ne 0 ]
then
echo "./configure command failed."
exit 1
fi
make
if [ $? -ne 0 ]
then
echo "make command failed."
exit 1
fi
cp src/.libs/* ${LIBDIR}
if [ $? -ne 0 ]
then
echo "cp command failed."
exit 1
fi
#!/bin/bash -x
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Builds protobuf 3 for iOS.
cd tensorflow/contrib/makefile
GENDIR=`pwd`/gen/protobuf_ios/
LIBDIR=${GENDIR}lib
mkdir -p ${LIBDIR}
OSX_VERSION=darwin14.0.0
IPHONEOS_PLATFORM=`xcrun --sdk iphoneos --show-sdk-platform-path`
IPHONEOS_SYSROOT=`xcrun --sdk iphoneos --show-sdk-path`
IPHONESIMULATOR_PLATFORM=`xcrun --sdk iphonesimulator --show-sdk-platform-path`
IPHONESIMULATOR_SYSROOT=`xcrun --sdk iphonesimulator --show-sdk-path`
IOS_SDK_VERSION=`xcrun --sdk iphoneos --show-sdk-version`
MIN_SDK_VERSION=9.2
CFLAGS="-DNDEBUG -g -O0 -pipe -fPIC -fcxx-exceptions"
CXXFLAGS="${CFLAGS} -std=c++11 -stdlib=libc++"
LDFLAGS="-stdlib=libc++"
LIBS="-lc++ -lc++abi"
cd downloads/protobuf
./autogen.sh
if [ $? -ne 0 ]
then
echo "./autogen.sh command failed."
exit 1
fi
make distclean
./configure \
--build=x86_64-apple-${OSX_VERSION} \
--host=i386-apple-${OSX_VERSION} \
--disable-shared \
--enable-cross-compile \
--with-protoc=protoc \
--prefix=${LIBDIR}/iossim_386 \
--exec-prefix=${LIBDIR}/iossim_386 \
"CFLAGS=${CFLAGS} \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-arch i386 \
-isysroot ${IPHONESIMULATOR_SYSROOT}" \
"CXX=${CXX}" \
"CXXFLAGS=${CXXFLAGS} \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-arch i386 \
-isysroot \
${IPHONESIMULATOR_SYSROOT}" \
LDFLAGS="-arch i386 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
${LDFLAGS} \
-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
"LIBS=${LIBS}"
make
make install
make distclean
./configure \
--build=x86_64-apple-${OSX_VERSION} \
--host=x86_64-apple-${OSX_VERSION} \
--disable-shared \
--enable-cross-compile \
--with-protoc=protoc \
--prefix=${LIBDIR}/iossim_x86_64 \
--exec-prefix=${LIBDIR}/iossim_x86_64 \
"CFLAGS=${CFLAGS} \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-arch x86_64 \
-isysroot ${IPHONESIMULATOR_SYSROOT}" \
"CXX=${CXX}" \
"CXXFLAGS=${CXXFLAGS} \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
-arch x86_64 \
-isysroot \
${IPHONESIMULATOR_SYSROOT}" \
LDFLAGS="-arch x86_64 \
-mios-simulator-version-min=${MIN_SDK_VERSION} \
${LDFLAGS} \
-L${IPHONESIMULATOR_SYSROOT}/usr/lib/ \
-L${IPHONESIMULATOR_SYSROOT}/usr/lib/system" \
"LIBS=${LIBS}"
make
make install
make distclean
./configure \
--build=x86_64-apple-${OSX_VERSION} \
--host=armv7-apple-${OSX_VERSION} \
--with-protoc=protoc \
--disable-shared \
--prefix=${LIBDIR}/ios_arm7 \
--exec-prefix=${LIBDIR}/ios_arm7 \
"CFLAGS=${CFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch armv7 \
-isysroot ${IPHONEOS_SYSROOT}" \
"CXX=${CXX}" \
"CXXFLAGS=${CXXFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch armv7 \
-isysroot ${IPHONEOS_SYSROOT}" \
LDFLAGS="-arch armv7 \
-miphoneos-version-min=${MIN_SDK_VERSION} \
${LDFLAGS}" \
"LIBS=${LIBS}"
make
make install
make distclean
./configure \
--build=x86_64-apple-${OSX_VERSION} \
--host=armv7s-apple-${OSX_VERSION} \
--with-protoc=protoc \
--disable-shared \
--prefix=${LIBDIR}/ios_arm7s \
--exec-prefix=${LIBDIR}/ios_arm7s \
"CFLAGS=${CFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch armv7s \
-isysroot ${IPHONEOS_SYSROOT}" \
"CXX=${CXX}" \
"CXXFLAGS=${CXXFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch armv7s \
-isysroot ${IPHONEOS_SYSROOT}" \
LDFLAGS="-arch armv7s \
-miphoneos-version-min=${MIN_SDK_VERSION} \
${LDFLAGS}" \
"LIBS=${LIBS}"
make
make install
make distclean
./configure \
--build=x86_64-apple-${OSX_VERSION} \
--host=arm \
--with-protoc=protoc \
--disable-shared \
--prefix=${LIBDIR}/ios_arm64 \
--exec-prefix=${LIBDIR}/ios_arm64 \
"CFLAGS=${CFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch arm64 \
-isysroot ${IPHONEOS_SYSROOT}" \
"CXXFLAGS=${CXXFLAGS} \
-miphoneos-version-min=${MIN_SDK_VERSION} \
-arch arm64 \
-isysroot ${IPHONEOS_SYSROOT}" \
LDFLAGS="-arch arm64 \
-miphoneos-version-min=${MIN_SDK_VERSION} \
${LDFLAGS}" \
"LIBS=${LIBS}"
make
make install
lipo \
${LIBDIR}/iossim_386/lib/libprotobuf.a \
${LIBDIR}/iossim_x86_64/lib/libprotobuf.a \
${LIBDIR}/ios_arm7/lib/libprotobuf.a \
${LIBDIR}/ios_arm7s/lib/libprotobuf.a \
${LIBDIR}/ios_arm64/lib/libprotobuf.a \
-create \
-output ${LIBDIR}/libprotobuf.a
lipo \
${LIBDIR}/iossim_386/lib/libprotobuf-lite.a \
${LIBDIR}/iossim_x86_64/lib/libprotobuf-lite.a \
${LIBDIR}/ios_arm7/lib/libprotobuf-lite.a \
${LIBDIR}/ios_arm7s/lib/libprotobuf-lite.a \
${LIBDIR}/ios_arm64/lib/libprotobuf-lite.a \
-create \
-output ${LIBDIR}/libprotobuf-lite.a
#!/bin/bash -x
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Builds the TensorFlow core library with ARM and x86 architectures for iOS, and
# packs them into a fat file.
GENDIR=tensorflow/contrib/makefile/gen/
LIBDIR=${GENDIR}lib
LIB_PREFIX=libtensorflow-core
make -f tensorflow/contrib/makefile/Makefile cleantarget
make -f tensorflow/contrib/makefile/Makefile \
TARGET=IOS IOS_ARCH=ARMV7 LIB_NAME=${LIB_PREFIX}-armv7.a OPTFLAGS="$1" $2 $3
if [ $? -ne 0 ]
then
echo "armv7 compilation failed."
exit 1
fi
make -f tensorflow/contrib/makefile/Makefile cleantarget
make -f tensorflow/contrib/makefile/Makefile \
TARGET=IOS IOS_ARCH=ARMV7S LIB_NAME=${LIB_PREFIX}-armv7s.a OPTFLAGS="$1" $2 $3
if [ $? -ne 0 ]
then
echo "arm7vs compilation failed."
exit 1
fi
make -f tensorflow/contrib/makefile/Makefile cleantarget
make -f tensorflow/contrib/makefile/Makefile \
TARGET=IOS IOS_ARCH=ARM64 LIB_NAME=${LIB_PREFIX}-arm64.a OPTFLAGS="$1" $2 $3
if [ $? -ne 0 ]
then
echo "arm64 compilation failed."
exit 1
fi
make -f tensorflow/contrib/makefile/Makefile cleantarget
make -f tensorflow/contrib/makefile/Makefile \
TARGET=IOS IOS_ARCH=I386 LIB_NAME=${LIB_PREFIX}-i386.a OPTFLAGS="$1" $2 $3
if [ $? -ne 0 ]
then
echo "i386 compilation failed."
exit 1
fi
make -f tensorflow/contrib/makefile/Makefile cleantarget
make -f tensorflow/contrib/makefile/Makefile \
TARGET=IOS IOS_ARCH=X86_64 LIB_NAME=${LIB_PREFIX}-x86_64.a OPTFLAGS="$1" $2 $3
if [ $? -ne 0 ]
then
echo "x86_64 compilation failed."
exit 1
fi
lipo \
${LIBDIR}/${LIB_PREFIX}-armv7.a \
${LIBDIR}/${LIB_PREFIX}-armv7s.a \
${LIBDIR}/${LIB_PREFIX}-arm64.a \
${LIBDIR}/${LIB_PREFIX}-i386.a \
${LIBDIR}/${LIB_PREFIX}-x86_64.a \
-create \
-output ${LIBDIR}/${LIB_PREFIX}.a
#!/bin/bash -x
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Builds protobuf 3 for iOS.
cd tensorflow/contrib/makefile
GENDIR=`pwd`/gen/protobuf_pi/
LIBDIR=${GENDIR}
mkdir -p ${LIBDIR}
CXX=arm-linux-gnueabihf-g++
cd downloads/protobuf
./autogen.sh
if [ $? -ne 0 ]
then
echo "./autogen.sh command failed."
exit 1
fi
make distclean
./configure \
--build=i686-pc-linux-gnu \
--host=arm-linux \
--target=arm-linux \
--disable-shared \
--enable-cross-compile \
--with-protoc=protoc \
--prefix=${LIBDIR} \
--exec-prefix=${LIBDIR} \
"CXX=${CXX}" \
make
make install
#!/bin/bash -x
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
DOWNLOADS_DIR=tensorflow/contrib/makefile/downloads
mkdir ${DOWNLOADS_DIR}
EIGEN_HASH=d02e6a705c30
curl "https://bitbucket.org/eigen/eigen/get/${EIGEN_HASH}.tar.gz" \
-o /tmp/eigen-${EIGEN_HASH}.tar.gz
tar xzf /tmp/eigen-${EIGEN_HASH}.tar.gz -C ${DOWNLOADS_DIR}
git clone https://github.com/google/re2.git ${DOWNLOADS_DIR}/re2
git clone https://github.com/google/gemmlowp.git ${DOWNLOADS_DIR}/gemmlowp
git clone https://github.com/google/protobuf.git ${DOWNLOADS_DIR}/protobuf
# JPEG_VERSION=v9a
# curl "http://www.ijg.org/files/jpegsrc.${JPEG_VERSION}.tar.gz" \
# -o /tmp/jpegsrc.${JPEG_VERSION}.tar.gz
# tar xzf /tmp/jpegsrc.${JPEG_VERSION}.tar.gz -C ${DOWNLOADS_DIR}
# PNG_VERSION=v1.2.53
# curl -L "https://github.com/glennrp/libpng/archive/${PNG_VERSION}.zip" \
# -o /tmp/pngsrc.${PNG_VERSION}.zip
# unzip /tmp/pngsrc.${PNG_VERSION}.zip -d ${DOWNLOADS_DIR}
#!/bin/bash -x
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This script generates the source file lists needed by the makefile by querying
# the master Bazel build configuration.
bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
grep "//tensorflow/.*\.cc$" | \
grep -v "gen_proto_text" | \
grep -E -v "jpeg" | \
grep -E -v "png" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/tf_cc_files.txt
bazel query 'kind("source file", deps(//tensorflow/core:android_tensorflow_lib))' | \
grep "//tensorflow/.*\.proto$" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/tf_proto_files.txt
bazel query 'kind("generated file", deps(//tensorflow/core:proto_text))' | \
grep "pb_text\.cc$" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/tf_pb_text_files.txt
bazel query 'kind("source file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
grep -E "//tensorflow/.*\.cc$" | \
grep -E -v "jpeg" | \
grep -E -v "png" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/proto_text_cc_files.txt
bazel query 'kind("generated file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
grep -E "//tensorflow/.*\.cc$" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/proto_text_pb_cc_files.txt
bazel query 'kind("generated file", deps(//tensorflow/tools/proto_text:gen_proto_text_functions))' | \
grep -E "//tensorflow/.*\.h$" | \
sed -E 's#^//##g' | \
sed -E 's#:#/#g' \
> make/proto_text_pb_h_files.txt
......@@ -43,17 +43,18 @@ py_library(
],
)
py_test(
name = "learning_test",
srcs = ["python/slim/learning_test.py"],
srcs_version = "PY2AND3",
deps = [
"//tensorflow:tensorflow_py",
"//tensorflow/contrib/slim",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
)
# TODO(nsilberman): Fix this test and re-enable.
#py_test(
# name = "learning_test",
# srcs = ["python/slim/learning_test.py"],
# srcs_version = "PY2AND3",
# deps = [
# "//tensorflow:tensorflow_py",
# "//tensorflow/contrib/slim",
# "//tensorflow/python:framework_test_lib",
# "//tensorflow/python:platform_test",
# ],
#)
py_library(
name = "queues",
......
......@@ -16,6 +16,8 @@ limitations under the License.
#define USE_EIGEN_TENSOR
#define EIGEN_USE_THREADS
#include <array>
#include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
#include "tensorflow/core/kernels/conv_2d.h"
#include "tensorflow/core/kernels/conv_3d.h"
......
......@@ -68,6 +68,7 @@ namespace functor {
extern template struct L2Loss<GPUDevice, T>;
DECLARE_GPU_SPEC(float);
DECLARE_GPU_SPEC(double);
DECLARE_GPU_SPEC(Eigen::half);
#undef DECLARE_GPU_SPEC
} // namespace functor
......@@ -79,6 +80,7 @@ DECLARE_GPU_SPEC(Eigen::half);
L2LossOp<GPUDevice, T>);
REGISTER_GPU_KERNEL(float);
REGISTER_GPU_KERNEL(double);
REGISTER_GPU_KERNEL(Eigen::half);
#undef REGISTER_GPU_KERNEL
......
......@@ -25,6 +25,7 @@ namespace tensorflow {
typedef Eigen::GpuDevice GPUDevice;
template struct functor::L2Loss<GPUDevice, float>;
template struct functor::L2Loss<GPUDevice, double>;
template struct functor::L2Loss<GPUDevice, Eigen::half>;
} // namespace tensorflow
......
......@@ -48,7 +48,9 @@ struct ApplyAdadelta<CPUDevice, T> {
typename TTypes<T>::ConstFlat grad) {
accum.device(d) =
accum * rho() + grad.square() * (static_cast<T>(1) - rho());
const auto update = accum_update * (accum + epsilon()).rsqrt() * grad;
const auto update =
(accum_update + epsilon()).sqrt() *
(accum + epsilon()).rsqrt() * grad;
accum_update.device(d) =
accum_update * rho() + update.square() * (static_cast<T>(1) - rho());
var.device(d) -= update * lr();
......
......@@ -1665,10 +1665,25 @@ class ComplexMakeRealImagTest(tf.test.TestCase):
delta=epsilon)
self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def _compareBroadcastGradient(self, x):
x_ = tf.convert_to_tensor(x)
epsilon = 1e-3
with self.test_session():
for args in [(x_, 0.), (0., x_)]:
z = tf.reduce_sum(tf.complex_abs(tf.complex(*args)))
jacob_t, jacob_n = tf.test.compute_gradient(x_,
list(x.shape),
z,
[1],
x_init_value=x,
delta=epsilon)
self.assertAllClose(jacob_t, jacob_n, rtol=epsilon, atol=epsilon)
def testGradient(self):
# complex64
data = np.arange(1, 2, 0.10).reshape([5, 2]).astype(np.float32)
self._compareGradient(data)
self._compareBroadcastGradient(data)
# complex128
data = np.arange(1, 2, 0.10).reshape([5, 2]).astype(np.float64)
self._compareGradient(data)
......
......@@ -48,6 +48,15 @@ class ShapeOpsTest(tf.test.TestCase):
self.assertAllEqual(np_ans, result)
self.assertShapeEqual(np_ans, tf_ans)
def _compareShapeSparse(self, x_np, use_gpu=False):
np_ans = np.array(np.shape(x_np))
x_tf, unused_nnz = _sparsify(x_np)
with self.test_session(use_gpu=use_gpu):
tf_ans = tf.shape(x_tf)
result = tf_ans.eval()
self.assertAllEqual(np_ans, result)
self.assertShapeEqual(np_ans, tf_ans)
def _compareShapeN(self, x, use_gpu=False):
np_ans = np.array(np.shape(x))
with self.test_session(use_gpu=use_gpu) as sess:
......@@ -67,7 +76,7 @@ class ShapeOpsTest(tf.test.TestCase):
def _compareRankSparse(self, x_np, use_gpu=False):
np_ans = np.asarray(np.ndim(x_np))
x_tf, nnz = _sparsify(x_np)
x_tf, unused_nnz = _sparsify(x_np)
with self.test_session(use_gpu=use_gpu):
tf_ans = tf.rank(x_tf)
result = tf_ans.eval()
......@@ -87,6 +96,7 @@ class ShapeOpsTest(tf.test.TestCase):
self._compareShapeN(x, use_gpu=False)
self._compareRank(x, use_gpu=False)
self._compareSize(x, use_gpu=False)
self._compareShapeSparse(x, use_gpu=False)
self._compareRankSparse(x, use_gpu=False)
def _testGpu(self, x):
......@@ -94,6 +104,7 @@ class ShapeOpsTest(tf.test.TestCase):
self._compareShapeN(x, use_gpu=True)
self._compareRank(x, use_gpu=True)
self._compareSize(x, use_gpu=True)
self._compareShapeSparse(x, use_gpu=True)
self._compareRankSparse(x, use_gpu=True)
def _testAll(self, x):
......
......@@ -100,6 +100,32 @@ _baseslice = slice
listdiff = gen_array_ops.list_diff
def shape(input, name=None):
"""Returns the shape of a tensor.
This operation returns a 1-D integer tensor representing the shape of `input`.
For example:
```python
# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
shape(t) ==> [2, 2, 3]
```
Args:
input: A `Tensor` or `SparseTensor`.
name: A name for the operation (optional).
Returns:
A `Tensor` of type `int32`.
"""
with ops.op_scope([input], name, "Shape") as name:
if isinstance(input, ops.SparseTensor):
return input.shape
else:
return gen_array_ops.shape(input, name=name)
def rank(input, name=None):
"""Returns the rank of a tensor.
......
......@@ -681,9 +681,15 @@ ops.NoGradient("LinSpace")
@ops.RegisterGradient("Complex")
def _ComplexGrad(_, grad):
def _ComplexGrad(op, grad):
"""Returns the real and imaginary components of 'grad', respectively."""
return math_ops.real(grad), math_ops.imag(grad)
x = op.inputs[0]
y = op.inputs[1]
sx = array_ops.shape(x)
sy = array_ops.shape(y)
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
return (array_ops.reshape(math_ops.reduce_sum(math_ops.real(grad), rx), sx),
array_ops.reshape(math_ops.reduce_sum(math_ops.imag(grad), ry), sy))
@ops.RegisterGradient("Real")
......
......@@ -116,11 +116,13 @@ class LogSoftmaxTest(tf.test.TestCase):
class L2LossTest(tf.test.TestCase):
def testL2Loss(self):
with self.test_session():
x = tf.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x")
l2loss = tf.nn.l2_loss(x)
value = l2loss.eval()
self.assertAllClose(7.0, value)
for dtype in [tf.float32, tf.float64]:
with self.test_session():
x = tf.constant([1.0, 0.0, 3.0, 2.0], shape=[2, 2], name="x",
dtype=dtype)
l2loss = tf.nn.l2_loss(x)
value = l2loss.eval()
self.assertAllClose(7.0, value)
def testGradient(self):
x_shape = [20, 7, 3]
......
......@@ -126,7 +126,8 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
state = initial_state
else:
if not dtype:
raise ValueError("If no initial_state is provided, dtype must be.")
raise ValueError("If no initial_state is provided, "
"dtype must be specified")
state = cell.zero_state(batch_size, dtype)
if sequence_length is not None: # Prepare variables
......
......@@ -20,104 +20,94 @@ from __future__ import print_function
import tensorflow.python.platform
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
class AdadeltaOptimizerTest(tf.test.TestCase):
def testBasic(self):
num_updates = 4 # number of ADADELTA steps to perform
for dtype in [tf.half, tf.float32]:
with self.test_session():
var0 = tf.Variable([1.0, 2.0], dtype=dtype)
var1 = tf.Variable([3.0, 4.0], dtype=dtype)
grads0 = tf.constant([0.1, 0.1], dtype=dtype)
grads1 = tf.constant([0.01, 0.01], dtype=dtype)
lr = 1.0
rho = 0.95
epsilon = 1e-8
adadelta_opt = tf.train.AdadeltaOptimizer(lr, rho=rho, epsilon=epsilon)
adadelta_update = adadelta_opt.apply_gradients(zip(
[grads0, grads1], [var0, var1]))
tf.initialize_all_variables().run()
# Check we have slots
self.assertEqual(["accum", "accum_update"],
adadelta_opt.get_slot_names())
slot0 = adadelta_opt.get_slot(var0, "accum")
self.assertEquals(slot0.get_shape(), var0.get_shape())
self.assertFalse(slot0 in tf.trainable_variables())
slot0_update = adadelta_opt.get_slot(var0, "accum_update")
self.assertEquals(slot0_update.get_shape(), var0.get_shape())
self.assertFalse(slot0_update in tf.trainable_variables())
slot1 = adadelta_opt.get_slot(var1, "accum")
self.assertEquals(slot1.get_shape(), var1.get_shape())
self.assertFalse(slot1 in tf.trainable_variables())
slot1_update = adadelta_opt.get_slot(var1, "accum_update")
self.assertEquals(slot1_update.get_shape(), var1.get_shape())
self.assertFalse(slot1_update in tf.trainable_variables())
# Fetch params to validate initial values
self.assertAllClose([1.0, 2.0], var0.eval())
self.assertAllClose([3.0, 4.0], var1.eval())
adadelta_update.run()
# Check that the accumulators have been updated.
grad = 0.1
accum = 0
accum_update = 0
accum = accum * rho + (grad**2) * (1 - rho)
update1 = np.sqrt(accum_update + epsilon) * (
1. / np.sqrt(accum + epsilon)) * grad
accum_update = accum_update * rho + (update1**2) * (1.0 - rho)
self.assertAllCloseAccordingToType(
np.array([accum, accum]), slot0.eval())
self.assertAllCloseAccordingToType(
np.array([accum_update, accum_update]), slot0_update.eval())
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([1.0 - update1 * lr, 2.0 - update1 * lr]),
var0.eval(),
rtol=1e-3)
self.assertAllCloseAccordingToType(
np.array([3.0 - update1 * lr, 4.0 - update1 * lr]),
var1.eval(),
rtol=1e-3)
# Step 2: the momentum accumulators contain the previous update.
accum = accum * rho + (grad**2) * (1 - rho)
update2 = ((accum_update + epsilon)**0.5 *
(1. / (accum + epsilon)**0.5) * grad)
accum_update = accum_update * rho + (update2**2) * (1.0 - rho)
adadelta_update.run()
# Check that the momentum accumulators have been updated.
self.assertAllCloseAccordingToType(
np.array([accum, accum]), slot0.eval())
self.assertAllCloseAccordingToType(
np.array([accum_update, accum_update]), slot0_update.eval())
# Check that the parameters have been updated.
self.assertAllCloseAccordingToType(
np.array([1.0 - update1 - update2, 2.0 - update1 - update2]),
var0.eval(),
rtol=1e-3)
self.assertAllCloseAccordingToType(
np.array([3.0 - update1 - update2, 4.0 - update1 - update2]),
var1.eval(),
rtol=1e-3)
for grad in [0.2, 0.1, 0.01]:
for lr in [1.0, 0.5, 0.1]:
with self.test_session():
var0_init = [1.0, 2.0]
var1_init = [3.0, 4.0]
var0 = tf.Variable(var0_init, dtype=dtype)
var1 = tf.Variable(var1_init, dtype=dtype)
grads = tf.constant([grad, grad], dtype=dtype)
accum = 0.0
accum_update = 0.0
# ADADELTA gradient optimizer
rho = 0.95
epsilon = 1e-8
adadelta_opt = tf.train.AdadeltaOptimizer(lr, rho, epsilon)
adadelta_update = adadelta_opt.apply_gradients(zip(
[grads, grads], [var0, var1]))
tf.initialize_all_variables().run()
# Assign slots
slot = [None] * 2
slot_update = [None] * 2
self.assertEqual(["accum", "accum_update"],
adadelta_opt.get_slot_names())
slot[0] = adadelta_opt.get_slot(var0, "accum")
self.assertEquals(slot[0].get_shape(), var0.get_shape())
self.assertFalse(slot[0] in tf.trainable_variables())
slot_update[0] = adadelta_opt.get_slot(var0, "accum_update")
self.assertEquals(slot_update[0].get_shape(), var0.get_shape())
self.assertFalse(slot_update[0] in tf.trainable_variables())
slot[1] = adadelta_opt.get_slot(var1, "accum")
self.assertEquals(slot[1].get_shape(), var1.get_shape())
self.assertFalse(slot[1] in tf.trainable_variables())
slot_update[1] = adadelta_opt.get_slot(var1, "accum_update")
self.assertEquals(slot_update[1].get_shape(), var1.get_shape())
self.assertFalse(slot_update[1] in tf.trainable_variables())
# Fetch params to validate initial values
self.assertAllClose(var0_init, var0.eval())
self.assertAllClose(var1_init, var1.eval())
update = [None] * num_updates
tot_update = 0
for step in range(num_updates):
# Run adadelta update for comparison
adadelta_update.run()
# Perform initial update without previous accum values
accum = accum * rho + (grad**2) * (1 - rho)
update[step] = (np.sqrt(accum_update + epsilon) *
(1. / np.sqrt(accum + epsilon)) * grad)
accum_update = (accum_update * rho + (update[step]**2) *
(1.0 - rho))
tot_update += update[step] * lr
# Check that the accumulators have been updated
for slot_idx in range(2):
self.assertAllCloseAccordingToType(
np.array([accum, accum], dtype=dtype.as_numpy_dtype()),
slot[slot_idx].eval())
self.assertAllCloseAccordingToType(
np.array([accum_update, accum_update],
dtype=dtype.as_numpy_dtype()),
slot_update[slot_idx].eval())
# Check that the parameters have been updated
self.assertAllCloseAccordingToType(
np.array([var0_init[0] - tot_update,
var0_init[1] - tot_update], dtype=dtype.as_numpy_dtype()),
var0.eval(), rtol=1e-3)
self.assertAllCloseAccordingToType(
np.array([var1_init[0] - tot_update,
var1_init[1] - tot_update], dtype=dtype.as_numpy_dtype()),
var1.eval(), rtol=1e-3)
if __name__ == "__main__":
tf.test.main()
......@@ -27,8 +27,7 @@ CudaContext* ExtractCudaContext(CUDAExecutor *cuda_exec);
CUDAExecutor *ExtractCudaExecutor(StreamExecutor *stream_exec);
ScopedActivateExecutorContext::ScopedActivateExecutorContext(
CUDAExecutor *cuda_exec)
: cuda_exec_(cuda_exec),
CUDAExecutor *cuda_exec):
driver_scoped_activate_context_(
new ScopedActivateContext{ExtractCudaContext(cuda_exec)}) { }
......
......@@ -51,8 +51,6 @@ class ScopedActivateExecutorContext {
~ScopedActivateExecutorContext();
private:
// The CUDA executor implementation whose context is activated.
CUDAExecutor* cuda_exec_;
// The cuda.h-using datatype that we wrap.
ScopedActivateContext* driver_scoped_activate_context_;
......
......@@ -457,6 +457,7 @@ class ScopedFilterDescriptor {
<< ToString(status);
}
#if CUDNN_VERSION >= 5000
// TODO(b/23032134): Even if the filter layout is not supported,
// cudnnSetFilter4DDescriptor_v4 will return CUDNN_STATUS_SUCCESS because it
// does not take layout as an input. Maybe force cuDNN by giving wrong
......@@ -471,6 +472,7 @@ class ScopedFilterDescriptor {
<< FilterLayoutString(filter_descriptor.layout());
break;
}
#endif
std::vector<int> dims(2 + filter_descriptor.ndims());
dims[0] = filter_descriptor.output_feature_map_count();
......@@ -666,7 +668,7 @@ class ScopedActivationDescriptor {
mode = CUDNN_ACTIVATION_TANH;
break;
default:
LOG(ERROR) << "unrecognized activation mode: "
LOG(FATAL) << "unrecognized activation mode: "
<< static_cast<int>(activation_mode);
}
......@@ -1916,6 +1918,7 @@ bool CudnnSupport::DoNormalize(
Stream* stream, const dnn::NormalizeDescriptor& normalize_descriptor,
const DeviceMemory<float>& input_data, DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary)
return false;
}
bool CudnnSupport::DoDepthConcatenate(
......@@ -1977,6 +1980,7 @@ bool CudnnSupport::DoElementwiseOperate(
const dnn::BatchDescriptor& output_dimensions,
DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary)
return false;
}
bool CudnnSupport::DoXYPad(Stream* stream,
......@@ -1985,6 +1989,7 @@ bool CudnnSupport::DoXYPad(Stream* stream,
int64 left_pad, int64 right_pad, int64 top_pad,
int64 bottom_pad, DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary)
return false;
}
bool CudnnSupport::DoXYSlice(Stream* stream,
......@@ -1994,6 +1999,7 @@ bool CudnnSupport::DoXYSlice(Stream* stream,
int64 bottom_trim,
DeviceMemory<float>* output_data) {
LOG(FATAL) << "not yet implemented"; // TODO(leary)
return false;
}
bool CudnnSupport::DoMemcpyD2HQuantized(
......
......@@ -32,7 +32,7 @@ namespace cuda {
class CUDAExecutor;
// Opaque and unique identifer for the cuDNN plugin.
// Opaque and unique identifier for the cuDNN plugin.
extern const PluginId kCuDnnPlugin;
// cudnn-library based DNN support. For details on overridden interface
......
......@@ -235,6 +235,8 @@ bool CUDAExecutor::GetKernel(const MultiKernelLoaderSpec &spec,
}
if (on_disk_spec != nullptr) {
LOG(WARNING) << "loading CUDA kernel from disk is not supported";
return false;
} else if (spec.has_cuda_ptx_in_memory()) {
kernelname = &spec.cuda_ptx_in_memory().kernelname();
......
......@@ -49,6 +49,7 @@ string QuantizedActivationModeString(QuantizedActivationMode mode) {
LOG(FATAL) << "Unknown quantized_activation_mode "
<< static_cast<int32>(mode);
}
return "unknown quantized_activation_mode";
}
string ActivationModeString(ActivationMode mode) {
......@@ -66,6 +67,7 @@ string ActivationModeString(ActivationMode mode) {
default:
LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
}
return "unknown activation_mode";
}
string ElementwiseOperationString(ElementwiseOperation op) {
......@@ -77,6 +79,7 @@ string ElementwiseOperationString(ElementwiseOperation op) {
default:
LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
}
return "unknown element wise op";
}
string DataLayoutString(DataLayout layout) {
......@@ -92,6 +95,7 @@ string DataLayoutString(DataLayout layout) {
default:
LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
}
return "unknown data layout";
}
string FilterLayoutString(FilterLayout layout) {
......@@ -105,6 +109,7 @@ string FilterLayoutString(FilterLayout layout) {
default:
LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
}
return "unknown filter layout";
}
string ShortPoolingModeString(PoolingMode mode) {
......@@ -116,6 +121,7 @@ string ShortPoolingModeString(PoolingMode mode) {
default:
LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
}
return "unknown filter layout";
}
std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
......@@ -166,7 +172,7 @@ std::vector<int64> ReorderDims(const std::vector<int64>& input,
reordered[b_idx_to] = input[b_idx_from];
reordered[d_idx_to] = input[d_idx_from];
for (int i = 0; i < input.size() - 2;
for (size_t i = 0; i < input.size() - 2;
i++, spatial_idx_from++, spatial_idx_to++) {
reordered[spatial_idx_to] = input[spatial_idx_from];
}
......
......@@ -354,7 +354,7 @@ class FilterDescriptor {
// Arguments:
// - zero_padding_height: padding of the "y dimension" of the input data. Note
// that this is different from the height of the filter.
// - zero_padding_width: analogouus to the height above, but in the "x
// - zero_padding_width: analogous to the height above, but in the "x
// dimension".
// - vertical_filter_stride: the convolution slides a 2-dimensional window of
// filter-height-by-filter-width over the input layer -- the center of that
......@@ -767,7 +767,7 @@ class DnnSupport {
// filter_descriptor: dimensions of the convolution filter.
// filter_data: coefficients for the convolution filter.
// output_descriptor: dimensions of the output gradients, which is the same
// as the dimensions of the ouput.
// as the dimensions of the output.
// backward_output_data: un-owned device memory region which contains the
// backprop of the output.
// convolution_descriptor: stride of the convolution filter.
......@@ -813,7 +813,7 @@ class DnnSupport {
// input_data: un-owned device memory region which contains the
// convolution input.
// output_descriptor: dimensions of the output gradients, which is the same
// as the dimensions of the ouput.
// as the dimensions of the output.
// backward_output_data: un-owned device memory region which contains the
// backprop of the output.
// convolution_descriptor: stride of the convolution filter.
......
......@@ -63,10 +63,13 @@ class DeviceMemory;
class Timer;
namespace dnn {
struct BatchDescriptor;
struct FilterDescriptor;
struct ConvolutionDescriptor;
struct ProfileResult;
class BatchDescriptor;
class FilterDescriptor;
class ConvolutionDescriptor;
class BatchDescriptor;
class FilterDescriptor;
class ConvolutionDescriptor;
class ProfileResult;
typedef int64 AlgorithmType;
} // namespace dnn
......@@ -1257,7 +1260,7 @@ class Stream {
// back-end implementation will be appropriately seeded by default.
// At a minimum 16 bytes of data are required in the seed buffer.
//
// To seed with good (non-reproducable) data:
// To seed with good (non-reproducible) data:
// File* f = File::Open("/dev/random", "r");
// int64 bytes_read = f->Read(seed_data, bytes_to_read);
// < error checking >
......@@ -1297,7 +1300,7 @@ class Stream {
uint64 size);
// Alternative interface for memcpying from device to host that takes an
// array slice. Checks that the destination size can accomodate the host
// array slice. Checks that the destination size can accommodate the host
// slice size.
template <typename T>
Stream &ThenMemcpyD2H(const DeviceMemory<T> &gpu_src,
......@@ -1308,7 +1311,7 @@ class Stream {
}
// Alternative interface for memcpying from host to device that takes an
// array slice. Checks that the destination size can accomodate the host
// array slice. Checks that the destination size can accommodate the host
// slice size.
template <typename T>
Stream &ThenMemcpyH2D(port::ArraySlice<T> host_src,
......@@ -1339,7 +1342,7 @@ class Stream {
// Entrain onto the stream: a memset of a 32-bit pattern at a GPU location
// of
// size bytes, where bytes must be evenly 32-bit sized (i.e. evently
// size bytes, where bytes must be evenly 32-bit sized (i.e. evenly
// divisible
// by 4). The location must not be null.
Stream &ThenMemset32(DeviceMemoryBase *location, const uint32 &pattern,
......
......@@ -50,10 +50,6 @@ string StackTraceIfVLOG10() {
}
}
// Maximum stack depth to report when generating backtrace on mem allocation
// (for GPU memory leak checker)
static const int kMaxStackDepth = 256;
// Make sure the executor is done with its work; we know (because this isn't
// publicly visible) that all enqueued work is quick.
void BlockOnThreadExecutor(port::ThreadPool *executor) {
......
......@@ -119,7 +119,7 @@ DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | sed -e 's/=/_/g' -e 's/,/-/g')
DOCKER_IMG_NAME=$(echo "${DOCKER_IMG_NAME}" | tr '[:upper:]' '[:lower:]')
# Print arguments.
echo "WORKSAPCE: ${WORKSPACE}"
echo "WORKSPACE: ${WORKSPACE}"
echo "CI_DOCKER_EXTRA_PARAMS: ${CI_DOCKER_EXTRA_PARAMS[@]}"
echo "COMMAND: ${COMMAND[@]}"
echo "CI_COMMAND_PREFIX: ${CI_COMMAND_PREFIX[@]}"
......
......@@ -56,7 +56,7 @@ using the command described at the end of the previous section.
**Asynchronous and synchronous parameter updates**
There are two modes for the coordination of the parameters from multiple
workers: asynchronous and synchrnous.
workers: asynchronous and synchronous.
In the asynchronous mode, the parameter updates (gradients) from the workers
are applied to the parameters without any explicit coordination. This is the
......
......@@ -25,7 +25,7 @@ values for --worker_index. There should be exactly one invocation with
initialization. The other, non-master, sessions will wait for the master
session to finish the initialization before proceeding to the training stage.
The coordination between the multpile worker invocations occurs due to
The coordination between the multiple worker invocations occurs due to
the definition of the parameters on the same ps devices. The parameter updates
from one worker is visible to all other workers. As such, the workers can
perform forward computation and gradient calculation in parallel, which
......@@ -61,7 +61,7 @@ flags.DEFINE_integer("num_workers", None,
flags.DEFINE_integer("num_parameter_servers", 2,
"Total number of parameter servers (must be >= 1)")
flags.DEFINE_integer("replicas_to_aggregate", None,
"Number of replicas to aggregate before paramter update"
"Number of replicas to aggregate before parameter update"
"is applied (For sync_replicas mode only; default: "
"num_workers)")
flags.DEFINE_integer("grpc_port", 2222,
......@@ -77,7 +77,7 @@ flags.DEFINE_string("worker_grpc_url", None,
"grpc://tf-worker0:2222)")
flags.DEFINE_boolean("sync_replicas", False,
"Use the sync_replicas (synchronized replicas) mode, "
"wherein the parameter updates from workersare aggregated "
"wherein the parameter updates from workers are aggregated "
"before applied to avoid stale gradients")
FLAGS = flags.FLAGS
......
......@@ -19,7 +19,7 @@
# Usage:
# create_tf_cluster.sh <num_workers> <num_parameter_servers>
#
# In addition, this script obeys values in the folllowing environment variables:
# In addition, this script obeys values in the following environment variables:
# TF_DIST_LOCAL_CLUSTER: create TensorFlow cluster on local machine
# TF_DIST_SERVER_DOCKER_IMAGE: overrides the default docker image to launch
# TensorFlow (GRPC) servers with
......
......@@ -20,7 +20,7 @@
# This script tears down any existing TensorFlow cluster, consisting of
# services, replication controllers and pods, before creating a new cluster.
# The cluster containers a number of parameter server services and a number of
# worker services. The paramater servers will hold parameters of the ML model,
# worker services. The parameter servers will hold parameters of the ML model,
# e.g., weights and biases of the NN layers, while the workers will hold the
# TensorFlow ops.
#
......@@ -45,7 +45,7 @@
# updates.
#
#
# This script obeys values in the folllowing environment variables:
# This script obeys values in the following environment variables:
# TF_DIST_GRPC_SERVER_URLS: If it is set to a list of valid server urls,
# separated with spaces or commas
# (e.g., "grpc://1.2.3.4:2222 grpc//5.6.7.8:2222"),
......
......@@ -157,7 +157,7 @@ cc_library(
# This rule checks if Cuda libraries in the source tree has been properly configured.
# The output list makes bazel runs this rule first if the Cuda files are missing.
# This gives us an opportunity to check and print a meaningful error message.
# But we will need to create the output file list to make bazel happy in a successfull run.
# But we will need to create the output file list to make bazel happy in a successful run.
genrule(
name = "cuda_check",
srcs = [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册