提交 b1cc9da4 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/baidu/Paddle into benchmark_cfg_doc

...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
# The document of clang-format is # The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html # http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html # http://clang.llvm.org/docs/ClangFormatStyleOptions.html
#
# TODO(yuyang18): Add python and other language code style
--- ---
Language: Cpp Language: Cpp
BasedOnStyle: Google BasedOnStyle: Google
...@@ -22,8 +20,9 @@ IndentWidth: 2 ...@@ -22,8 +20,9 @@ IndentWidth: 2
TabWidth: 2 TabWidth: 2
ContinuationIndentWidth: 4 ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class AccessModifierOffset: -2 # The private/protected/public has no indent in class
PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11 Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
... ...
...@@ -5,4 +5,6 @@ build/ ...@@ -5,4 +5,6 @@ build/
.vscode .vscode
.idea .idea
.project .project
.cproject
.pydevproject .pydevproject
Makefile
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
- id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks:
- id: check-added-large-files
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks:
- id: clang-formater
[style]
based_on_style = pep8
column_limit = 80
...@@ -35,11 +35,22 @@ addons: ...@@ -35,11 +35,22 @@ addons:
- libgoogle-glog-dev - libgoogle-glog-dev
- libgflags-dev - libgflags-dev
- libgtest-dev - libgtest-dev
- curl
- lcov
- graphviz - graphviz
- swig
before_install: before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx breathe recommonmark - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
script: script:
- paddle/scripts/travis/main.sh - paddle/scripts/travis/main.sh
notifications: notifications:
......
...@@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 2.8) ...@@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C) project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0) set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8) set(PADDLE_MINOR_VERSION 9)
set(PADDLE_PATCH_VERSION 0b2) set(PADDLE_PATCH_VERSION 0a0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION}) set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR}) set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package) include(package)
include(swig) find_package(SWIG 2.0)
find_package(CUDA QUIET) find_package(CUDA QUIET)
find_package(Protobuf REQUIRED) find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED) find_package(PythonLibs 2.7 REQUIRED)
...@@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND}) ...@@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND}) option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF) option(ON_TRAVIS "Running test on travis-ci or not." OFF)
option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
...@@ -49,11 +52,16 @@ endif() ...@@ -49,11 +52,16 @@ endif()
include(enableCXX11) include(enableCXX11)
include(cpplint) include(cpplint)
include(ccache) include(ccache)
if(WITH_RDMA)
include(rdma)
endif()
include(util) include(util)
include(flags) include(flags)
include(cudnn) include(cudnn)
include(FindPythonModule) include(FindPythonModule)
include(check_packages) include(check_packages)
include(swig)
include(coveralls)
# add PaddlePaddle version # add PaddlePaddle version
if(DEFINED ENV{PADDLE_VERSION}) if(DEFINED ENV{PADDLE_VERSION})
...@@ -87,11 +95,24 @@ if(NOT WITH_GPU) ...@@ -87,11 +95,24 @@ if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else() else()
if(${CUDA_VERSION_MAJOR} GREATER 6)
if(COMPILER_SUPPORT_CXX11)
LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile. # TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set(CUDA_PROPAGATE_HOST_FLAGS OFF) set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile") message(FATAL_ERROR "Paddle need cudnn to compile")
endif() endif()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_DSO) if(WITH_DSO)
set(CUDA_LIBRARIES "") set(CUDA_LIBRARIES "")
...@@ -115,11 +136,11 @@ if(NOT WITH_TIMER) ...@@ -115,11 +136,11 @@ if(NOT WITH_TIMER)
endif(NOT WITH_TIMER) endif(NOT WITH_TIMER)
if(WITH_AVX) if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX) else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX) endif(WITH_AVX)
if(WITH_PYTHON) if(WITH_PYTHON)
...@@ -129,12 +150,15 @@ else(WITH_PYTHON) ...@@ -129,12 +150,15 @@ else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON) add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON) endif(WITH_PYTHON)
if(NOT WITH_RDMA) if(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA) include_directories("${RDMA_INC_DIR}")
endif() else(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
if(WITH_GLOG) if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG) add_definitions(-DPADDLE_USE_GLOG)
include_directories(${LIBGLOG_INCLUDE_DIR})
endif() endif()
if(WITH_GFLAGS) if(WITH_GFLAGS)
......
Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
Both Chinese and English issues are welcome.
It's hard to solve a problem when important details are missing.
Before submitting the issue, look over the following criteria before handing your request in.
- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
- [ ] Did you retrieve your issue from widespread search engines ?
- [ ] Is my description of the issue clear enough to reproduce this problem?
* If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
* If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
- [ ] Is my description of the issue use the github markdown correctly?
* Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
* Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.
# PaddlePaddle # PaddlePaddle
| **`Linux`** | **`License`** | **`Chat Room`** |
|----------------|---------------|-----------------| [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
|[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)|[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)|[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)| [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
Welcome to the PaddlePaddle GitHub. Welcome to the PaddlePaddle GitHub.
...@@ -12,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep ...@@ -12,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu. learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle. Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release log](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
## Features ## Features
...@@ -24,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t ...@@ -24,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
connection. connection.
- **Efficiency** - **Efficiency**
In order to unleash the power of heterogeneous computing resource, In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some computing, memory, architecture and communication. The following are some
examples: examples:
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
(e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
- Highly optimized recurrent networks which can handle **variable-length** - Highly optimized recurrent networks which can handle **variable-length**
sequence without padding. sequence without padding.
- Optimized local and distributed training for models with high dimensional - Optimized local and distributed training for models with high dimensional
sparse data. sparse data.
...@@ -55,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t ...@@ -55,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
## Installation ## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code. directly build on **Linux** and **Mac OS X** from the source code.
## Documentation ## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers. Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br> - [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
You can follow the quick start tutorial to learn how use PaddlePaddle You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step. step-by-step.
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br> - [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
We provide five demos, including: image classification, sentiment analysis, We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling. sequence to sequence model, recommendation, semantic role labeling.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br> - [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
This system supports training deep learning models on multiple machines This system supports training deep learning models on multiple machines
with data parallelism. with data parallelism.
- [Python API](http://paddlepaddle.org/doc/ui/) <br> - [Python API](http://paddlepaddle.org/doc/ui/) <br>
PaddlePaddle supports using either Python interface or C++ to build your PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your interface for Python. You can also use SWIG to create interface for your
favorite programming language. favorite programming language.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br> - [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
We sincerely appreciate your interest and contributions. If you would like to We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide. contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br> - [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
## Ask Questions ## Ask Questions
Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
**paddle-dev@baidu.com** to ask questions and talk about methods and models. You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
Framework development discussions and
bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
## Copyright and License ## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE). PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
...@@ -5,11 +5,11 @@ Machine: ...@@ -5,11 +5,11 @@ Machine:
- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz - CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
- GPU: Tesla K40m - GPU: Tesla K40m
- cuDNN: v5.1 - cuDNN: v5.1
- system: Docker 1.12.1, all platform are tested in docker environment. - system: Docker 1.12.1, all platforms are tested in docker environment.
Platforms: Platforms:
- PaddlePaddle: - PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0
- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu - Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu
- Caffe: kaixhin/cuda-caffe - Caffe: kaixhin/cuda-caffe
...@@ -28,7 +28,7 @@ AlexNet, GoogleNet and a small network used in Caffe. ...@@ -28,7 +28,7 @@ AlexNet, GoogleNet and a small network used in Caffe.
- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt) - [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)
### Singe-GPU ### Single-GPU
- AlexNet: input - 3 * 227 * 227, Time: ms/batch - AlexNet: input - 3 * 227 * 227, Time: ms/batch
...@@ -61,7 +61,7 @@ All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, be ...@@ -61,7 +61,7 @@ All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, be
**Notation** **Notation**
All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. The time in PaddlePaddle and TensorFlow contains it. But, compared with the total time, the time of parameter updating is relatively little. All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. While PaddlePaddle and TensorFlow contains this time. But, compared with the total time, the time of parameter updating is relatively little on single machine.
In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN. In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.
...@@ -106,7 +106,7 @@ We use lstm network for text classfication to test benchmark. ...@@ -106,7 +106,7 @@ We use lstm network for text classfication to test benchmark.
- Dictionary size=30000 - Dictionary size=30000
- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow. - Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
### Single GPU ### Single-GPU
#### LSTM in Text Classification #### LSTM in Text Classification
......
...@@ -2,56 +2,63 @@ ...@@ -2,56 +2,63 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
height=227 height = 227
width=227 width = 227
num_class = 1000 num_class = 1000
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
define_py_data_sources2("train.list",
None,
module="provider",
obj="process",
args=args)
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
define_py_data_sources2(
"train.list", None, module="provider", obj="process", args=args)
settings( settings(
batch_size = batch_size, batch_size=batch_size,
learning_rate = 0.01 / batch_size, learning_rate=0.01 / batch_size,
learning_method = MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * batch_size) regularization=L2Regularization(0.0005 * batch_size))
)
# conv1 # conv1
net = data_layer('data', size=height * width * 3) net = data_layer('data', size=height * width * 3)
net = img_conv_layer(input=net, filter_size=11, num_channels=3, net = img_conv_layer(
num_filters=96, stride=4, padding=1) input=net,
filter_size=11,
num_channels=3,
num_filters=96,
stride=4,
padding=1)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
net = img_pool_layer(input=net, pool_size=3, stride=2) net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv2 # conv2
net = img_conv_layer(input=net, filter_size=5, num_filters=256, net = img_conv_layer(
stride=1, padding=2, groups=1) input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
net = img_pool_layer(input=net, pool_size=3, stride=2) net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv3 # conv3
net = img_conv_layer(input=net, filter_size=3, num_filters=384, net = img_conv_layer(
stride=1, padding=1) input=net, filter_size=3, num_filters=384, stride=1, padding=1)
# conv4 # conv4
net = img_conv_layer(input=net, filter_size=3, num_filters=384, net = img_conv_layer(
stride=1, padding=1, groups=1) input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)
# conv5 # conv5
net = img_conv_layer(input=net, filter_size=3, num_filters=256, net = img_conv_layer(
stride=1, padding=1, groups=1) input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
net = img_pool_layer(input=net, pool_size=3, stride=2) net = img_pool_layer(input=net, pool_size=3, stride=2)
net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5)) net = fc_layer(
net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5)) input=net,
size=4096,
act=ReluActivation(),
layer_attr=ExtraAttr(drop_rate=0.5))
net = fc_layer(
input=net,
size=4096,
act=ReluActivation(),
layer_attr=ExtraAttr(drop_rate=0.5))
net = fc_layer(input=net, size=1000, act=SoftmaxActivation()) net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
lab = data_layer('label', num_class) lab = data_layer('label', num_class)
loss = cross_entropy(input=net, label=lab) loss = cross_entropy(input=net, label=lab)
outputs(loss) outputs(loss)
#!/usr/bin/env python #!/usr/bin/env python
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
height=224 height = 224
width=224 width = 224
num_class = 1000 num_class = 1000
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
args={'height':height, 'width':width, 'color':True, 'num_class':num_class} args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
define_py_data_sources2("train.list", define_py_data_sources2(
None, "train.list", None, module="provider", obj="process", args=args)
module="provider",
obj="process",
args=args)
settings( settings(
batch_size = batch_size, batch_size=batch_size,
learning_rate = 0.01 / batch_size, learning_rate=0.01 / batch_size,
learning_method = MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * batch_size) regularization=L2Regularization(0.0005 * batch_size))
)
def inception2(name, input, channels, \ def inception2(name, input, channels, \
filter1, filter1,
...@@ -34,26 +30,61 @@ def inception2(name, input, channels, \ ...@@ -34,26 +30,61 @@ def inception2(name, input, channels, \
maxpool = name + '_max' maxpool = name + '_max'
convproj = name + '_proj' convproj = name + '_proj'
cov1 = img_conv_layer(name=conv1, input=input, filter_size=1, cov1 = img_conv_layer(
num_channels=channels, num_filters=filter1, name=conv1,
stride=1, padding=0) input=input,
filter_size=1,
cov3r = img_conv_layer(name=conv3r, input=input, filter_size=1, num_channels=channels,
num_channels=channels, num_filters=filter3R, num_filters=filter1,
stride=1, padding=0) stride=1,
cov3 = img_conv_layer(name=conv3, input=cov3r, filter_size=3, padding=0)
num_filters=filter3, stride=1, padding=1)
cov3r = img_conv_layer(
cov5r = img_conv_layer(name=conv5r, input=input, filter_size=1, name=conv3r,
num_channels=channels, num_filters=filter5R, input=input,
stride=1, padding=0) filter_size=1,
cov5 = img_conv_layer(name=conv5, input=cov5r, filter_size=5, num_channels=channels,
num_filters=filter5, stride=1, padding=2) num_filters=filter3R,
stride=1,
pool1 = img_pool_layer(name=maxpool, input=input, pool_size=3, padding=0)
num_channels=channels, stride=1, padding=1) cov3 = img_conv_layer(
covprj = img_conv_layer(name=convproj, input=pool1, filter_size=1, name=conv3,
num_filters=proj, stride=1, padding=0) input=cov3r,
filter_size=3,
num_filters=filter3,
stride=1,
padding=1)
cov5r = img_conv_layer(
name=conv5r,
input=input,
filter_size=1,
num_channels=channels,
num_filters=filter5R,
stride=1,
padding=0)
cov5 = img_conv_layer(
name=conv5,
input=cov5r,
filter_size=5,
num_filters=filter5,
stride=1,
padding=2)
pool1 = img_pool_layer(
name=maxpool,
input=input,
pool_size=3,
num_channels=channels,
stride=1,
padding=1)
covprj = img_conv_layer(
name=convproj,
input=pool1,
filter_size=1,
num_filters=proj,
stride=1,
padding=0)
cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj]) cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
return cat return cat
...@@ -64,28 +95,51 @@ def inception(name, input, channels, \ ...@@ -64,28 +95,51 @@ def inception(name, input, channels, \
filter5R, filter5, filter5R, filter5,
proj): proj):
cov1 = conv_projection(input=input, filter_size=1, num_channels=channels, cov1 = conv_projection(
num_filters=filter1, stride=1, padding=0) input=input,
filter_size=1,
cov3r = img_conv_layer(name=name + '_3r', input=input, filter_size=1, num_channels=channels,
num_channels=channels, num_filters=filter3R, num_filters=filter1,
stride=1, padding=0) stride=1,
cov3 = conv_projection(input=cov3r, filter_size=3, num_filters=filter3, padding=0)
stride=1, padding=1)
cov3r = img_conv_layer(
cov5r = img_conv_layer(name=name + '_5r', input=input, filter_size=1, name=name + '_3r',
num_channels=channels, num_filters=filter5R, input=input,
stride=1, padding=0) filter_size=1,
cov5 = conv_projection(input=cov5r, filter_size=5, num_filters=filter5, num_channels=channels,
stride=1, padding=2) num_filters=filter3R,
stride=1,
pool1 = img_pool_layer(name=name + '_max', input=input, pool_size=3, padding=0)
num_channels=channels, stride=1, padding=1) cov3 = conv_projection(
covprj = conv_projection(input=pool1, filter_size=1, num_filters=proj, input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
stride=1, padding=0)
cov5r = img_conv_layer(
cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj], name=name + '_5r',
bias_attr=True, act=ReluActivation()) input=input,
filter_size=1,
num_channels=channels,
num_filters=filter5R,
stride=1,
padding=0)
cov5 = conv_projection(
input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
pool1 = img_pool_layer(
name=name + '_max',
input=input,
pool_size=3,
num_channels=channels,
stride=1,
padding=1)
covprj = conv_projection(
input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
cat = concat_layer(
name=name,
input=[cov1, cov3, cov5, covprj],
bias_attr=True,
act=ReluActivation())
return cat return cat
...@@ -93,36 +147,60 @@ lab = data_layer(name="label", size=1000) ...@@ -93,36 +147,60 @@ lab = data_layer(name="label", size=1000)
data = data_layer(name="input", size=3 * height * width) data = data_layer(name="input", size=3 * height * width)
# stage 1 # stage 1
conv1 = img_conv_layer(name="conv1", input=data, filter_size=7, conv1 = img_conv_layer(
num_channels=3, num_filters=64, stride=2, padding=3) name="conv1",
pool1 = img_pool_layer(name="pool1", input=conv1, pool_size=3, input=data,
num_channels=64, stride=2) filter_size=7,
num_channels=3,
num_filters=64,
stride=2,
padding=3)
pool1 = img_pool_layer(
name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
# stage 2 # stage 2
conv2_1 = img_conv_layer(name="conv2_1", input=pool1, filter_size=1, conv2_1 = img_conv_layer(
num_filters=64, stride=1, padding=0) name="conv2_1",
conv2_2 = img_conv_layer(name="conv2_2", input=conv2_1, filter_size=3, input=pool1,
num_filters=192, stride=1, padding=1) filter_size=1,
pool2 = img_pool_layer(name="pool2", input=conv2_2, pool_size=3, num_filters=64,
num_channels=192, stride=2) stride=1,
padding=0)
conv2_2 = img_conv_layer(
name="conv2_2",
input=conv2_1,
filter_size=3,
num_filters=192,
stride=1,
padding=1)
pool2 = img_pool_layer(
name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
# stage 3 # stage 3
ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32) ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
ince3b = inception("ince3b", ince3a, 256, 128, 128,192, 32, 96, 64) ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
pool3 = img_pool_layer(name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2) pool3 = img_pool_layer(
name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
# stage 4 # stage 4
ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64) ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64) ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64) ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64) ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128) ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
pool4 = img_pool_layer(name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2) pool4 = img_pool_layer(
name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
# stage 5 # stage 5
ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128) ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128) ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
pool5 = img_pool_layer(name="pool5", input=ince5b, num_channels=1024, pool_size=7, stride=7, pool_type=AvgPooling()) pool5 = img_pool_layer(
name="pool5",
input=ince5b,
num_channels=1024,
pool_size=7,
stride=7,
pool_type=AvgPooling())
# We remove loss1 and loss2 for all system when testing benchmark # We remove loss1 and loss2 for all system when testing benchmark
# output 1 # output 1
...@@ -141,7 +219,8 @@ pool5 = img_pool_layer(name="pool5", input=ince5b, num_channels=1024, pool_size= ...@@ -141,7 +219,8 @@ pool5 = img_pool_layer(name="pool5", input=ince5b, num_channels=1024, pool_size=
# output 3 # output 3
dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4) dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
out3 = fc_layer(name="output3", input=dropout, size=1000, act=SoftmaxActivation()) out3 = fc_layer(
loss3 = cross_entropy(name='loss3', input=out3, label=lab) name="output3", input=dropout, size=1000, act=SoftmaxActivation())
loss3 = cross_entropy(name='loss3', input=out3, label=lab)
outputs(loss3) outputs(loss3)
import io,os import io, os
import random import random
import numpy as np import numpy as np
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
def initHook(settings, height, width, color, num_class, **kwargs): def initHook(settings, height, width, color, num_class, **kwargs):
settings.height = height settings.height = height
settings.width = width settings.width = width
settings.color = color settings.color = color
settings.num_class = num_class settings.num_class = num_class
if settings.color: if settings.color:
settings.data_size = settings.height * settings.width * 3 settings.data_size = settings.height * settings.width * 3
else: else:
...@@ -15,7 +16,9 @@ def initHook(settings, height, width, color, num_class, **kwargs): ...@@ -15,7 +16,9 @@ def initHook(settings, height, width, color, num_class, **kwargs):
settings.slots = [dense_vector(settings.data_size), integer_value(1)] settings.slots = [dense_vector(settings.data_size), integer_value(1)]
@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
@provider(
init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_list): def process(settings, file_list):
with open(file_list, 'r') as fdata: with open(file_list, 'r') as fdata:
for line in fdata: for line in fdata:
......
...@@ -2,42 +2,44 @@ ...@@ -2,42 +2,44 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
height=32 height = 32
width=32 width = 32
num_class = 10 num_class = 10
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
args={'height':height, 'width':width, 'color':True, 'num_class':num_class} args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
define_py_data_sources2("train.list", define_py_data_sources2(
None, "train.list", None, module="provider", obj="process", args=args)
module="provider",
obj="process",
args=args)
settings( settings(
batch_size = batch_size, batch_size=batch_size,
learning_rate = 0.01 / batch_size, learning_rate=0.01 / batch_size,
learning_method = MomentumOptimizer(0.9), learning_method=MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * batch_size) regularization=L2Regularization(0.0005 * batch_size))
)
# conv1 # conv1
net = data_layer('data', size=height * width * 3) net = data_layer('data', size=height * width * 3)
net = img_conv_layer(input=net, filter_size=5, num_channels=3, net = img_conv_layer(
num_filters=32, stride=1, padding=2) input=net,
filter_size=5,
num_channels=3,
num_filters=32,
stride=1,
padding=2)
net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1) net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
# conv2 # conv2
net = img_conv_layer(input=net, filter_size=5, num_filters=32, net = img_conv_layer(
stride=1, padding=2) input=net, filter_size=5, num_filters=32, stride=1, padding=2)
net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling()) net = img_pool_layer(
input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
# conv3 # conv3
net = img_conv_layer(input=net, filter_size=3, num_filters=64, net = img_conv_layer(
stride=1, padding=1) input=net, filter_size=3, num_filters=64, stride=1, padding=1)
net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling()) net = img_pool_layer(
input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
net = fc_layer(input=net, size=64, act=ReluActivation()) net = fc_layer(input=net, size=64, act=ReluActivation())
net = fc_layer(input=net, size=10, act=SoftmaxActivation()) net = fc_layer(input=net, size=10, act=SoftmaxActivation())
......
...@@ -4,6 +4,7 @@ import gzip ...@@ -4,6 +4,7 @@ import gzip
import os import os
import numpy import numpy
def get_dataset_file(dataset, default_dataset, origin): def get_dataset_file(dataset, default_dataset, origin):
data_dir, data_file = os.path.split(dataset) data_dir, data_file = os.path.split(dataset)
if (not os.path.isfile(dataset)) and data_file == default_dataset: if (not os.path.isfile(dataset)) and data_file == default_dataset:
...@@ -13,13 +14,14 @@ def get_dataset_file(dataset, default_dataset, origin): ...@@ -13,13 +14,14 @@ def get_dataset_file(dataset, default_dataset, origin):
return dataset return dataset
def create_data(path="imdb.pkl"): def create_data(path="imdb.pkl"):
if (not os.path.isfile('imdb.train.pkl')): if (not os.path.isfile('imdb.train.pkl')):
path = get_dataset_file( path = get_dataset_file(
path, "imdb.pkl", path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl") "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
if path.endswith(".gz"): if path.endswith(".gz"):
f = gzip.open(path, 'rb') f = gzip.open(path, 'rb')
else: else:
...@@ -35,8 +37,10 @@ def create_data(path="imdb.pkl"): ...@@ -35,8 +37,10 @@ def create_data(path="imdb.pkl"):
if (not os.path.isfile('train.list')): if (not os.path.isfile('train.list')):
file('train.list', 'w').write('imdb.train.pkl\n') file('train.list', 'w').write('imdb.train.pkl\n')
def main(): def main():
create_data('imdb.pkl') create_data('imdb.pkl')
if __name__ == "__main__": if __name__ == "__main__":
main() main()
import io,os import io, os
import random import random
import numpy as np import numpy as np
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
from paddle.trainer.PyDataProvider2 import * from paddle.trainer.PyDataProvider2 import *
def remove_unk(x, n_words): def remove_unk(x, n_words):
return [[1 if w >= n_words else w for w in sen] for sen in x] return [[1 if w >= n_words else w for w in sen] for sen in x]
# ============================================================== # ==============================================================
# tensorflow uses fixed length, but PaddlePaddle can process # tensorflow uses fixed length, but PaddlePaddle can process
# variable-length. Padding is used in benchmark in order to # variable-length. Padding is used in benchmark in order to
# compare with other platform. # compare with other platform.
# ============================================================== # ==============================================================
def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', def pad_sequences(sequences,
truncating='post', value=0.): maxlen=None,
dtype='int32',
padding='post',
truncating='post',
value=0.):
lengths = [len(s) for s in sequences] lengths = [len(s) for s in sequences]
nb_samples = len(sequences) nb_samples = len(sequences)
...@@ -43,12 +49,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', ...@@ -43,12 +49,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs): def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
settings.vocab_size = vocab_size settings.vocab_size = vocab_size
settings.pad_seq = pad_seq settings.pad_seq = pad_seq
settings.maxlen = maxlen settings.maxlen = maxlen
settings.input_types = [ settings.input_types = [
integer_value_sequence(vocab_size), integer_value_sequence(vocab_size), integer_value(2)
integer_value(2)] ]
@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) @provider(
init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file): def process(settings, file):
f = open(file, 'rb') f = open(file, 'rb')
train_set = pickle.load(f) train_set = pickle.load(f)
...@@ -57,8 +65,8 @@ def process(settings, file): ...@@ -57,8 +65,8 @@ def process(settings, file):
# remove unk, namely remove the words out of dictionary # remove unk, namely remove the words out of dictionary
x = remove_unk(x, settings.vocab_size) x = remove_unk(x, settings.vocab_size)
if settings.pad_seq: if settings.pad_seq:
x = pad_sequences(x, maxlen=settings.maxlen, value=0.) x = pad_sequences(x, maxlen=settings.maxlen, value=0.)
for i in range(len(y)): for i in range(len(y)):
yield map(int,x[i]), int(y[i]) yield map(int, x[i]), int(y[i])
...@@ -6,33 +6,29 @@ import imdb ...@@ -6,33 +6,29 @@ import imdb
num_class = 2 num_class = 2
vocab_size = 30000 vocab_size = 30000
fixedlen = 100 fixedlen = 100
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
lstm_num = get_config_arg('lstm_num', int, 1) lstm_num = get_config_arg('lstm_num', int, 1)
hidden_size = get_config_arg('hidden_size', int, 128) hidden_size = get_config_arg('hidden_size', int, 128)
# whether to pad sequence into fixed length # whether to pad sequence into fixed length
pad_seq = get_config_arg('pad_seq', bool, True) pad_seq = get_config_arg('pad_seq', bool, True)
imdb.create_data('imdb.pkl') imdb.create_data('imdb.pkl')
args={'vocab_size':vocab_size, 'pad_seq':pad_seq, 'maxlen':fixedlen} args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
define_py_data_sources2("train.list", define_py_data_sources2(
None, "train.list", None, module="provider", obj="process", args=args)
module="provider",
obj="process",
args=args)
settings( settings(
batch_size=batch_size, batch_size=batch_size,
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
net = data_layer('data', size=vocab_size) net = data_layer('data', size=vocab_size)
net = embedding_layer(input=net, size=128) net = embedding_layer(input=net, size=128)
for i in xrange(lstm_num): for i in xrange(lstm_num):
net = simple_lstm(input=net, size=hidden_size) net = simple_lstm(input=net, size=hidden_size)
net = last_seq(input=net) net = last_seq(input=net)
net = fc_layer(input=net, size=2, act=SoftmaxActivation()) net = fc_layer(input=net, size=2, act=SoftmaxActivation())
......
...@@ -8,10 +8,8 @@ import tensorflow as tf ...@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 128, tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False, tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""") """Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False, tf.app.flags.DEFINE_boolean('forward_backward_only', False,
...@@ -23,47 +21,64 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW', ...@@ -23,47 +21,64 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW',
tf.app.flags.DEFINE_boolean('log_device_placement', False, tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""") """Whether to log device placement.""")
def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005): def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w',[kH, kW, nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [kH, kW, nIn, nOut],
dtype=tf.float32) initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
if wd is not None and wd > 0: if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType, conv = tf.nn.conv2d(
data_format=FLAGS.data_format) inpOp,
kernel,
biases = tf.get_variable(name=name + '_b', shape=[nOut], strides,
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), padding=padType,
data_format=FLAGS.data_format)
biases = tf.get_variable(
name=name + '_b',
shape=[nOut],
initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
bias = tf.reshape( bias = tf.reshape(
tf.nn.bias_add(conv, biases, data_format=FLAGS.data_format), tf.nn.bias_add(
conv, biases, data_format=FLAGS.data_format),
conv.get_shape()) conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope) conv1 = tf.nn.relu(bias, name=scope)
return conv1 return conv1
def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None): def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w', [nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [nIn, nOut],
initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
if wd is not None and wd > 0: if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
biases = tf.get_variable(name + '_b', [nOut], biases = tf.get_variable(
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), name + '_b', [nOut],
dtype=tf.float32,trainable=True) initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32,
trainable=True)
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases tf.matmul(inpOp, kernel) + biases
...@@ -72,31 +87,36 @@ def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None): ...@@ -72,31 +87,36 @@ def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
return output return output
def _mpool(name, inpOp, kH, kW, dH, dW): def _mpool(name, inpOp, kH, kW, dH, dW):
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.max_pool(inpOp, return tf.nn.max_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding='VALID', strides=strides,
data_format=FLAGS.data_format, padding='VALID',
name=name) data_format=FLAGS.data_format,
name=name)
def _norm(name, l_input, lsize=4): def _norm(name, l_input, lsize=4):
return tf.nn.lrn(l_input, lsize, bias=1.0, return tf.nn.lrn(l_input,
lsize,
bias=1.0,
alpha=0.001 / 9.0, alpha=0.001 / 9.0,
beta=0.75, name=name) beta=0.75,
name=name)
def loss(logits, labels): def loss(logits, labels):
labels = tf.cast(labels, tf.int64) labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example') logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean) tf.add_to_collection('losses', cross_entropy_mean)
...@@ -104,6 +124,7 @@ def loss(logits, labels): ...@@ -104,6 +124,7 @@ def loss(logits, labels):
# decay terms (L2 loss). # decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss') return tf.add_n(tf.get_collection('losses'), name='total_loss')
def get_incoming_shape(incoming): def get_incoming_shape(incoming):
""" Returns the incoming data shape """ """ Returns the incoming data shape """
if isinstance(incoming, tf.Tensor): if isinstance(incoming, tf.Tensor):
...@@ -113,50 +134,52 @@ def get_incoming_shape(incoming): ...@@ -113,50 +134,52 @@ def get_incoming_shape(incoming):
else: else:
raise Exception("Invalid incoming layer.") raise Exception("Invalid incoming layer.")
def inference(images): def inference(images):
conv1 = _conv ('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID') conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
pool1 = _mpool('pool1', conv1, 3, 3, 2, 2) pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
norm1 = _norm ('norm1', pool1, lsize=5) norm1 = _norm('norm1', pool1, lsize=5)
conv2 = _conv ('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME') conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
pool2 = _mpool('pool2', conv2, 3, 3, 2, 2) pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
norm2 = _norm ('norm2', pool2, lsize=5) norm2 = _norm('norm2', pool2, lsize=5)
conv3 = _conv ('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME') conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
conv4 = _conv ('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME') conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
conv5 = _conv ('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME') conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
pool5 = _mpool('pool5', conv5, 3, 3, 2, 2) pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6]) resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096, 0.5) affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096, 0.5)
affn2 = _affine('fc7', affn1, 4096, 4096, 0.5) affn2 = _affine('fc7', affn1, 4096, 4096, 0.5)
affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
return affn3 return affn3
def time_tensorflow_run(session, target, info_string): def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10 num_steps_burn_in = 10
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
if not isinstance(target, list): if not isinstance(target, list):
target = [target] target = [target]
target_op = tf.group(*target) target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in): for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
_ = session.run(target_op) _ = session.run(target_op)
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
print ('%s: step %d, duration = %.3f' % print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration)) (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration total_duration += duration
total_duration_squared += duration * duration total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd)) (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def _add_loss_summaries(total_loss): def _add_loss_summaries(total_loss):
""" """
Generates moving average for all losses and associated summaries for Generates moving average for all losses and associated summaries for
visualizing the performance of the network. visualizing the performance of the network.
...@@ -165,96 +188,111 @@ def _add_loss_summaries(total_loss): ...@@ -165,96 +188,111 @@ def _add_loss_summaries(total_loss):
Returns: Returns:
loss_averages_op: op for generating moving averages of losses. loss_averages_op: op for generating moving averages of losses.
""" """
# Compute the moving average of all individual losses and the total loss. # Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
losses = tf.get_collection('losses') losses = tf.get_collection('losses')
loss_averages_op = loss_averages.apply(losses + [total_loss]) loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name +' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op # Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(l.op.name + ' (raw)', l)
tf.scalar_summary(l.op.name, loss_averages.average(l))
return loss_averages_op
def run_benchmark(): def run_benchmark():
with tf.Graph().as_default(): with tf.Graph().as_default():
with tf.device('/gpu:0'): with tf.device('/gpu:0'):
# Generate some dummy images. # Generate some dummy images.
image_size = 224 image_size = 224
# Note that our padding definition is slightly different the cuda-convnet. # Note that our padding definition is slightly different the cuda-convnet.
# In order to force the model to start with the same activations sizes, # In order to force the model to start with the same activations sizes,
# we add 3 to the image_size and employ VALID padding above. # we add 3 to the image_size and employ VALID padding above.
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3] image_shape = [
else: FLAGS.batch_size, 3, image_size + 3, image_size + 3
image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3] ]
images = tf.get_variable('image', image_shape, else:
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32), image_shape = [
dtype=tf.float32, FLAGS.batch_size, image_size + 3, image_size + 3, 3
trainable=False) ]
images = tf.get_variable(
labels = tf.get_variable('label', [FLAGS.batch_size], 'image',
initializer=tf.constant_initializer(1), image_shape,
dtype=tf.int32, initializer=tf.truncated_normal_initializer(
trainable=False) stddev=0.1, dtype=tf.float32),
dtype=tf.float32,
# Build a Graph that computes the logits predictions from the trainable=False)
# inference model.
last_layer = inference(images) labels = tf.get_variable(
'label', [FLAGS.batch_size],
objective = loss(last_layer, labels) initializer=tf.constant_initializer(1),
# Compute the gradient with respect to all the parameters. dtype=tf.int32,
trainable=False)
# Compute gradients.
# opt = tf.train.GradientDescentOptimizer(0.001) # Build a Graph that computes the logits predictions from the
opt = tf.train.MomentumOptimizer(0.001, 0.9) # inference model.
grads = opt.compute_gradients(objective) last_layer = inference(images)
global_step = tf.get_variable('global_step', [],
initializer=tf.constant_initializer(0.0, dtype=tf.float32), objective = loss(last_layer, labels)
trainable=False, dtype=tf.float32) # Compute the gradient with respect to all the parameters.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Compute gradients.
# Track the moving averages of all trainable variables. # opt = tf.train.GradientDescentOptimizer(0.001)
variable_averages = tf.train.ExponentialMovingAverage( opt = tf.train.MomentumOptimizer(0.001, 0.9)
0.9, global_step) grads = opt.compute_gradients(objective)
variables_averages_op = variable_averages.apply(tf.trainable_variables()) global_step = tf.get_variable(
'global_step', [],
# Build an initialization operation. initializer=tf.constant_initializer(
init = tf.initialize_all_variables() 0.0, dtype=tf.float32),
trainable=False,
# Start running operations on the Graph. dtype=tf.float32)
sess = tf.Session(config=tf.ConfigProto( apply_gradient_op = opt.apply_gradients(
allow_soft_placement=True, grads, global_step=global_step)
log_device_placement=FLAGS.log_device_placement))
sess.run(init) # Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(0.9,
run_forward = True global_step)
run_forward_backward = True variables_averages_op = variable_averages.apply(
if FLAGS.forward_only and FLAGS.forward_backward_only: tf.trainable_variables())
raise ValueError("Cannot specify --forward_only and "
"--forward_backward_only at the same time.") # Build an initialization operation.
if FLAGS.forward_only: init = tf.initialize_all_variables()
run_forward_backward = False
elif FLAGS.forward_backward_only: # Start running operations on the Graph.
run_forward = False sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
if run_forward: log_device_placement=FLAGS.log_device_placement))
time_tensorflow_run(sess, last_layer, "Forward") sess.run(init)
if run_forward_backward: run_forward = True
with tf.control_dependencies([apply_gradient_op, variables_averages_op]): run_forward_backward = True
train_op = tf.no_op(name='train') if FLAGS.forward_only and FLAGS.forward_backward_only:
time_tensorflow_run(sess, [train_op, objective], "Forward-backward") raise ValueError("Cannot specify --forward_only and "
"--forward_backward_only at the same time.")
if FLAGS.forward_only:
run_forward_backward = False
elif FLAGS.forward_backward_only:
run_forward = False
if run_forward:
time_tensorflow_run(sess, last_layer, "Forward")
if run_forward_backward:
with tf.control_dependencies(
[apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective],
"Forward-backward")
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
...@@ -9,10 +9,8 @@ import tensorflow as tf ...@@ -9,10 +9,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 64, tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_string('data_format', 'NCHW', tf.app.flags.DEFINE_string('data_format', 'NCHW',
"""The data format for Convnet operations. """The data format for Convnet operations.
Can be either NHWC or NCHW. Can be either NHWC or NCHW.
...@@ -21,88 +19,110 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW', ...@@ -21,88 +19,110 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW',
tf.app.flags.DEFINE_string('train_dir', '/train_model', tf.app.flags.DEFINE_string('train_dir', '/train_model',
"""Directory where to write event logs """ """Directory where to write event logs """
"""and checkpoint.""") """and checkpoint.""")
tf.app.flags.DEFINE_integer('num_gpus', 4, tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
"""How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False, tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""") """Whether to log device placement.""")
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=50000 NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EPOCHS_PER_DECAY=50 NUM_EPOCHS_PER_DECAY = 50
INITIAL_LEARNING_RATE = 0.1 INITIAL_LEARNING_RATE = 0.1
LEARNING_RATE_DECAY_FACTOR = 0.1 LEARNING_RATE_DECAY_FACTOR = 0.1
TOWER_NAME = 'tower' TOWER_NAME = 'tower'
def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005): def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w',[kH, kW, nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [kH, kW, nIn, nOut],
dtype=tf.float32) initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
if wd is not None: if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType, conv = tf.nn.conv2d(
data_format=FLAGS.data_format) inpOp,
kernel,
biases = tf.get_variable(name=name + '_b', shape=[nOut], strides,
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), padding=padType,
data_format=FLAGS.data_format)
biases = tf.get_variable(
name=name + '_b',
shape=[nOut],
initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
bias = tf.reshape( bias = tf.reshape(
tf.nn.bias_add(conv, biases, data_format=FLAGS.data_format), tf.nn.bias_add(
conv, biases, data_format=FLAGS.data_format),
conv.get_shape()) conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope) conv1 = tf.nn.relu(bias, name=scope)
return conv1 return conv1
def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True): def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w', [nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [nIn, nOut],
initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
if wd is not None: if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
biases = tf.get_variable(name + '_b', [nOut], biases = tf.get_variable(
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), name + '_b', [nOut],
dtype=tf.float32,trainable=True) initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32,
trainable=True)
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases tf.matmul(inpOp, kernel) + biases
return affine1 return affine1
def _mpool(name, inpOp, kH, kW, dH, dW): def _mpool(name, inpOp, kH, kW, dH, dW):
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.max_pool(inpOp, return tf.nn.max_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding='VALID', strides=strides,
data_format=FLAGS.data_format, padding='VALID',
name=name) data_format=FLAGS.data_format,
name=name)
def _norm(name, l_input, lsize=4): def _norm(name, l_input, lsize=4):
return tf.nn.lrn(l_input, lsize, bias=1.0, return tf.nn.lrn(l_input,
lsize,
bias=1.0,
alpha=0.001 / 9.0, alpha=0.001 / 9.0,
beta=0.75, name=name) beta=0.75,
name=name)
def loss(logits, labels): def loss(logits, labels):
labels = tf.cast(labels, tf.int64) labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example') logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean) tf.add_to_collection('losses', cross_entropy_mean)
...@@ -120,24 +140,26 @@ def get_incoming_shape(incoming): ...@@ -120,24 +140,26 @@ def get_incoming_shape(incoming):
else: else:
raise Exception("Invalid incoming layer.") raise Exception("Invalid incoming layer.")
def inference(images): def inference(images):
conv1 = _conv ('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID') conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
pool1 = _mpool('pool1', conv1, 3, 3, 2, 2) pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
norm1 = _norm ('norm1', pool1, lsize=5) norm1 = _norm('norm1', pool1, lsize=5)
conv2 = _conv ('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME') conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
pool2 = _mpool('pool2', conv2, 3, 3, 2, 2) pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
norm2 = _norm ('norm2', pool2, lsize=5) norm2 = _norm('norm2', pool2, lsize=5)
conv3 = _conv ('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME') conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
conv4 = _conv ('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME') conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
conv5 = _conv ('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME') conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
pool5 = _mpool('pool5', conv5, 3, 3, 2, 2) pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6]) resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096) affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096)
affn2 = _affine('fc7', affn1, 4096, 4096) affn2 = _affine('fc7', affn1, 4096, 4096)
affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
return affn3 return affn3
def tower_loss(scope): def tower_loss(scope):
"""Calculate the total loss on a single tower running the model. """Calculate the total loss on a single tower running the model.
Args: Args:
...@@ -150,15 +172,19 @@ def tower_loss(scope): ...@@ -150,15 +172,19 @@ def tower_loss(scope):
image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3] image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3]
else: else:
image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3] image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3]
images = tf.get_variable('image', image_shape, images = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32), 'image',
dtype=tf.float32, image_shape,
trainable=False) initializer=tf.truncated_normal_initializer(
stddev=0.1, dtype=tf.float32),
labels = tf.get_variable('label', [FLAGS.batch_size], dtype=tf.float32,
initializer=tf.constant_initializer(1), trainable=False)
dtype=tf.int32,
trainable=False) labels = tf.get_variable(
'label', [FLAGS.batch_size],
initializer=tf.constant_initializer(1),
dtype=tf.int32,
trainable=False)
# Build a Graph that computes the logits predictions from the # Build a Graph that computes the logits predictions from the
# inference model. # inference model.
...@@ -167,7 +193,7 @@ def tower_loss(scope): ...@@ -167,7 +193,7 @@ def tower_loss(scope):
# Build the portion of the Graph calculating the losses. Note that we will # Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below. # assemble the total_loss using a custom function below.
_ = loss(last_layer, labels) _ = loss(last_layer, labels)
# Assemble all of the losses for the current tower only. # Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope) losses = tf.get_collection('losses', scope)
...@@ -186,7 +212,7 @@ def tower_loss(scope): ...@@ -186,7 +212,7 @@ def tower_loss(scope):
loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss # Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name. # as the original loss name.
tf.scalar_summary(loss_name +' (raw)', l) tf.scalar_summary(loss_name + ' (raw)', l)
tf.scalar_summary(loss_name, loss_averages.average(l)) tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]): with tf.control_dependencies([loss_averages_op]):
...@@ -195,7 +221,7 @@ def tower_loss(scope): ...@@ -195,7 +221,7 @@ def tower_loss(scope):
def average_gradients(tower_grads): def average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers. """Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers. Note that this function provides a synchronization point across all towers.
Args: Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list tower_grads: List of lists of (gradient, variable) tuples. The outer list
...@@ -205,130 +231,135 @@ def average_gradients(tower_grads): ...@@ -205,130 +231,135 @@ def average_gradients(tower_grads):
List of pairs of (gradient, variable) where the gradient has been averaged List of pairs of (gradient, variable) where the gradient has been averaged
across all towers. across all towers.
""" """
average_grads = [] average_grads = []
for grad_and_vars in zip(*tower_grads): for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following: # Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = [] grads = []
for g, _ in grad_and_vars: for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower. # Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0) expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below. # Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g) grads.append(expanded_g)
# Average over the 'tower' dimension. # Average over the 'tower' dimension.
grad = tf.concat(0, grads) grad = tf.concat(0, grads)
grad = tf.reduce_mean(grad, 0) grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared # Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to # across towers. So .. we will just return the first tower's pointer to
# the Variable. # the Variable.
v = grad_and_vars[0][1] v = grad_and_vars[0][1]
grad_and_var = (grad, v) grad_and_var = (grad, v)
average_grads.append(grad_and_var) average_grads.append(grad_and_var)
return average_grads return average_grads
def time_tensorflow_run(session, target): def time_tensorflow_run(session, target):
num_steps_burn_in = 50 num_steps_burn_in = 50
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in): for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
_, loss_value = session.run(target) _, loss_value = session.run(target)
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
examples_per_sec = num_examples_per_step / duration examples_per_sec = num_examples_per_step / duration
sec_per_batch = duration sec_per_batch = duration
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' format_str = (
'sec/batch batch_size = %d)') '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
print (format_str % 'sec/batch batch_size = %d)')
(datetime.now(), i - num_steps_burn_in, print(format_str %
loss_value, duration, sec_per_batch, num_examples_per_step)) (datetime.now(), i - num_steps_burn_in, loss_value,
duration, sec_per_batch, num_examples_per_step))
total_duration += duration
total_duration_squared += duration * duration total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' % print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), FLAGS.num_batches, mn, sd)) (datetime.now(), FLAGS.num_batches, mn, sd))
def run_benchmark(): def run_benchmark():
with tf.Graph().as_default(), tf.device('/cpu:0'): with tf.Graph().as_default(), tf.device('/cpu:0'):
# Create a variable to count the number of train() calls. This equals the # Create a variable to count the number of train() calls. This equals the
# number of batches processed * FLAGS.num_gpus. # number of batches processed * FLAGS.num_gpus.
global_step = tf.get_variable( global_step = tf.get_variable(
'global_step', [], 'global_step', [],
initializer=tf.constant_initializer(0), trainable=False) initializer=tf.constant_initializer(0),
trainable=False)
# Calculate the learning rate schedule.
num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / # Calculate the learning rate schedule.
FLAGS.batch_size) num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) FLAGS.batch_size)
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, # Decay the learning rate exponentially based on the number of steps.
global_step, lr = tf.train.exponential_decay(
decay_steps, INITIAL_LEARNING_RATE,
LEARNING_RATE_DECAY_FACTOR, global_step,
staircase=True) decay_steps,
LEARNING_RATE_DECAY_FACTOR,
# Create an optimizer that performs gradient descent. staircase=True)
opt = tf.train.MomentumOptimizer(lr, 0.9)
# Create an optimizer that performs gradient descent.
# Calculate the gradients for each model tower. opt = tf.train.MomentumOptimizer(lr, 0.9)
tower_grads = []
for i in xrange(FLAGS.num_gpus): # Calculate the gradients for each model tower.
with tf.device('/gpu:%d' % i): tower_grads = []
with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: for i in xrange(FLAGS.num_gpus):
# Calculate the loss for one tower of the model. This function with tf.device('/gpu:%d' % i):
# constructs the entire model but shares the variables across with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
# all towers. # Calculate the loss for one tower of the model. This function
loss = tower_loss(scope) # constructs the entire model but shares the variables across
# all towers.
# Reuse variables for the next tower. loss = tower_loss(scope)
tf.get_variable_scope().reuse_variables()
# Reuse variables for the next tower.
# Retain the summaries from the final tower. tf.get_variable_scope().reuse_variables()
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
# Retain the summaries from the final tower.
# Calculate the gradients for the batch of data on this tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
grads = opt.compute_gradients(loss)
# Calculate the gradients for the batch of data on this tower.
# Keep track of the gradients across all towers. grads = opt.compute_gradients(loss)
tower_grads.append(grads)
# Keep track of the gradients across all towers.
# We must calculate the mean of each gradient. Note that this is the tower_grads.append(grads)
# synchronization point across all towers.
grads = average_gradients(tower_grads) # We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers.
# Apply the gradients to adjust the shared variables. grads = average_gradients(tower_grads)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Apply the gradients to adjust the shared variables.
# Group all updates to into a single train op. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
train_op = tf.group(apply_gradient_op)
# Group all updates to into a single train op.
# Build an initialization operation. train_op = tf.group(apply_gradient_op)
init = tf.initialize_all_variables()
# Build an initialization operation.
# Start running operations on the Graph. allow_soft_placement must be set to init = tf.initialize_all_variables()
# True to build towers on GPU, as some of the ops do not have GPU
# implementations. # Start running operations on the Graph. allow_soft_placement must be set to
sess = tf.Session(config=tf.ConfigProto( # True to build towers on GPU, as some of the ops do not have GPU
allow_soft_placement=True, # implementations.
log_device_placement=FLAGS.log_device_placement)) sess = tf.Session(config=tf.ConfigProto(
sess.run(init) allow_soft_placement=True,
time_tensorflow_run(sess, [train_op, loss]) log_device_placement=FLAGS.log_device_placement))
sess.run(init)
time_tensorflow_run(sess, [train_op, loss])
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
...@@ -8,10 +8,8 @@ import tensorflow as tf ...@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 128, tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False, tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""") """Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False, tf.app.flags.DEFINE_boolean('forward_backward_only', False,
...@@ -29,72 +27,92 @@ conv_counter = 1 ...@@ -29,72 +27,92 @@ conv_counter = 1
pool_counter = 1 pool_counter = 1
affine_counter = 1 affine_counter = 1
def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd = 0.0005):
def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
global conv_counter global conv_counter
global parameters global parameters
name = 'conv' + str(conv_counter) name = 'conv' + str(conv_counter)
conv_counter += 1 conv_counter += 1
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut], kernel = tf.Variable(
dtype=tf.float32, tf.truncated_normal(
stddev=1e-1), name='weights') [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
name='weights')
if wd is not None and wd > 0: if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType, conv = tf.nn.conv2d(
data_format=FLAGS.data_format) inpOp,
biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), kernel,
trainable=True, name='biases') strides,
bias = tf.reshape(tf.nn.bias_add(conv, biases, padding=padType,
data_format=FLAGS.data_format), data_format=FLAGS.data_format)
conv.get_shape()) biases = tf.Variable(
tf.constant(
0.0, shape=[nOut], dtype=tf.float32),
trainable=True,
name='biases')
bias = tf.reshape(
tf.nn.bias_add(
conv, biases, data_format=FLAGS.data_format),
conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope) conv1 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases] parameters += [kernel, biases]
return conv1 return conv1
def _affine(inpOp, nIn, nOut, act=True, wd = 0.0005):
def _affine(inpOp, nIn, nOut, act=True, wd=0.0005):
global affine_counter global affine_counter
global parameters global parameters
name = 'affine' + str(affine_counter) name = 'affine' + str(affine_counter)
affine_counter += 1 affine_counter += 1
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.Variable(tf.truncated_normal([nIn, nOut], kernel = tf.Variable(
dtype=tf.float32, tf.truncated_normal(
stddev=1e-1), name='weights') [nIn, nOut], dtype=tf.float32, stddev=1e-1),
name='weights')
if wd is not None and wd > 0: if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), biases = tf.Variable(
trainable=True, name='biases') tf.constant(
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else tf.matmul(inpOp, kernel) + biases 0.0, shape=[nOut], dtype=tf.float32),
trainable=True,
name='biases')
affine1 = tf.nn.relu_layer(
inpOp, kernel, biases,
name=name) if act else tf.matmul(inpOp, kernel) + biases
parameters += [kernel, biases] parameters += [kernel, biases]
return affine1 return affine1
def _mpool(inpOp, kH, kW, dH, dW, padding): def _mpool(inpOp, kH, kW, dH, dW, padding):
global pool_counter global pool_counter
global parameters global parameters
name = 'pool' + str(pool_counter) name = 'pool' + str(pool_counter)
pool_counter += 1 pool_counter += 1
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.max_pool(inpOp, return tf.nn.max_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding=padding, strides=strides,
data_format=FLAGS.data_format, padding=padding,
name=name) data_format=FLAGS.data_format,
name=name)
def _apool(inpOp, kH, kW, dH, dW, padding): def _apool(inpOp, kH, kW, dH, dW, padding):
global pool_counter global pool_counter
...@@ -102,17 +120,19 @@ def _apool(inpOp, kH, kW, dH, dW, padding): ...@@ -102,17 +120,19 @@ def _apool(inpOp, kH, kW, dH, dW, padding):
name = 'pool' + str(pool_counter) name = 'pool' + str(pool_counter)
pool_counter += 1 pool_counter += 1
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.avg_pool(inpOp, return tf.nn.avg_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding=padding, strides=strides,
data_format=FLAGS.data_format, padding=padding,
name=name) data_format=FLAGS.data_format,
name=name)
def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2): def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
conv1 = _conv(inp, inSize, o1s, 1, 1, 1, 1, 'VALID') conv1 = _conv(inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
...@@ -127,9 +147,9 @@ def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2): ...@@ -127,9 +147,9 @@ def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
pool = _conv(pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID') pool = _conv(pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
channel_dim = 1 channel_dim = 1
else: else:
channel_dim = 3 channel_dim = 3
incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool]) incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
return incept return incept
...@@ -139,40 +159,40 @@ def loss(logits, labels): ...@@ -139,40 +159,40 @@ def loss(logits, labels):
labels = tf.expand_dims(labels, 1) labels = tf.expand_dims(labels, 1)
indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
concated = tf.concat(1, [indices, labels]) concated = tf.concat(1, [indices, labels])
onehot_labels = tf.sparse_to_dense( onehot_labels = tf.sparse_to_dense(concated,
concated, tf.pack([batch_size, 1000]), 1.0, 0.0) tf.pack([batch_size, 1000]), 1.0, 0.0)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
onehot_labels, logits, onehot_labels, name='xentropy')
name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss return loss
def inference(images): def inference(images):
# stage 1 # stage 1
conv1 = _conv (images, 3, 64, 7, 7, 2, 2, 'SAME') conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME')
pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME') pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
# stage 2 # stage 2
conv2 = _conv (pool1, 64, 64, 1, 1, 1, 1, 'VALID') conv2 = _conv(pool1, 64, 64, 1, 1, 1, 1, 'VALID')
conv3 = _conv (conv2, 64, 192, 3, 3, 1, 1, 'SAME') conv3 = _conv(conv2, 64, 192, 3, 3, 1, 1, 'SAME')
pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME') pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME')
# stage 3 # stage 3
incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32) incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32)
incept3b = _inception(incept3a, 256, 128, 128, 192, 32, 96, 3, 64) incept3b = _inception(incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME') pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME')
# stage 4 # stage 4
incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64) incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64)
incept4b = _inception(incept4a, 512, 160, 112, 224, 24, 64, 3, 64) incept4b = _inception(incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
incept4c = _inception(incept4b, 512, 128, 128, 256, 24, 64, 3, 64) incept4c = _inception(incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
incept4d = _inception(incept4c, 512, 112, 144, 288, 32, 64, 3, 64) incept4d = _inception(incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
incept4e = _inception(incept4d, 528, 256, 160, 320, 32, 128, 3, 128) incept4e = _inception(incept4d, 528, 256, 160, 320, 32, 128, 3, 128)
pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME') pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME')
# stage 5 # stage 5
incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128) incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128)
incept5b = _inception(incept5a, 832, 384, 192, 384, 48, 128, 3, 128) incept5b = _inception(incept5a, 832, 384, 192, 384, 48, 128, 3, 128)
pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID') pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID')
# output 1 # output 1
resh1 = tf.reshape(pool6, [-1, 1024]) resh1 = tf.reshape(pool6, [-1, 1024])
...@@ -183,100 +203,109 @@ def inference(images): ...@@ -183,100 +203,109 @@ def inference(images):
def time_tensorflow_run(session, target, info_string): def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10 num_steps_burn_in = 10
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
if not isinstance(target, list): if not isinstance(target, list):
target = [target] target = [target]
target_op = tf.group(*target) target_op = tf.group(*target)
for i in range(FLAGS.num_batches + num_steps_burn_in): for i in range(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
_ = session.run(target_op) _ = session.run(target_op)
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
print ('%s: step %d, duration = %.3f' % print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration)) (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration total_duration += duration
total_duration_squared += duration * duration total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd)) (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def run_benchmark(): def run_benchmark():
global parameters global parameters
with tf.Graph().as_default(): with tf.Graph().as_default():
# Generate some dummy images. # Generate some dummy images.
image_size = 224 image_size = 224
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
image_shape = [FLAGS.batch_size, 3, image_size, image_size] image_shape = [FLAGS.batch_size, 3, image_size, image_size]
else: else:
image_shape = [FLAGS.batch_size, image_size, image_size, 3] image_shape = [FLAGS.batch_size, image_size, image_size, 3]
images = tf.get_variable('image', image_shape, images = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32), 'image',
dtype=tf.float32, image_shape,
trainable=False) initializer=tf.truncated_normal_initializer(
stddev=0.1, dtype=tf.float32),
labels = tf.get_variable('label', [FLAGS.batch_size], dtype=tf.float32,
initializer=tf.constant_initializer(1), trainable=False)
dtype=tf.int32,
trainable=False) labels = tf.get_variable(
'label', [FLAGS.batch_size],
# Build a Graph that computes the logits predictions from the initializer=tf.constant_initializer(1),
# inference model. dtype=tf.int32,
last_layer = inference(images) trainable=False)
objective = loss(last_layer, labels) # Build a Graph that computes the logits predictions from the
# inference model.
# Compute gradients. last_layer = inference(images)
# opt = tf.train.GradientDescentOptimizer(0.001)
opt = tf.train.MomentumOptimizer(0.001, 0.9) objective = loss(last_layer, labels)
grads = opt.compute_gradients(objective)
global_step = tf.get_variable('global_step', [], # Compute gradients.
initializer=tf.constant_initializer(0.0, dtype=tf.float32), # opt = tf.train.GradientDescentOptimizer(0.001)
trainable=False, dtype=tf.float32) opt = tf.train.MomentumOptimizer(0.001, 0.9)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) grads = opt.compute_gradients(objective)
global_step = tf.get_variable(
# Track the moving averages of all trainable variables. 'global_step', [],
variable_averages = tf.train.ExponentialMovingAverage( initializer=tf.constant_initializer(
0.9, global_step) 0.0, dtype=tf.float32),
variables_averages_op = variable_averages.apply(tf.trainable_variables()) trainable=False,
dtype=tf.float32)
# Build an initialization operation. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
init = tf.initialize_all_variables()
# Track the moving averages of all trainable variables.
# Start running operations on the Graph. variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
sess = tf.Session(config=tf.ConfigProto( variables_averages_op = variable_averages.apply(tf.trainable_variables(
allow_soft_placement=True, ))
log_device_placement=FLAGS.log_device_placement))
sess.run(init) # Build an initialization operation.
init = tf.initialize_all_variables()
run_forward = True
run_forward_backward = True # Start running operations on the Graph.
if FLAGS.forward_only and FLAGS.forward_backward_only: sess = tf.Session(config=tf.ConfigProto(
raise ValueError("Cannot specify --forward_only and " allow_soft_placement=True,
"--forward_backward_only at the same time.") log_device_placement=FLAGS.log_device_placement))
if FLAGS.forward_only: sess.run(init)
run_forward_backward = False
elif FLAGS.forward_backward_only: run_forward = True
run_forward = False run_forward_backward = True
if FLAGS.forward_only and FLAGS.forward_backward_only:
if run_forward: raise ValueError("Cannot specify --forward_only and "
# Run the forward benchmark. "--forward_backward_only at the same time.")
time_tensorflow_run(sess, last_layer, "Forward") if FLAGS.forward_only:
run_forward_backward = False
if run_forward_backward: elif FLAGS.forward_backward_only:
with tf.control_dependencies([apply_gradient_op, variables_averages_op]): run_forward = False
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective], "Forward-backward") if run_forward:
# Run the forward benchmark.
time_tensorflow_run(sess, last_layer, "Forward")
if run_forward_backward:
with tf.control_dependencies(
[apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
...@@ -8,10 +8,8 @@ import tensorflow as tf ...@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 128, tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False, tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""") """Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False, tf.app.flags.DEFINE_boolean('forward_backward_only', False,
...@@ -29,78 +27,97 @@ conv_counter = 1 ...@@ -29,78 +27,97 @@ conv_counter = 1
pool_counter = 1 pool_counter = 1
affine_counter = 1 affine_counter = 1
def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005, act=True): def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005, act=True):
global conv_counter global conv_counter
global parameters global parameters
name = 'conv' + str(conv_counter) name = 'conv' + str(conv_counter)
conv_counter += 1 conv_counter += 1
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut], kernel = tf.Variable(
dtype=tf.float32, tf.truncated_normal(
stddev=1e-1), name='weights') [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
name='weights')
if wd is not None: if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType, conv = tf.nn.conv2d(
data_format=FLAGS.data_format) inpOp,
biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), kernel,
trainable=True, name='biases') strides,
bias = tf.reshape(tf.nn.bias_add(conv, biases, padding=padType,
data_format=FLAGS.data_format), data_format=FLAGS.data_format)
conv.get_shape()) biases = tf.Variable(
tf.constant(
0.0, shape=[nOut], dtype=tf.float32),
trainable=True,
name='biases')
bias = tf.reshape(
tf.nn.bias_add(
conv, biases, data_format=FLAGS.data_format),
conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope) if act else bias conv1 = tf.nn.relu(bias, name=scope) if act else bias
parameters += [kernel, biases] parameters += [kernel, biases]
return conv1 return conv1
def _affine(inpOp, nIn, nOut, wd=None, act=True): def _affine(inpOp, nIn, nOut, wd=None, act=True):
global affine_counter global affine_counter
global parameters global parameters
name = 'affine' + str(affine_counter) name = 'affine' + str(affine_counter)
affine_counter += 1 affine_counter += 1
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.Variable(tf.truncated_normal([nIn, nOut], kernel = tf.Variable(
dtype=tf.float32, tf.truncated_normal(
stddev=1e-1), name='weights') [nIn, nOut], dtype=tf.float32, stddev=1e-1),
name='weights')
if wd is not None: if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss') weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay) tf.add_to_collection('losses', weight_decay)
biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32), biases = tf.Variable(
trainable=True, name='biases') tf.constant(
0.0, shape=[nOut], dtype=tf.float32),
trainable=True,
name='biases')
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else tf.matmul(inpOp, kernel) + biases affine1 = tf.nn.relu_layer(
inpOp, kernel, biases,
name=name) if act else tf.matmul(inpOp, kernel) + biases
parameters += [kernel, biases] parameters += [kernel, biases]
return affine1 return affine1
def _mpool(inpOp, kH, kW, dH, dW, padding): def _mpool(inpOp, kH, kW, dH, dW, padding):
global pool_counter global pool_counter
global parameters global parameters
name = 'pool' + str(pool_counter) name = 'pool' + str(pool_counter)
pool_counter += 1 pool_counter += 1
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.max_pool(inpOp, return tf.nn.max_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding=padding, strides=strides,
data_format=FLAGS.data_format, padding=padding,
name=name) data_format=FLAGS.data_format,
name=name)
def _apool(inpOp, kH, kW, dH, dW, padding): def _apool(inpOp, kH, kW, dH, dW, padding):
...@@ -109,36 +126,42 @@ def _apool(inpOp, kH, kW, dH, dW, padding): ...@@ -109,36 +126,42 @@ def _apool(inpOp, kH, kW, dH, dW, padding):
name = 'pool' + str(pool_counter) name = 'pool' + str(pool_counter)
pool_counter += 1 pool_counter += 1
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
ksize = [1, 1, kH, kW] ksize = [1, 1, kH, kW]
strides = [1, 1, dH, dW] strides = [1, 1, dH, dW]
else: else:
ksize = [1, kH, kW, 1] ksize = [1, kH, kW, 1]
strides = [1, dH, dW, 1] strides = [1, dH, dW, 1]
return tf.nn.avg_pool(inpOp, return tf.nn.avg_pool(
ksize=ksize, inpOp,
strides=strides, ksize=ksize,
padding=padding, strides=strides,
data_format=FLAGS.data_format, padding=padding,
name=name) data_format=FLAGS.data_format,
name=name)
def _norm(name, l_input, lsize=4): def _norm(name, l_input, lsize=4):
return tf.nn.lrn(l_input, lsize, bias=1.0, return tf.nn.lrn(l_input,
lsize,
bias=1.0,
alpha=0.001 / 9.0, alpha=0.001 / 9.0,
beta=0.75, name=name) beta=0.75,
name=name)
def loss(logits, labels): def loss(logits, labels):
batch_size = tf.size(labels) batch_size = tf.size(labels)
labels = tf.expand_dims(labels, 1) labels = tf.expand_dims(labels, 1)
indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
concated = tf.concat(1, [indices, labels]) concated = tf.concat(1, [indices, labels])
onehot_labels = tf.sparse_to_dense( onehot_labels = tf.sparse_to_dense(concated,
concated, tf.pack([batch_size, 10]), 1.0, 0.0) tf.pack([batch_size, 10]), 1.0, 0.0)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
onehot_labels, logits, onehot_labels, name='xentropy')
name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss return loss
def get_incoming_shape(incoming): def get_incoming_shape(incoming):
""" Returns the incoming data shape """ """ Returns the incoming data shape """
if isinstance(incoming, tf.Tensor): if isinstance(incoming, tf.Tensor):
...@@ -148,125 +171,134 @@ def get_incoming_shape(incoming): ...@@ -148,125 +171,134 @@ def get_incoming_shape(incoming):
else: else:
raise Exception("Invalid incoming layer.") raise Exception("Invalid incoming layer.")
def inference(images): def inference(images):
conv1 = _conv (images, 3, 32, 5, 5, 1, 1, 'SAME') conv1 = _conv(images, 3, 32, 5, 5, 1, 1, 'SAME')
pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME') pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
conv2 = _conv (pool1, 32, 32, 5, 5, 1, 1, 'SAME') conv2 = _conv(pool1, 32, 32, 5, 5, 1, 1, 'SAME')
pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME') pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME')
conv3 = _conv (pool2, 32, 64, 5, 5, 1, 1, 'SAME') conv3 = _conv(pool2, 32, 64, 5, 5, 1, 1, 'SAME')
pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME') pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME')
resh1 = tf.reshape(pool3, [-1, 64 * 4 * 4]) resh1 = tf.reshape(pool3, [-1, 64 * 4 * 4])
affn1 = _affine(resh1, 64 * 4 * 4, 64) affn1 = _affine(resh1, 64 * 4 * 4, 64)
affn2 = _affine(affn1, 64, 10, act=False) affn2 = _affine(affn1, 64, 10, act=False)
print ('conv1:', get_incoming_shape(conv1)) print('conv1:', get_incoming_shape(conv1))
print ('pool1:', get_incoming_shape(pool1)) print('pool1:', get_incoming_shape(pool1))
print ('conv2:', get_incoming_shape(conv2)) print('conv2:', get_incoming_shape(conv2))
print ('pool2:', get_incoming_shape(pool2)) print('pool2:', get_incoming_shape(pool2))
print ('conv3:', get_incoming_shape(conv3)) print('conv3:', get_incoming_shape(conv3))
print ('pool3:', get_incoming_shape(pool3)) print('pool3:', get_incoming_shape(pool3))
return affn2 return affn2
def time_tensorflow_run(session, target, info_string): def time_tensorflow_run(session, target, info_string):
num_steps_burn_in = 10 num_steps_burn_in = 10
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
if not isinstance(target, list): if not isinstance(target, list):
target = [target] target = [target]
target_op = tf.group(*target) target_op = tf.group(*target)
for i in xrange(FLAGS.num_batches + num_steps_burn_in): for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
_ = session.run(target_op) _ = session.run(target_op)
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
print ('%s: step %d, duration = %.3f' % print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration)) (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration total_duration += duration
total_duration_squared += duration * duration total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd)) (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def run_benchmark(): def run_benchmark():
global parameters global parameters
with tf.Graph().as_default(): with tf.Graph().as_default():
# Generate some dummy images. # Generate some dummy images.
image_size = 32 image_size = 32
# Note that our padding definition is slightly different the cuda-convnet. # Note that our padding definition is slightly different the cuda-convnet.
# In order to force the model to start with the same activations sizes, # In order to force the model to start with the same activations sizes,
# we add 3 to the image_size and employ VALID padding above. # we add 3 to the image_size and employ VALID padding above.
if FLAGS.data_format == 'NCHW': if FLAGS.data_format == 'NCHW':
image_shape = [FLAGS.batch_size, 3, image_size, image_size] image_shape = [FLAGS.batch_size, 3, image_size, image_size]
else: else:
image_shape = [FLAGS.batch_size, image_size, image_size, 3] image_shape = [FLAGS.batch_size, image_size, image_size, 3]
images = tf.get_variable('image', image_shape, images = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32), 'image',
dtype=tf.float32, image_shape,
trainable=False) initializer=tf.truncated_normal_initializer(
stddev=0.1, dtype=tf.float32),
labels = tf.get_variable('label', [FLAGS.batch_size], dtype=tf.float32,
initializer=tf.constant_initializer(1), trainable=False)
dtype=tf.int32,
trainable=False) labels = tf.get_variable(
'label', [FLAGS.batch_size],
# Build a Graph that computes the logits predictions from the initializer=tf.constant_initializer(1),
# inference model. dtype=tf.int32,
last_layer = inference(images) trainable=False)
objective = loss(last_layer, labels) # Build a Graph that computes the logits predictions from the
# inference model.
# Compute gradients. last_layer = inference(images)
opt = tf.train.MomentumOptimizer(0.001, 0.9)
grads = opt.compute_gradients(objective) objective = loss(last_layer, labels)
global_step = tf.get_variable('global_step', [],
initializer=tf.constant_initializer(0.0, dtype=tf.float32), # Compute gradients.
trainable=False, dtype=tf.float32) opt = tf.train.MomentumOptimizer(0.001, 0.9)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) grads = opt.compute_gradients(objective)
global_step = tf.get_variable(
# Track the moving averages of all trainable variables. 'global_step', [],
variable_averages = tf.train.ExponentialMovingAverage( initializer=tf.constant_initializer(
0.9, global_step) 0.0, dtype=tf.float32),
variables_averages_op = variable_averages.apply(tf.trainable_variables()) trainable=False,
dtype=tf.float32)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Build an initialization operation.
init = tf.initialize_all_variables() # Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
# Start running operations on the Graph. variables_averages_op = variable_averages.apply(tf.trainable_variables(
sess = tf.Session(config=tf.ConfigProto( ))
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement)) # Build an initialization operation.
sess.run(init) init = tf.initialize_all_variables()
run_forward = True # Start running operations on the Graph.
run_forward_backward = True sess = tf.Session(config=tf.ConfigProto(
if FLAGS.forward_only and FLAGS.forward_backward_only: allow_soft_placement=True,
raise ValueError("Cannot specify --forward_only and " log_device_placement=FLAGS.log_device_placement))
"--forward_backward_only at the same time.") sess.run(init)
if FLAGS.forward_only:
run_forward_backward = False run_forward = True
elif FLAGS.forward_backward_only: run_forward_backward = True
run_forward = False if FLAGS.forward_only and FLAGS.forward_backward_only:
raise ValueError("Cannot specify --forward_only and "
if run_forward: "--forward_backward_only at the same time.")
# Run the forward benchmark. if FLAGS.forward_only:
time_tensorflow_run(sess, last_layer, "Forward") run_forward_backward = False
elif FLAGS.forward_backward_only:
if run_forward_backward: run_forward = False
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train') if run_forward:
time_tensorflow_run(sess, [train_op, objective], "Forward-backward") # Run the forward benchmark.
time_tensorflow_run(sess, last_layer, "Forward")
if run_forward_backward:
with tf.control_dependencies(
[apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
You also should install tflearn: You also should install tflearn:
```bash ```bash
pip install tflearn pip install -r requirements.txt
``` ```
...@@ -8,14 +8,13 @@ import tflearn ...@@ -8,14 +8,13 @@ import tflearn
from tflearn.data_utils import to_categorical, pad_sequences from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb from tflearn.datasets import imdb
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
class DataSet(object): class DataSet(object):
def __init__(self, data, labels): def __init__(self, data, labels):
assert data.shape[0] == labels.shape[0], ( assert data.shape[0] == labels.shape[0], (
'data.shape: %s labels.shape: %s' % (data.shape, 'data.shape: %s labels.shape: %s' % (data.shape, labels.shape))
labels.shape))
self._num_examples = data.shape[0] self._num_examples = data.shape[0]
self._data = data self._data = data
...@@ -64,8 +63,11 @@ class DataSet(object): ...@@ -64,8 +63,11 @@ class DataSet(object):
def create_datasets(file_path, vocab_size=30000, val_fraction=0.0): def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):
# IMDB Dataset loading # IMDB Dataset loading
train, test, _ = imdb.load_data(path=file_path, n_words=vocab_size, train, test, _ = imdb.load_data(
valid_portion=val_fraction, sort_by_len=False) path=file_path,
n_words=vocab_size,
valid_portion=val_fraction,
sort_by_len=False)
trainX, trainY = train trainX, trainY = train
testX, testY = test testX, testY = test
......
...@@ -11,27 +11,22 @@ from tensorflow.python.ops import rnn ...@@ -11,27 +11,22 @@ from tensorflow.python.ops import rnn
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 128, tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100, tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
"""Number of batches to run.""") tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_layers', 1,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('max_len', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False, tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""") """Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False, tf.app.flags.DEFINE_boolean('forward_backward_only', False,
"""Only run the forward-forward pass.""") """Only run the forward-forward pass.""")
tf.app.flags.DEFINE_integer('hidden_size', 128, tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
"""Number of batches to run.""") tf.app.flags.DEFINE_integer('emb_size', 128, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('emb_size', 128,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False, tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""") """Whether to log device placement.""")
VOCAB_SIZE=30000 VOCAB_SIZE = 30000
NUM_CLASS=2 NUM_CLASS = 2
def get_feed_dict(x_data, y_data=None): def get_feed_dict(x_data, y_data=None):
feed_dict = {} feed_dict = {}
...@@ -44,6 +39,7 @@ def get_feed_dict(x_data, y_data=None): ...@@ -44,6 +39,7 @@ def get_feed_dict(x_data, y_data=None):
return feed_dict return feed_dict
def get_incoming_shape(incoming): def get_incoming_shape(incoming):
""" Returns the incoming data shape """ """ Returns the incoming data shape """
if isinstance(incoming, tf.Tensor): if isinstance(incoming, tf.Tensor):
...@@ -56,53 +52,75 @@ def get_incoming_shape(incoming): ...@@ -56,53 +52,75 @@ def get_incoming_shape(incoming):
# Note input * W is done in LSTMCell, # Note input * W is done in LSTMCell,
# which is different from PaddlePaddle # which is different from PaddlePaddle
def single_lstm(name, incoming, n_units, use_peepholes=True, def single_lstm(name,
return_seq=False, return_state=False): incoming,
with tf.name_scope(name) as scope: n_units,
cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes) use_peepholes=True,
output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32) return_seq=False,
out = output if return_seq else output[-1] return_state=False):
return (out, _cell_state) if return_state else out with tf.name_scope(name) as scope:
cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
def lstm(name, incoming, n_units, use_peepholes=True, output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
return_seq=False, return_state=False, num_layers=1): out = output if return_seq else output[-1]
with tf.name_scope(name) as scope: return (out, _cell_state) if return_state else out
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32) def lstm(name,
if not isinstance(incoming, list): incoming,
# if the input is embeding, the Tensor shape : [None, time_step, emb_size] n_units,
incoming = [tf.squeeze(input_, [1]) use_peepholes=True,
for input_ in tf.split(1, FLAGS.max_len, incoming)] return_seq=False,
outputs, state = tf.nn.rnn(cell, incoming, initial_state=initial_state, return_state=False,
dtype=tf.float32) num_layers=1):
out = outputs if return_seq else outputs[-1] with tf.name_scope(name) as scope:
return (out, _cell_state) if return_state else out lstm_cell = tf.nn.rnn_cell.LSTMCell(
n_units, use_peepholes=use_peepholes)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
if not isinstance(incoming, list):
# if the input is embeding, the Tensor shape : [None, time_step, emb_size]
incoming = [
tf.squeeze(input_, [1])
for input_ in tf.split(1, FLAGS.max_len, incoming)
]
outputs, state = tf.nn.rnn(cell,
incoming,
initial_state=initial_state,
dtype=tf.float32)
out = outputs if return_seq else outputs[-1]
return (out, _cell_state) if return_state else out
def embedding(name, incoming, vocab_size, emb_size): def embedding(name, incoming, vocab_size, emb_size):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
#with tf.device("/cpu:0"): #with tf.device("/cpu:0"):
embedding = tf.get_variable( embedding = tf.get_variable(
name+'_emb', [vocab_size, emb_size], dtype=tf.float32) name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
out = tf.nn.embedding_lookup(embedding, incoming) out = tf.nn.embedding_lookup(embedding, incoming)
return out return out
def fc(name, inpOp, nIn, nOut, act=True): def fc(name, inpOp, nIn, nOut, act=True):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w', [nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [nIn, nOut],
initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
biases = tf.get_variable(name + '_b', [nOut], biases = tf.get_variable(
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), name + '_b', [nOut],
dtype=tf.float32,trainable=True) initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32,
trainable=True)
net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases tf.matmul(inpOp, kernel) + biases
return net return net
def inference(seq): def inference(seq):
net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size) net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
print "emb:", get_incoming_shape(net) print "emb:", get_incoming_shape(net)
...@@ -111,91 +129,95 @@ def inference(seq): ...@@ -111,91 +129,95 @@ def inference(seq):
net = fc('fc1', net, FLAGS.hidden_size, 2) net = fc('fc1', net, FLAGS.hidden_size, 2)
return net return net
def loss(logits, labels): def loss(logits, labels):
# one label index for one sample # one label index for one sample
labels = tf.cast(labels, tf.float32) labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits( cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example') logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean) tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss') return tf.add_n(tf.get_collection('losses'), name='total_loss')
def time_tensorflow_run(session, target, x_input, y_input, info_string): def time_tensorflow_run(session, target, x_input, y_input, info_string):
num_steps_burn_in = 50 num_steps_burn_in = 50
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
if not isinstance(target, list): if not isinstance(target, list):
target = [target] target = [target]
target_op = tf.group(*target) target_op = tf.group(*target)
train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE) train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
for i in xrange(FLAGS.num_batches + num_steps_burn_in): for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
data, label = train_dataset.next_batch(FLAGS.batch_size) data, label = train_dataset.next_batch(FLAGS.batch_size)
_ = session.run(target_op, feed_dict={x_input:data, y_input:label}) _ = session.run(target_op, feed_dict={x_input: data, y_input: label})
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
print ('%s: step %d, duration = %.3f' % print('%s: step %d, duration = %.3f' %
(datetime.now(), i - num_steps_burn_in, duration)) (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration total_duration += duration
total_duration_squared += duration * duration total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' % print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), info_string, FLAGS.num_batches, mn, sd)) (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def run_benchmark(): def run_benchmark():
with tf.Graph().as_default(): with tf.Graph().as_default():
global_step=0 global_step = 0
with tf.device('/cpu:0'): with tf.device('/cpu:0'):
global_step = tf.Variable(0, trainable=False) global_step = tf.Variable(0, trainable=False)
with tf.device('/gpu:0'): with tf.device('/gpu:0'):
#x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input") #x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input")
#y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input") #y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input")
x_input = tf.placeholder(tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input") x_input = tf.placeholder(
y_input = tf.placeholder(tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input") tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input")
# Generate some dummy sequnce. y_input = tf.placeholder(
tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input")
# Generate some dummy sequnce.
last_layer = inference(x_input)
last_layer = inference(x_input)
objective = loss(last_layer, y_input)
opt = tf.train.AdamOptimizer(0.001) objective = loss(last_layer, y_input)
grads = opt.compute_gradients(objective) opt = tf.train.AdamOptimizer(0.001)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) grads = opt.compute_gradients(objective)
apply_gradient_op = opt.apply_gradients(
init = tf.initialize_all_variables() grads, global_step=global_step)
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True, init = tf.initialize_all_variables()
log_device_placement=FLAGS.log_device_placement)) sess = tf.Session(config=tf.ConfigProto(
sess.run(init) allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
run_forward = True sess.run(init)
run_forward_backward = True
if FLAGS.forward_only and FLAGS.forward_backward_only: run_forward = True
raise ValueError("Cannot specify --forward_only and " run_forward_backward = True
"--forward_backward_only at the same time.") if FLAGS.forward_only and FLAGS.forward_backward_only:
if FLAGS.forward_only: raise ValueError("Cannot specify --forward_only and "
run_forward_backward = False "--forward_backward_only at the same time.")
elif FLAGS.forward_backward_only: if FLAGS.forward_only:
run_forward = False run_forward_backward = False
elif FLAGS.forward_backward_only:
if run_forward: run_forward = False
time_tensorflow_run(sess, last_layer, x_input, y_input, "Forward")
if run_forward:
if run_forward_backward: time_tensorflow_run(sess, last_layer, x_input, y_input,
with tf.control_dependencies([apply_gradient_op]): "Forward")
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective], x_input, y_input, "Forward-backward") if run_forward_backward:
with tf.control_dependencies([apply_gradient_op]):
train_op = tf.no_op(name='train')
time_tensorflow_run(sess, [train_op, objective], x_input,
y_input, "Forward-backward")
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
...@@ -12,35 +12,28 @@ from tensorflow.python.ops import rnn ...@@ -12,35 +12,28 @@ from tensorflow.python.ops import rnn
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 64, tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
"""Batch size.""") tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_batches', 100, tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
"""Number of batches to run.""") tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_layers', 1, tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
"""Number of batches to run.""") tf.app.flags.DEFINE_integer('emb_size', 64, """Number of batches to run.""")
tf.app.flags.DEFINE_integer('max_len', 100,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('hidden_size', 128,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('emb_size', 64,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False, tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""") """Whether to log device placement.""")
tf.app.flags.DEFINE_integer('num_gpus', 4, tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
"""How many GPUs to use.""")
VOCAB_SIZE=30000 VOCAB_SIZE = 30000
NUM_CLASS=2 NUM_CLASS = 2
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=50000 NUM_EPOCHS_PER_DECAY = 50
NUM_EPOCHS_PER_DECAY=50 INITIAL_LEARNING_RATE = 0.1
INITIAL_LEARNING_RATE = 0.1
LEARNING_RATE_DECAY_FACTOR = 0.1 LEARNING_RATE_DECAY_FACTOR = 0.1
TOWER_NAME = 'tower' TOWER_NAME = 'tower'
train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE) train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
def get_incoming_shape(incoming): def get_incoming_shape(incoming):
""" Returns the incoming data shape """ """ Returns the incoming data shape """
if isinstance(incoming, tf.Tensor): if isinstance(incoming, tf.Tensor):
...@@ -53,49 +46,68 @@ def get_incoming_shape(incoming): ...@@ -53,49 +46,68 @@ def get_incoming_shape(incoming):
# Note input * W is done in LSTMCell, # Note input * W is done in LSTMCell,
# which is different from PaddlePaddle # which is different from PaddlePaddle
def single_lstm(name, incoming, n_units, use_peepholes=True, def single_lstm(name,
return_seq=False, return_state=False): incoming,
with tf.name_scope(name) as scope: n_units,
cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes) use_peepholes=True,
output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32) return_seq=False,
out = output if return_seq else output[-1] return_state=False):
return (out, _cell_state) if return_state else out with tf.name_scope(name) as scope:
cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
def lstm(name, incoming, n_units, use_peepholes=True, out = output if return_seq else output[-1]
return_seq=False, return_state=False, num_layers=1): return (out, _cell_state) if return_state else out
with tf.name_scope(name) as scope:
lstm_cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers) def lstm(name,
initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32) incoming,
if not isinstance(incoming, list): n_units,
# if the input is embeding, the Tensor shape : [None, time_step, emb_size] use_peepholes=True,
incoming = [tf.squeeze(input_, [1]) return_seq=False,
for input_ in tf.split(1, FLAGS.max_len, incoming)] return_state=False,
outputs, state = tf.nn.rnn(cell, incoming, initial_state=initial_state, num_layers=1):
dtype=tf.float32) with tf.name_scope(name) as scope:
out = outputs if return_seq else outputs[-1] lstm_cell = tf.nn.rnn_cell.LSTMCell(
return (out, _cell_state) if return_state else out n_units, use_peepholes=use_peepholes)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
if not isinstance(incoming, list):
# if the input is embeding, the Tensor shape : [None, time_step, emb_size]
incoming = [
tf.squeeze(input_, [1])
for input_ in tf.split(1, FLAGS.max_len, incoming)
]
outputs, state = tf.nn.rnn(cell,
incoming,
initial_state=initial_state,
dtype=tf.float32)
out = outputs if return_seq else outputs[-1]
return (out, _cell_state) if return_state else out
def embedding(name, incoming, vocab_size, emb_size): def embedding(name, incoming, vocab_size, emb_size):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
#with tf.device("/cpu:0"): #with tf.device("/cpu:0"):
embedding = tf.get_variable( embedding = tf.get_variable(
name+'_emb', [vocab_size, emb_size], dtype=tf.float32) name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
out = tf.nn.embedding_lookup(embedding, incoming) out = tf.nn.embedding_lookup(embedding, incoming)
return out return out
def fc(name, inpOp, nIn, nOut, act=True): def fc(name, inpOp, nIn, nOut, act=True):
with tf.name_scope(name) as scope: with tf.name_scope(name) as scope:
kernel = tf.get_variable(name + '_w', [nIn, nOut], kernel = tf.get_variable(
initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32), name + '_w', [nIn, nOut],
initializer=tf.truncated_normal_initializer(
stddev=0.01, dtype=tf.float32),
dtype=tf.float32) dtype=tf.float32)
biases = tf.get_variable(name + '_b', [nOut], biases = tf.get_variable(
initializer=tf.constant_initializer(value=0.0, dtype=tf.float32), name + '_b', [nOut],
dtype=tf.float32,trainable=True) initializer=tf.constant_initializer(
value=0.0, dtype=tf.float32),
dtype=tf.float32,
trainable=True)
net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases tf.matmul(inpOp, kernel) + biases
...@@ -119,7 +131,7 @@ def loss(logits, labels): ...@@ -119,7 +131,7 @@ def loss(logits, labels):
# logits, labels, name='cross_entropy_per_example') # logits, labels, name='cross_entropy_per_example')
labels = tf.cast(labels, tf.float32) labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits( cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
logits, labels, name='cross_entropy_per_example') logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean) tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss') return tf.add_n(tf.get_collection('losses'), name='total_loss')
...@@ -142,7 +154,7 @@ def tower_loss(scope): ...@@ -142,7 +154,7 @@ def tower_loss(scope):
# assemble the total_loss using a custom function below. # assemble the total_loss using a custom function below.
#_ = loss(last_layer, label) #_ = loss(last_layer, label)
_ = loss(last_layer, label) _ = loss(last_layer, label)
# Assemble all of the losses for the current tower only. # Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope) losses = tf.get_collection('losses', scope)
...@@ -161,7 +173,7 @@ def tower_loss(scope): ...@@ -161,7 +173,7 @@ def tower_loss(scope):
loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss # Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name. # as the original loss name.
tf.scalar_summary(loss_name +' (raw)', l) tf.scalar_summary(loss_name + ' (raw)', l)
#tf.scalar_summary(loss_name, loss_averages.average(l)) #tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]): with tf.control_dependencies([loss_averages_op]):
...@@ -170,7 +182,7 @@ def tower_loss(scope): ...@@ -170,7 +182,7 @@ def tower_loss(scope):
def average_gradients(tower_grads): def average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers. """Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers. Note that this function provides a synchronization point across all towers.
Args: Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list tower_grads: List of lists of (gradient, variable) tuples. The outer list
...@@ -180,127 +192,131 @@ def average_gradients(tower_grads): ...@@ -180,127 +192,131 @@ def average_gradients(tower_grads):
List of pairs of (gradient, variable) where the gradient has been averaged List of pairs of (gradient, variable) where the gradient has been averaged
across all towers. across all towers.
""" """
average_grads = [] average_grads = []
for grad_and_vars in zip(*tower_grads): for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following: # Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = [] grads = []
for g, _ in grad_and_vars: for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower. # Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0) expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below. # Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g) grads.append(expanded_g)
# Average over the 'tower' dimension. # Average over the 'tower' dimension.
grad = tf.concat(0, grads) grad = tf.concat(0, grads)
grad = tf.reduce_mean(grad, 0) grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared # Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to # across towers. So .. we will just return the first tower's pointer to
# the Variable. # the Variable.
v = grad_and_vars[0][1] v = grad_and_vars[0][1]
grad_and_var = (grad, v) grad_and_var = (grad, v)
average_grads.append(grad_and_var) average_grads.append(grad_and_var)
return average_grads return average_grads
def time_tensorflow_run(session, target): def time_tensorflow_run(session, target):
num_steps_burn_in = 80 num_steps_burn_in = 80
total_duration = 0.0 total_duration = 0.0
total_duration_squared = 0.0 total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in): for i in xrange(FLAGS.num_batches + num_steps_burn_in):
start_time = time.time() start_time = time.time()
_ = session.run(target, feed_dict={x_input:data, y_input:label}) _ = session.run(target, feed_dict={x_input: data, y_input: label})
_, loss_value = session.run(target) _, loss_value = session.run(target)
duration = time.time() - start_time duration = time.time() - start_time
if i > num_steps_burn_in: if i > num_steps_burn_in:
if not i % 10: if not i % 10:
num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
examples_per_sec = num_examples_per_step / duration examples_per_sec = num_examples_per_step / duration
# sec_per_batch = duration / FLAGS.num_gpus # sec_per_batch = duration / FLAGS.num_gpus
sec_per_batch = duration sec_per_batch = duration
format_str = ('%s: step %d, loss= %.2f (%.1f examples/sec; %.3f ' format_str = (
'sec/batch batch_size= %d)') '%s: step %d, loss= %.2f (%.1f examples/sec; %.3f '
print (format_str % 'sec/batch batch_size= %d)')
(datetime.now(), i - num_steps_burn_in, print(format_str %
loss_value, duration, sec_per_batch, num_examples_per_step)) (datetime.now(), i - num_steps_burn_in, loss_value,
duration, sec_per_batch, num_examples_per_step))
total_duration += duration
total_duration_squared += duration * duration total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr) sd = math.sqrt(vr)
print ('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' % print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), FLAGS.num_batches, mn, sd)) (datetime.now(), FLAGS.num_batches, mn, sd))
def run_benchmark(): def run_benchmark():
with tf.Graph().as_default(), tf.device('/cpu:0'): with tf.Graph().as_default(), tf.device('/cpu:0'):
# Create a variable to count the number of train() calls. This equals the # Create a variable to count the number of train() calls. This equals the
# number of batches processed * FLAGS.num_gpus. # number of batches processed * FLAGS.num_gpus.
global_step = tf.get_variable( global_step = tf.get_variable(
'global_step', [], 'global_step', [],
initializer=tf.constant_initializer(0), trainable=False) initializer=tf.constant_initializer(0),
trainable=False)
# Calculate the learning rate schedule.
num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / # Calculate the learning rate schedule.
FLAGS.batch_size) num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) FLAGS.batch_size)
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Create an optimizer that performs gradient descent.
opt = tf.train.AdamOptimizer(0.001) # Create an optimizer that performs gradient descent.
opt = tf.train.AdamOptimizer(0.001)
#train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
#train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
# Calculate the gradients for each model tower.
tower_grads = [] # Calculate the gradients for each model tower.
for i in xrange(FLAGS.num_gpus): tower_grads = []
with tf.device('/gpu:%d' % i): for i in xrange(FLAGS.num_gpus):
with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: with tf.device('/gpu:%d' % i):
# Calculate the loss for one tower of the model. This function with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
# constructs the entire model but shares the variables across # Calculate the loss for one tower of the model. This function
# all towers. # constructs the entire model but shares the variables across
loss = tower_loss(scope) # all towers.
loss = tower_loss(scope)
# Reuse variables for the next tower.
tf.get_variable_scope().reuse_variables() # Reuse variables for the next tower.
tf.get_variable_scope().reuse_variables()
# Retain the summaries from the final tower.
# summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Retain the summaries from the final tower.
# summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
# Calculate the gradients for the batch of data on this tower.
grads = opt.compute_gradients(loss) # Calculate the gradients for the batch of data on this tower.
grads = opt.compute_gradients(loss)
# Keep track of the gradients across all towers.
tower_grads.append(grads) # Keep track of the gradients across all towers.
tower_grads.append(grads)
# We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers. # We must calculate the mean of each gradient. Note that this is the
grads = average_gradients(tower_grads) # synchronization point across all towers.
grads = average_gradients(tower_grads)
# Apply the gradients to adjust the shared variables.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Apply the gradients to adjust the shared variables.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Group all updates to into a single train op.
train_op = tf.group(apply_gradient_op) # Group all updates to into a single train op.
train_op = tf.group(apply_gradient_op)
# Build an initialization operation.
init = tf.initialize_all_variables() # Build an initialization operation.
init = tf.initialize_all_variables()
# Start running operations on the Graph. allow_soft_placement must be set to
# True to build towers on GPU, as some of the ops do not have GPU # Start running operations on the Graph. allow_soft_placement must be set to
# implementations. # True to build towers on GPU, as some of the ops do not have GPU
sess = tf.Session(config=tf.ConfigProto( # implementations.
allow_soft_placement=True, sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) allow_soft_placement=True,
sess.run(init) log_device_placement=FLAGS.log_device_placement))
time_tensorflow_run(sess, [train_op, loss]) sess.run(init)
time_tensorflow_run(sess, [train_op, loss])
def main(_): def main(_):
run_benchmark() run_benchmark()
if __name__ == '__main__': if __name__ == '__main__':
tf.app.run() tf.app.run()
...@@ -3,36 +3,55 @@ ...@@ -3,36 +3,55 @@
INCLUDE(CheckCXXSourceRuns) INCLUDE(CheckCXXSourceRuns)
SET(FIND_AVX_10)
SET(FIND_AVX_20)
SET(AVX_FLAGS)
SET(AVX_FOUND)
# Check AVX 2
SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(CMAKE_REQUIRED_FLAGS "-mavx2") set(MMX_FLAG "-mmmx")
ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS set(SSE2_FLAG "-msse2")
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2") set(SSE3_FLAG "-msse3")
SET(AVX_FLAG "-mavx")
SET(AVX2_FLAG "-mavx2")
ELSEIF(MSVC)
set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2")
set(SSE3_FLAG "/arch:SSE3")
SET(AVX_FLAG "/arch:AVX")
SET(AVX2_FLAG "/arch:AVX2")
ENDIF() ENDIF()
# Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <mmintrin.h>
int main() int main()
{ {
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); _mm_setzero_si64();
__m256i result = _mm256_abs_epi32 (a);
return 0; return 0;
}" FIND_AVX_20) }" MMX_FOUND)
# Check AVX # Check SSE2
SET(CMAKE_REQUIRED_FLAGS) set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") CHECK_CXX_SOURCE_RUNS("
SET(CMAKE_REQUIRED_FLAGS "-mavx") #include <emmintrin.h>
ELSEIF(MSVC AND NOT CMAKE_CL_64) int main()
SET(CMAKE_REQUIRED_FLAGS "/arch:AVX") {
endif() _mm_setzero_si128();
return 0;
}" SSE2_FOUND)
# Check SSE3
set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
CHECK_CXX_SOURCE_RUNS("
#include <pmmintrin.h>
int main()
{
__m128d a = _mm_set1_pd(6.28);
__m128d b = _mm_set1_pd(3.14);
__m128d result = _mm_addsub_pd(a, b);
result = _mm_movedup_pd(result);
return 0;
}" SSE3_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
CHECK_CXX_SOURCE_RUNS(" CHECK_CXX_SOURCE_RUNS("
#include <immintrin.h> #include <immintrin.h>
int main() int main()
...@@ -41,25 +60,17 @@ int main() ...@@ -41,25 +60,17 @@ int main()
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b); __m256 result = _mm256_add_ps (a, b);
return 0; return 0;
}" FIND_AVX_10) }" AVX_FOUND)
IF(${FIND_AVX_20})
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
ELSEIF(MSVC)
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
ENDIF()
ENDIF()
IF(${FIND_AVX_10}) # Check AVX 2
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
SET(AVX_FLAGS "${AVX_FLAGS} -mavx") CHECK_CXX_SOURCE_RUNS("
ELSEIF(MSVC) #include <immintrin.h>
SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") int main()
ENDIF() {
ENDIF() __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}" AVX2_FOUND)
IF(${FIND_AVX_10}) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
SET(AVX_FOUND TRUE)
MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
ENDIF()
# Find the CBlas libraries # Find the CBlas and lapack libraries
# #
# It will search MKL, atlas, OpenBlas, reference-cblas in order. # It will search MKL, atlas, OpenBlas, reference-cblas in order.
# #
...@@ -19,6 +19,8 @@ set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") ...@@ -19,6 +19,8 @@ set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include) ${MKL_ROOT}/include)
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib ${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64) ${MKL_ROOT}/lib/intel64)
...@@ -37,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) ...@@ -37,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
${MKL_SEQUENTIAL_LIB} ${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB}) ${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL) add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return() # return file. return() # return file.
endif() endif()
...@@ -55,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS ...@@ -55,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS
) )
find_path(ATLAS_INC_DIR NAMES cblas.h find_path(ATLAS_INC_DIR NAMES cblas.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS}) PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS}) PATHS ${ATLAS_LIB_SEARCH_PATHS})
find_library(ATLAS_LIB NAMES atlas libatlas.so.3 find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS}) PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB) if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS) set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR}) set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB}) set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return() return()
endif() endif()
...@@ -83,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS ...@@ -83,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS
find_path(OPENBLAS_INC_DIR NAMES cblas.h find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas find_library(OPENBLAS_LIB NAMES openblas
PATHS ${OPENBLAS_LIB_SEARCH_PATHS}) PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
...@@ -90,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) ...@@ -90,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS) set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR}) set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB}) set(CBLAS_LIBS ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return() return()
endif() endif()
......
# CMake script for code coverage.
# If _COVERALLS_UPLOAD is ON, it will upload json files to overalls.io automatically.
# Param _COVERAGE_SRCS A list of coverage source files.
# Param _COVERALLS_UPLOAD Upload the result to coveralls.
# Param _CMAKE_SCRIPT_PATH CMake script path.
function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
# clean previous gcov data.
file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
# find curl for upload JSON soon.
if (_COVERALLS_UPLOAD)
find_program(CURL_EXECUTABLE curl)
if (NOT CURL_EXECUTABLE)
message(FATAL_ERROR "Coveralls: curl not found!")
endif()
endif()
# When passing a CMake list to an external process, the list
# will be converted from the format "1;2;3" to "1 2 3".
set(COVERAGE_SRCS "")
foreach (SINGLE_SRC ${_COVERAGE_SRCS})
set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}")
endforeach()
# query number of logical cores
cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES)
# coveralls json file.
set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
add_custom_target(coveralls_generate
# Run regress tests.
COMMAND ${CMAKE_CTEST_COMMAND}
-j ${core_size}
--output-on-failure
# Generate Gcov and translate it into coveralls JSON.
COMMAND ${CMAKE_COMMAND}
-DCOVERAGE_SRCS="${COVERAGE_SRCS}"
-DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
-DCOV_PATH="${PROJECT_BINARY_DIR}"
-DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
-P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMENT "Coveralls: generating coveralls output..."
)
if (_COVERALLS_UPLOAD)
message("COVERALLS UPLOAD: ON")
# Upload the JSON to coveralls.
add_custom_target(coveralls_upload
COMMAND ${CURL_EXECUTABLE}
-S -F json_file=@${COVERALLS_FILE}
https://coveralls.io/api/v1/jobs
DEPENDS coveralls_generate
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMENT "Coveralls: uploading coveralls output...")
add_custom_target(coveralls DEPENDS coveralls_upload)
else()
message("COVERALLS UPLOAD: OFF")
add_custom_target(coveralls DEPENDS coveralls_generate)
endif()
endfunction()
if(ON_COVERALLS)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(EXCLUDE_DIRS
"demo/"
"build/"
"tests/"
".test_env/"
)
if(WITH_GPU)
file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu")
else()
file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c")
endif()
# exclude trivial files in PADDLE_SOURCES
foreach(EXCLUDE_DIR ${EXCLUDE_DIRS})
foreach(TMP_PATH ${PADDLE_SOURCES})
string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH})
endif()
endforeach(TMP_PATH)
endforeach()
# convert to absolute path
set(PADDLE_SRCS "")
foreach(PADDLE_SRC ${PADDLE_SOURCES})
set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}")
endforeach()
code_coverage(
"${PADDLE_SRCS}"
${COVERALLS_UPLOAD}
"${PROJECT_SOURCE_DIR}/cmake"
)
endif()
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Copyright (C) 2014 Joakim Söderberg <joakim.soderberg@gmail.com>
#
# This is intended to be run by a custom target in a CMake project like this.
# 0. Compile program with coverage support.
# 1. Clear coverage data. (Recursively delete *.gcda in build dir)
# 2. Run the unit tests.
# 3. Run this script specifying which source files the coverage should be performed on.
#
# This script will then use gcov to generate .gcov files in the directory specified
# via the COV_PATH var. This should probably be the same as your cmake build dir.
#
# It then parses the .gcov files to convert them into the Coveralls JSON format:
# https://coveralls.io/docs/api
#
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
# Since it's not possible to pass a CMake list properly in the
# "1;2;3" format to an external process, we have replaced the
# ";" with "*", so reverse that here so we get it back into the
# CMake list format.
string(REGEX REPLACE "\\*" ";" COVERAGE_SRCS ${COVERAGE_SRCS})
find_program(GCOV_EXECUTABLE gcov)
if (NOT GCOV_EXECUTABLE)
message(FATAL_ERROR "gcov not found! Aborting...")
endif()
find_package(Git)
# TODO: Add these git things to the coveralls json.
if (GIT_FOUND)
# Branch.
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
macro (git_log_format FORMAT_CHARS VAR_NAME)
execute_process(
COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE ${VAR_NAME}
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endmacro()
git_log_format(an GIT_AUTHOR_EMAIL)
git_log_format(ae GIT_AUTHOR_EMAIL)
git_log_format(cn GIT_COMMITTER_NAME)
git_log_format(ce GIT_COMMITTER_EMAIL)
git_log_format(B GIT_COMMIT_MESSAGE)
message("Git exe: ${GIT_EXECUTABLE}")
message("Git branch: ${GIT_BRANCH}")
message("Git author: ${GIT_AUTHOR_NAME}")
message("Git e-mail: ${GIT_AUTHOR_EMAIL}")
message("Git commiter name: ${GIT_COMMITTER_NAME}")
message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}")
message("Git commit message: ${GIT_COMMIT_MESSAGE}")
endif()
############################# Macros #########################################
#
# This macro converts from the full path format gcov outputs:
#
# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
#
# to the original source file path the .gcov is for:
#
# /path/to/project/root/subdir/the_file.c
#
macro(get_source_path_from_gcov_filename _SRC_FILENAME _GCOV_FILENAME)
# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
# ->
# #path#to#project#root#subdir#the_file.c.gcov
get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME)
# #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c
string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT})
string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP})
set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}")
endmacro()
##############################################################################
# Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:")
# Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in)
# and run gcov on those.
foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH)
#
# The -p below refers to "Preserve path components",
# This means that the generated gcov filename of a source file will
# keep the original files entire filepath, but / is replaced with #.
# Example:
#
# /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda
# ------------------------------------------------------------------------------
# File '/path/to/project/root/subdir/the_file.c'
# Lines executed:68.34% of 199
# /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov'
#
# If -p is not specified then the file is named only "the_file.c.gcov"
#
execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
WORKING_DIRECTORY ${GCDA_DIR}
)
endforeach()
# TODO: Make these be absolute path
file(GLOB_RECURSE ALL_GCOV_FILES "${COV_PATH}" "*.gcov")
# Get only the filenames to use for filtering.
#set(COVERAGE_SRCS_NAMES "")
#foreach (COVSRC ${COVERAGE_SRCS})
# get_filename_component(COVSRC_NAME ${COVSRC} NAME)
# message("${COVSRC} -> ${COVSRC_NAME}")
# list(APPEND COVERAGE_SRCS_NAMES "${COVSRC_NAME}")
#endforeach()
#
# Filter out all but the gcov files we want.
#
# We do this by comparing the list of COVERAGE_SRCS filepaths that the
# user wants the coverage data for with the paths of the generated .gcov files,
# so that we only keep the relevant gcov files.
#
# Example:
# COVERAGE_SRCS =
# /path/to/project/root/subdir/the_file.c
#
# ALL_GCOV_FILES =
# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
# /path/to/project/root/build/#path#to#project#root#subdir#other_file.c.gcov
#
# Result should be:
# GCOV_FILES =
# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
#
set(GCOV_FILES "")
#message("Look in coverage sources: ${COVERAGE_SRCS}")
message("\nFilter out unwanted GCOV files:")
message("===============================")
set(COVERAGE_SRCS_REMAINING ${COVERAGE_SRCS})
foreach (GCOV_FILE ${ALL_GCOV_FILES})
#
# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
# ->
# /path/to/project/root/subdir/the_file.c
get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
# Is this in the list of source files?
# TODO: We want to match against relative path filenames from the source file root...
list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND)
if (NOT WAS_FOUND EQUAL -1)
message("YES: ${GCOV_FILE}")
list(APPEND GCOV_FILES ${GCOV_FILE})
# We remove it from the list, so we don't bother searching for it again.
# Also files left in COVERAGE_SRCS_REMAINING after this loop ends should
# have coverage data generated from them (no lines are covered).
list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH})
else()
message("NO: ${GCOV_FILE}")
endif()
endforeach()
# TODO: Enable setting these
set(JSON_SERVICE_NAME "travis-ci")
set(JSON_SERVICE_JOB_ID $ENV{TRAVIS_JOB_ID})
set(JSON_TEMPLATE
"{
\"service_name\": \"\@JSON_SERVICE_NAME\@\",
\"service_job_id\": \"\@JSON_SERVICE_JOB_ID\@\",
\"source_files\": \@JSON_GCOV_FILES\@
}"
)
set(SRC_FILE_TEMPLATE
"{
\"name\": \"\@GCOV_SRC_REL_PATH\@\",
\"source_digest\": \"\@GCOV_CONTENTS_MD5\@\",
\"coverage\": \@GCOV_FILE_COVERAGE\@
}"
)
message("\nGenerate JSON for files:")
message("=========================")
set(JSON_GCOV_FILES "[")
# Read the GCOV files line by line and get the coverage data.
foreach (GCOV_FILE ${GCOV_FILES})
get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}")
# The new coveralls API doesn't need the entire source (Yay!)
# However, still keeping that part for now. Will cleanup in the future.
file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5)
message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}")
# Loads the gcov file as a list of lines.
# (We first open the file and replace all occurences of [] with _
# because CMake will fail to parse a line containing unmatched brackets...
# also the \ to escaped \n in macros screws up things.)
# https://public.kitware.com/Bug/view.php?id=15369
file(READ ${GCOV_FILE} GCOV_CONTENTS)
string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}")
file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES)
list(LENGTH GCOV_LINES LINE_COUNT)
# Instead of trying to parse the source from the
# gcov file, simply read the file contents from the source file.
# (Parsing it from the gcov is hard because C-code uses ; in many places
# which also happens to be the same as the CMake list delimeter).
file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)
string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
# According to http://json.org/ these should be escaped as well.
# Don't know how to do that in CMake however...
#string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
#string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
#string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
# We want a json array of coverage data as a single string
# start building them from the contents of the .gcov
set(GCOV_FILE_COVERAGE "[")
set(GCOV_LINE_COUNT 1) # Line number for the .gcov.
set(DO_SKIP 0)
foreach (GCOV_LINE ${GCOV_LINES})
#message("${GCOV_LINE}")
# Example of what we're parsing:
# Hitcount |Line | Source
# " 8: 26: if (!allowed || (strlen(allowed) == 0))"
string(REGEX REPLACE
"^([^:]*):([^:]*):(.*)$"
"\\1;\\2;\\3"
RES
"${GCOV_LINE}")
# Check if we should exclude lines using the Lcov syntax.
string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}")
string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}")
string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}")
set(RESET_SKIP 0)
if (LINE_SKIP AND NOT DO_SKIP)
set(DO_SKIP 1)
set(RESET_SKIP 1)
endif()
if (START_SKIP)
set(DO_SKIP 1)
message("${GCOV_LINE_COUNT}: Start skip")
endif()
if (END_SKIP)
set(DO_SKIP 0)
endif()
list(LENGTH RES RES_COUNT)
if (RES_COUNT GREATER 2)
list(GET RES 0 HITCOUNT)
list(GET RES 1 LINE)
list(GET RES 2 SOURCE)
string(STRIP ${HITCOUNT} HITCOUNT)
string(STRIP ${LINE} LINE)
# Lines with 0 line numbers are metadata and can be ignored.
if (NOT ${LINE} EQUAL 0)
if (DO_SKIP)
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
else()
# Translate the hitcount into valid JSON values.
if (${HITCOUNT} STREQUAL "#####")
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
elseif (${HITCOUNT} STREQUAL "-")
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
else()
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ")
endif()
endif()
endif()
else()
message(WARNING "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}")
endif()
if (RESET_SKIP)
set(DO_SKIP 0)
endif()
math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1")
endforeach()
message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!")
# Advanced way of removing the trailing comma in the JSON array.
# "[1, 2, 3, " -> "[1, 2, 3"
string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
# Append the trailing ] to complete the JSON array.
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file.
message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach()
# Loop through all files we couldn't find any coverage for
# as well, and generate JSON for those as well with 0% coverage.
foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
# Loads the source file as a list of lines.
file(STRINGS ${NOT_COVERED_SRC} SRC_LINES)
set(GCOV_FILE_COVERAGE "[")
set(GCOV_FILE_SOURCE "")
foreach (SOURCE ${SRC_LINES})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}")
string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}")
string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}")
string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}")
set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n")
endforeach()
# Remove trailing comma, and complete JSON array with ]
string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach()
# Get rid of trailing comma.
string(REGEX REPLACE ",[ ]*$" "" JSON_GCOV_FILES ${JSON_GCOV_FILES})
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}]")
# Generate the final complete JSON!
message("Generate final JSON...")
string(CONFIGURE ${JSON_TEMPLATE} JSON)
file(WRITE "${COVERALLS_OUTPUT_FILE}" "${JSON}")
message("###########################################################################")
message("Generated coveralls JSON containing coverage data:")
message("${COVERALLS_OUTPUT_FILE}")
message("###########################################################################")
...@@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name) ...@@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name)
endif() endif()
if(${safe_name}) if(${safe_name})
set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE) set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
if(is_c)
set(CUDA_NVCC_FLAGS
--compiler-options;${flag_name}
${CUDA_NVCC_FLAGS}
PARENT_SCOPE)
endif()
endif() endif()
endfunction() endfunction()
...@@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name) ...@@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name)
safe_set_flag(OFF ${src_list} ${flag_name}) safe_set_flag(OFF ${src_list} ${flag_name})
endmacro() endmacro()
# helper macro to set nvcc flag
macro(safe_set_nvflag flag_name)
string(REPLACE "-" "_" safe_name ${flag_name})
string(REPLACE "=" "_" safe_name ${safe_name})
CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
if(${safe_name})
set(CUDA_NVCC_FLAGS
--compiler-options;${flag_name}
${CUDA_NVCC_FLAGS})
endif()
endmacro()
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
if(NOT UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS)
set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS) set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
...@@ -63,20 +71,43 @@ set(COMMON_FLAGS ...@@ -63,20 +71,43 @@ set(COMMON_FLAGS
-Wnon-virtual-dtor -Wnon-virtual-dtor
-Wdelete-non-virtual-dtor -Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=unused-local-typedefs -Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.
) )
if (APPLE)
# On Mac OS X build fat binaries with x86_64 architectures by default.
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
else()
set(GPU_COMMON_FLAGS
-Wall
-Wextra
-Werror
${GPU_COMMON_FLAGS})
endif()
foreach(flag ${COMMON_FLAGS}) foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag}) safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach() endforeach()
# On Mac OS X build fat binaries with x86_64 architectures by default. foreach(flag ${GPU_COMMON_FLAGS})
if (APPLE) safe_set_nvflag(${flag})
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) endforeach()
endif ()
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here. # So, don't set these flags here.
......
# user should download rdma first from subversion repository
# execute following instruction to download svn mannally
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
return()
endif()
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
find_program(
SWIG_BINARY_PATH
swig)
if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
set(SWIG_FOUND OFF)
else()
set(SWIG_FOUND ON)
endif()
set(MIN_SWIG_VERSION 2)
if(SWIG_FOUND)
execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
OUTPUT_VARIABLE _SWIG_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
"Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
set(SWIG_FOUND FALSE)
endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
endif(SWIG_FOUND)
function(generate_python_api target_name) function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx ${PROJ_ROOT}/paddle/Paddle_wrap.cxx
...@@ -27,6 +5,7 @@ function(generate_python_api target_name) ...@@ -27,6 +5,7 @@ function(generate_python_api target_name)
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py && mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig") COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS add_custom_target(${target_name} ALL DEPENDS
......
...@@ -67,6 +67,10 @@ endmacro() ...@@ -67,6 +67,10 @@ endmacro()
# #
# It will handle WITH_PYTHON/WITH_GLOG etc. # It will handle WITH_PYTHON/WITH_GLOG etc.
function(link_paddle_exe TARGET_NAME) function(link_paddle_exe TARGET_NAME)
if(WITH_RDMA)
generate_rdma_links()
endif()
if(WITH_METRIC) if(WITH_METRIC)
if(WITH_GPU) if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu) set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
...@@ -109,6 +113,12 @@ function(link_paddle_exe TARGET_NAME) ...@@ -109,6 +113,12 @@ function(link_paddle_exe TARGET_NAME)
${ZLIB_LIBRARIES} ${ZLIB_LIBRARIES}
${INTERAL_LIBS} ${INTERAL_LIBS}
${CMAKE_DL_LIBS}) ${CMAKE_DL_LIBS})
if(WITH_RDMA)
target_link_libraries(${TARGET_NAME}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(WITH_PYTHON) if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME} target_link_libraries(${TARGET_NAME}
...@@ -178,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME) ...@@ -178,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp) add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro() endmacro()
macro(add_paddle_culib TARGET_NAME)
set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
endmacro()
# Creates C resources file from files in given resource file # Creates C resources file from files in given resource file
function(create_resources res_file output) function(create_resources res_file output)
# Create empty output file # Create empty output file
......
...@@ -5,3 +5,5 @@ plot.png ...@@ -5,3 +5,5 @@ plot.png
train.log train.log
image_provider_copy_1.py image_provider_copy_1.py
*pyc *pyc
train.list
test.list
文件模式从 100644 更改为 100755
...@@ -16,7 +16,6 @@ import numpy as np ...@@ -16,7 +16,6 @@ import numpy as np
import sys import sys
import os import os
import PIL.Image as Image import PIL.Image as Image
""" """
Usage: python process_cifar input_dir output_dir Usage: python process_cifar input_dir output_dir
""" """
...@@ -30,6 +29,7 @@ def mkdir_not_exist(path): ...@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
if not os.path.exists(path): if not os.path.exists(path):
os.mkdir(path) os.mkdir(path)
def create_dir_structure(output_dir): def create_dir_structure(output_dir):
""" """
Create the directory structure for the directory. Create the directory structure for the directory.
...@@ -39,8 +39,8 @@ def create_dir_structure(output_dir): ...@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
mkdir_not_exist(os.path.join(output_dir, "train")) mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test")) mkdir_not_exist(os.path.join(output_dir, "test"))
def convert_batch(batch_path, label_set, label_map,
output_dir, data_split): def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
""" """
Convert CIFAR batch to the structure of Paddle format. Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted. batch_path: the batch to be converted.
...@@ -67,11 +67,23 @@ if __name__ == '__main__': ...@@ -67,11 +67,23 @@ if __name__ == '__main__':
output_dir = sys.argv[2] output_dir = sys.argv[2]
num_batch = 5 num_batch = 5
create_dir_structure(output_dir) create_dir_structure(output_dir)
label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer", label_map = {
5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"} 0: "airplane",
1: "automobile",
2: "bird",
3: "cat",
4: "deer",
5: "dog",
6: "frog",
7: "horse",
8: "ship",
9: "truck"
}
labels = {} labels = {}
for i in range(1, num_batch + 1): for i in range(1, num_batch + 1):
convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels, convert_batch(
label_map, output_dir, "train") os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
convert_batch(os.path.join(input_dir, "test_batch"), {}, output_dir, "train")
label_map, output_dir, "test") convert_batch(
\ No newline at end of file os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
"test")
...@@ -17,3 +17,6 @@ set -e ...@@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1 python preprocess.py -i $data_dir -s 32 -c 1
echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import random
# define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield [x], [2 * x + 0.3]
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
# limitations under the License. # limitations under the License.
o = open("./" + "train.list", "w") o = open("./" + "train.list", "w")
o.write("./data/raw_data/train" +"\n") o.write("./data/raw_data/train" + "\n")
o.close() o.close()
o = open("./" + "test.list", "w") o = open("./" + "test.list", "w")
o.write("./data/raw_data/t10k" +"\n") o.write("./data/raw_data/t10k" + "\n")
o.close() o.close()
\ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册