diff --git a/.clang-format b/.clang-format
index 6bbd46d0ff956517991d4faad3f2c026487f412b..9ba433b17362424973626470d930356c2173dd84 100644
--- a/.clang-format
+++ b/.clang-format
@@ -13,8 +13,6 @@
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
---
Language: Cpp
BasedOnStyle: Google
@@ -22,8 +20,9 @@ IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class
-PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
...
diff --git a/.dockerignore b/.dockerignore
new file mode 120000
index 0000000000000000000000000000000000000000..3e4e48b0b5fe6b468434d6767749b399319f2da2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+.gitignore
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 65ba217de37c82287829eef105066aba86d69651..35bed0accdaa274f5966ca5b4b7180106325449b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,7 @@ build/
.vscode
.idea
.project
+.cproject
.pydevproject
+Makefile
+.test_env/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90c25e435083d78ad4c123999a588aaf9092f719
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+- repo: https://github.com/Lucas-C/pre-commit-hooks.git
+ sha: c25201a00e6b0514370501050cf2a8538ac12270
+ hooks:
+ - id: remove-crlf
+- repo: https://github.com/reyoung/mirrors-yapf.git
+ sha: v0.13.2
+ hooks:
+ - id: yapf
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
+ hooks:
+ - id: check-added-large-files
+ - id: check-merge-conflict
+ - id: check-symlinks
+ - id: detect-private-key
+ - id: end-of-file-fixer
+- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
+ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
+ hooks:
+ - id: clang-formater
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000000000000000000000000000000000000..4741fb4f3bbc6681088cf9e960321e7b857a93a8
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/.travis.yml b/.travis.yml
index ffe3bc193b49eb3b3318cbbc7f1c3d86dc205c14..effcf90769647960d55b971af0939496dc850e7a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,7 +42,7 @@ addons:
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
- if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+ if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d7e7e49e9a038acc6ca272433cd39b08c812eccc..7b4242374914b83a73454199a670c1bd77993b2d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,6 @@
cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
-set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b3)
-set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
@@ -12,6 +8,17 @@ include(package)
find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
+
+# Check protobuf library version.
+execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
+ OUTPUT_VARIABLE PROTOBUF_VERSION)
+string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
+
+set(PROTOBUF_3 OFF)
+if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
+ set(PROTOBUF_3 ON)
+endif()
+
find_package(PythonLibs 2.7 REQUIRED)
find_package(PythonInterp 2.7 REQUIRED)
find_package(ZLIB REQUIRED)
@@ -36,6 +43,7 @@ option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
+option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
@@ -44,7 +52,7 @@ option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
if(NOT CMAKE_BUILD_TYPE)
- set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
+ set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
@@ -63,31 +71,11 @@ include(check_packages)
include(swig)
include(coveralls)
-# add PaddlePaddle version
-if(DEFINED ENV{PADDLE_VERSION})
- add_definitions(-DPADDLE_VERSION=\"$ENV{PADDLE_VERSION}\")
-else()
- if(EXISTS ${PROJ_ROOT}/.svn/)
- find_package(Subversion REQUIRED)
- if(SUBVERSION_FOUND)
- Subversion_WC_INFO(${PROJ_ROOT} Project)
- add_definitions(-DPADDLE_VERSION=${Project_WC_REVISION})
- endif()
- elseif(EXISTS ${PROJ_ROOT}/.git/)
- find_package(Git REQUIRED)
- execute_process(
- COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
- WORKING_DIRECTORY ${PROJ_ROOT}
- OUTPUT_VARIABLE GIT_SHA1
- RESULT_VARIABLE GIT_RESULT
- ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
- if(NOT ${GIT_RESULT})
- add_definitions(-DPADDLE_VERSION=\"${GIT_SHA1}\")
- else()
- message(WARNING "Cannot add paddle version from git tag")
- endif()
- endif()
-endif()
+# Set PaddlePaddle version to Git tag name or Git commit ID.
+find_package(Git REQUIRED)
+# version.cmake will get the current PADDLE_VERSION
+include(version)
+add_definitions(-DPADDLE_VERSION=\"${PADDLE_VERSION}\")
if(NOT WITH_GPU)
@@ -115,7 +103,6 @@ else()
endif(WITH_AVX)
if(WITH_DSO)
- set(CUDA_LIBRARIES "")
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
@@ -135,6 +122,10 @@ if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER)
+if(NOT WITH_PROFILER)
+ add_definitions(-DPADDLE_DISABLE_PROFILER)
+endif(NOT WITH_PROFILER)
+
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
diff --git a/README.md b/README.md
index 81ff8c7122ab8f1e39ef14a056532bb85cc57c77..8a8e15841586ae6a01bb93e94f6074189f556f5a 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,13 @@
# PaddlePaddle
-[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
-[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop)
-[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
+[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
+[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+
Welcome to the PaddlePaddle GitHub.
@@ -14,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release announcement](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle.
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
## Features
@@ -26,15 +29,15 @@ Please refer to our [release announcement](https://github.com/baidu/Paddle/relea
connection.
- **Efficiency**
-
+
In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some
examples:
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
- (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
- - Highly optimized recurrent networks which can handle **variable-length**
+ (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
+ - Highly optimized recurrent networks which can handle **variable-length**
sequence without padding.
- Optimized local and distributed training for models with high dimensional
sparse data.
@@ -57,41 +60,39 @@ Please refer to our [release announcement](https://github.com/baidu/Paddle/relea
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or
+pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
-
+
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en)
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
-
+
- [Example and Demo](http://paddlepaddle.org/doc/demo/)
We provide five demos, including: image classification, sentiment analysis,
- sequence to sequence model, recommendation, semantic role labeling.
-
+ sequence to sequence model, recommendation, semantic role labeling.
+
- [Distributed Training](http://paddlepaddle.org/doc/cluster)
This system supports training deep learning models on multiple machines
with data parallelism.
-
+
- [Python API](http://paddlepaddle.org/doc/ui/)
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
-
+
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html)
We sincerely appreciate your interest and contributions. If you would like to
- contribute, please read the contribution guide.
+ contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/)
## Ask Questions
-Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-Framework development discussions and
-bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
+
+You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 0000000000000000000000000000000000000000..a8a245ab442ba0fc63d1f1fda932e7590a6fe4ca
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,69 @@
+# Release v0.9.0
+
+## New Features:
+
+* New Layers
+ * bilinear interpolation layer.
+ * spatial pyramid-pool layer.
+ * de-convolution layer.
+ * maxout layer.
+* Support rectangle padding, stride, window and input for Pooling Operation.
+* Add —job=time in trainer, which can be used to print time info without compiler option -WITH_TIMER=ON.
+* Expose cost_weight/nce_layer in `trainer_config_helpers`
+* Add FAQ, concepts, h-rnn docs.
+* Add Bidi-LSTM and DB-LSTM to quick start demo @alvations
+* Add usage track scripts.
+
+## Improvements
+
+* Add Travis-CI for Mac OS X. Enable swig unittest in Travis-CI. Skip Travis-CI when only docs are changed.
+* Add code coverage tools.
+* Refine convolution layer to speedup and reduce GPU memory.
+* Speed up PyDataProvider2
+* Add ubuntu deb package build scripts.
+* Make Paddle use git-flow branching model.
+* PServer support no parameter blocks.
+
+## Bug Fixes
+
+* add zlib link to py_paddle
+* add input sparse data check for sparse layer at runtime
+* Bug fix for sparse matrix multiplication
+* Fix floating-point overflow problem of tanh
+* Fix some nvcc compile options
+* Fix a bug in yield dictionary in DataProvider
+* Fix SRL hang when exit.
+
+# Release v0.8.0beta.1
+New features:
+
+* Mac OSX is supported by source code. #138
+ * Both GPU and CPU versions of PaddlePaddle are supported.
+
+* Support CUDA 8.0
+
+* Enhance `PyDataProvider2`
+ * Add dictionary yield format. `PyDataProvider2` can yield a dictionary with key is data_layer's name, value is features.
+ * Add `min_pool_size` to control memory pool in provider.
+
+* Add `deb` install package & docker image for no_avx machines.
+ * Especially for cloud computing and virtual machines
+
+* Automatically disable `avx` instructions in cmake when machine's CPU don't support `avx` instructions.
+
+* Add Parallel NN api in trainer_config_helpers.
+
+* Add `travis ci` for Github
+
+Bug fixes:
+
+* Several bugs in trainer_config_helpers. Also complete the unittest for trainer_config_helpers
+* Check if PaddlePaddle is installed when unittest.
+* Fix bugs in GTX series GPU
+* Fix bug in MultinomialSampler
+
+Also more documentation was written since last release.
+
+# Release v0.8.0beta.0
+
+PaddlePaddle v0.8.0beta.0 release. The install package is not stable yet and it's a pre-release version.
diff --git a/benchmark/.gitignore b/benchmark/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..7b66e8a5b5020fd847982db401665d24ba3a069c
--- /dev/null
+++ b/benchmark/.gitignore
@@ -0,0 +1,9 @@
+paddle/image/logs
+paddle/image/*.pyc
+paddle/image/train.list
+paddle/rnn/logs
+paddle/rnn/*.pyc
+paddle/rnn/imdb.pkl
+caffe/image/logs
+tensorflow/image/logs
+tensorflow/rnn/logs
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..367013f0457f9bbb9ae1335ea63dce181316d444
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,168 @@
+# Benchmark
+
+Machine:
+
+- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
+- GPU: Tesla K40m
+- cuDNN: v5.1
+- system: Docker 1.12.1, all platforms are tested in docker environment.
+
+Platforms:
+
+- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0
+- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu
+- Caffe: kaixhin/cuda-caffe
+
+Several convolutional neural networks and recurrent neural networks are used to test.
+
+## Image
+
+### Benchmark Model
+
+AlexNet, GoogleNet and a small network used in Caffe.
+
+- [AlexNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet): but the group size is one.
+
+- [GoogleNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet): but remove loss1 and loss2 when testing benchmark.
+
+- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)
+
+
+### Single-GPU
+
+- AlexNet: input - 3 * 227 * 227, Time: ms/batch
+
+| BatchSize | 64 | 128 | 256 | 512 |
+|--------------|-----| -----| ------| -----|
+| PaddlePaddle | 195 | 334 | 602 | 1629 |
+| TensorFlow | 223 | 364 | 645 | 1235 |
+| Caffe | 324 | 627 | 1232 | 2513 |
+
+**Notation**
+
+All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, because its workspace limit size of cuDNN-conv interface is 8 * 1024 * 1024, which is smaller in PaddlePaddle and TensorFlow. Note that Caffe will be faster if increasing the workspace limit size.
+
+- GoogletNet: input - 3 * 224 * 224, Time: ms/batch
+
+
+| BatchSize | 64 | 128 | 256 |
+|--------------|-------| -------| --------|
+| PaddlePaddle | 613 | 1149 | 2348 |
+| TensorFlow | 644 | 1176 | 2219 |
+| Caffe | 694 | 1364 | out of memory |
+
+- SmallNet: input - 3 * 32 * 32, Time ms/batch
+
+| BatchSize | 64 | 128 | 256 | 512 |
+|--------------|--------| -------- | --------|---------|
+| PaddlePaddle | 10.463 | 18.184 | 33.113 | 63.039 |
+| TensorFlow | 9 | 15 | 28 | 59 |
+| Caffe | 9.373 | 16.6606 | 31.4797 | 59.719 |
+
+**Notation**
+
+All the single-GPU experiments in caffe use `caffe time` to calculate elapsed time, which does not include parameter updating time. However, both PaddlePaddle and TensorFlow experiments contain the parameter updating time. As compared with the total time, this part is relatively little on single machine, we can ignore it.
+
+In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.
+
+### Multi-GPU: 4 GPUs
+
+- AlexNet, ms / batch
+
+| total-BatchSize | 128 * 4 | 256 * 4 |
+|------------------|----------| -----------|
+| PaddlePaddle | 347 | 622 |
+| TensorFlow | 377 | 675 |
+| Caffe | 1229 | 2435 |
+
+For example, if `total-BatchSize = 128 * 4`, the speedup ratio is calculated by
+
+```
+ time_at_1gpu_batch_128 * 4 / time_at_4gpu_total_batch_512
+= (334 * 4)/347
+= 3.85
+```
+
+
+
+
+- GoogleNet, ms / batch
+
+| total-BatchSize | 128 * 4 | 256 * 4 |
+|-------------------|--------------| ----------- |
+| PaddlePaddle | 1178 | 2367 |
+| TensorFlow | 1210 | 2292 |
+| Caffe | 2007 | out of memory |
+
+
+
+
+## RNN
+We use lstm network for text classfication to test benchmark.
+
+### Dataset
+- [IMDB](http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl)
+- Sequence length is 100. In fact, PaddlePaddle supports training with variable-length sequence, but TensorFlow needs to pad. Thus, we also pad sequence length to 100 in PaddlePaddle in order to compare.
+- Dictionary size=30000
+- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
+
+### Single-GPU
+
+#### LSTM in Text Classification
+
+Testing `2 lstm layer + fc` network with different hidden size and batch size.
+
+- Batch size = 64, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|-------| -------| --------|
+| PaddlePaddle | 83 | 184 | 641 |
+| TensorFlow | 175 | 280 | 818 |
+
+- Batch size = 128, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|------- | -------| --------|
+| PaddlePaddle | 110 | 261 | 1007 |
+| TensorFlow | 181 | 361 | 1237 |
+
+
+- Batch size = 256, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|-------| -------| --------|
+| PaddlePaddle | 170 | 414 | 1655 |
+| TensorFlow | 238 | 536 | 1905 |
+
+
+
+#### Seq2Seq
+
+The benchmark of sequence-to-sequence network will be added later.
+
+
+### Multi GPU: 4 GPUs
+
+#### LSTM in Text Classification
+
+- hidden_size = 256, ms / batch
+
+| batch_size | 256 | 512 |
+|--------------| -------| --------|
+| PaddlePaddle | 90 | 118 |
+| TensorFlow | 226 | 118 |
+
+
+- hidden_size = 512, ms / batch
+
+| batch_size | 256 | 512 |
+|--------------| -------| --------|
+| PaddlePaddle | 189 | 268 |
+| TensorFlow | 297 | 383 |
+
+
+
+
+#### Seq2Seq
+
+The benchmark of sequence-to-sequence network will be added later.
diff --git a/benchmark/caffe/image/alexnet.prototxt b/benchmark/caffe/image/alexnet.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..aca184ddaf2ca2b5e2bea17d131055e0621b8271
--- /dev/null
+++ b/benchmark/caffe/image/alexnet.prototxt
@@ -0,0 +1,347 @@
+name: "alexnet"
+input: "data"
+input_dim: 64
+input_dim: 3
+input_dim: 227
+input_dim: 227
+input: "label"
+input_dim: 64
+input_dim: 1
+input_dim: 1
+input_dim: 1
+force_backward: true
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 11
+ stride: 4
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "conv1"
+ top: "conv1"
+}
+layer {
+ name: "norm1"
+ type: "LRN"
+ bottom: "conv1"
+ top: "norm1"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "norm1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 2
+ kernel_size: 5
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "norm2"
+ type: "LRN"
+ bottom: "conv2"
+ top: "norm2"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "norm2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "conv4"
+ type: "Convolution"
+ bottom: "conv3"
+ top: "conv4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu4"
+ type: "ReLU"
+ bottom: "conv4"
+ top: "conv4"
+}
+layer {
+ name: "conv5"
+ type: "Convolution"
+ bottom: "conv4"
+ top: "conv5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu5"
+ type: "ReLU"
+ bottom: "conv5"
+ top: "conv5"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "InnerProduct"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "InnerProduct"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc8"
+ type: "InnerProduct"
+ bottom: "fc7"
+ top: "fc8"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 1000
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "fc8"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/benchmark/caffe/image/googlenet.prototxt b/benchmark/caffe/image/googlenet.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..c5f3b4fe3efcb6f7397031c086997fa914c67b7f
--- /dev/null
+++ b/benchmark/caffe/image/googlenet.prototxt
@@ -0,0 +1,2334 @@
+name: "googlenet"
+input: "data"
+input_dim: 128
+input_dim: 3
+input_dim: 224
+input_dim: 224
+input: "label"
+input_dim: 128
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layer {
+ name: "conv1/7x7_s2"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1/7x7_s2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 3
+ kernel_size: 7
+ stride: 2
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv1/relu_7x7"
+ type: "ReLU"
+ bottom: "conv1/7x7_s2"
+ top: "conv1/7x7_s2"
+}
+layer {
+ name: "pool1/3x3_s2"
+ type: "Pooling"
+ bottom: "conv1/7x7_s2"
+ top: "pool1/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+#layer {
+# name: "pool1/norm1"
+# type: "LRN"
+# bottom: "pool1/3x3_s2"
+# top: "pool1/norm1"
+# lrn_param {
+# local_size: 5
+# alpha: 0.0001
+# beta: 0.75
+# }
+#}
+layer {
+ name: "conv2/3x3_reduce"
+ type: "Convolution"
+# bottom: "pool1/norm1"
+ bottom: "pool1/3x3_s2"
+ top: "conv2/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv2/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "conv2/3x3_reduce"
+ top: "conv2/3x3_reduce"
+}
+layer {
+ name: "conv2/3x3"
+ type: "Convolution"
+ bottom: "conv2/3x3_reduce"
+ top: "conv2/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv2/relu_3x3"
+ type: "ReLU"
+ bottom: "conv2/3x3"
+ top: "conv2/3x3"
+}
+#layer {
+# name: "conv2/norm2"
+# type: "LRN"
+# bottom: "conv2/3x3"
+# top: "conv2/norm2"
+# lrn_param {
+# local_size: 5
+# alpha: 0.0001
+# beta: 0.75
+# }
+#}
+layer {
+ name: "pool2/3x3_s2"
+ type: "Pooling"
+# bottom: "conv2/norm2"
+ bottom: "conv2/3x3"
+ top: "pool2/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_3a/1x1"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_3a/1x1"
+ top: "inception_3a/1x1"
+}
+layer {
+ name: "inception_3a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_3a/3x3_reduce"
+ top: "inception_3a/3x3_reduce"
+}
+layer {
+ name: "inception_3a/3x3"
+ type: "Convolution"
+ bottom: "inception_3a/3x3_reduce"
+ top: "inception_3a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_3a/3x3"
+ top: "inception_3a/3x3"
+}
+layer {
+ name: "inception_3a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_3a/5x5_reduce"
+ top: "inception_3a/5x5_reduce"
+}
+layer {
+ name: "inception_3a/5x5"
+ type: "Convolution"
+ bottom: "inception_3a/5x5_reduce"
+ top: "inception_3a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_3a/5x5"
+ top: "inception_3a/5x5"
+}
+layer {
+ name: "inception_3a/pool"
+ type: "Pooling"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_3a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_3a/pool"
+ top: "inception_3a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_3a/pool_proj"
+ top: "inception_3a/pool_proj"
+}
+layer {
+ name: "inception_3a/output"
+ type: "Concat"
+ bottom: "inception_3a/1x1"
+ bottom: "inception_3a/3x3"
+ bottom: "inception_3a/5x5"
+ bottom: "inception_3a/pool_proj"
+ top: "inception_3a/output"
+}
+layer {
+ name: "inception_3b/1x1"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_3b/1x1"
+ top: "inception_3b/1x1"
+}
+layer {
+ name: "inception_3b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_3b/3x3_reduce"
+ top: "inception_3b/3x3_reduce"
+}
+layer {
+ name: "inception_3b/3x3"
+ type: "Convolution"
+ bottom: "inception_3b/3x3_reduce"
+ top: "inception_3b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_3b/3x3"
+ top: "inception_3b/3x3"
+}
+layer {
+ name: "inception_3b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_3b/5x5_reduce"
+ top: "inception_3b/5x5_reduce"
+}
+layer {
+ name: "inception_3b/5x5"
+ type: "Convolution"
+ bottom: "inception_3b/5x5_reduce"
+ top: "inception_3b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_3b/5x5"
+ top: "inception_3b/5x5"
+}
+layer {
+ name: "inception_3b/pool"
+ type: "Pooling"
+ bottom: "inception_3a/output"
+ top: "inception_3b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_3b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_3b/pool"
+ top: "inception_3b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_3b/pool_proj"
+ top: "inception_3b/pool_proj"
+}
+layer {
+ name: "inception_3b/output"
+ type: "Concat"
+ bottom: "inception_3b/1x1"
+ bottom: "inception_3b/3x3"
+ bottom: "inception_3b/5x5"
+ bottom: "inception_3b/pool_proj"
+ top: "inception_3b/output"
+}
+layer {
+ name: "pool3/3x3_s2"
+ type: "Pooling"
+ bottom: "inception_3b/output"
+ top: "pool3/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_4a/1x1"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4a/1x1"
+ top: "inception_4a/1x1"
+}
+layer {
+ name: "inception_4a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4a/3x3_reduce"
+ top: "inception_4a/3x3_reduce"
+}
+layer {
+ name: "inception_4a/3x3"
+ type: "Convolution"
+ bottom: "inception_4a/3x3_reduce"
+ top: "inception_4a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 208
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4a/3x3"
+ top: "inception_4a/3x3"
+}
+layer {
+ name: "inception_4a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4a/5x5_reduce"
+ top: "inception_4a/5x5_reduce"
+}
+layer {
+ name: "inception_4a/5x5"
+ type: "Convolution"
+ bottom: "inception_4a/5x5_reduce"
+ top: "inception_4a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 48
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4a/5x5"
+ top: "inception_4a/5x5"
+}
+layer {
+ name: "inception_4a/pool"
+ type: "Pooling"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4a/pool"
+ top: "inception_4a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4a/pool_proj"
+ top: "inception_4a/pool_proj"
+}
+layer {
+ name: "inception_4a/output"
+ type: "Concat"
+ bottom: "inception_4a/1x1"
+ bottom: "inception_4a/3x3"
+ bottom: "inception_4a/5x5"
+ bottom: "inception_4a/pool_proj"
+ top: "inception_4a/output"
+}
+#layer {
+# name: "loss1/ave_pool"
+# type: "Pooling"
+# bottom: "inception_4a/output"
+# top: "loss1/ave_pool"
+# pooling_param {
+# pool: AVE
+# kernel_size: 5
+# stride: 3
+# }
+#}
+#layer {
+# name: "loss1/conv"
+# type: "Convolution"
+# bottom: "loss1/ave_pool"
+# top: "loss1/conv"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# convolution_param {
+# num_output: 128
+# kernel_size: 1
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss1/relu_conv"
+# type: "ReLU"
+# bottom: "loss1/conv"
+# top: "loss1/conv"
+#}
+#layer {
+# name: "loss1/fc"
+# type: "InnerProduct"
+# bottom: "loss1/conv"
+# top: "loss1/fc"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1024
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss1/relu_fc"
+# type: "ReLU"
+# bottom: "loss1/fc"
+# top: "loss1/fc"
+#}
+#layer {
+# name: "loss1/drop_fc"
+# type: "Dropout"
+# bottom: "loss1/fc"
+# top: "loss1/fc"
+# dropout_param {
+# dropout_ratio: 0.7
+# }
+#}
+#layer {
+# name: "loss1/classifier"
+# type: "InnerProduct"
+# bottom: "loss1/fc"
+# top: "loss1/classifier"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1000
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0
+# }
+# }
+#}
+#layer {
+# name: "loss1/loss"
+# type: "SoftmaxWithLoss"
+# bottom: "loss1/classifier"
+# bottom: "label"
+# top: "loss1/loss1"
+# loss_weight: 0.3
+#}
+layer {
+ name: "inception_4b/1x1"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4b/1x1"
+ top: "inception_4b/1x1"
+}
+layer {
+ name: "inception_4b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 112
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4b/3x3_reduce"
+ top: "inception_4b/3x3_reduce"
+}
+layer {
+ name: "inception_4b/3x3"
+ type: "Convolution"
+ bottom: "inception_4b/3x3_reduce"
+ top: "inception_4b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 224
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4b/3x3"
+ top: "inception_4b/3x3"
+}
+layer {
+ name: "inception_4b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 24
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4b/5x5_reduce"
+ top: "inception_4b/5x5_reduce"
+}
+layer {
+ name: "inception_4b/5x5"
+ type: "Convolution"
+ bottom: "inception_4b/5x5_reduce"
+ top: "inception_4b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4b/5x5"
+ top: "inception_4b/5x5"
+}
+layer {
+ name: "inception_4b/pool"
+ type: "Pooling"
+ bottom: "inception_4a/output"
+ top: "inception_4b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4b/pool"
+ top: "inception_4b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4b/pool_proj"
+ top: "inception_4b/pool_proj"
+}
+layer {
+ name: "inception_4b/output"
+ type: "Concat"
+ bottom: "inception_4b/1x1"
+ bottom: "inception_4b/3x3"
+ bottom: "inception_4b/5x5"
+ bottom: "inception_4b/pool_proj"
+ top: "inception_4b/output"
+}
+layer {
+ name: "inception_4c/1x1"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4c/1x1"
+ top: "inception_4c/1x1"
+}
+layer {
+ name: "inception_4c/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4c/3x3_reduce"
+ top: "inception_4c/3x3_reduce"
+}
+layer {
+ name: "inception_4c/3x3"
+ type: "Convolution"
+ bottom: "inception_4c/3x3_reduce"
+ top: "inception_4c/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4c/3x3"
+ top: "inception_4c/3x3"
+}
+layer {
+ name: "inception_4c/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 24
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4c/5x5_reduce"
+ top: "inception_4c/5x5_reduce"
+}
+layer {
+ name: "inception_4c/5x5"
+ type: "Convolution"
+ bottom: "inception_4c/5x5_reduce"
+ top: "inception_4c/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4c/5x5"
+ top: "inception_4c/5x5"
+}
+layer {
+ name: "inception_4c/pool"
+ type: "Pooling"
+ bottom: "inception_4b/output"
+ top: "inception_4c/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4c/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4c/pool"
+ top: "inception_4c/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4c/pool_proj"
+ top: "inception_4c/pool_proj"
+}
+layer {
+ name: "inception_4c/output"
+ type: "Concat"
+ bottom: "inception_4c/1x1"
+ bottom: "inception_4c/3x3"
+ bottom: "inception_4c/5x5"
+ bottom: "inception_4c/pool_proj"
+ top: "inception_4c/output"
+}
+layer {
+ name: "inception_4d/1x1"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 112
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4d/1x1"
+ top: "inception_4d/1x1"
+}
+layer {
+ name: "inception_4d/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 144
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4d/3x3_reduce"
+ top: "inception_4d/3x3_reduce"
+}
+layer {
+ name: "inception_4d/3x3"
+ type: "Convolution"
+ bottom: "inception_4d/3x3_reduce"
+ top: "inception_4d/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 288
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4d/3x3"
+ top: "inception_4d/3x3"
+}
+layer {
+ name: "inception_4d/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4d/5x5_reduce"
+ top: "inception_4d/5x5_reduce"
+}
+layer {
+ name: "inception_4d/5x5"
+ type: "Convolution"
+ bottom: "inception_4d/5x5_reduce"
+ top: "inception_4d/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4d/5x5"
+ top: "inception_4d/5x5"
+}
+layer {
+ name: "inception_4d/pool"
+ type: "Pooling"
+ bottom: "inception_4c/output"
+ top: "inception_4d/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4d/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4d/pool"
+ top: "inception_4d/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4d/pool_proj"
+ top: "inception_4d/pool_proj"
+}
+layer {
+ name: "inception_4d/output"
+ type: "Concat"
+ bottom: "inception_4d/1x1"
+ bottom: "inception_4d/3x3"
+ bottom: "inception_4d/5x5"
+ bottom: "inception_4d/pool_proj"
+ top: "inception_4d/output"
+}
+#layer {
+# name: "loss2/ave_pool"
+# type: "Pooling"
+# bottom: "inception_4d/output"
+# top: "loss2/ave_pool"
+# pooling_param {
+# pool: AVE
+# kernel_size: 5
+# stride: 3
+# }
+#}
+#layer {
+# name: "loss2/conv"
+# type: "Convolution"
+# bottom: "loss2/ave_pool"
+# top: "loss2/conv"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# convolution_param {
+# num_output: 128
+# kernel_size: 1
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss2/relu_conv"
+# type: "ReLU"
+# bottom: "loss2/conv"
+# top: "loss2/conv"
+#}
+#layer {
+# name: "loss2/fc"
+# type: "InnerProduct"
+# bottom: "loss2/conv"
+# top: "loss2/fc"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1024
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss2/relu_fc"
+# type: "ReLU"
+# bottom: "loss2/fc"
+# top: "loss2/fc"
+#}
+#layer {
+# name: "loss2/drop_fc"
+# type: "Dropout"
+# bottom: "loss2/fc"
+# top: "loss2/fc"
+# dropout_param {
+# dropout_ratio: 0.7
+# }
+#}
+#layer {
+# name: "loss2/classifier"
+# type: "InnerProduct"
+# bottom: "loss2/fc"
+# top: "loss2/classifier"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1000
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0
+# }
+# }
+#}
+#layer {
+# name: "loss2/loss"
+# type: "SoftmaxWithLoss"
+# bottom: "loss2/classifier"
+# bottom: "label"
+# top: "loss2/loss1"
+# loss_weight: 0.3
+#}
+layer {
+ name: "inception_4e/1x1"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4e/1x1"
+ top: "inception_4e/1x1"
+}
+layer {
+ name: "inception_4e/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4e/3x3_reduce"
+ top: "inception_4e/3x3_reduce"
+}
+layer {
+ name: "inception_4e/3x3"
+ type: "Convolution"
+ bottom: "inception_4e/3x3_reduce"
+ top: "inception_4e/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 320
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4e/3x3"
+ top: "inception_4e/3x3"
+}
+layer {
+ name: "inception_4e/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4e/5x5_reduce"
+ top: "inception_4e/5x5_reduce"
+}
+layer {
+ name: "inception_4e/5x5"
+ type: "Convolution"
+ bottom: "inception_4e/5x5_reduce"
+ top: "inception_4e/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4e/5x5"
+ top: "inception_4e/5x5"
+}
+layer {
+ name: "inception_4e/pool"
+ type: "Pooling"
+ bottom: "inception_4d/output"
+ top: "inception_4e/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4e/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4e/pool"
+ top: "inception_4e/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4e/pool_proj"
+ top: "inception_4e/pool_proj"
+}
+layer {
+ name: "inception_4e/output"
+ type: "Concat"
+ bottom: "inception_4e/1x1"
+ bottom: "inception_4e/3x3"
+ bottom: "inception_4e/5x5"
+ bottom: "inception_4e/pool_proj"
+ top: "inception_4e/output"
+}
+layer {
+ name: "pool4/3x3_s2"
+ type: "Pooling"
+ bottom: "inception_4e/output"
+ top: "pool4/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_5a/1x1"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_5a/1x1"
+ top: "inception_5a/1x1"
+}
+layer {
+ name: "inception_5a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_5a/3x3_reduce"
+ top: "inception_5a/3x3_reduce"
+}
+layer {
+ name: "inception_5a/3x3"
+ type: "Convolution"
+ bottom: "inception_5a/3x3_reduce"
+ top: "inception_5a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 320
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_5a/3x3"
+ top: "inception_5a/3x3"
+}
+layer {
+ name: "inception_5a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_5a/5x5_reduce"
+ top: "inception_5a/5x5_reduce"
+}
+layer {
+ name: "inception_5a/5x5"
+ type: "Convolution"
+ bottom: "inception_5a/5x5_reduce"
+ top: "inception_5a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_5a/5x5"
+ top: "inception_5a/5x5"
+}
+layer {
+ name: "inception_5a/pool"
+ type: "Pooling"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_5a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_5a/pool"
+ top: "inception_5a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_5a/pool_proj"
+ top: "inception_5a/pool_proj"
+}
+layer {
+ name: "inception_5a/output"
+ type: "Concat"
+ bottom: "inception_5a/1x1"
+ bottom: "inception_5a/3x3"
+ bottom: "inception_5a/5x5"
+ bottom: "inception_5a/pool_proj"
+ top: "inception_5a/output"
+}
+layer {
+ name: "inception_5b/1x1"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_5b/1x1"
+ top: "inception_5b/1x1"
+}
+layer {
+ name: "inception_5b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_5b/3x3_reduce"
+ top: "inception_5b/3x3_reduce"
+}
+layer {
+ name: "inception_5b/3x3"
+ type: "Convolution"
+ bottom: "inception_5b/3x3_reduce"
+ top: "inception_5b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_5b/3x3"
+ top: "inception_5b/3x3"
+}
+layer {
+ name: "inception_5b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 48
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_5b/5x5_reduce"
+ top: "inception_5b/5x5_reduce"
+}
+layer {
+ name: "inception_5b/5x5"
+ type: "Convolution"
+ bottom: "inception_5b/5x5_reduce"
+ top: "inception_5b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_5b/5x5"
+ top: "inception_5b/5x5"
+}
+layer {
+ name: "inception_5b/pool"
+ type: "Pooling"
+ bottom: "inception_5a/output"
+ top: "inception_5b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_5b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_5b/pool"
+ top: "inception_5b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_5b/pool_proj"
+ top: "inception_5b/pool_proj"
+}
+layer {
+ name: "inception_5b/output"
+ type: "Concat"
+ bottom: "inception_5b/1x1"
+ bottom: "inception_5b/3x3"
+ bottom: "inception_5b/5x5"
+ bottom: "inception_5b/pool_proj"
+ top: "inception_5b/output"
+}
+layer {
+ name: "pool5/7x7_s1"
+ type: "Pooling"
+ bottom: "inception_5b/output"
+ top: "pool5/7x7_s1"
+ pooling_param {
+ pool: AVE
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "pool5/drop_7x7_s1"
+ type: "Dropout"
+ bottom: "pool5/7x7_s1"
+ top: "pool5/7x7_s1"
+ dropout_param {
+ dropout_ratio: 0.4
+ }
+}
+layer {
+ name: "loss3/classifier"
+ type: "InnerProduct"
+ bottom: "pool5/7x7_s1"
+ top: "loss3/classifier"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 1000
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "loss3/loss3"
+ type: "SoftmaxWithLoss"
+ bottom: "loss3/classifier"
+ bottom: "label"
+ top: "loss3/loss3"
+ loss_weight: 1
+}
diff --git a/benchmark/caffe/image/run.sh b/benchmark/caffe/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aa9ac20ca5cc1d48a07ce39f7d6c6d70ad4121ab
--- /dev/null
+++ b/benchmark/caffe/image/run.sh
@@ -0,0 +1,30 @@
+set -e
+
+function test() {
+ cfg=$1
+ batch=$2
+ prefix=$3
+ sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
+ sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
+ caffe time --model=$cfg --iterations=50 --gpu 0 > logs/$prefix-1gpu-batch${batch}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.prototxt 64 alexnet
+test alexnet.prototxt 128 alexnet
+test alexnet.prototxt 256 alexnet
+test alexnet.prototxt 512 alexnet
+
+# googlenet
+test googlenet.prototxt 64 googlenet
+test googlenet.prototxt 128 googlenet
+
+# small net
+test smallnet_mnist_cifar.prototxt 64 smallnet
+test smallnet_mnist_cifar.prototxt 128 smallnet
+test smallnet_mnist_cifar.prototxt 256 smallnet
+test smallnet_mnist_cifar.prototxt 512 smallnet
diff --git a/benchmark/caffe/image/run_multi.sh b/benchmark/caffe/image/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9a0a71bc185a421842265ea6d2310429adb86913
--- /dev/null
+++ b/benchmark/caffe/image/run_multi.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+function test() {
+ cfg=$1
+ batch=$2
+ prefix=$3
+ batch_per_gpu=`expr ${batch} / 4`
+ sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
+ sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
+ sed -i "1c\net : \"${cfg}\"" solver.prototxt
+ caffe train --solver=solver.prototxt -gpu 0,1,2,3 > logs/${prefix}-4gpu-batch${batch}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.prototxt 512 alexnet
+test alexnet.prototxt 1024 alexnet
+
+# googlnet
+test googlenet.prototxt 512 googlenet
diff --git a/benchmark/caffe/image/smallnet_mnist_cifar.prototxt b/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb0e32bbfb9f785ece6d428356987e5503dd25d
--- /dev/null
+++ b/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
@@ -0,0 +1,198 @@
+name: "mnist/cifar"
+input: "data"
+input_dim: 128
+input_dim: 3
+input_dim: 32
+input_dim: 32
+input: "label"
+input_dim: 128
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.0001
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "pool1"
+ top: "pool1"
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2"
+ top: "pool2"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3"
+ top: "pool3"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "ip1"
+ type: "InnerProduct"
+ bottom: "pool3"
+ top: "ip1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ inner_product_param {
+ num_output: 64
+ weight_filler {
+ type: "gaussian"
+ std: 0.1
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "ip2"
+ type: "InnerProduct"
+ bottom: "ip1"
+ top: "ip2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.1
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "accuracy"
+ type: "Accuracy"
+ bottom: "ip2"
+ bottom: "label"
+ top: "accuracy"
+ include {
+ phase: TEST
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "ip2"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/benchmark/caffe/image/solver.prototxt b/benchmark/caffe/image/solver.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..61c10284e6027b4cc0b3d4c8fcf949e0a5a22a85
--- /dev/null
+++ b/benchmark/caffe/image/solver.prototxt
@@ -0,0 +1,10 @@
+net: "alexnet.prototxt"
+base_lr: 0.01
+lr_policy: "fixed"
+display: 20
+max_iter: 200
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "models/caffe_alexnet_train"
+solver_mode: GPU
diff --git a/benchmark/figs/alexnet-4gpu.png b/benchmark/figs/alexnet-4gpu.png
new file mode 100644
index 0000000000000000000000000000000000000000..28b95a44508f0ee7ad270c9ccdf8659009406b03
Binary files /dev/null and b/benchmark/figs/alexnet-4gpu.png differ
diff --git a/benchmark/figs/googlenet-4gpu.png b/benchmark/figs/googlenet-4gpu.png
new file mode 100644
index 0000000000000000000000000000000000000000..9b5331f05a3e54cacf949f10b6603bf627a6d106
Binary files /dev/null and b/benchmark/figs/googlenet-4gpu.png differ
diff --git a/benchmark/figs/rnn_lstm_4gpus.png b/benchmark/figs/rnn_lstm_4gpus.png
new file mode 100644
index 0000000000000000000000000000000000000000..973ce2fa5f65e9681c972d4f5bd5776b5c4aa264
Binary files /dev/null and b/benchmark/figs/rnn_lstm_4gpus.png differ
diff --git a/benchmark/figs/rnn_lstm_cls.png b/benchmark/figs/rnn_lstm_cls.png
new file mode 100644
index 0000000000000000000000000000000000000000..26d05cac11aa7ae8cdfbcd8c4401f6547a9404f6
Binary files /dev/null and b/benchmark/figs/rnn_lstm_cls.png differ
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3358d43a4b08c6a9b89d59e1a8be53ee1f12bbe0
--- /dev/null
+++ b/benchmark/paddle/image/alexnet.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+
+height = 227
+width = 227
+num_class = 1000
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+# conv1
+net = data_layer('data', size=height * width * 3)
+net = img_conv_layer(
+ input=net,
+ filter_size=11,
+ num_channels=3,
+ num_filters=96,
+ stride=4,
+ padding=1)
+net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+# conv2
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
+net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+# conv3
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1)
+# conv4
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)
+
+# conv5
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = cross_entropy(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc893bab98c4d2e07c62fbd012d51a0939db4766
--- /dev/null
+++ b/benchmark/paddle/image/googlenet.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+from paddle.trainer_config_helpers import *
+
+height = 224
+width = 224
+num_class = 1000
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+def inception2(name, input, channels, \
+ filter1,
+ filter3R, filter3,
+ filter5R, filter5,
+ proj):
+
+ conv1 = name + '_1'
+ conv3r = name + '_3r'
+ conv3 = name + '_3'
+ conv5r = name + '_5r'
+ conv5 = name + '_5'
+ maxpool = name + '_max'
+ convproj = name + '_proj'
+
+ cov1 = img_conv_layer(
+ name=conv1,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=conv3r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = img_conv_layer(
+ name=conv3,
+ input=cov3r,
+ filter_size=3,
+ num_filters=filter3,
+ stride=1,
+ padding=1)
+
+ cov5r = img_conv_layer(
+ name=conv5r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = img_conv_layer(
+ name=conv5,
+ input=cov5r,
+ filter_size=5,
+ num_filters=filter5,
+ stride=1,
+ padding=2)
+
+ pool1 = img_pool_layer(
+ name=maxpool,
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = img_conv_layer(
+ name=convproj,
+ input=pool1,
+ filter_size=1,
+ num_filters=proj,
+ stride=1,
+ padding=0)
+
+ cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
+ return cat
+
+def inception(name, input, channels, \
+ filter1,
+ filter3R, filter3,
+ filter5R, filter5,
+ proj):
+
+ cov1 = conv_projection(
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=name + '_3r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = conv_projection(
+ input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
+
+ cov5r = img_conv_layer(
+ name=name + '_5r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = conv_projection(
+ input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
+
+ pool1 = img_pool_layer(
+ name=name + '_max',
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = conv_projection(
+ input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
+
+ cat = concat_layer(
+ name=name,
+ input=[cov1, cov3, cov5, covprj],
+ bias_attr=True,
+ act=ReluActivation())
+ return cat
+
+
+lab = data_layer(name="label", size=1000)
+data = data_layer(name="input", size=3 * height * width)
+
+# stage 1
+conv1 = img_conv_layer(
+ name="conv1",
+ input=data,
+ filter_size=7,
+ num_channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
+pool1 = img_pool_layer(
+ name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
+
+# stage 2
+conv2_1 = img_conv_layer(
+ name="conv2_1",
+ input=pool1,
+ filter_size=1,
+ num_filters=64,
+ stride=1,
+ padding=0)
+conv2_2 = img_conv_layer(
+ name="conv2_2",
+ input=conv2_1,
+ filter_size=3,
+ num_filters=192,
+ stride=1,
+ padding=1)
+pool2 = img_pool_layer(
+ name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
+
+# stage 3
+ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+pool3 = img_pool_layer(
+ name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
+
+# stage 4
+ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
+ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+pool4 = img_pool_layer(
+ name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
+
+# stage 5
+ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
+pool5 = img_pool_layer(
+ name="pool5",
+ input=ince5b,
+ num_channels=1024,
+ pool_size=7,
+ stride=7,
+ pool_type=AvgPooling())
+
+# We remove loss1 and loss2 for all system when testing benchmark
+# output 1
+# pool_o1 = img_pool_layer(name="pool_o1", input=ince4a, num_channels=512, pool_size=5, stride=3, pool_type=AvgPooling())
+# conv_o1 = img_conv_layer(name="conv_o1", input=pool_o1, filter_size=1, num_filters=128, stride=1, padding=0)
+# fc_o1 = fc_layer(name="fc_o1", input=conv_o1, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
+# out1 = fc_layer(name="output1", input=fc_o1, size=1000, act=SoftmaxActivation())
+# loss1 = cross_entropy(name='loss1', input=out1, label=lab, coeff=0.3)
+
+# output 2
+#pool_o2 = img_pool_layer(name="pool_o2", input=ince4d, num_channels=528, pool_size=5, stride=3, pool_type=AvgPooling())
+#conv_o2 = img_conv_layer(name="conv_o2", input=pool_o2, filter_size=1, num_filters=128, stride=1, padding=0)
+#fc_o2 = fc_layer(name="fc_o2", input=conv_o2, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
+#out2 = fc_layer(name="output2", input=fc_o2, size=1000, act=SoftmaxActivation())
+#loss2 = cross_entropy(name='loss2', input=out2, label=lab, coeff=0.3)
+
+# output 3
+dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
+out3 = fc_layer(
+ name="output3", input=dropout, size=1000, act=SoftmaxActivation())
+loss3 = cross_entropy(name='loss3', input=out3, label=lab)
+
+outputs(loss3)
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac47212b5a75667e8e9d4465b33f575516e2836
--- /dev/null
+++ b/benchmark/paddle/image/provider.py
@@ -0,0 +1,26 @@
+import io, os
+import random
+import numpy as np
+from paddle.trainer.PyDataProvider2 import *
+
+
+def initHook(settings, height, width, color, num_class, **kwargs):
+ settings.height = height
+ settings.width = width
+ settings.color = color
+ settings.num_class = num_class
+ if settings.color:
+ settings.data_size = settings.height * settings.width * 3
+ else:
+ settings.data_size = settings.height * settings.width
+
+ settings.slots = [dense_vector(settings.data_size), integer_value(1)]
+
+
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_list):
+ for i in xrange(1024):
+ img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
+ lab = random.randint(0, settings.num_class)
+ yield img.astype('float32'), int(lab)
diff --git a/benchmark/paddle/image/run.sh b/benchmark/paddle/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..717ed487ba7657db6535efcb1128a355a0f15eaf
--- /dev/null
+++ b/benchmark/paddle/image/run.sh
@@ -0,0 +1,51 @@
+set -e
+
+function train() {
+ cfg=$1
+ thread=$2
+ bz=$3
+ args="batch_size=$3"
+ prefix=$4
+ paddle train --job=time \
+ --config=$cfg \
+ --use_gpu=True \
+ --trainer_count=$thread \
+ --log_period=10 \
+ --test_period=100 \
+ --config_args=$args \
+ > logs/$prefix-${thread}gpu-$bz.log 2>&1
+}
+
+if [ ! -d "train.list" ]; then
+ echo " " > train.list
+fi
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#========single-gpu=========#
+# alexnet
+train alexnet.py 1 64 alexnet
+train alexnet.py 1 128 alexnet
+train alexnet.py 1 256 alexnet
+train alexnet.py 1 512 alexnet
+
+# googlenet
+train googlenet.py 1 64 googlenet
+train googlenet.py 1 128 googlenet
+train googlenet.py 1 256 googlenet
+
+# smallnet
+train smallnet_mnist_cifar.py 1 64 smallnet
+train smallnet_mnist_cifar.py 1 128 smallnet
+train smallnet_mnist_cifar.py 1 256 smallnet
+train smallnet_mnist_cifar.py 1 512 smallnet
+
+
+############################
+#========multi-gpus=========#
+train alexnet.py 4 512 alexnet
+train alexnet.py 4 1024 alexnet
+
+train googlenet.py 4 512 googlenet
+train googlenet.py 4 1024 googlenet
diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..58879c454f37991405d83bbb593bb5d1e977ff53
--- /dev/null
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+
+height = 32
+width = 32
+num_class = 10
+
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+# conv1
+net = data_layer('data', size=height * width * 3)
+net = img_conv_layer(
+ input=net,
+ filter_size=5,
+ num_channels=3,
+ num_filters=32,
+ stride=1,
+ padding=2)
+net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
+
+# conv2
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=32, stride=1, padding=2)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+
+# conv3
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=64, stride=1, padding=1)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+
+net = fc_layer(input=net, size=64, act=ReluActivation())
+net = fc_layer(input=net, size=10, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = classification_cost(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py
new file mode 100755
index 0000000000000000000000000000000000000000..fc4ed4025f9ed2e0a32a1709ff8df4af53521196
--- /dev/null
+++ b/benchmark/paddle/rnn/imdb.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import six.moves.cPickle as pickle
+import gzip
+import os
+import numpy
+
+
+def get_dataset_file(dataset, default_dataset, origin):
+ data_dir, data_file = os.path.split(dataset)
+ if (not os.path.isfile(dataset)) and data_file == default_dataset:
+ from six.moves import urllib
+ print('Downloading data from %s' % origin)
+ urllib.request.urlretrieve(origin, dataset)
+
+ return dataset
+
+
+def create_data(path="imdb.pkl"):
+
+ if (not os.path.isfile('imdb.train.pkl')):
+ path = get_dataset_file(
+ path, "imdb.pkl",
+ "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
+
+ if path.endswith(".gz"):
+ f = gzip.open(path, 'rb')
+ else:
+ f = open(path, 'rb')
+
+ train_set = pickle.load(f)
+ test_set = pickle.load(f)
+ f.close()
+
+ pickle.dump(train_set, open('imdb.train.pkl', 'wb'))
+ pickle.dump(test_set, open('imdb.test.pkl', 'wb'))
+
+ if (not os.path.isfile('train.list')):
+ file('train.list', 'w').write('imdb.train.pkl\n')
+
+
+def main():
+ create_data('imdb.pkl')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..928ca75daf84ccebb775364b0be0d8b3d5eebff9
--- /dev/null
+++ b/benchmark/paddle/rnn/provider.py
@@ -0,0 +1,72 @@
+import io, os
+import random
+import numpy as np
+import six.moves.cPickle as pickle
+from paddle.trainer.PyDataProvider2 import *
+
+
+def remove_unk(x, n_words):
+ return [[1 if w >= n_words else w for w in sen] for sen in x]
+
+
+# ==============================================================
+# tensorflow uses fixed length, but PaddlePaddle can process
+# variable-length. Padding is used in benchmark in order to
+# compare with other platform.
+# ==============================================================
+def pad_sequences(sequences,
+ maxlen=None,
+ dtype='int32',
+ padding='post',
+ truncating='post',
+ value=0.):
+ lengths = [len(s) for s in sequences]
+
+ nb_samples = len(sequences)
+ if maxlen is None:
+ maxlen = np.max(lengths)
+
+ x = (np.ones((nb_samples, maxlen)) * value).astype(dtype)
+ for idx, s in enumerate(sequences):
+ if len(s) == 0:
+ continue # empty list was found
+ if truncating == 'pre':
+ trunc = s[-maxlen:]
+ elif truncating == 'post':
+ trunc = s[:maxlen]
+ else:
+ raise ValueError("Truncating type '%s' not understood" % padding)
+
+ if padding == 'post':
+ x[idx, :len(trunc)] = trunc
+ elif padding == 'pre':
+ x[idx, -len(trunc):] = trunc
+ else:
+ raise ValueError("Padding type '%s' not understood" % padding)
+ return x
+
+
+def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
+ settings.vocab_size = vocab_size
+ settings.pad_seq = pad_seq
+ settings.maxlen = maxlen
+ settings.input_types = [
+ integer_value_sequence(vocab_size), integer_value(2)
+ ]
+
+
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file):
+ f = open(file, 'rb')
+ train_set = pickle.load(f)
+ f.close()
+ x, y = train_set
+
+ # remove unk, namely remove the words out of dictionary
+ x = remove_unk(x, settings.vocab_size)
+ if settings.pad_seq:
+ x = pad_sequences(x, maxlen=settings.maxlen, value=0.)
+
+ for i in range(len(y)):
+ yield map(int, x[i]), int(y[i])
diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..83eb3e565473f7e7e91cddeaa3cd2aafb7e3df2c
--- /dev/null
+++ b/benchmark/paddle/rnn/rnn.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+import imdb
+
+num_class = 2
+vocab_size = 30000
+fixedlen = 100
+batch_size = get_config_arg('batch_size', int, 128)
+lstm_num = get_config_arg('lstm_num', int, 1)
+hidden_size = get_config_arg('hidden_size', int, 128)
+# whether to pad sequence into fixed length
+pad_seq = get_config_arg('pad_seq', bool, True)
+imdb.create_data('imdb.pkl')
+
+args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
+
+net = data_layer('data', size=vocab_size)
+net = embedding_layer(input=net, size=128)
+
+for i in xrange(lstm_num):
+ net = simple_lstm(input=net, size=hidden_size)
+
+net = last_seq(input=net)
+net = fc_layer(input=net, size=2, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = classification_cost(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/rnn/run.sh b/benchmark/paddle/rnn/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e9dfeb2e525979f47e4ef48f7610dc1007900f2c
--- /dev/null
+++ b/benchmark/paddle/rnn/run.sh
@@ -0,0 +1,50 @@
+set -e
+
+function train() {
+ cfg=$1
+ thread=$2
+ args="lstm_num=${3},seq_pad=${4},hidden_size=${5},batch_size=${6}"
+ paddle train --job=time \
+ --config=$cfg \
+ --use_gpu=1 \
+ --trainer_count=$thread \
+ --log_period=10 \
+ --test_period=100 \
+ --num_passes=1 \
+ --feed_data=1 \
+ --config_args=$args \
+ >logs/rnn-pad${4}-${thread}gpu-lstm${3}-batch${6}-hid${5}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+## padding, single gpu
+#-----config--gpu--lstm_num--padding--hidden_size--batch_size
+## lstm_num=2, batch_size=64
+train rnn.py 1 2 1 256 64
+train rnn.py 1 2 1 512 64
+train rnn.py 1 2 1 1280 64
+
+## lstm_num=2, batch_size=128
+train rnn.py 1 2 1 256 128
+train rnn.py 1 2 1 512 128
+train rnn.py 1 2 1 1280 128
+
+## lstm_num=4, batch_size=256
+train rnn.py 1 2 1 256 256
+train rnn.py 1 2 1 512 256
+train rnn.py 1 2 1 1280 256
+
+
+#==================multi gpus=====================#
+# hidden_size=256, lstm_num=2, different batch size
+train rnn.py 4 2 1 256 128
+train rnn.py 4 2 1 256 256
+train rnn.py 4 2 1 256 512
+
+# hidden_size=512, lstm_num=4, different batch size
+train rnn.py 4 2 1 512 128
+train rnn.py 4 2 1 512 256
+train rnn.py 4 2 1 512 512
diff --git a/benchmark/tensorflow/image/alexnet.py b/benchmark/tensorflow/image/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6a39ef778e21bee7374718a1b1ddf43392825a8
--- /dev/null
+++ b/benchmark/tensorflow/image/alexnet.py
@@ -0,0 +1,298 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ output = tf.nn.dropout(affine1, drop) if drop else affine1
+
+ return output
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
+ affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096, 0.5)
+ affn2 = _affine('fc7', affn1, 4096, 4096, 0.5)
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+
+ return affn3
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def _add_loss_summaries(total_loss):
+ """
+ Generates moving average for all losses and associated summaries for
+ visualizing the performance of the network.
+
+ Args:
+ total_loss: Total loss from loss().
+ Returns:
+ loss_averages_op: op for generating moving averages of losses.
+ """
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ losses = tf.get_collection('losses')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(l.op.name + ' (raw)', l)
+ tf.scalar_summary(l.op.name, loss_averages.average(l))
+
+ return loss_averages_op
+
+
+def run_benchmark():
+ with tf.Graph().as_default():
+ with tf.device('/gpu:0'):
+ # Generate some dummy images.
+ image_size = 224
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [
+ FLAGS.batch_size, 3, image_size + 3, image_size + 3
+ ]
+ else:
+ image_shape = [
+ FLAGS.batch_size, image_size + 3, image_size + 3, 3
+ ]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+ # Compute the gradient with respect to all the parameters.
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9,
+ global_step)
+ variables_averages_op = variable_averages.apply(
+ tf.trainable_variables())
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective],
+ "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/alexnet_multi_gpu.py b/benchmark/tensorflow/image/alexnet_multi_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b5ee78f4dd5429abd85d75c092a6e3a2a39f922
--- /dev/null
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
@@ -0,0 +1,365 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import re
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+
+tf.app.flags.DEFINE_string('train_dir', '/train_model',
+ """Directory where to write event logs """
+ """and checkpoint.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return affine1
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
+ affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096)
+ affn2 = _affine('fc7', affn1, 4096, 4096)
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+
+ return affn3
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3]
+ else:
+ image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ _ = loss(last_layer, labels)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet.py b/benchmark/tensorflow/image/googlenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..decf855b54451efba5f6a7868fbcf631789f3572
--- /dev/null
+++ b/benchmark/tensorflow/image/googlenet.py
@@ -0,0 +1,311 @@
+from six.moves import xrange
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+parameters = []
+
+conv_counter = 1
+pool_counter = 1
+affine_counter = 1
+
+
+def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
+ global conv_counter
+ global parameters
+ name = 'conv' + str(conv_counter)
+ conv_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+ conv1 = tf.nn.relu(bias, name=scope)
+ parameters += [kernel, biases]
+ return conv1
+
+
+def _affine(inpOp, nIn, nOut, act=True, wd=0.0005):
+ global affine_counter
+ global parameters
+ name = 'affine' + str(affine_counter)
+ affine_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
+ parameters += [kernel, biases]
+ return affine1
+
+
+def _mpool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
+ conv1 = _conv(inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
+
+ conv3_ = _conv(inp, inSize, o2s1, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(conv3_, o2s1, o2s2, 3, 3, 1, 1, 'SAME')
+
+ conv5_ = _conv(inp, inSize, o3s1, 1, 1, 1, 1, 'VALID')
+ conv5 = _conv(conv5_, o3s1, o3s2, 5, 5, 1, 1, 'SAME')
+
+ pool_ = _mpool(inp, o4s1, o4s1, 1, 1, 'SAME')
+ pool = _conv(pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
+
+ if FLAGS.data_format == 'NCHW':
+ channel_dim = 1
+ else:
+ channel_dim = 3
+ incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
+ return incept
+
+
+def loss(logits, labels):
+ batch_size = tf.size(labels)
+ labels = tf.expand_dims(labels, 1)
+ indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
+ concated = tf.concat(1, [indices, labels])
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 1000]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
+ loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+ return loss
+
+
+def inference(images):
+ # stage 1
+ conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ # stage 2
+ conv2 = _conv(pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME')
+
+ # stage 3
+ incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3b = _inception(incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
+ pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME')
+
+ # stage 4
+ incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4b = _inception(incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
+ incept4c = _inception(incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
+ incept4d = _inception(incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
+ incept4e = _inception(incept4d, 528, 256, 160, 320, 32, 128, 3, 128)
+ pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME')
+
+ # stage 5
+ incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5b = _inception(incept5a, 832, 384, 192, 384, 48, 128, 3, 128)
+ pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID')
+
+ # output 1
+ resh1 = tf.reshape(pool6, [-1, 1024])
+ drop = tf.nn.dropout(resh1, 0.4)
+ affn1 = _affine(resh1, 1024, 1000, act=False)
+
+ return affn1
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in range(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet_multi_gpu.py b/benchmark/tensorflow/image/googlenet_multi_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..31466faa37c47c66e4fe4628e28c867875e89f2e
--- /dev/null
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
@@ -0,0 +1,411 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import re
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+
+tf.app.flags.DEFINE_string('train_dir', '/train_model',
+ """Directory where to write event logs """
+ """and checkpoint.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return affine1
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW, padding):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(name, inpOp, kH, kW, dH, dW, padding):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def _inception(name, inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
+ conv1 = _conv(name + '_1', inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
+
+ conv3_ = _conv(name + '_3r', inp, inSize, o2s1, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(name + '_3', conv3_, o2s1, o2s2, 3, 3, 1, 1, 'SAME')
+
+ conv5_ = _conv(name + '_5r', inp, inSize, o3s1, 1, 1, 1, 1, 'VALID')
+ conv5 = _conv(name + '5', conv5_, o3s1, o3s2, 5, 5, 1, 1, 'SAME')
+
+ pool_ = _mpool(name + 'pool', inp, o4s1, o4s1, 1, 1, 'SAME')
+ pool = _conv(name + 'proj', pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
+
+ if FLAGS.data_format == 'NCHW':
+ channel_dim = 1
+ else:
+ channel_dim = 3
+ incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
+ return incept
+
+
+def inference(images):
+ # stage 1
+ conv1 = _conv('conv1', images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2, 'SAME')
+
+ # stage 2
+ conv2 = _conv('conv2', pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv('conv3', conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool('pool3', conv3, 3, 3, 2, 2, 'SAME')
+
+ # stage 3
+ incept3a = _inception('ince3a', pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3b = _inception('ince3b', incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
+ pool4 = _mpool('pool4', incept3b, 3, 3, 2, 2, 'SAME')
+
+ # stage 4
+ incept4a = _inception('ince4a', pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4b = _inception('ince4b', incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
+ incept4c = _inception('ince4c', incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
+ incept4d = _inception('ince4d', incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
+ incept4e = _inception('ince4e', incept4d, 528, 256, 160, 320, 32, 128, 3,
+ 128)
+ pool5 = _mpool('pool5', incept4e, 3, 3, 2, 2, 'SAME')
+
+ # stage 5
+ incept5a = _inception('ince5a', pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5b = _inception('ince5b', incept5a, 832, 384, 192, 384, 48, 128, 3,
+ 128)
+ pool6 = _apool('pool6', incept5b, 7, 7, 1, 1, 'VALID')
+
+ # output 1
+ resh1 = tf.reshape(pool6, [-1, 1024])
+ drop = tf.nn.dropout(resh1, 0.4)
+ affn1 = _affine('fc_out', resh1, 1024, 1000, act=False)
+
+ return affn1
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ _ = loss(last_layer, labels)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/run.sh b/benchmark/tensorflow/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..eade36beb9df5f8d3978939216e058203e024c1a
--- /dev/null
+++ b/benchmark/tensorflow/image/run.sh
@@ -0,0 +1,28 @@
+set -e
+
+function test() {
+ cfg=$1
+ batch_size=$2
+ prefix=$3
+ python $cfg --batch_size=$batch_size > logs/${prefix}-1gpu-${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.py 64 alexnet
+test alexnet.py 128 alexnet
+test alexnet.py 256 alexnet
+test alexnet.py 512 alexnet
+
+# googlenet
+test googlenet.py 64 googlenet
+test googlenet.py 128 googlenet
+
+# smallnet
+test smallnet_mnist_cifar.py 64 smallnet
+test smallnet_mnist_cifar.py 128 smallnet
+test smallnet_mnist_cifar.py 256 smallnet
+test smallnet_mnist_cifar.py 512 smallnet
diff --git a/benchmark/tensorflow/image/run_multi.sh b/benchmark/tensorflow/image/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..69faa4331744f2276e7706185ae10bc507f95764
--- /dev/null
+++ b/benchmark/tensorflow/image/run_multi.sh
@@ -0,0 +1,22 @@
+set -e
+
+function test() {
+ cfg=$1
+ num_gpu=$2
+ batch_size=$3
+ batch_per_gpu=`expr ${batch_size} / ${num_gpu}`
+ prefix=$4
+ python $cfg --num_gpus=$num_gpu --batch_size=${batch_per_gpu} > logs/${prefix}-4gpu-${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet_multi_gpu.py 4 512 alexnet
+test alexnet_multi_gpu.py 4 1024 alexnet
+
+# googlenet
+test googlenet_multi_gpu.py 4 512 alexnet
+test googlenet_multi_gpu.py 4 1024 alexnet
diff --git a/benchmark/tensorflow/image/smallnet_mnist_cifar.py b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a625134a6c58586b29190ede9c66253f484d2cf
--- /dev/null
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
@@ -0,0 +1,304 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+parameters = []
+
+conv_counter = 1
+pool_counter = 1
+affine_counter = 1
+
+
+def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005, act=True):
+ global conv_counter
+ global parameters
+ name = 'conv' + str(conv_counter)
+ conv_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope) if act else bias
+
+ parameters += [kernel, biases]
+
+ return conv1
+
+
+def _affine(inpOp, nIn, nOut, wd=None, act=True):
+ global affine_counter
+ global parameters
+ name = 'affine' + str(affine_counter)
+ affine_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
+
+ parameters += [kernel, biases]
+
+ return affine1
+
+
+def _mpool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ batch_size = tf.size(labels)
+ labels = tf.expand_dims(labels, 1)
+ indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
+ concated = tf.concat(1, [indices, labels])
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 10]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
+ loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+ return loss
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv(images, 3, 32, 5, 5, 1, 1, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ conv2 = _conv(pool1, 32, 32, 5, 5, 1, 1, 'SAME')
+ pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME')
+ conv3 = _conv(pool2, 32, 64, 5, 5, 1, 1, 'SAME')
+ pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME')
+ resh1 = tf.reshape(pool3, [-1, 64 * 4 * 4])
+ affn1 = _affine(resh1, 64 * 4 * 4, 64)
+ affn2 = _affine(affn1, 64, 10, act=False)
+
+ print('conv1:', get_incoming_shape(conv1))
+ print('pool1:', get_incoming_shape(pool1))
+ print('conv2:', get_incoming_shape(conv2))
+ print('pool2:', get_incoming_shape(pool2))
+ print('conv3:', get_incoming_shape(conv3))
+ print('pool3:', get_incoming_shape(pool3))
+
+ return affn2
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 32
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/README.md b/benchmark/tensorflow/rnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..da8e7b8b07969051cbec3ac6a713eaf7fc738a55
--- /dev/null
+++ b/benchmark/tensorflow/rnn/README.md
@@ -0,0 +1,5 @@
+You also should install tflearn:
+
+```bash
+pip install -r requirements.txt
+```
diff --git a/benchmark/tensorflow/rnn/reader.py b/benchmark/tensorflow/rnn/reader.py
new file mode 100755
index 0000000000000000000000000000000000000000..f538329a15ea9ad9293c97c94340989e2c421eb2
--- /dev/null
+++ b/benchmark/tensorflow/rnn/reader.py
@@ -0,0 +1,92 @@
+import os.path
+import io
+import numpy as np
+import tensorflow as tf
+
+# tflearn
+import tflearn
+from tflearn.data_utils import to_categorical, pad_sequences
+from tflearn.datasets import imdb
+
+FLAGS = tf.app.flags.FLAGS
+
+
+class DataSet(object):
+ def __init__(self, data, labels):
+ assert data.shape[0] == labels.shape[0], (
+ 'data.shape: %s labels.shape: %s' % (data.shape, labels.shape))
+ self._num_examples = data.shape[0]
+
+ self._data = data
+ self._labels = labels
+ self._epochs_completed = 0
+ self._index_in_epoch = 0
+
+ @property
+ def data(self):
+ return self._data
+
+ @property
+ def labels(self):
+ return self._labels
+
+ @property
+ def num_examples(self):
+ return self._num_examples
+
+ @property
+ def epochs_completed(self):
+ return self._epochs_completed
+
+ def next_batch(self, batch_size):
+ assert batch_size <= self._num_examples
+
+ start = self._index_in_epoch
+ self._index_in_epoch += batch_size
+ if self._index_in_epoch > self._num_examples:
+ # Finished epoch
+ self._epochs_completed += 1
+ # Shuffle the data
+ perm = np.arange(self._num_examples)
+ np.random.shuffle(perm)
+ self._data = self._data[perm]
+ self._labels = self._labels[perm]
+ # Start next epoch
+ start = 0
+ self._index_in_epoch = batch_size
+
+ end = self._index_in_epoch
+
+ return self._data[start:end], self._labels[start:end]
+
+
+def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):
+
+ # IMDB Dataset loading
+ train, test, _ = imdb.load_data(
+ path=file_path,
+ n_words=vocab_size,
+ valid_portion=val_fraction,
+ sort_by_len=False)
+ trainX, trainY = train
+ testX, testY = test
+
+ # Data preprocessing
+ # Sequence padding
+ trainX = pad_sequences(trainX, maxlen=FLAGS.max_len, value=0.)
+ testX = pad_sequences(testX, maxlen=FLAGS.max_len, value=0.)
+ # Converting labels to binary vectors
+ trainY = to_categorical(trainY, nb_classes=2)
+ testY = to_categorical(testY, nb_classes=2)
+
+ train_dataset = DataSet(trainX, trainY)
+
+ return train_dataset
+
+
+def main():
+ create_datasets('imdb.pkl')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/tensorflow/rnn/requirements.txt b/benchmark/tensorflow/rnn/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4242e7d24fbbeb18e8fb9a760d76fa6d5363b03f
--- /dev/null
+++ b/benchmark/tensorflow/rnn/requirements.txt
@@ -0,0 +1 @@
+tflearn
diff --git a/benchmark/tensorflow/rnn/rnn.py b/benchmark/tensorflow/rnn/rnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..f288083e13656563b511980553245142efec4e65
--- /dev/null
+++ b/benchmark/tensorflow/rnn/rnn.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+from six.moves import xrange # pylint: disable=redefined-builtin
+import math
+import time
+import numpy as np
+from datetime import datetime
+
+import reader
+import tensorflow as tf
+from tensorflow.python.ops import rnn
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
+
+
+def get_feed_dict(x_data, y_data=None):
+ feed_dict = {}
+
+ if y_data is not None:
+ feed_dict[y_input] = y_data
+
+ for i in xrange(x_data.shape[0]):
+ feed_dict[x_input[i]] = x_data[i, :, :]
+
+ return feed_dict
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+# Note input * W is done in LSTMCell,
+# which is different from PaddlePaddle
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def embedding(name, incoming, vocab_size, emb_size):
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
+
+
+def fc(name, inpOp, nIn, nOut, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return net
+
+
+def inference(seq):
+ net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
+ print "emb:", get_incoming_shape(net)
+ net = lstm('lstm', net, FLAGS.hidden_size, num_layers=FLAGS.num_layers)
+ print "lstm:", get_incoming_shape(net)
+ net = fc('fc1', net, FLAGS.hidden_size, 2)
+ return net
+
+
+def loss(logits, labels):
+ # one label index for one sample
+ labels = tf.cast(labels, tf.float32)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def time_tensorflow_run(session, target, x_input, y_input, info_string):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ data, label = train_dataset.next_batch(FLAGS.batch_size)
+ _ = session.run(target_op, feed_dict={x_input: data, y_input: label})
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default():
+ global_step = 0
+ with tf.device('/cpu:0'):
+ global_step = tf.Variable(0, trainable=False)
+ with tf.device('/gpu:0'):
+ #x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input")
+ #y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input")
+ x_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input")
+ y_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input")
+ # Generate some dummy sequnce.
+
+ last_layer = inference(x_input)
+
+ objective = loss(last_layer, y_input)
+ opt = tf.train.AdamOptimizer(0.001)
+ grads = opt.compute_gradients(objective)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ init = tf.initialize_all_variables()
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, x_input, y_input,
+ "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies([apply_gradient_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], x_input,
+ y_input, "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/rnn_multi_gpu.py b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
new file mode 100755
index 0000000000000000000000000000000000000000..eabee4fa8fe6325212ace1c11be4862cd2720b08
--- /dev/null
+++ b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python
+from six.moves import xrange # pylint: disable=redefined-builtin
+import re
+import math
+import time
+import numpy as np
+from datetime import datetime
+
+import reader
+import tensorflow as tf
+from tensorflow.python.ops import rnn
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 64, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+# Note input * W is done in LSTMCell,
+# which is different from PaddlePaddle
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def embedding(name, incoming, vocab_size, emb_size):
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
+
+
+def fc(name, inpOp, nIn, nOut, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return net
+
+
+def inference(seq):
+ net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
+ print "emb:", get_incoming_shape(net)
+ net = lstm('lstm', net, FLAGS.hidden_size, num_layers=FLAGS.num_layers)
+ print "lstm:", get_incoming_shape(net)
+ net = fc('fc1', net, FLAGS.hidden_size, 2)
+ return net
+
+
+def loss(logits, labels):
+ # one label index for one sample
+ #labels = tf.cast(labels, tf.int64)
+ # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ # logits, labels, name='cross_entropy_per_example')
+ labels = tf.cast(labels, tf.float32)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ data, label = train_dataset.next_batch(FLAGS.batch_size)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(data)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ #_ = loss(last_layer, label)
+ _ = loss(last_layer, label)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ #tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 80
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target, feed_dict={x_input: data, y_input: label})
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ # sec_per_batch = duration / FLAGS.num_gpus
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss= %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size= %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.AdamOptimizer(0.001)
+
+ #train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ # summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/run.sh b/benchmark/tensorflow/rnn/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..bb4c69cb95f965eff35f1c5a60376bf1e84f841b
--- /dev/null
+++ b/benchmark/tensorflow/rnn/run.sh
@@ -0,0 +1,29 @@
+set -e
+
+function test() {
+ lstm_num=$1
+ batch_size=$2
+ hid_size=$3
+ prefix=$4
+ python rnn.py --num_layers=${lstm_num} --batch_size=$batch_size \
+ --hidden_size=${hid_size} \
+ --forward_backward_only=1 \
+ > logs/1gpu-${lstm_num}lstm-batch${batch_size}-hid${hid_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#--lstm_num--batch_size--hidden_size--#
+test 2 64 256
+test 2 64 512
+test 2 64 1280
+
+test 2 128 256
+test 2 128 512
+test 2 128 1280
+
+test 2 256 256
+test 2 256 512
+test 2 256 1280
diff --git a/benchmark/tensorflow/rnn/run_multi.sh b/benchmark/tensorflow/rnn/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f7f52e01e38d304bb3bf8185c53bd0da26014d3a
--- /dev/null
+++ b/benchmark/tensorflow/rnn/run_multi.sh
@@ -0,0 +1,28 @@
+set -e
+
+function test() {
+ num_gpu=$1
+ lstm_num=$2
+ hid_size=$3
+ batch_per_gpu=`expr ${batch_size} / ${num_gpu}`
+ batch_size=$4
+ python rnn_multi_gpu.py --num_layers=${lstm_num} --batch_size=$batch_per_gpu \
+ --num_gpus=${num_gpu} \
+ --hidden_size=${hid_size} \
+ --forward_backward_only=1 \
+ > logs/${num_gpu}gpu-${lstm_num}lstm-hid${hid_size}-batch${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#--num_gpus--lstm_num--hiddne_size--batch_size--#
+test 4 2 256 128
+test 4 2 256 256
+test 4 2 256 512
+
+test 4 2 512 128
+test 4 2 512 256
+test 4 2 512 512
+
diff --git a/cmake/version.cmake b/cmake/version.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..a0518e07e88a1ff468c301523f888c7d95e15185
--- /dev/null
+++ b/cmake/version.cmake
@@ -0,0 +1,24 @@
+# Get the latest git tag.
+set(PADDLE_VERSION $ENV{PADDLE_VERSION})
+set(tmp_version "HEAD")
+while ("${PADDLE_VERSION}" STREQUAL "")
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
+ WORKING_DIRECTORY ${PROJ_ROOT}
+ OUTPUT_VARIABLE GIT_TAG_NAME
+ RESULT_VARIABLE GIT_RESULT
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if (NOT ${GIT_RESULT})
+ # Check the tag is a correct version
+ if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
+ string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME})
+ else() # otherwise, get the previous git tag name.
+ set(tmp_version "${GIT_TAG_NAME}~1")
+ endif()
+ else()
+ set(PADDLE_VERSION "0.0.0")
+ message(WARNING "Cannot add paddle version from git tag")
+ endif()
+endwhile()
+
+message(STATUS "Paddle version is ${PADDLE_VERSION}")
diff --git a/demo/gan/.gitignore b/demo/gan/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..93a6f5080a16a601cffb0bff51af9aef3ba3bae7
--- /dev/null
+++ b/demo/gan/.gitignore
@@ -0,0 +1,11 @@
+output/
+uniform_params/
+cifar_params/
+mnist_params/
+*.png
+.pydevproject
+.project
+*.log
+*.pyc
+data/mnist_data/
+data/cifar-10-batches-py/
diff --git a/demo/gan/README.md b/demo/gan/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fdc970a07b488c3a4146c9baa76a133a456fc9ab
--- /dev/null
+++ b/demo/gan/README.md
@@ -0,0 +1,13 @@
+# Generative Adversarial Networks (GAN)
+
+This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434).
+
+The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data).
+
+In order to run the model, first download the corresponding data by running the shell script in ./data.
+Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
+
+$python gan_trainer.py -d cifar --use_gpu 1
+
+The generated images will be stored in ./cifar_samples/
+The corresponding models will be stored in ./cifar_params/
\ No newline at end of file
diff --git a/demo/gan/data/download_cifar.sh b/demo/gan/data/download_cifar.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ea3be594cd08f829e94f2c692a44947baa62b759
--- /dev/null
+++ b/demo/gan/data/download_cifar.sh
@@ -0,0 +1,18 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+tar zxf cifar-10-python.tar.gz
+rm cifar-10-python.tar.gz
+
diff --git a/demo/gan/data/get_mnist_data.sh b/demo/gan/data/get_mnist_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d21bf7067135f1f8be486ef0f13fc3ec94ffc4ed
--- /dev/null
+++ b/demo/gan/data/get_mnist_data.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env sh
+# This script downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/mnist_data"
+mkdir "$DIR/mnist_data"
+cd "$DIR/mnist_data"
+
+echo "Downloading..."
+
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+ if [ ! -e $fname ]; then
+ wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+ gunzip ${fname}.gz
+ fi
+done
+
+
diff --git a/demo/gan/gan_conf.py b/demo/gan/gan_conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..05eee3a9b9ce455eb3a5d47d3165ee7f42f1002e
--- /dev/null
+++ b/demo/gan/gan_conf.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+
+mode = get_config_arg("mode", str, "generator")
+assert mode in set(["generator",
+ "discriminator",
+ "generator_training",
+ "discriminator_training"])
+
+is_generator_training = mode == "generator_training"
+is_discriminator_training = mode == "discriminator_training"
+is_generator = mode == "generator"
+is_discriminator = mode == "discriminator"
+
+# The network structure below follows the ref https://arxiv.org/abs/1406.2661
+# Here we used two hidden layers and batch_norm
+
+print('mode=%s' % mode)
+# the dim of the noise (z) as the input of the generator network
+noise_dim = 10
+# the dim of the hidden layer
+hidden_dim = 10
+# the dim of the generated sample
+sample_dim = 2
+
+settings(
+ batch_size=128,
+ learning_rate=1e-4,
+ learning_method=AdamOptimizer(beta1=0.5)
+)
+
+def discriminator(sample):
+ """
+ discriminator ouputs the probablity of a sample is from generator
+ or real data.
+ The output has two dimenstional: dimension 0 is the probablity
+ of the sample is from generator and dimension 1 is the probabblity
+ of the sample is from real data.
+ """
+ param_attr = ParamAttr(is_static=is_generator_training)
+ bias_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0)
+
+ hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=ReluActivation())
+
+ hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ hidden_bn = batch_norm_layer(hidden2,
+ act=ReluActivation(),
+ name="dis_hidden_bn",
+ bias_attr=bias_attr,
+ param_attr=ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0.02),
+ use_global_stats=False)
+
+ return fc_layer(input=hidden_bn, name="dis_prob", size=2,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=SoftmaxActivation())
+
+def generator(noise):
+ """
+ generator generates a sample given noise
+ """
+ param_attr = ParamAttr(is_static=is_discriminator_training)
+ bias_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0)
+
+ hidden = fc_layer(input=noise,
+ name="gen_layer_hidden",
+ size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=ReluActivation())
+
+ hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ hidden_bn = batch_norm_layer(hidden2,
+ act=ReluActivation(),
+ name="gen_layer_hidden_bn",
+ bias_attr=bias_attr,
+ param_attr=ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0.02),
+ use_global_stats=False)
+
+ return fc_layer(input=hidden_bn,
+ name="gen_layer1",
+ size=sample_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+if is_generator_training:
+ noise = data_layer(name="noise", size=noise_dim)
+ sample = generator(noise)
+
+if is_discriminator_training:
+ sample = data_layer(name="sample", size=sample_dim)
+
+if is_generator_training or is_discriminator_training:
+ label = data_layer(name="label", size=1)
+ prob = discriminator(sample)
+ cost = cross_entropy(input=prob, label=label)
+ classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+ outputs(cost)
+
+if is_generator:
+ noise = data_layer(name="noise", size=noise_dim)
+ outputs(generator(noise))
diff --git a/demo/gan/gan_conf_image.py b/demo/gan/gan_conf_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc5910e9f02d7aac59207fdaa0222d01ac3bf609
--- /dev/null
+++ b/demo/gan/gan_conf_image.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+
+mode = get_config_arg("mode", str, "generator")
+dataSource = get_config_arg("data", str, "mnist")
+assert mode in set(["generator",
+ "discriminator",
+ "generator_training",
+ "discriminator_training"])
+
+is_generator_training = mode == "generator_training"
+is_discriminator_training = mode == "discriminator_training"
+is_generator = mode == "generator"
+is_discriminator = mode == "discriminator"
+
+# The network structure below follows the dcgan paper
+# (https://arxiv.org/abs/1511.06434)
+
+print('mode=%s' % mode)
+# the dim of the noise (z) as the input of the generator network
+noise_dim = 100
+# the number of filters in the layer in generator/discriminator that is
+# closet to the image
+gf_dim = 64
+df_dim = 64
+if dataSource == "mnist":
+ sample_dim = 28 # image dim
+ c_dim = 1 # image color
+else:
+ sample_dim = 32
+ c_dim = 3
+s2, s4 = int(sample_dim/2), int(sample_dim/4),
+s8, s16 = int(sample_dim/8), int(sample_dim/16)
+
+settings(
+ batch_size=128,
+ learning_rate=2e-4,
+ learning_method=AdamOptimizer(beta1=0.5)
+)
+
+def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
+ param_attr, bias_attr, param_attr_bn, bn, trans=False,
+ act=ReluActivation()):
+
+ """
+ conv_bn is a utility function that constructs a convolution/deconv layer
+ with an optional batch_norm layer
+
+ :param bn: whether to use batch_norm_layer
+ :type bn: bool
+ :param trans: whether to use conv (False) or deconv (True)
+ :type trans: bool
+ """
+
+ # calculate the filter_size and padding size based on the given
+ # imgSize and ouput size
+ tmp = imgSize - (output_x - 1) * stride
+ if tmp <= 1 or tmp > 5:
+ raise ValueError("conv input-output dimension does not fit")
+ elif tmp <= 3:
+ filter_size = tmp + 2
+ padding = 1
+ else:
+ filter_size = tmp
+ padding = 0
+
+ print (imgSize, output_x, stride, filter_size, padding)
+
+ if trans:
+ nameApx = "_conv"
+ else:
+ nameApx = "_convt"
+
+ if bn:
+ conv = img_conv_layer(input, filter_size=filter_size,
+ num_filters=num_filters,
+ name=name + nameApx, num_channels=channels,
+ act=LinearActivation(), groups=1, stride=stride,
+ padding=padding, bias_attr=bias_attr,
+ param_attr=param_attr, shared_biases=True, layer_attr=None,
+ filter_size_y=None, stride_y=None, padding_y=None,
+ trans=trans)
+
+ conv_bn = batch_norm_layer(conv,
+ act=act,
+ name=name + nameApx + "_bn",
+ bias_attr=bias_attr,
+ param_attr=param_attr_bn,
+ use_global_stats=False)
+
+ return conv_bn
+ else:
+ conv = img_conv_layer(input, filter_size=filter_size,
+ num_filters=num_filters,
+ name=name + nameApx, num_channels=channels,
+ act=act, groups=1, stride=stride,
+ padding=padding, bias_attr=bias_attr,
+ param_attr=param_attr, shared_biases=True, layer_attr=None,
+ filter_size_y=None, stride_y=None, padding_y=None,
+ trans=trans)
+ return conv
+
+def generator(noise):
+ """
+ generator generates a sample given noise
+ """
+ param_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=0.0,
+ initial_std=0.02)
+ bias_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=0.0,
+ initial_std=0.0)
+
+ param_attr_bn=ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0.02)
+
+ h1 = fc_layer(input=noise,
+ name="gen_layer_h1",
+ size=s8 * s8 * gf_dim * 4,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ h1_bn = batch_norm_layer(h1,
+ act=ReluActivation(),
+ name="gen_layer_h1_bn",
+ bias_attr=bias_attr,
+ param_attr=param_attr_bn,
+ use_global_stats=False)
+
+ h2_bn = conv_bn(h1_bn,
+ channels=gf_dim*4,
+ output_x=s8,
+ num_filters=gf_dim*2,
+ imgSize=s4,
+ stride=2,
+ name="gen_layer_h2",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True,
+ trans=True)
+
+ h3_bn = conv_bn(h2_bn,
+ channels=gf_dim*2,
+ output_x=s4,
+ num_filters=gf_dim,
+ imgSize=s2,
+ stride=2,
+ name="gen_layer_h3",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True,
+ trans=True)
+
+
+ return conv_bn(h3_bn,
+ channels=gf_dim,
+ output_x=s2,
+ num_filters=c_dim,
+ imgSize=sample_dim,
+ stride=2,
+ name="gen_layer_h4",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=False,
+ trans=True,
+ act=TanhActivation())
+
+
+def discriminator(sample):
+ """
+ discriminator ouputs the probablity of a sample is from generator
+ or real data.
+ The output has two dimenstional: dimension 0 is the probablity
+ of the sample is from generator and dimension 1 is the probabblity
+ of the sample is from real data.
+ """
+ param_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=0.0,
+ initial_std=0.02)
+ bias_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=0.0,
+ initial_std=0.0)
+
+ param_attr_bn=ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0.02)
+
+ h0 = conv_bn(sample,
+ channels=c_dim,
+ imgSize=sample_dim,
+ num_filters=df_dim,
+ output_x=s2,
+ stride=2,
+ name="dis_h0",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=False)
+
+ h1_bn = conv_bn(h0,
+ channels=df_dim,
+ imgSize=s2,
+ num_filters=df_dim*2,
+ output_x=s4,
+ stride=2,
+ name="dis_h1",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True)
+
+ h2_bn = conv_bn(h1_bn,
+ channels=df_dim*2,
+ imgSize=s4,
+ num_filters=df_dim*4,
+ output_x=s8,
+ stride=2,
+ name="dis_h2",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True)
+
+ return fc_layer(input=h2_bn, name="dis_prob", size=2,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=SoftmaxActivation())
+
+
+
+if is_generator_training:
+ noise = data_layer(name="noise", size=noise_dim)
+ sample = generator(noise)
+
+if is_discriminator_training:
+ sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim)
+
+if is_generator_training or is_discriminator_training:
+ label = data_layer(name="label", size=1)
+ prob = discriminator(sample)
+ cost = cross_entropy(input=prob, label=label)
+ classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+ outputs(cost)
+
+if is_generator:
+ noise = data_layer(name="noise", size=noise_dim)
+ outputs(generator(noise))
diff --git a/demo/gan/gan_trainer.py b/demo/gan/gan_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..72699952b961cb5bf6ac14dd65eee1aeab5e2a7c
--- /dev/null
+++ b/demo/gan/gan_trainer.py
@@ -0,0 +1,329 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import random
+import numpy
+import cPickle
+import sys,os
+from PIL import Image
+
+from paddle.trainer.config_parser import parse_config
+from paddle.trainer.config_parser import logger
+import py_paddle.swig_paddle as api
+import matplotlib.pyplot as plt
+
+def plot2DScatter(data, outputfile):
+ '''
+ Plot the data as a 2D scatter plot and save to outputfile
+ data needs to be two dimensinoal
+ '''
+ x = data[:, 0]
+ y = data[:, 1]
+ logger.info("The mean vector is %s" % numpy.mean(data, 0))
+ logger.info("The std vector is %s" % numpy.std(data, 0))
+
+ heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
+ extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
+
+ plt.clf()
+ plt.scatter(x, y)
+ plt.savefig(outputfile, bbox_inches='tight')
+
+def CHECK_EQ(a, b):
+ assert a == b, "a=%s, b=%s" % (a, b)
+
+def copy_shared_parameters(src, dst):
+ '''
+ copy the parameters from src to dst
+ :param src: the source of the parameters
+ :type src: GradientMachine
+ :param dst: the destination of the parameters
+ :type dst: GradientMachine
+ '''
+ src_params = [src.getParameter(i)
+ for i in xrange(src.getParameterSize())]
+ src_params = dict([(p.getName(), p) for p in src_params])
+
+
+ for i in xrange(dst.getParameterSize()):
+ dst_param = dst.getParameter(i)
+ src_param = src_params.get(dst_param.getName(), None)
+ if src_param is None:
+ continue
+ src_value = src_param.getBuf(api.PARAMETER_VALUE)
+ dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
+ CHECK_EQ(len(src_value), len(dst_value))
+ dst_value.copyFrom(src_value)
+ dst_param.setValueUpdated()
+
+def print_parameters(src):
+ src_params = [src.getParameter(i)
+ for i in xrange(src.getParameterSize())]
+
+ print "***************"
+ for p in src_params:
+ print "Name is %s" % p.getName()
+ print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
+
+def load_mnist_data(imageFile):
+ f = open(imageFile, "rb")
+ f.read(16)
+
+ # Define number of samples for train/test
+ if "train" in imageFile:
+ n = 60000
+ else:
+ n = 10000
+
+ data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28))
+ data = data / 255.0 * 2.0 - 1.0
+
+ f.close()
+ return data.astype('float32')
+
+def load_cifar_data(cifar_path):
+ batch_size = 10000
+ data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32")
+ for i in range(1, 6):
+ file = cifar_path + "/data_batch_" + str(i)
+ fo = open(file, 'rb')
+ dict = cPickle.load(fo)
+ fo.close()
+ data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"]
+
+ data = data / 255.0 * 2.0 - 1.0
+ return data
+
+# synthesize 2-D uniform data
+def load_uniform_data():
+ data = numpy.random.rand(1000000, 2).astype('float32')
+ return data
+
+def merge(images, size):
+ if images.shape[1] == 28*28:
+ h, w, c = 28, 28, 1
+ else:
+ h, w, c = 32, 32, 3
+ img = numpy.zeros((h * size[0], w * size[1], c))
+ for idx in xrange(size[0] * size[1]):
+ i = idx % size[1]
+ j = idx // size[1]
+ img[j*h:j*h+h, i*w:i*w+w, :] = \
+ ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
+ return img.astype('uint8')
+
+def save_images(images, path):
+ merged_img = merge(images, [8, 8])
+ if merged_img.shape[2] == 1:
+ im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB')
+ else:
+ im = Image.fromarray(merged_img, mode="RGB")
+ im.save(path)
+
+def get_real_samples(batch_size, data_np):
+ return data_np[numpy.random.choice(data_np.shape[0], batch_size,
+ replace=False),:]
+
+def get_noise(batch_size, noise_dim):
+ return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
+
+def get_fake_samples(generator_machine, batch_size, noise):
+ gen_inputs = api.Arguments.createArguments(1)
+ gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+ gen_outputs = api.Arguments.createArguments(0)
+ generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
+ fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
+ return fake_samples
+
+def get_training_loss(training_machine, inputs):
+ outputs = api.Arguments.createArguments(0)
+ training_machine.forward(inputs, outputs, api.PASS_TEST)
+ loss = outputs.getSlotValue(0).copyToNumpyMat()
+ return numpy.mean(loss)
+
+def prepare_discriminator_data_batch_pos(batch_size, data_np):
+ real_samples = get_real_samples(batch_size, data_np)
+ labels = numpy.ones(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
+ return inputs
+
+def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
+ fake_samples = get_fake_samples(generator_machine, batch_size, noise)
+ labels = numpy.zeros(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
+ return inputs
+
+def prepare_generator_data_batch(batch_size, noise):
+ label = numpy.ones(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label))
+ return inputs
+
+
+def find(iterable, cond):
+ for item in iterable:
+ if cond(item):
+ return item
+ return None
+
+
+def get_layer_size(model_conf, layer_name):
+ layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
+ assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
+ return layer_conf.size
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
+ parser.add_argument("--use_gpu", default="1",
+ help="1 means use gpu for training")
+ parser.add_argument("--gpu_id", default="0",
+ help="the gpu_id parameter")
+ args = parser.parse_args()
+ data_source = args.data_source
+ use_gpu = args.use_gpu
+ assert data_source in ["mnist", "cifar", "uniform"]
+ assert use_gpu in ["0", "1"]
+
+ if not os.path.exists("./%s_samples/" % data_source):
+ os.makedirs("./%s_samples/" % data_source)
+
+ if not os.path.exists("./%s_params/" % data_source):
+ os.makedirs("./%s_params/" % data_source)
+
+ api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100',
+ '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
+
+ if data_source == "uniform":
+ conf = "gan_conf.py"
+ num_iter = 10000
+ else:
+ conf = "gan_conf_image.py"
+ num_iter = 1000
+
+ gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
+ dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
+ generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
+ batch_size = dis_conf.opt_config.batch_size
+ noise_dim = get_layer_size(gen_conf.model_config, "noise")
+
+ if data_source == "mnist":
+ data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
+ elif data_source == "cifar":
+ data_np = load_cifar_data("./data/cifar-10-batches-py/")
+ else:
+ data_np = load_uniform_data()
+
+ # this creates a gradient machine for discriminator
+ dis_training_machine = api.GradientMachine.createFromConfigProto(
+ dis_conf.model_config)
+ # this create a gradient machine for generator
+ gen_training_machine = api.GradientMachine.createFromConfigProto(
+ gen_conf.model_config)
+
+ # generator_machine is used to generate data only, which is used for
+ # training discriminator
+ logger.info(str(generator_conf.model_config))
+ generator_machine = api.GradientMachine.createFromConfigProto(
+ generator_conf.model_config)
+
+ dis_trainer = api.Trainer.create(
+ dis_conf, dis_training_machine)
+
+ gen_trainer = api.Trainer.create(
+ gen_conf, gen_training_machine)
+
+ dis_trainer.startTrain()
+ gen_trainer.startTrain()
+
+ # Sync parameters between networks (GradientMachine) at the beginning
+ copy_shared_parameters(gen_training_machine, dis_training_machine)
+ copy_shared_parameters(gen_training_machine, generator_machine)
+
+ # constrain that either discriminator or generator can not be trained
+ # consecutively more than MAX_strike times
+ curr_train = "dis"
+ curr_strike = 0
+ MAX_strike = 5
+
+ for train_pass in xrange(100):
+ dis_trainer.startTrainPass()
+ gen_trainer.startTrainPass()
+ for i in xrange(num_iter):
+ # Do forward pass in discriminator to get the dis_loss
+ noise = get_noise(batch_size, noise_dim)
+ data_batch_dis_pos = prepare_discriminator_data_batch_pos(
+ batch_size, data_np)
+ dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
+
+ data_batch_dis_neg = prepare_discriminator_data_batch_neg(
+ generator_machine, batch_size, noise)
+ dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)
+
+ dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
+
+ # Do forward pass in generator to get the gen_loss
+ data_batch_gen = prepare_generator_data_batch(
+ batch_size, noise)
+ gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
+
+ if i % 100 == 0:
+ print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg)
+ print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss)
+
+ # Decide which network to train based on the training history
+ # And the relative size of the loss
+ if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
+ ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss):
+ if curr_train == "dis":
+ curr_strike += 1
+ else:
+ curr_train = "dis"
+ curr_strike = 1
+ dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
+ dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
+ copy_shared_parameters(dis_training_machine, gen_training_machine)
+
+ else:
+ if curr_train == "gen":
+ curr_strike += 1
+ else:
+ curr_train = "gen"
+ curr_strike = 1
+ gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
+ # TODO: add API for paddle to allow true parameter sharing between different GradientMachines
+ # so that we do not need to copy shared parameters.
+ copy_shared_parameters(gen_training_machine, dis_training_machine)
+ copy_shared_parameters(gen_training_machine, generator_machine)
+
+ dis_trainer.finishTrainPass()
+ gen_trainer.finishTrainPass()
+ # At the end of each pass, save the generated samples/images
+ fake_samples = get_fake_samples(generator_machine, batch_size, noise)
+ if data_source == "uniform":
+ plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+ else:
+ save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+ dis_trainer.finishTrain()
+ gen_trainer.finishTrain()
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py
index b766118eb00737c7a196ed85850b3cebd690b0d0..b235010e4ece377beffaaa1b9247a77d7a96b712 100644
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
import sys
import os
import PIL.Image as Image
-
"""
Usage: python process_cifar input_dir output_dir
"""
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
if not os.path.exists(path):
os.mkdir(path)
+
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
-def convert_batch(batch_path, label_set, label_map,
- output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
- label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
- 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+ label_map = {
+ 0: "airplane",
+ 1: "automobile",
+ 2: "bird",
+ 3: "cat",
+ 4: "deer",
+ 5: "dog",
+ 6: "frog",
+ 7: "horse",
+ 8: "ship",
+ 9: "truck"
+ }
labels = {}
for i in range(1, num_batch + 1):
- convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
- label_map, output_dir, "train")
- convert_batch(os.path.join(input_dir, "test_batch"), {},
- label_map, output_dir, "test")
\ No newline at end of file
+ convert_batch(
+ os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+ output_dir, "train")
+ convert_batch(
+ os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+ "test")
diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py
index 305efbcdc6bb11f1dac65cc3af82fb997db97f27..28bf1bb02c1f08b2e8ec9acd38f0a8594b05ab66 100644
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,14 +46,14 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
- settings.img_size,
- settings.color)
+ settings.img_size, settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [
dense_vector(settings.img_raw_size), # image feature
- integer_value(settings.num_classes)] # labels
+ integer_value(settings.num_classes)
+ ] # labels
settings.logger.info('DataProvider Initialization finished')
@@ -79,8 +79,8 @@ def processData(settings, file_list):
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
- img_feat = image_util.preprocess_img(img, settings.img_mean,
- settings.img_size, settings.is_train,
- settings.color)
+ img_feat = image_util.preprocess_img(
+ img, settings.img_mean, settings.img_size,
+ settings.is_train, settings.color)
label = data['labels'][i]
yield img_feat.astype('float32'), int(label)
diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py
index c545d16aafbc741bce25f9469e7f67de5b88fa8c..b5c6431c06f77cef5c31ca844a8427eebaea2fce 100644
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
from PIL import Image
from cStringIO import StringIO
+
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
- percent = (target_size/float(min(img.size[0], img.size[1])))
- resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+ percent = (target_size / float(min(img.size[0], img.size[1])))
+ resized_size = int(round(img.size[0] * percent)), int(
+ round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
+
def flip(im):
"""
Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
else:
return im[:, ::-1]
+
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images.
"""
if color:
- height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+ height, width = max(inner_size, im.shape[1]), max(inner_size,
+ im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
- padded_im[:, startY: endY, startX: endX] = im
+ padded_im[:, startY:endY, startX:endX] = im
else:
im = im.astype('float32')
- height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+ height, width = max(inner_size, im.shape[0]), max(inner_size,
+ im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
- padded_im[startY: endY, startX: endX] = im
+ padded_im[startY:endY, startX:endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
- pic = padded_im[:, startY: endY, startX: endX]
+ pic = padded_im[:, startY:endY, startX:endX]
else:
- pic = padded_im[startY: endY, startX: endX]
+ pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
+
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
+
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic -= img_mean
return pic.flatten()
+
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
- assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+ assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
- mean = mean[:, border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[:, border:border + crop_size, border:border +
+ crop_size].astype('float32')
else:
- assert(mean_img_size * mean_img_size == mean.shape[0])
+ assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
- mean = mean[border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[border:border + crop_size, border:border +
+ crop_size].astype('float32')
return mean
+
def load_image(img_path, is_color=True):
"""
Load image and return.
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img.load()
return img
+
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
- crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
- -crop_dims / 2.0,
- crop_dims / 2.0
- ])
+ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+ [-crop_dims / 2.0, crop_dims / 2.0])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
- crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
- im_shape[-1]), dtype=np.float32)
+ crops = np.empty(
+ (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+ dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
- crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
+ crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops
+
class ImageTransformer:
- def __init__(self, transpose = None,
- channel_swap = None, mean = None, is_color = True):
+ def __init__(self,
+ transpose=None,
+ channel_swap=None,
+ mean=None,
+ is_color=True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
- self.is_color = is_color
+ self.is_color = is_color
- def set_transpose(self, order):
+ def set_transpose(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.transpose = order
- def set_channel_swap(self, order):
+ def set_channel_swap(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
- mean = mean[:, np.newaxis, np.newaxis]
- else:
+ mean = mean[:, np.newaxis, np.newaxis]
+ else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
- self.mean = mean
+ self.mean = mean
def transformer(self, data):
if self.transpose is not None:
diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py
index 5d9e93265867389ca6d2aa26e48fcfa08561e6ae..6a47bd5851c99635dd7d3f1d5df67dd081ca4584 100755
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import os,sys
+import os, sys
import numpy as np
import logging
from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
def __init__(self,
train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
self.oversample = oversample
self.is_color = is_color
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -90,14 +93,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0])
+
if __name__ == '__main__':
- image_size=32
- crop_size=32
- multi_crop=True
- config="vgg_16_cifar.py"
- output_layer="__fc_layer_1__"
- mean_path="data/cifar-out/batches/batches.meta"
- model_path=sys.argv[1]
- image=sys.argv[2]
- use_gpu=bool(int(sys.argv[3]))
-
- obj = ImageClassifier(train_conf=config,
- model_dir=model_path,
- resize_dim=image_size,
- crop_dim=crop_size,
- mean_file=mean_path,
- use_gpu=use_gpu,
- oversample=multi_crop)
+ image_size = 32
+ crop_size = 32
+ multi_crop = True
+ config = "vgg_16_cifar.py"
+ output_layer = "__fc_layer_1__"
+ mean_path = "data/cifar-out/batches/batches.meta"
+ model_path = sys.argv[1]
+ image = sys.argv[2]
+ use_gpu = bool(int(sys.argv[3]))
+
+ obj = ImageClassifier(
+ train_conf=config,
+ model_dir=model_path,
+ resize_dim=image_size,
+ crop_dim=crop_size,
+ mean_file=mean_path,
+ use_gpu=use_gpu,
+ oversample=multi_crop)
obj.predict(image, output_layer)
diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py
index fe7ea19bf02776629dff0f64f5b671dc457eae64..10b9c1691b5e51273c73a975545cd36f3822e901 100755
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,24 +19,36 @@ from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--input", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-s", "--size", action="store",
- dest="size", help="Processed image size.")
- parser.add_option("-c", "--color", action="store",
- dest="color", help="whether to use color images.")
+ parser.add_option(
+ "-i",
+ "--input",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-s",
+ "--size",
+ action="store",
+ dest="size",
+ help="Processed image size.")
+ parser.add_option(
+ "-c",
+ "--color",
+ action="store",
+ dest="color",
+ help="whether to use color images.")
return parser.parse_args()
+
if __name__ == '__main__':
- options, args = option_parser()
- data_dir = options.input
- processed_image_size = int(options.size)
- color = options.color == "1"
- data_creator = ImageClassificationDatasetCreater(data_dir,
- processed_image_size,
- color)
- data_creator.train_list_name = "train.txt"
- data_creator.test_list_name = "test.txt"
- data_creator.num_per_batch = 1000
- data_creator.overwrite = True
- data_creator.create_batches()
+ options, args = option_parser()
+ data_dir = options.input
+ processed_image_size = int(options.size)
+ color = options.color == "1"
+ data_creator = ImageClassificationDatasetCreater(
+ data_dir, processed_image_size, color)
+ data_creator.train_list_name = "train.txt"
+ data_creator.test_list_name = "test.txt"
+ data_creator.num_per_batch = 1000
+ data_creator.overwrite = True
+ data_creator.create_batches()
diff --git a/demo/image_classification/train.sh b/demo/image_classification/train.sh
index ed9b5220fff6a434cd332f0972d39c4149b3ebfe..db0a057bf35b4ad04a08a1e3f1fad3bd6a486350 100755
--- a/demo/image_classification/train.sh
+++ b/demo/image_classification/train.sh
@@ -24,7 +24,7 @@ paddle train \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--trainer_count=1 \
---num_passes=200 \
+--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py
index edd6988c48acd6b554e09b721c37b291e21f46eb..58ceff5fc2f46cac9997b6d8af2b0db0c43e0c75 100755
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
- data_dir='data/cifar-out/batches/'
- meta_path=data_dir+'batches.meta'
-
- args = {'meta':meta_path,'mean_img_size': 32,
- 'img_size': 32,'num_classes': 10,
- 'use_jpeg': 1,'color': "color"}
-
- define_py_data_sources2(train_list="train.list",
- test_list="train.list",
- module='image_provider',
- obj='processData',
- args=args)
+ data_dir = 'data/cifar-out/batches/'
+ meta_path = data_dir + 'batches.meta'
+
+ args = {
+ 'meta': meta_path,
+ 'mean_img_size': 32,
+ 'img_size': 32,
+ 'num_classes': 10,
+ 'use_jpeg': 1,
+ 'color': "color"
+ }
+
+ define_py_data_sources2(
+ train_list="train.list",
+ test_list="train.list",
+ module='image_provider',
+ obj='processData',
+ args=args)
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=3*32*32
-label_size=10
-img = data_layer(name='image',
- size=data_size)
+data_size = 3 * 32 * 32
+label_size = 10
+img = data_layer(name='image', size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
- num_channels=3,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/introduction/README.md b/demo/introduction/README.md
index bebf1d090d98691199ede55736dfe5b964a8fd42..0614a7afe645677ef0b65a17ea05f1dcfa45214f 100644
--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
@@ -1,4 +1,3 @@
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
-
diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py
index be8c0bc89156cf843d9b08276b52f92a4d8c9706..8515022e18dc6bbf055e6db3121568acf1df1c55 100644
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@@ -15,10 +15,10 @@
from paddle.trainer.PyDataProvider2 import *
import random
+
# define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
- yield [x], [2*x+0.3]
-
+ yield [x], [2 * x + 0.3]
diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py
index 8cfb843c42105757b0f63c4a00d034b47a37a0bb..ca4a1872731abde90e72cb167929b3d9e2e1ebf4 100755
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@@ -23,14 +23,17 @@ Usage:
import numpy as np
import os
+
def load(file_name):
with open(file_name, 'rb') as f:
- f.read(16) # skip header for float type.
+ f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
+
def main():
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
- load('output/pass-00029/b'))
+ load('output/pass-00029/b'))
+
if __name__ == '__main__':
main()
diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py
index 3e3df5583282a4335ddea7b1cb30a84052d0adca..7c838c1a8f5b3cb6ac732197c85cd7c728eb013f 100644
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None,
- module='dataprovider', obj='process',args={})
+with open(data_file, 'w') as f:
+ f.writelines(' ')
+define_py_data_sources2(
+ train_list=data_file,
+ test_list=None,
+ module='dataprovider',
+ obj='process',
+ args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+ input=x,
+ param_attr=ParamAttr(name='w'),
+ size=1,
+ act=LinearActivation(),
+ bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
outputs(cost)
-
diff --git a/demo/mnist/data/generate_list.py b/demo/mnist/data/generate_list.py
index 1b929048b4d82b5e9d80585b6d0180f2e92200ce..d880721f94c68bbbc1740f82872462efdb368fa2 100644
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@@ -13,9 +13,9 @@
# limitations under the License.
o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
o.close()
o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
-o.close()
\ No newline at end of file
+o.write("./data/raw_data/t10k" + "\n")
+o.close()
diff --git a/demo/mnist/data/get_mnist_data.sh b/demo/mnist/data/get_mnist_data.sh
index 9099b5ab6fb85d86d346a7ad819538fbd013c6ff..5a2e34026d4fe7f8315d4f5453bec7c4ee4f6885 100755
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@@ -19,4 +19,3 @@ done
cd $DIR
rm -f *.list
python generate_list.py
-
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 32af29730a7365df1a98fe54a2edf8850ee93e8d..6df4676da3bdc2e6949cc911fa3720cb51ddc568 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
-@provider(input_types={
- 'pixel': dense_vector(28 * 28),
- 'label': integer_value(10)
-})
+@provider(
+ input_types={'pixel': dense_vector(28 * 28),
+ 'label': integer_value(10)})
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
diff --git a/demo/mnist/vgg_16_mnist.py b/demo/mnist/vgg_16_mnist.py
index 45a45bb061aa781231a944bb82ebfbc6b0dc9618..f9e89bc588abacd98a8f5fc82a00fae6bb2de10e 100644
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
-
if not is_predict:
- data_dir='./data/'
- define_py_data_sources2(train_list= data_dir + 'train.list',
- test_list= data_dir + 'test.list',
- module='mnist_provider',
- obj='process')
+ data_dir = './data/'
+ define_py_data_sources2(
+ train_list=data_dir + 'train.list',
+ test_list=data_dir + 'test.list',
+ module='mnist_provider',
+ obj='process')
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=1*28*28
-label_size=10
+data_size = 1 * 28 * 28
+label_size = 10
img = data_layer(name='pixel', size=data_size)
# small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
- num_channels=1,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/model_zoo/embedding/extract_para.py b/demo/model_zoo/embedding/extract_para.py
index 17067792fc38d0d25bd28dc35bfb1b88ad5020cd..47e06fae9caa9c3d9e0d6eb2e3f6633a776c5b1d 100755
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def get_row_index(preDict, usrDict):
"""
Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
pos.append(index[word])
return pos
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+ paraDim):
"""
Extract desired parameters from a pretrained embedding model based on user dictionary
"""
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
print "extract parameters finish, total", len(rowIndex), "lines"
fi.close()
+
def main():
"""
Main entry for running paraconvert.py
@@ -78,19 +81,33 @@ def main():
"python %prog --preModel PREMODEL --preDict PREDICT" \
" --usrModel USRMODEL --usrDict USRDICT -d DIM"
parser = OptionParser(usage)
- parser.add_option("--preModel", action="store", dest="preModel",
- help="the name of pretrained embedding model")
- parser.add_option("--preDict", action="store", dest="preDict",
- help="the name of pretrained dictionary")
- parser.add_option("--usrModel", action="store", dest="usrModel",
- help="the name of output usr embedding model")
- parser.add_option("--usrDict", action="store", dest="usrDict",
- help="the name of user specified dictionary")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--preModel",
+ action="store",
+ dest="preModel",
+ help="the name of pretrained embedding model")
+ parser.add_option(
+ "--preDict",
+ action="store",
+ dest="preDict",
+ help="the name of pretrained dictionary")
+ parser.add_option(
+ "--usrModel",
+ action="store",
+ dest="usrModel",
+ help="the name of output usr embedding model")
+ parser.add_option(
+ "--usrDict",
+ action="store",
+ dest="usrDict",
+ help="the name of user specified dictionary")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
- extract_parameters_by_usrDict(options.preModel, options.preDict,
- options.usrModel, options.usrDict, int(options.dim))
+ extract_parameters_by_usrDict(options.preModel, options.preDict,
+ options.usrModel, options.usrDict,
+ int(options.dim))
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/embedding/paraconvert.py b/demo/model_zoo/embedding/paraconvert.py
index 523412303617a38035392e4bb99f8ce119be8ac8..54155eff8e26b16ff5303d8d279e81b4bf8a90f4 100755
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def binary2text(input, output, paraDim):
"""
Convert a binary parameter file of embedding model to be a text file.
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
fo.close()
print "binary2text finish, total", line, "lines"
+
def get_para_count(input):
"""
Compute the total number of embedding parameters in input text file.
input: the name of input text file
"""
- numRows = 1
+ numRows = 1
paraDim = 0
with open(input) as f:
line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
numRows += 1
return numRows * paraDim
+
def text2binary(input, output, paddle_head=True):
"""
Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
fo.close()
print "text2binary finish, total", count, "lines"
+
def main():
"""
Main entry for running paraconvert.py
@@ -131,21 +134,26 @@ def main():
"python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
"python %prog --t2b -i INPUT -o OUTPUT"
parser = OptionParser(usage)
- parser.add_option("--b2t", action="store_true",
- help="convert parameter file of embedding model from binary to text")
- parser.add_option("--t2b", action="store_true",
- help="convert parameter file of embedding model from text to binary")
- parser.add_option("-i", action="store", dest="input",
- help="input parameter file name")
- parser.add_option("-o", action="store", dest="output",
- help="output parameter file name")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--b2t",
+ action="store_true",
+ help="convert parameter file of embedding model from binary to text")
+ parser.add_option(
+ "--t2b",
+ action="store_true",
+ help="convert parameter file of embedding model from text to binary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input parameter file name")
+ parser.add_option(
+ "-o", action="store", dest="output", help="output parameter file name")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
if options.b2t:
binary2text(options.input, options.output, options.dim)
if options.t2b:
text2binary(options.input, options.output)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/embedding/pre_DictAndModel.sh b/demo/model_zoo/embedding/pre_DictAndModel.sh
index 7821850fb25cc5b87aa305c2113efbf50b093ed1..6d647f5dd9368eaf81c19386511c7d231e4799e3 100755
--- a/demo/model_zoo/embedding/pre_DictAndModel.sh
+++ b/demo/model_zoo/embedding/pre_DictAndModel.sh
@@ -18,7 +18,5 @@ set -x
# download the dictionary and pretrained model
for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
do
- # following is the google drive address
- # you can also directly download from https://pan.baidu.com/s/1o8q577s
- wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/$file --no-check-certificate
+ wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file
done
diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py
index 06d471722f8059804a59e6823bebccff85a8d542..7855126edcfec20de251e5bc08c08c7aab8f7a8e 100755
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
- def __init__(self, train_conf, model_dir=None,
- resize_dim=256, crop_dim=224,
+ def __init__(self,
+ train_conf,
+ model_dir=None,
+ resize_dim=256,
+ crop_dim=224,
use_gpu=True,
mean_file=None,
output_layer=None,
- oversample=False, is_color=True):
+ oversample=False,
+ is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
assert isinstance(self.output_layer, basestring)
self.output_layer = self.output_layer.split(",")
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
- self.transformer.set_channel_swap((2,1,0))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
+ self.transformer.set_channel_swap((2, 1, 0))
self.mean_file = mean_file
if self.mean_file is not None:
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
else:
# if you use three mean value, set like:
# this three mean value is calculated from ImageNet.
- self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+ self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -105,14 +112,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
logging.info("Label of %s is: %d", image, lab[0])
return results
- def extract(self, data_file, output_dir, batch_size = 10000):
+ def extract(self, data_file, output_dir, batch_size=10000):
"""
extract and save features of output layers, which are
specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
image_feature[file_name] = feature
sample_num += 1
if sample_num == batch_size:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
if idx % 1000 == 0:
logging.info('%d/%d, %s', idx, len(image_files), file_name)
if sample_num > 0:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
of = open(file, 'wb')
cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
+
def option_parser():
"""
Main entry for predciting
"""
usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage)
- parser.add_option("-j", "--job",
- action="store", dest="job_type",
- help="job type: predict, extract\
+ parser.add_option(
+ "-j",
+ "--job",
+ action="store",
+ dest="job_type",
+ help="job type: predict, extract\
predict: predicting,\
extract: extract features")
- parser.add_option("-c", "--conf",
- action="store", dest="train_conf",
- help="network config")
- parser.add_option("-i", "--data",
- action="store", dest="data_file",
- help="image list")
- parser.add_option("-w", "--model",
- action="store", dest="model_path",
- default=None, help="model path")
- parser.add_option("-g", "--use_gpu", action="store",
- dest="use_gpu", default=True,
- help="Whether to use gpu mode.")
- parser.add_option("-o", "--output_dir",
- action="store", dest="output_dir",
- default="output", help="output path")
- parser.add_option("-m", "--mean", action="store",
- dest="mean", default=None,
- help="mean file.")
- parser.add_option("-p", "--multi_crop", action="store_true",
- dest="multi_crop", default=False,
- help="Wether to use multiple crops on image.")
+ parser.add_option(
+ "-c",
+ "--conf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-i", "--data", action="store", dest="data_file", help="image list")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
+ parser.add_option(
+ "-g",
+ "--use_gpu",
+ action="store",
+ dest="use_gpu",
+ default=True,
+ help="Whether to use gpu mode.")
+ parser.add_option(
+ "-o",
+ "--output_dir",
+ action="store",
+ dest="output_dir",
+ default="output",
+ help="output path")
+ parser.add_option(
+ "-m",
+ "--mean",
+ action="store",
+ dest="mean",
+ default=None,
+ help="mean file.")
+ parser.add_option(
+ "-p",
+ "--multi_crop",
+ action="store_true",
+ dest="multi_crop",
+ default=False,
+ help="Wether to use multiple crops on image.")
parser.add_option("-l", "--output_layer", action="store",
dest="output_layer", default=None,
help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
"classification probability, output in resnet.py.")
return parser.parse_args()
+
def main():
"""
1. parse input arguments.
2. predicting or extract features according job type.
"""
options, args = option_parser()
- obj = ImageClassifier(options.train_conf,
- options.model_path,
- use_gpu=options.use_gpu,
- mean_file=options.mean,
- output_layer=options.output_layer,
- oversample=options.multi_crop)
+ obj = ImageClassifier(
+ options.train_conf,
+ options.model_path,
+ use_gpu=options.use_gpu,
+ mean_file=options.mean,
+ output_layer=options.output_layer,
+ oversample=options.multi_crop)
if options.job_type == "predict":
obj.predict(options.data_file)
elif options.job_type == "extract":
- obj.extract(options.data_file,
- options.output_dir)
+ obj.extract(options.data_file, options.output_dir)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/resnet/example/__init__.py b/demo/model_zoo/resnet/example/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
diff --git a/demo/model_zoo/resnet/example/image_list_provider.py b/demo/model_zoo/resnet/example/image_list_provider.py
index ee457e1fffc7ed8629dc6bde63a8047818c0ff9d..9e415f76a53326c5809b7a8c508701c519ab443b 100644
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
from paddle.trainer.PyDataProvider2 import *
-def hook(settings, image_size, crop_size, color, file_list,
- is_train, **kwargs):
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
"""
Description: Init with a list of data file
file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
sz = settings.crop_size * settings.crop_size
settings.img_mean = np.zeros(sz * 3, dtype=np.single)
for idx, value in enumerate(settings.mean_value):
- settings.img_mean[idx * sz: (idx + 1) * sz] = value
+ settings.img_mean[idx * sz:(idx + 1) * sz] = value
settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
settings.crop_size)
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
settings.input_types = [
dense_vector(settings.img_input_size), # image feature
- integer_value(1)] # labels
+ integer_value(1)
+ ] # labels
settings.logger.info('Image short side: %s', settings.img_size)
settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
# swap channel
if settings.is_swap_channel:
img = img[settings.swap_channel, :, :]
- img_feat = preprocess_img(img,
- settings.img_mean,
- settings.crop_size,
- settings.is_train,
- settings.color)
+ img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
+ settings.is_train, settings.color)
yield img_feat.tolist(), int(lab.strip())
diff --git a/demo/model_zoo/resnet/get_model.sh b/demo/model_zoo/resnet/get_model.sh
index 89312d43edf8e4e7d639be73d5b3983ea916b902..133d08fca431540f2ed5cd6e63b51d9ce3a1b344 100755
--- a/demo/model_zoo/resnet/get_model.sh
+++ b/demo/model_zoo/resnet/get_model.sh
@@ -24,9 +24,7 @@ echo "Downloading ResNet models..."
for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz
do
- # following is the google drive address
- # you can also directly download from https://pan.baidu.com/s/1o8q577s
- wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/imagenet/$file --no-check-certificate
+ wget http://paddlepaddle.bj.bcebos.com/model_zoo/imagenet/$file
tar -xvf $file
rm $file
done
diff --git a/demo/model_zoo/resnet/load_feature.py b/demo/model_zoo/resnet/load_feature.py
index ee4930b7a17f7f21ceeba8db253eed64416ebf10..b0948b75fd0ac9a3fa89070aed04d523ce286f4e 100644
--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
import cPickle
import logging
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
def load_feature_c(file):
"""
Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
f = open(file, 'r')
for line in f:
sample = []
- for slot in line.strip().split(";"):
- fea = [float(val) for val in slot.strip().split()]
+ for slot in line.strip().split(";"):
+ fea = [float(val) for val in slot.strip().split()]
if fea:
sample.append(fea)
features.append(sample)
f.close()
return features
+
def load_feature_py(feature_dir):
"""
Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
logging.info('Load feature file %s', file_name)
return features
+
if __name__ == '__main__':
- print load_feature_py(sys.argv[1])
+ print load_feature_py(sys.argv[1])
#print load_feature_c(sys.argv[1])
diff --git a/demo/model_zoo/resnet/resnet.py b/demo/model_zoo/resnet/resnet.py
index 483e308ac804e13ca249ef4e47e9e9b00770ce1b..015b74cd484596039b9fcf010576ca340d044db7 100644
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
# limitations under the License.
from paddle.trainer_config_helpers import *
-
"""
paper: https://arxiv.org/abs/1512.03385
"""
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
# mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;"
- args={
+ args = {
'mean_meta': "model/mean_meta_224/mean.meta",
- 'image_size': 224, 'crop_size': 224,
- 'color': True,'swap_channel:': [2, 1, 0]}
- define_py_data_sources2(train_list,
- 'example/test.list',
- module="example.image_list_provider",
- obj="processData",
- args=args)
+ 'image_size': 224,
+ 'crop_size': 224,
+ 'color': True,
+ 'swap_channel:': [2, 1, 0]
+ }
+ define_py_data_sources2(
+ train_list,
+ 'example/test.list',
+ module="example.image_list_provider",
+ obj="processData",
+ args=args)
batch_size = 1
learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
learning_method='momentum',
learning_rate_decay_a=0.5,
learning_rate_decay_b=1200000 * 10,
- learning_rate_schedule="discexp",
-)
+ learning_rate_schedule="discexp", )
-def conv_bn_layer(name, input, filter_size, num_filters,
- stride, padding, channels=None,
+def conv_bn_layer(name,
+ input,
+ filter_size,
+ num_filters,
+ stride,
+ padding,
+ channels=None,
active_type=ReluActivation()):
"""
A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
conv layer has no activation.
"""
- tmp = img_conv_layer(name=name + "_conv",
- input=input,
- filter_size=filter_size,
- num_channels=channels,
- num_filters=num_filters,
- stride=stride,
- padding=padding,
- act=LinearActivation(),
- bias_attr=False)
- return batch_norm_layer(name=name + "_bn",
- input=tmp,
- act=active_type,
- use_global_stats=is_test)
+ tmp = img_conv_layer(
+ name=name + "_conv",
+ input=input,
+ filter_size=filter_size,
+ num_channels=channels,
+ num_filters=num_filters,
+ stride=stride,
+ padding=padding,
+ act=LinearActivation(),
+ bias_attr=False)
+ return batch_norm_layer(
+ name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
Last conv_bn_layer has no activation.
Addto layer has activation of relu.
"""
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=1,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[input, last_name],
- act=ReluActivation())
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=1,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[input, last_name], act=ReluActivation())
def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
branch2x: bottleneck building block, shortcuts are identity.
"""
# stride = 2
- branch1 = conv_bn_layer(name=name + '_branch1',
- input=input,
- filter_size=1,
- num_filters=num_filters2,
- stride=stride,
- padding=0,
- active_type=LinearActivation())
-
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=stride,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
-
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[branch1, last_name],
- act=ReluActivation())
+ branch1 = conv_bn_layer(
+ name=name + '_branch1',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=stride,
+ padding=0,
+ active_type=LinearActivation())
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=stride,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
# For ImageNet
# conv1: 112x112
img = data_layer(name='input', size=224 * 224 * 3)
- tmp = conv_bn_layer("conv1", img,
- filter_size=7,
- channels=3,
- num_filters=64,
- stride=2,
- padding=3)
+ tmp = conv_bn_layer(
+ "conv1",
+ img,
+ filter_size=7,
+ channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
# conv2_x: 56x56
- tmp = mid_projection(name="res2_1",
- input=tmp,
- num_filters1=64,
- num_filters2=256,
- stride=1)
+ tmp = mid_projection(
+ name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
for i in xrange(2, res2_num + 1, 1):
- tmp = bottleneck_block(name="res2_" + str(i),
- input=tmp,
- num_filters1=64,
- num_filters2=256)
+ tmp = bottleneck_block(
+ name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
# conv3_x: 28x28
- tmp = mid_projection(name="res3_1",
- input=tmp,
- num_filters1=128,
- num_filters2=512)
+ tmp = mid_projection(
+ name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
for i in xrange(2, res3_num + 1, 1):
- tmp = bottleneck_block(name="res3_" + str(i),
- input=tmp, num_filters1=128,
- num_filters2=512)
+ tmp = bottleneck_block(
+ name="res3_" + str(i),
+ input=tmp,
+ num_filters1=128,
+ num_filters2=512)
# conv4_x: 14x14
- tmp = mid_projection(name="res4_1", input=tmp,
- num_filters1=256, num_filters2=1024)
+ tmp = mid_projection(
+ name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
for i in xrange(2, res4_num + 1, 1):
- tmp = bottleneck_block(name="res4_" + str(i),
- input=tmp,
- num_filters1=256,
- num_filters2=1024)
+ tmp = bottleneck_block(
+ name="res4_" + str(i),
+ input=tmp,
+ num_filters1=256,
+ num_filters2=1024)
# conv5_x: 7x7
- tmp = mid_projection(name="res5_1", input=tmp,
- num_filters1=512, num_filters2=2048)
+ tmp = mid_projection(
+ name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
for i in xrange(2, res5_num + 1, 1):
- tmp = bottleneck_block(name="res5_" + str(i),
- input=tmp, num_filters1=512,
- num_filters2=2048)
-
- tmp = img_pool_layer(name='avgpool',
- input=tmp,
- pool_size=7,
- stride=1,
- pool_type=AvgPooling())
-
- output = fc_layer(name='output',
- input=tmp,
- size=1000,
- act=SoftmaxActivation())
+ tmp = bottleneck_block(
+ name="res5_" + str(i),
+ input=tmp,
+ num_filters1=512,
+ num_filters2=2048)
+
+ tmp = img_pool_layer(
+ name='avgpool',
+ input=tmp,
+ pool_size=7,
+ stride=1,
+ pool_type=AvgPooling())
+
+ output = fc_layer(
+ name='output', input=tmp, size=1000, act=SoftmaxActivation())
if not is_predict:
- classification_cost(input=output, label=data_layer(name='label',
- size=1))
+ classification_cost(
+ input=output, label=data_layer(
+ name='label', size=1))
def res_net_50():
diff --git a/demo/quick_start/api_train.py b/demo/quick_start/api_train.py
index 5ae19b8d26534a9521a6da7630796edce36780e7..66cbb856484d231613a0026be129a7bc3a7cfdf5 100644
--- a/demo/quick_start/api_train.py
+++ b/demo/quick_start/api_train.py
@@ -22,27 +22,32 @@ from py_paddle import DataProviderConverter
from paddle.trainer.PyDataProvider2 \
import integer_value, integer_value_sequence, sparse_binary_vector
+
def parse_arguments():
parser = argparse.ArgumentParser()
- parser.add_argument("--train_data",
- type=str, required=False, help="train data file")
+ parser.add_argument(
+ "--train_data", type=str, required=False, help="train data file")
parser.add_argument("--test_data", type=str, help="test data file")
- parser.add_argument("--config",
- type=str, required=True, help="config file name")
+ parser.add_argument(
+ "--config", type=str, required=True, help="config file name")
parser.add_argument("--dict_file", required=True, help="dictionary file")
- parser.add_argument("--seq",
- default=1, type=int,
- help="whether use sequence training")
- parser.add_argument("--use_gpu", default=0, type=int,
- help="whether use GPU for training")
- parser.add_argument("--trainer_count", default=1, type=int,
- help="Number of threads for training")
- parser.add_argument("--num_passes", default=5, type=int,
- help="Number of training passes")
+ parser.add_argument(
+ "--seq", default=1, type=int, help="whether use sequence training")
+ parser.add_argument(
+ "--use_gpu", default=0, type=int, help="whether use GPU for training")
+ parser.add_argument(
+ "--trainer_count",
+ default=1,
+ type=int,
+ help="Number of threads for training")
+ parser.add_argument(
+ "--num_passes", default=5, type=int, help="Number of training passes")
return parser.parse_args()
+
UNK_IDX = 0
+
def load_data(file_name, word_dict):
with open(file_name, 'r') as f:
for line in f:
@@ -51,6 +56,7 @@ def load_data(file_name, word_dict):
word_slot = [word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label)
+
def load_dict(dict_file):
word_dict = dict()
with open(dict_file, 'r') as f:
@@ -59,6 +65,7 @@ def load_dict(dict_file):
word_dict[w] = i
return word_dict
+
def main():
options = parse_arguments()
api.initPaddle("--use_gpu=%s" % options.use_gpu,
@@ -86,9 +93,9 @@ def main():
# create a data converter which converts data to PaddlePaddle
# internal format
input_types = [
- integer_value_sequence(len(word_dict)) if options.seq
- else sparse_binary_vector(len(word_dict)),
- integer_value(2)]
+ integer_value_sequence(len(word_dict)) if options.seq else
+ sparse_binary_vector(len(word_dict)), integer_value(2)
+ ]
converter = DataProviderConverter(input_types)
batch_size = trainer_config.opt_config.batch_size
@@ -102,7 +109,7 @@ def main():
trainer.trainOneDataBatch(size, converter(batch))
trainer.finishTrainPass()
if test_dataset:
- trainer.startTestPeriod();
+ trainer.startTestPeriod()
for pos in xrange(0, len(test_dataset), batch_size):
batch = itertools.islice(test_dataset, pos, pos + batch_size)
size = min(batch_size, len(test_dataset) - pos)
@@ -110,5 +117,6 @@ def main():
trainer.finishTestPeriod()
trainer.finishTrain()
+
if __name__ == '__main__':
main()
diff --git a/demo/quick_start/data/README.md b/demo/quick_start/data/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..63abcf7ebf31903213e44cf492b93e09f61db14e
--- /dev/null
+++ b/demo/quick_start/data/README.md
@@ -0,0 +1,9 @@
+This dataset consists of electronics product reviews associated with
+binary labels (positive/negative) for sentiment classification.
+
+The preprocessed data can be downloaded by script `get_data.sh`.
+The data was derived from reviews_Electronics_5.json.gz at
+
+http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+
+If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.
diff --git a/demo/quick_start/data/get_data.sh b/demo/quick_start/data/get_data.sh
index f355d63225b28ab495b34e72dd3be8d237ae08f4..952de3f3c8f52a7a6f84412f9b38f16ac2503ac2 100755
--- a/demo/quick_start/data/get_data.sh
+++ b/demo/quick_start/data/get_data.sh
@@ -17,14 +17,11 @@ set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd $DIR
-echo "Downloading Amazon Electronics reviews data..."
-# http://jmcauley.ucsd.edu/data/amazon/
-wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+# Download the preprocessed data
+wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
-echo "Downloading mosesdecoder..."
-#https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+# Extract package
+tar zxvf preprocessed_data.tar.gz
-unzip master.zip
-rm master.zip
-echo "Done."
+# Remove compressed package
+rm preprocessed_data.tar.gz
diff --git a/demo/quick_start/data/pred.list b/demo/quick_start/data/pred.list
deleted file mode 100644
index d88b2b63851101a8b40e706b32d8c17b5fabb201..0000000000000000000000000000000000000000
--- a/demo/quick_start/data/pred.list
+++ /dev/null
@@ -1 +0,0 @@
-./data/pred.txt
diff --git a/demo/quick_start/data/pred.txt b/demo/quick_start/data/pred.txt
deleted file mode 100644
index 6ed5f738ddaff6645448d5e606dcef1baf01b282..0000000000000000000000000000000000000000
--- a/demo/quick_start/data/pred.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-the device is cute , but that 's just about all that 's good. the specs are what you 'd expect : it 's a wifi mic , with some noise filter options. the app has the option to upload your baby 's name and photo , which is a cutesy touch. but the app is otherwise unstable and useless unless you upgrade for $ 60 / year.set up involves downloading the app , turning on the mic , switching your phone to the wifi network of the mic , telling the app your wifi settings , switching your wifi back to your home router. the app is then directly connected to your mic.the app is adware ! the main screen says " cry notifications on / off : upgrade to evoz premium and receive a text message of email when your baby is crying " .but the adware points out an important limitation , this monitor is only intended to be used from your home network. if you want to access it remotely , get a webcam. this app would make a lot more sense of the premium features were included with the hardware .
-don 't be fooled by my one star rating. if there was a zero , i would have selected it. this product was a waste of my money.it has never worked like the company said it supposed to. i only have one device , an iphone 4gs. after charging the the iphone mid way , the i.sound portable power max 16,000 mah is completely drained. the led light no longer lit up. when plugging the isound portable power max into a wall outlet to charge , it would charge for about 20-30 minutes and then all four battery led indicator lit up showing a full charge. i would leave it on to charge for the full 8 hours or more but each time with the same result upon using. don 't buy this thing. put your money to good use elsewhere .
diff --git a/demo/quick_start/preprocess.sh b/demo/quick_start/data/proc_from_raw_data/get_data.sh
similarity index 63%
rename from demo/quick_start/preprocess.sh
rename to demo/quick_start/data/proc_from_raw_data/get_data.sh
index 58a72147c5e41351634395e770e9a214ed3cb01d..cd85e26842dfccea78e4f26bdfee938887021f03 100755
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/data/proc_from_raw_data/get_data.sh
@@ -16,24 +16,40 @@
# 1. size of pos : neg = 1:1.
# 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
# 3. distinct train set and test set.
-# 4. build dict
set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+
+# Download data
+echo "Downloading Amazon Electronics reviews data..."
+# http://jmcauley.ucsd.edu/data/amazon/
+wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
+echo "Downloading mosesdecoder..."
+# https://github.com/moses-smt/mosesdecoder
+wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
+
+unzip master.zip
+rm master.zip
+
+##################
+# Preprocess data
+echo "Preprocess data..."
export LC_ALL=C
UNAME_STR=`uname`
-if [[ ${UNAME_STR} == 'Linux' ]]; then
+if [ ${UNAME_STR} == 'Linux' ]; then
SHUF_PROG='shuf'
else
SHUF_PROG='gshuf'
fi
-mkdir -p data/tmp
-python preprocess.py -i data/reviews_Electronics_5.json.gz
+mkdir -p tmp
+python preprocess.py -i reviews_Electronics_5.json.gz
# uniq and shuffle
-cd data/tmp
-echo 'uniq and shuffle...'
+cd tmp
+echo 'Uniq and shuffle...'
cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
@@ -53,11 +69,11 @@ cat train.pos train.neg | ${SHUF_PROG} >../train.txt
cat test.pos test.neg | ${SHUF_PROG} >../test.txt
cd -
-echo 'data/train.txt' > data/train.list
-echo 'data/test.txt' > data/test.list
+echo 'train.txt' > train.list
+echo 'test.txt' > test.list
# use 30k dict
-rm -rf data/tmp
-mv data/dict.txt data/dict_all.txt
-cat data/dict_all.txt | head -n 30001 > data/dict.txt
-echo 'preprocess finished'
+rm -rf tmp
+mv dict.txt dict_all.txt
+cat dict_all.txt | head -n 30001 > dict.txt
+echo 'Done.'
diff --git a/demo/quick_start/preprocess.py b/demo/quick_start/data/proc_from_raw_data/preprocess.py
similarity index 93%
rename from demo/quick_start/preprocess.py
rename to demo/quick_start/data/proc_from_raw_data/preprocess.py
index 69fdbe44b5245bc2855847a1507e6eaed517eb96..56c2c5f16ceb63ff88fa51ed78c2e77ea5b64592 100755
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/data/proc_from_raw_data/preprocess.py
@@ -13,9 +13,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
-1. (remove HTML before or not)tokensizing
+1. Tokenize the words and punctuation
2. pos sample : rating score 5; neg sample: rating score 1-2.
Usage:
@@ -35,7 +34,8 @@ import multiprocessing
batch_size = 5000
word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2) # parse + tokenize + save
+num_tokenize = max(1,
+ multiprocessing.cpu_count() - 2) # parse + tokenize + save
max_queue_size = 8
parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
@@ -76,7 +76,11 @@ def tokenize(sentences):
sentences : a list of input sentences.
return: a list of processed text.
"""
- dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+ dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
+ if not os.path.exists(dir):
+ sys.exit(
+ "The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
+ )
tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
assert isinstance(sentences, list)
text = "\n".join(sentences)
@@ -104,7 +108,7 @@ def tokenize_batch(id):
num_batch, instance, pre_fix = parse_queue.get()
if num_batch == -1: ### parse_queue finished
tokenize_queue.put((-1, None, None))
- sys.stderr.write("tokenize theread %s finish\n" % (id))
+ sys.stderr.write("Thread %s finish\n" % (id))
break
tokenize_instance = tokenize(instance)
tokenize_queue.put((num_batch, tokenize_instance, pre_fix))
diff --git a/demo/quick_start/dataprovider_bow.py b/demo/quick_start/dataprovider_bow.py
index f8cde189cf87d73aec05da4b34e064cddecff56b..a5156a2d40cc04c02e50d676045ae6da8937ba01 100644
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary
UNK_IDX = 0
+
# initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the
# necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
- integer_value(2)]
+ integer_value(2)
+ ]
+
# Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
- settings.input_types = [
- sparse_binary_vector(len(dictionary))
- ]
+ settings.input_types = [sparse_binary_vector(len(dictionary))]
+
# Declaring a data provider for prediction. The difference with process
# is that label is not generated.
diff --git a/demo/quick_start/dataprovider_emb.py b/demo/quick_start/dataprovider_emb.py
index f5632d5f3f8bd8bb83b12198e7450b239eb1f7f6..286f3f5c82081f1a6e02a26023969790792a78a3 100755
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -24,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
# Define the second input for label id
- integer_value(2)]
+ integer_value(2)
+ ]
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -40,7 +41,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+ integer_value(
+ len(dictionary), seq_type=SequenceType.SEQUENCE)
]
diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh
index 49806292a4ec5bd4194ccb6f6a638b6b2b4f37ed..b3c471608c3248bfc714d5e44dd927f25dd23ea0 100755
--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@@ -20,6 +20,7 @@ cfg=trainer_config.lr.py
#cfg=trainer_config.lstm.py
#cfg=trainer_config.bidi-lstm.py
#cfg=trainer_config.db-lstm.py
+#cfg=trainer_config.resnet-lstm.py
paddle train \
--config=$cfg \
--save_dir=./output \
diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/demo/quick_start/trainer_config.bidi-lstm.py
index 3be3d373422714c6b40e530cf112f9106b85d20b..51deaf31f94681b6b61f98f798cef14a65ec92cb 100644
--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,19 +40,17 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
bi_lstm = bidirectional_lstm(input=emb, size=128)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-output = fc_layer(input=dropout, size=2,
- bias_attr=bias_attr,
- act=SoftmaxActivation())
+output = fc_layer(
+ input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.cnn.py b/demo/quick_start/trainer_config.cnn.py
index 253ec0aee26cf42226d79726a75aad6c61c77565..388efa75f903e0c7c803c99cd50d73a004133a67 100644
--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,8 +40,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.db-lstm.py b/demo/quick_start/trainer_config.db-lstm.py
index b35bdf5a61b4731cadb5eb992796c5e885efbd7e..02bc898d881efbd3bfaed95d45cd9e70ed046746 100644
--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,10 +40,9 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
@@ -52,17 +52,18 @@ lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
input_layers = [hidden_0, lstm_0]
-for i in range(1,8):
+for i in range(1, 8):
fc = fc_layer(input=input_layers, size=128)
- lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1),
- reverse=(i % 2) == 1,)
+ lstm = lstmemory(
+ input=fc,
+ layer_attr=ExtraAttr(drop_rate=0.1),
+ reverse=(i % 2) == 1, )
input_layers = [fc, lstm]
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
- bias_attr=bias_attr,
- act=SoftmaxActivation())
+output = fc_layer(
+ input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
diff --git a/demo/quick_start/trainer_config.emb.py b/demo/quick_start/trainer_config.emb.py
index 34dd7b96f2f142159472b98a09fd0092fac15e43..8fd18a7aac704e62b137845edb46cce5bc373285 100644
--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
- batch_size=batch_size,
- learning_rate=2e-3,
- learning_method=AdamOptimizer()
-)
+ batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.lr.py b/demo/quick_start/trainer_config.lr.py
index c6059947f30b32975d72155150de095ade01aa9d..b9c9441baac28a8a8f6078065b75664819d6cd04 100644
--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
# We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_bow",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_bow",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -44,8 +45,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
# Define the data for text features. The size of the data layer is the number
# of words in the dictionary.
diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py
index b412a9cbd914dc7abd70b93bbe250759552ee071..8821e02d9bd4a0d06b8afa99df8e0fac3e2fcefe 100644
--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,17 +40,14 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
-
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128,
- lstm_cell_attr=ExtraAttr(drop_rate=0.25))
+lstm = simple_lstm(
+ input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2,
- act=SoftmaxActivation())
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/demo/quick_start/trainer_config.resnet-lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e1581c386eb880d481b7352c4d21f3a5ef5c9a
--- /dev/null
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
@@ -0,0 +1,94 @@
+# edit-mode: -*- python -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This configuration is a demonstration of how to implement the stacked LSTM
+with residual connections, i.e. an LSTM layer takes the sum of the hidden states
+and inputs of the previous LSTM layer instead of only the hidden states.
+This architecture is from:
+Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
+Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
+Google's Neural Machine Translation System: Bridging the Gap between Human and
+Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
+Different from the architecture described in the paper, we use a stack single
+direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
+since this is a demo code, to reduce computation time, we stacked 4 layers
+instead of 8 layers.
+"""
+
+from paddle.trainer_config_helpers import *
+
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
+
+batch_size = 128 if not is_predict else 1
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25
+)
+
+bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+
+previous_input, previous_hidden_state = emb, lstm
+
+for i in range(3):
+ # The input to the current layer is the sum of the hidden state
+ # and input of the previous layer.
+ current_input = addto_layer(input=[previous_input, previous_hidden_state])
+ hidden_state = simple_lstm(input=current_input, size=128,
+ lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+ previous_input, previous_hidden_state = current_input, hidden_state
+
+lstm = previous_hidden_state
+
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_last, size=2,
+ bias_attr=bias_attr,
+ act=SoftmaxActivation())
+
+
+if is_predict:
+ maxid = maxid_layer(output)
+ outputs([maxid, output])
+else:
+ label = data_layer(name="label", size=2)
+ cls = classification_cost(input=output, label=label)
+ outputs(cls)
diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py
index a5f00b3ef9ca00b42b8e31ddd6cfeca3580152b0..613e36b496e47edbc0eabd8f15a0abdcb50f6424 100755
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
yield integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
- yield integer_value(len(each_meta['dict']),
- seq_type=SequenceType.SEQUENCE if is_seq
- else SequenceType.NO_SEQUENCE)
+ yield integer_value(
+ len(each_meta['dict']),
+ seq_type=SequenceType.SEQUENCE
+ if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict']))
diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json
index 71a9dd7be6bd10e177dfb443a94b719c3816d833..f26e74ce47bb7843a571e6033f051c046b31f054 100644
--- a/demo/recommendation/data/config.json
+++ b/demo/recommendation/data/config.json
@@ -14,4 +14,3 @@
"fields": ["id", "title", "genres"]
}
}
-
diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py
index 29f38082693ad890ac4dfa302399663af8dbd27b..fa605458300f81da6772d88cfbad413e4dcf97fe 100644
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
config_generator.py
@@ -29,10 +28,7 @@ import json
import docopt
import copy
-DEFAULT_FILE = {
- "type": "split",
- "delimiter": ","
-}
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
DEFAULT_FIELD = {
"id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
- obj[k] = {
- "file": file_dict,
- "fields": fields
- }
- meta = {
- "meta": obj
- }
+ obj[k] = {"file": file_dict, "fields": fields}
+ meta = {"meta": obj}
# print meta
if fmt == 'json':
+
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
+
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py
index 8d1a33d02aea112e51f1d43bedc06fdcee1186f5..593c863670d5eb5d684adf643ff745f3914b656b 100644
--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Preprocess Movielens dataset, to get movie/user object.
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False):
- self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
- key=key, reverse=reverse)
+ self.__key_set__ = sorted(
+ list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict()
for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split':
- self.dict = SplitEmbeddingDict(
- config['dict'].get('delimiter', ','))
+ self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content':
- self.dict = EmbeddingFieldParser.WholeContentDict(
- config['dict']['sort'])
+ self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
+ 'sort'])
else:
print config
assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
return PositionContentExtractor(config['pos'])
else:
extra_args = config['regex']
- return RegexPositionContentExtractor(pos=config['pos'],
- **extra_args)
+ return RegexPositionContentExtractor(
+ pos=config['pos'], **extra_args)
class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
metas = map(lambda x: x.meta_field(), field_parsers)
# print metas
- key_index = filter(lambda x: x is not None, map(
- lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
- else None, enumerate(metas)))[0]
+ key_index = filter(
+ lambda x: x is not None,
+ map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+ enumerate(metas)))[0]
key_map = []
for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
for i in range(key_index + 1, len(metas)):
key_map.append(i)
- obj = {
- '__meta__': {
- 'raw_meta': metas,
- 'feature_map': key_map
- }
- }
+ obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block)
diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py
index ff1f7fab7befdb5bdfa39fd0f1753e6804e82d8f..8dd0cbd32af6074439e98dac024c5fed76cd52b2 100644
--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Separate movielens 1m dataset to train/test file.
diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py
index 454467f40b44bb526d143934c4a7350d41e54c0e..ff3932be03f1e4a1fc1d0bdb189ab7fe1fbbeca0 100755
--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
+
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
settings.input_types = headers
settings.meta = meta
+
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py
index f8044a3195ec25bc2fa7c9079e4977f971059352..e2a202cfd1a476046d7e1d1896b87d72c4906ff2 100755
--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
- network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
- movie_meta = meta['movie'][movie_id] # Query Data From Meta.
+ movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
- print "Prediction Score is %.2f" % ((network.forwardTest(
- cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+ print "Prediction Score is %.2f" % (
+ (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+ / 2)
diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py
index 624c22ec969dc98808863ad53573b9633f1791ac..cec340b0b65a841029a1c0538d9881bb38f026ff 100755
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
# load meta file
meta = pickle.load(f)
-settings(batch_size=1600, learning_rate=1e-3,
- learning_method=RMSPropOptimizer())
+settings(
+ batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id':
slot_dim = each_meta['max']
- embedding = embedding_layer(input=data_layer(slot_name,
- size=slot_dim),
- size=256)
- fusion.append(fc_layer(input=embedding,
- size=256))
+ embedding = embedding_layer(
+ input=data_layer(
+ slot_name, size=slot_dim), size=256)
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
embedding = embedding_layer(input=din, size=256)
if is_seq:
fusion.append(
- text_conv_pool(input=embedding, context_len=5,
- hidden_size=256))
+ text_conv_pool(
+ input=embedding, context_len=5, hidden_size=256))
else:
- fusion.append(fc_layer(input=embedding,
- size=256))
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict'])
- hidden = fc_layer(input=data_layer(slot_name, slot_dim),
- size=256)
- fusion.append(fc_layer(input=hidden,
- size=256))
+ hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
+ fusion.append(fc_layer(input=hidden, size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
- outputs(regression_cost(input=similarity,
- label=data_layer('rating', size=1)))
-
- define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
- obj='process', args={'meta': meta})
+ outputs(
+ regression_cost(
+ input=similarity, label=data_layer(
+ 'rating', size=1)))
+
+ define_py_data_sources2(
+ 'data/train.list',
+ 'data/test.list',
+ module='dataprovider',
+ obj='process',
+ args={'meta': meta})
else:
outputs(similarity)
diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py
index 2982e54c665b41400aab0a893ff3c76335404988..daca5f01cf2b3bd231bf530f17ec760272ce93e0 100644
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@@ -17,24 +17,15 @@ import os
from optparse import OptionParser
-def extract_dict_features(pair_file, feature_file, src_dict_file,
- tgt_dict_file):
- src_dict = set()
- tgt_dict = set()
-
- with open(pair_file) as fin, open(feature_file, 'w') as feature_out, open(
- src_dict_file, 'w') as src_dict_out, open(tgt_dict_file,
- 'w') as tgt_dict_out:
+def extract_dict_features(pair_file, feature_file):
+
+ with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
for line in fin:
- sentence, labels = line.strip().split('\t')
+ sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split()
labels_list = labels.split()
- src_dict.update(sentence_list)
- tgt_dict.update(labels_list)
-
verb_index = labels_list.index('B-V')
- verb_feature = sentence_list[verb_index]
mark = [0] * len(labels_list)
if verb_index > 0:
@@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file,
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
- ctx_n1_feature = ctx_n1
+
+ if verb_index > 1:
+ mark[verb_index - 2] = 1
+ ctx_n2 = sentence_list[verb_index - 2]
+ else:
+ ctx_n2 = 'bos'
mark[verb_index] = 1
- ctx_0_feature = sentence_list[verb_index]
+ ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
- ctx_p1_feature = ctx_p1
+
+ if verb_index < len(labels_list) - 3:
+ mark[verb_index + 2] = 1
+ ctx_p2 = sentence_list[verb_index + 2]
+ else:
+ ctx_p2 = 'eos'
+
feature_str = sentence + '\t' \
- + verb_feature + '\t' \
- + ctx_n1_feature + '\t' \
- + ctx_0_feature + '\t' \
- + ctx_p1_feature + '\t' \
+ + predicate + '\t' \
+ + ctx_n2 + '\t' \
+ + ctx_n1 + '\t' \
+ + ctx_0 + '\t' \
+ + ctx_p1 + '\t' \
+ + ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \
+ labels
feature_out.write(feature_str + '\n')
- src_dict_out.write('\n')
- src_dict_out.write('\n'.join(list(src_dict)))
-
- tgt_dict_out.write('\n'.join(list(tgt_dict)))
if __name__ == '__main__':
- usage = '-p pair_file -f feature_file -s source dictionary -t target dictionary '
+ usage = '-p pair_file -f feature_file'
parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file')
- parser.add_option(
- '-f', dest='feature_file', help='the file to store feature')
- parser.add_option(
- '-s', dest='src_dict', help='the file to store source dictionary')
- parser.add_option(
- '-t', dest='tgt_dict', help='the file to store target dictionary')
+ parser.add_option('-f', dest='feature_file', help='the feature file')
(options, args) = parser.parse_args()
- extract_dict_features(options.pair_file, options.feature_file,
- options.src_dict, options.tgt_dict)
+ extract_dict_features(options.pair_file, options.feature_file)
diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py
index 4d1bef8f958a62be9941d474a0b67542dcc5cfab..86ab00ce41723169de035a841d9e129a1b9e82a3 100644
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ b/demo/semantic_role_labeling/data/extract_pairs.py
@@ -51,7 +51,7 @@ def read_sentences(words_file):
for line in fin:
line = line.strip()
if line == '':
- sentences.append(s.lower())
+ sentences.append(s)
s = ''
else:
s += line + ' '
@@ -64,6 +64,11 @@ def transform_labels(sentences, labels):
if len(labels[i]) == 1:
continue
else:
+ verb_list = []
+ for x in labels[i][0]:
+ if x !='-':
+ verb_list.append(x)
+
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
current_tag = 'O'
@@ -88,8 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True
else:
print 'error:', ll
-
- sen_lab_pair.append((sentences[i], label_seq))
+ sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
return sen_lab_pair
@@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout:
for x in sen_lab_pair:
sentence = x[0]
- label_seq = ' '.join(x[1])
- assert len(sentence.split()) == len(x[1])
- fout.write(sentence + '\t' + label_seq + '\n')
+ label_seq = ' '.join(x[2])
+ assert len(sentence.split()) == len(x[2])
+ fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh
index 268c0995e27006ec62f38bdda9b0a0994dab096c..99487e0d9a8c31d884c4a338386ad0ff8e5d9dc7 100644
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ b/demo/semantic_role_labeling/data/get_data.sh
@@ -14,6 +14,10 @@
# limitations under the License.
set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt
+wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb
tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
@@ -22,4 +26,4 @@ gunzip test.wsj.words.gz
gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
-python extract_dict_feature.py -p test.wsj.seq_pair -f feature -s src.dict -t tgt.dict
+python extract_dict_feature.py -p test.wsj.seq_pair -f feature
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
index 2ef25c42c1794c410fe85fd497a6ed9d2295dca9..2c8e13462730a2e980fa1c3fe342ef0e062ab5d7 100644
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -17,9 +17,11 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
-def hook(settings, word_dict, label_dict, **kwargs):
+def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
+ settings.predicate_dict = predicate_dict
+
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
@@ -27,31 +29,40 @@ def hook(settings, word_dict, label_dict, **kwargs):
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(predicate_dict)),
integer_value_sequence(2),
- integer_value_sequence(len(label_dict))]
+ integer_value_sequence(len(label_dict))
+ ]
-@provider(init_hook=hook)
-def process(obj, file_name):
+def get_batch_size(yeild_data):
+ return len(yeild_data[0])
+
+
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
+ can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = \
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
-
+
words = sentence.split()
sen_len = len(words)
- word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+ word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
- ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
- ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX)] * sen_len
- ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+ ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+ ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+ ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+ ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
- label_slot = [obj.label_dict.get(w) for w in label_list]
-
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+ label_slot = [settings.label_dict.get(w) for w in label_list]
+ yield word_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
index 364460afbe31caf42cd4f0836eba75e444b3f5b8..54ceff0e724220cc9ea96b9e0ec6844947a8343e 100644
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import math
import os
import sys
from paddle.trainer_config_helpers import *
#file paths
-word_dict_file = './data/src.dict'
-label_dict_file = './data/tgt.dict'
+word_dict_file = './data/wordDict.txt'
+label_dict_file = './data/targetDict.txt'
+predicate_file= './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
@@ -31,8 +31,10 @@ if not is_predict:
#load dictionaries
word_dict = dict()
label_dict = dict()
+ predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \
- open(label_dict_file, 'r') as f_label:
+ open(label_dict_file, 'r') as f_label, \
+ open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word):
w = line.strip()
word_dict[w] = i
@@ -41,8 +43,13 @@ if not is_predict:
w = line.strip()
label_dict[w] = i
+ for i, line in enumerate(f_pre):
+ w = line.strip()
+ predicate_dict[w] = i
+
+
if is_test:
- train_list_file = None
+ train_list_file = None
#define data provider
define_py_data_sources2(
@@ -51,91 +58,157 @@ if not is_predict:
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
- 'label_dict': label_dict})
+ 'label_dict': label_dict,
+ 'predicate_dict': predicate_dict })
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
+ pred_len = len(predicate_dict)
else:
word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int)
+ pred_len = get_config_arg('pred_len', int)
+############################## Hyper-parameters ##################################
mark_dict_len = 2
word_dim = 32
mark_dim = 5
-hidden_dim = 128
+hidden_dim = 512
depth = 8
-emb_lr = 1e-2
-fc_lr = 1e-2
-lstm_lr = 2e-2
+
+
+
+########################### Optimizer #######################################
+
settings(
batch_size=150,
- learning_method=AdamOptimizer(),
- learning_rate=1e-3,
+ learning_method=MomentumOptimizer(momentum=0),
+ learning_rate=2e-2,
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25)
+ is_async=False,
+ model_average=ModelAverage(average_window=0.5,
+ max_average_window=10000),
+
+)
-#6 features
+
+
+
+####################################### network ##############################
+#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len)
-predicate = data_layer(name='verb_data', size=word_dict_len)
+predicate = data_layer(name='verb_data', size=pred_len)
+
+ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
+ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
+
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
-ptt = ParameterAttribute(name='src_emb', learning_rate=emb_lr)
-layer_attr = ExtraLayerAttribute(drop_rate=0.5)
-fc_para_attr = ParameterAttribute(learning_rate=fc_lr)
-lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=lstm_lr)
-para_attr = [fc_para_attr, lstm_para_attr]
-word_embedding = embedding_layer(size=word_dim, input=word, param_attr=ptt)
-predicate_embedding = embedding_layer(
- size=word_dim, input=predicate, param_attr=ptt)
-ctx_n1_embedding = embedding_layer(size=word_dim, input=ctx_n1, param_attr=ptt)
-ctx_0_embedding = embedding_layer(size=word_dim, input=ctx_0, param_attr=ptt)
-ctx_p1_embedding = embedding_layer(size=word_dim, input=ctx_p1, param_attr=ptt)
-mark_embedding = embedding_layer(size=mark_dim, input=mark)
+default_std=1/math.sqrt(hidden_dim)/3.0
+
+emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
+std_0 = ParameterAttribute(initial_std=0.)
+std_default = ParameterAttribute(initial_std=default_std)
+
+predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
+mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
+
+word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
+emb_layers.append(predicate_embedding)
+emb_layers.append(mark_embedding)
hidden_0 = mixed_layer(
+ name='hidden0',
size=hidden_dim,
- input=[
- full_matrix_projection(input=word_embedding),
- full_matrix_projection(input=predicate_embedding),
- full_matrix_projection(input=ctx_n1_embedding),
- full_matrix_projection(input=ctx_0_embedding),
- full_matrix_projection(input=ctx_p1_embedding),
- full_matrix_projection(input=mark_embedding),
- ])
+ bias_attr=std_default,
+ input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
+
-lstm_0 = lstmemory(input=hidden_0, layer_attr=layer_attr)
+mix_hidden_lr = 1e-3
+lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
+hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
+
+lstm_0 = lstmemory(name='lstm0',
+ input=hidden_0,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
+
for i in range(1, depth):
- fc = fc_layer(input=input_tmp, size=hidden_dim, param_attr=para_attr)
+ mix_hidden = mixed_layer(name='hidden'+str(i),
+ size=hidden_dim,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ]
+ )
+
+ lstm = lstmemory(name='lstm'+str(i),
+ input=mix_hidden,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ reverse=((i % 2)==1),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
+
+ input_tmp = [mix_hidden, lstm]
+
+feature_out = mixed_layer(name='output',
+ size=label_dict_len,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ],
+ )
- lstm = lstmemory(
- input=fc,
- act=ReluActivation(),
- reverse=(i % 2) == 1,
- layer_attr=layer_attr)
- input_tmp = [fc, lstm]
-prob = fc_layer(
- input=input_tmp,
- size=label_dict_len,
- act=SoftmaxActivation(),
- param_attr=para_attr)
if not is_predict:
- cls = classification_cost(input=prob, label=target)
- outputs(cls)
+ crf_l = crf_layer( name = 'crf',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
+
+ )
+
+
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+
+ eval = sum_evaluator(input=crf_dec_l)
+
+ outputs(crf_l)
+
else:
- outputs(prob)
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+ outputs(crf_dec_l)
+
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
index 9a27112828e449174e3da79dc7db9fed20bfed6f..a7f1e8f81f59f6fe95fd29593ef1a826e652e570 100644
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -26,7 +26,7 @@ UNK_IDX = 0
class Prediction():
- def __init__(self, train_conf, dict_file, model_dir, label_file):
+ def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -35,16 +35,19 @@ class Prediction():
self.dict = {}
self.labels = {}
+ self.predicate_dict={}
self.labels_reverse = {}
- self.load_dict_label(dict_file, label_file)
+ self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict)
len_label = len(self.labels)
+ len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
- 'dict_len=' + str(len_dict) +
+ 'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
+ ',pred_len=' + str(len_pred) +
',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
@@ -56,11 +59,13 @@ class Prediction():
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict),
+ integer_value_sequence(len_pred),
integer_value_sequence(2)
- ]
+ ]
self.converter = DataProviderConverter(slots)
- def load_dict_label(self, dict_file, label_file):
+ def load_dict_label(self, dict_file, label_file, predicate_dict_file):
"""
Load dictionary from self.dict_file.
"""
@@ -71,52 +76,55 @@ class Prediction():
self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip()
+ for line_count, line in enumerate(open(predicate_dict_file, 'r')):
+ self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
"""
with open(data_file, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip(
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t')
words = sentence.split()
sen_len = len(words)
-
+
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [self.dict.get(predicate, UNK_IDX)] * sen_len
+ predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
+ ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
+
+ yield word_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot
-
- def predict(self, data_file):
+ def predict(self, data_file, output_file):
"""
data_file: file name of input data.
"""
input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input)
- prob = output[0]["value"]
- lab = list(np.argsort(-prob)[:, 0])
+ lab = output[0]["id"].tolist()
- with open(data_file, 'r') as fin, open('predict.res', 'w') as fout:
+ with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0
for line in fin:
sen = line.split('\t')[0]
len_sen = len(sen.split())
line_labels = lab[index:index + len_sen]
index += len_sen
- fout.write(sen + '\t' + ' '.join([self.labels_reverse[
- i] for i in line_labels]) + '\n')
+ fout.write(sen + '\t' + ' '.join(
+ [self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser():
- usage = ("python predict.py -c config -w model_dir "
- "-d word dictionary -l label_file -i input_file")
+ usage = ("python predict.py -c config -w model_dir "
+ "-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
@@ -137,6 +145,13 @@ def option_parser():
dest="label_file",
default=None,
help="label file")
+ parser.add_option(
+ "-p",
+ "--predict_dict_file",
+ action="store",
+ dest="predict_dict_file",
+ default=None,
+ help="predict_dict_file")
parser.add_option(
"-i",
"--data",
@@ -150,6 +165,14 @@ def option_parser():
dest="model_path",
default=None,
help="model path")
+
+ parser.add_option(
+ "-o",
+ "--output_file",
+ action="store",
+ dest="output_file",
+ default=None,
+ help="output file")
return parser.parse_args()
@@ -160,10 +183,12 @@ def main():
dict_file = options.dict_file
model_path = options.model_path
label_file = options.label_file
+ predict_dict_file = options.predict_dict_file
+ output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
- predict = Prediction(train_conf, dict_file, model_path, label_file)
- predict.predict(data_file)
+ predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
+ predict.predict(data_file,output_file)
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh
index a545b9a5d591b41bdbd54905cbbffc410abc8fb0..88ab5898f7d41056f4fe549b3145760783b27bf9 100644
--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
@@ -18,7 +18,7 @@ set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
- sort | head -n 1
+ sort -n | head -n 1
}
log=train.log
@@ -26,15 +26,18 @@ LOG=`get_best_pass $log`
LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}"
-
config_file=db_lstm.py
-dict_file=./data/src.dict
-label_file=./data/tgt.dict
+dict_file=./data/wordDict.txt
+label_file=./data/targetDict.txt
+predicate_dict_file=./data/verbDict.txt
input_file=./data/feature
+output_file=predict.res
python predict.py \
-c $config_file \
-w $best_model_path \
-l $label_file \
+ -p $predicate_dict_file \
-d $dict_file \
- -i $input_file
+ -i $input_file \
+ -o $output_file
diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh
index 804f722e5b8e9ee5b54c778c54f7833f5e6c4de0..f9e1bdcd4c752474329d36c4de3378f7d58e7b4b 100644
--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@@ -18,7 +18,7 @@ set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
- sort | head -n 1
+ sort -n | head -n 1
}
log=train.log
@@ -36,5 +36,5 @@ paddle train \
--job=test \
--use_gpu=false \
--config_args=is_test=1 \
+ --test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
-
diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh
index 94c7b6f31df3b5e5e059d6e1323ae0c0bec74753..420768bb2b4ebed7b135a49c5eee5e5538426ae1 100644
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@@ -16,12 +16,14 @@
set -e
paddle train \
--config=./db_lstm.py \
+ --use_gpu=0 \
+ --log_period=5000 \
+ --trainer_count=1 \
+ --show_parameter_stats_period=5000 \
--save_dir=./output \
- --trainer_count=4 \
- --log_period=10 \
- --num_passes=500 \
- --use_gpu=false \
- --show_parameter_stats_period=10 \
+ --num_passes=10000 \
+ --average_test_period=10000000 \
+ --init_model_path=./data \
+ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
-2>&1 | tee 'train.log'
-
+ 2>&1 | tee 'train.log'
diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh
index 41523927afe75428ef1151cef8184ede14eea9a7..28fa86232d89964b3f1680080239cf8a4ebefa9a 100755
--- a/demo/sentiment/data/get_imdb.sh
+++ b/demo/sentiment/data/get_imdb.sh
@@ -38,11 +38,11 @@ unzip master.zip
mkdir -p imdb/train
mkdir -p imdb/test
-cp -r aclImdb/train/pos/ imdb/train/
-cp -r aclImdb/train/neg/ imdb/train/
+cp -r aclImdb/train/pos/ imdb/train/pos
+cp -r aclImdb/train/neg/ imdb/train/neg
-cp -r aclImdb/test/pos/ imdb/test/
-cp -r aclImdb/test/neg/ imdb/test/
+cp -r aclImdb/test/pos/ imdb/test/pos
+cp -r aclImdb/test/neg/ imdb/test/neg
#remove compressed package
rm aclImdb_v1.tar.gz
diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py
index 9a9fd81f030cb1d2a10a5000fd1d12810d12112b..53e3d1d20df92b8815347bd8937064871f326b3f 100755
--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value_sequence(len(settings.word_dict)),
- integer_value(2)]
+ integer_value_sequence(len(settings.word_dict)), integer_value(2)
+ ]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@@ -29,6 +29,7 @@ def process(settings, file_name):
label, comment = line.strip().split('\t\t')
label = int(label)
words = comment.split()
- word_slot = [settings.word_dict[w] for w in words if w in
- settings.word_dict]
+ word_slot = [
+ settings.word_dict[w] for w in words if w in settings.word_dict
+ ]
yield word_slot, label
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
index 7d0baeabbba68b2a160463364d05cd865bf0314f..bc0f6f31264294034ed38309f7fda370865b2845 100755
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
-
"""
Usage: run following command to show help message.
python predict.py -h
"""
+
class SentimentPrediction():
- def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+ def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -44,7 +44,8 @@ class SentimentPrediction():
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
self.network.loadParameters(self.model_dir)
input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
@@ -61,7 +62,7 @@ class SentimentPrediction():
"""
Load label.
"""
- self.label={}
+ self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
@@ -72,7 +73,9 @@ class SentimentPrediction():
with open(data_file, 'r') as fdata:
for line in fdata:
words = line.strip().split()
- word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+ word_slot = [
+ self.word_dict[w] for w in words if w in self.word_dict
+ ]
if not word_slot:
print "all words are not in dictionary: %s", line
continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
if self.label is None:
print("%s: predicting label is %d" % (data_file, lab[0][0]))
else:
- print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+ print("%s: predicting label is %s" %
+ (data_file, self.label[lab[0][0]]))
+
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
- parser.add_option("-n", "--tconf", action="store",
- dest="train_conf", help="network config")
- parser.add_option("-d", "--dict", action="store",
- dest="dict_file",help="dictionary file")
- parser.add_option("-b", "--label", action="store",
- dest="label", default=None,
- help="dictionary file")
- parser.add_option("-i", "--data", action="store",
- dest="data", help="data file to predict")
- parser.add_option("-w", "--model", action="store",
- dest="model_path", default=None,
- help="model path")
+ parser.add_option(
+ "-n",
+ "--tconf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-d",
+ "--dict",
+ action="store",
+ dest="dict_file",
+ help="dictionary file")
+ parser.add_option(
+ "-b",
+ "--label",
+ action="store",
+ dest="label",
+ default=None,
+ help="dictionary file")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="data",
+ help="data file to predict")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
return parser.parse_args()
+
def main():
options, args = option_parser()
train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
predict = SentimentPrediction(train_conf, dict_file, model_path, label)
predict.predict(data)
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py
index 49b53d500a1bf816bde9c9675b251be8e9a68ae9..7146e95d751c4de649e204fab724085994dfa4d3 100755
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
from optparse import OptionParser
from paddle.utils.preprocess_util import *
-
"""
Usage: run following command to show help message.
python preprocess.py -h
"""
-def save_dict(dict, filename, is_reverse = True):
+
+def save_dict(dict, filename, is_reverse=True):
"""
Save dictionary into file.
dict: input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse):
- f.write('%s\t%s\n'%(k, v))
+ f.write('%s\t%s\n' % (k, v))
f.close()
+
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
toks = tok_text.split('\n')[:-1]
return toks
+
def read_lines(path):
"""
path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
seqs.append(line)
return seqs
+
class SentimentDataSetCreate():
"""
A class to process data for sentiment analysis task.
"""
- def __init__(self, data_path, output_path,
- use_okenizer = True, multi_lines = False):
+
+ def __init__(self,
+ data_path,
+ output_path,
+ use_okenizer=True,
+ multi_lines=False):
"""
data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
# Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..."
- file_lists = self.save_data(train_data,
- "train",
- self.batch_size,
- True,
- True)
+ file_lists = self.save_data(train_data, "train", self.batch_size, True,
+ True)
save_list(file_lists, self.train_list)
# If have test data path, preprocess test data.
if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir)
- assert(train_lab_set == test_lab_set)
+ assert (train_lab_set == test_lab_set)
print "processing test set..."
- file_lists = self.save_data(test_data,
- "test",
- self.batch_size,
- False,
- self.dict_with_test)
+ file_lists = self.save_data(test_data, "test", self.batch_size,
+ False, self.dict_with_test)
save_list(file_lists, self.test_list)
# save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count)
- def save_data(self, data, prefix = "",
+ def save_data(self,
+ data,
+ prefix="",
batch_size=50000,
is_shuffle=False,
build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
return: list of batch names
"""
if is_shuffle and self.multi_lines:
- return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+ return self.save_data_multi_lines(data, prefix, batch_size,
+ build_dict)
if is_shuffle:
random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, len(data))
# read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
data_list = tokenize(data_list)
return label_list, data_list
- def save_data_multi_lines(self, data, prefix = "",
+ def save_data_multi_lines(self,
+ data,
+ prefix="",
batch_size=50000,
build_dict=False):
"""
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
self.create_dict(data_list)
length = len(label_list)
- perm_list = np.array([ i for i in xrange(length) ])
+ perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
f.write('%s\t\t%s\n' % (lab, seq))
f.close()
+
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--data", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-o", "--output", action="store",
- dest="output", default=None,
- help="Output directory.")
- parser.add_option("-t", "--tokenizer", action="store",
- dest="use_tokenizer", default=True,
- help="Whether to use tokenizer.")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-o",
+ "--output",
+ action="store",
+ dest="output",
+ default=None,
+ help="Output directory.")
+ parser.add_option(
+ "-t",
+ "--tokenizer",
+ action="store",
+ dest="use_tokenizer",
+ default=True,
+ help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False,
help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,")
return parser.parse_args()
+
def main():
options, args = option_parser()
- data_dir=options.input
- output_dir=options.output
- use_tokenizer=options.use_tokenizer
- multi_lines=options.multi_lines
+ data_dir = options.input
+ output_dir = options.output
+ use_tokenizer = options.use_tokenizer
+ multi_lines = options.multi_lines
if output_dir is None:
outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
- data_creator = SentimentDataSetCreate(data_dir, output_dir,
- use_tokenizer, multi_lines)
+ data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
+ multi_lines)
data_creator.create_dataset()
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py
index 31e585edcaa111898c950ad016d3996fae15a7db..ff6a3624a404cb52d5d7ac0934fedba0d489dc22 100644
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i
- define_py_data_sources2(train_list, test_list,
- module="dataprovider",
- obj="process",
- args={'dictionary': word_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={'dictionary': word_dict})
return dict_dim, class_dim
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
- output = fc_layer(input=dropout, size=class_dim,
- act=SoftmaxActivation())
+ output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
- fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
- bias_attr=bias_attr)
- lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
- layer_attr=layer_attr)
+ fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
+ lstm1 = lstmemory(
+ input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
- fc = fc_layer(input=inputs, size=hid_dim, act=linear,
- param_attr=para_attr, bias_attr=bias_attr)
- lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
- bias_attr=bias_attr, layer_attr=layer_attr)
+ fc = fc_layer(
+ input=inputs,
+ size=hid_dim,
+ act=linear,
+ param_attr=para_attr,
+ bias_attr=bias_attr)
+ lstm = lstmemory(
+ input=fc,
+ reverse=(i % 2) == 0,
+ act=relu,
+ bias_attr=bias_attr,
+ layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
- output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
- act=SoftmaxActivation(),
- bias_attr=bias_attr, param_attr=para_attr)
+ output = fc_layer(
+ input=[fc_last, lstm_last],
+ size=class_dim,
+ act=SoftmaxActivation(),
+ bias_attr=bias_attr,
+ param_attr=para_attr)
if is_predict:
outputs(output)
else:
- outputs(
- classification_cost(input=output, label=data_layer('label', 1)))
+ outputs(classification_cost(input=output, label=data_layer('label', 1)))
diff --git a/demo/sentiment/test.sh b/demo/sentiment/test.sh
index 098fbb91389b89c8b69ccf2f5d308e4e715ac950..c8b12a0e89dbddea56b4ee069ebf66f8d8630615 100755
--- a/demo/sentiment/test.sh
+++ b/demo/sentiment/test.sh
@@ -17,7 +17,7 @@ set -e
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
- sort | head -n 1
+ sort -n | head -n 1
}
log=train.log
diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py
index db24182a8d7359786bd1f3b2083892cf846605d1..114a9138ebfef054c7d3ba99b4a510a452f8f2cd 100644
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,20 @@ is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
-data_dir = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
- batch_size=128,
- learning_rate=2e-3,
- learning_method=AdamOptimizer(),
- regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ batch_size=128,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ average_window=0.5,
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
#################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
- stacked_num=3, is_predict=is_predict)
+stacked_lstm_net(
+ dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
diff --git a/demo/seqToseq/data/paraphrase_data.sh b/demo/seqToseq/data/paraphrase_data.sh
index ea1f8dbcfad35699189f6cd4efc81d97e8c89148..1b3f1d45e11fbd5e600e58f583e503a603e484ff 100755
--- a/demo/seqToseq/data/paraphrase_data.sh
+++ b/demo/seqToseq/data/paraphrase_data.sh
@@ -16,9 +16,7 @@ set -e
set -x
# download the in-house paraphrase dataset
-# following is the google drive address
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/paraphrase.tar.gz --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz
# untar the dataset
tar -zxvf paraphrase.tar.gz
diff --git a/demo/seqToseq/data/wmt14_model.sh b/demo/seqToseq/data/wmt14_model.sh
index 2cec30688d27a57902cdf64d7be5712d12c69bdd..d6e7a732644dc188a165215ddf3f69e1514425eb 100755
--- a/demo/seqToseq/data/wmt14_model.sh
+++ b/demo/seqToseq/data/wmt14_model.sh
@@ -16,9 +16,7 @@ set -e
set -x
# download the pretrained model
-# following is the google drive address
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/wmt14_model.tar.gz --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz
# untar the model
tar -zxvf wmt14_model.tar.gz
diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py
index df19db109ed223c7515c3ebf2cb1918f41163930..c5da1b7685f47fda337921c7c60ac1497b9e48bb 100755
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
if settings.job_mode:
settings.trg_dict = trg_dict
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
- integer_value_sequence(len(settings.trg_dict)),
+ integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))
]
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else:
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(open(file_list[0], "r").readlines()))
]
@@ -62,8 +62,7 @@ def process(settings, file_name):
if settings.job_mode:
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
- trg_ids = [settings.trg_dict.get(w, UNK_IDX)
- for w in trg_words]
+ trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py
index 5efb17a664b9a2525972c29b9b5700b483b8c07e..bd1c51b1514b790ec385d48f49197b3e0285e736 100755
--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
-m --mergeDict merge source and target dictionary
"""
import os
-import sys
+import sys
import string
from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater
+
class SeqToSeqDatasetCreater(DatasetCreater):
"""
A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
if not os.path.exists(output):
os.system(cmd + '> ' + output)
- def build_dict(self, file_path, dict_path, dict_size = -1):
+ def build_dict(self, file_path, dict_path, dict_size=-1):
"""
Create the dictionary for the file, Note that
1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
for word in words:
if word not in dictory:
dictory[word] = 1
- else:
+ else:
dictory[word] += 1
output = open(dict_path, "w+")
output.write('\n\n\n')
count = 3
- for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+ for key, value in sorted(
+ dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n")
count += 1
if count == dict_size:
break
self.dict_size = count
-
- def create_dataset(self, dict_size = -1, mergeDict = False,
- suffixes = ['.src', '.trg']):
+
+ def create_dataset(self,
+ dict_size=-1,
+ mergeDict=False,
+ suffixes=['.src', '.trg']):
"""
Create seqToseq dataset
"""
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
# checkout dataset should be parallel corpora
suffix_len = len(suffixes[0])
for dataset in dataset_list:
- file_list = os.listdir(dataset)
- if len(file_list) % 2 == 1:
- raise RuntimeError("dataset should be parallel corpora")
- file_list.sort()
- for i in range(0, len(file_list), 2):
- if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
- raise RuntimeError("source and target file name should be equal")
+ file_list = os.listdir(dataset)
+ if len(file_list) % 2 == 1:
+ raise RuntimeError("dataset should be parallel corpora")
+ file_list.sort()
+ for i in range(0, len(file_list), 2):
+ if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+ raise RuntimeError(
+ "source and target file name should be equal")
# cat all the files with the same suffix in dataset
for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list:
outname = os.path.basename(dataset)
- self.concat_file(dataset, outname + suffixes[0],
+ self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname)
- save_list([os.path.join(dir_list[id], outname)],
+ save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id]))
id += 1
# build dictionary for train data
dict = ['src.dict', 'trg.dict']
- dict_path = [os.path.join(self.output_path, dict[0]),
- os.path.join(self.output_path, dict[1])]
+ dict_path = [
+ os.path.join(self.output_path, dict[0]),
+ os.path.join(self.output_path, dict[1])
+ ]
if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else:
outname = os.path.join(train_dataset, self.train_dir_name)
- for id in range(0,2):
+ for id in range(0, 2):
suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size
+
def main():
usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage)
- parser.add_option("-i", action="store", dest="input",
- help="input original dataset path")
- parser.add_option("-d", action="store", dest="dictsize",
- help="specified word count of dictionary")
- parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
- help="merge source and target dictionary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input original dataset path")
+ parser.add_option(
+ "-d",
+ action="store",
+ dest="dictsize",
+ help="specified word count of dictionary")
+ parser.add_option(
+ "-m",
+ "--mergeDict",
+ action="store_true",
+ dest="mergeDict",
+ help="merge source and target dictionary")
(options, args) = parser.parse_args()
if options.input[-1] == os.path.sep:
options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict)
+
if __name__ == "__main__":
- main();
+ main()
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
index edd6ad3f739b6cefc24d235be55c7a8f541e1ab7..ad5e3339c1461de06732eb62aca9e8323eea707b 100644
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
trg_dict = None
else:
train_list = os.path.join(data_dir, train_list)
- test_list = os.path.join(data_dir,test_list)
+ test_list = os.path.join(data_dir, test_list)
- define_py_data_sources2(train_list, test_list,
- module = "dataprovider",
- obj = "process",
- args = {"src_dict": src_dict,
- "trg_dict": trg_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={"src_dict": src_dict,
+ "trg_dict": trg_dict})
- return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
- "gen_result": gen_result}
+ return {
+ "src_dict_path": src_lang_dict,
+ "trg_dict_path": trg_lang_dict,
+ "gen_result": gen_result
+ }
def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(input=src_embedding, size=encoder_size)
- src_backward = simple_gru(input=src_embedding,
- size=encoder_size,
- reverse=True)
+ src_backward = simple_gru(
+ input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
- with mixed_layer(size=decoder_size,
- act=TanhActivation(), ) as decoder_boot:
+ with mixed_layer(
+ size=decoder_size,
+ act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
- decoder_mem = memory(name='gru_decoder',
- size=decoder_size,
- boot_layer=decoder_boot)
+ decoder_mem = memory(
+ name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
- context = simple_attention(encoded_sequence=enc_vec,
- encoded_proj=enc_proj,
- decoder_state=decoder_mem, )
+ context = simple_attention(
+ encoded_sequence=enc_vec,
+ encoded_proj=enc_proj,
+ decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
- gru_step = gru_step_layer(name='gru_decoder',
- input=decoder_inputs,
- output_mem=decoder_mem,
- size=decoder_size)
+ gru_step = gru_step_layer(
+ name='gru_decoder',
+ input=decoder_inputs,
+ output_mem=decoder_mem,
+ size=decoder_size)
- with mixed_layer(size=target_dict_dim,
- bias_attr=True,
- act=SoftmaxActivation()) as out:
+ with mixed_layer(
+ size=target_dict_dim, bias_attr=True,
+ act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
- group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
- StaticInput(input=encoded_proj,is_seq=True)]
+ group_inputs = [
+ StaticInput(
+ input=encoded_vector, is_seq=True), StaticInput(
+ input=encoded_proj, is_seq=True)
+ ]
if not is_generating:
trg_embedding = embedding_layer(
- input=data_layer(name='target_language_word',
- size=target_dict_dim),
+ input=data_layer(
+ name='target_language_word', size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
- decoder = recurrent_group(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs)
+ decoder = recurrent_group(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs)
- lbl = data_layer(name='target_language_next_word',
- size=target_dict_dim)
+ lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
- beam_gen = beam_search(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs,
- bos_id=0,
- eos_id=1,
- beam_size=beam_size,
- max_length=max_length)
-
- seqtext_printer_evaluator(input=beam_gen,
- id_input=data_layer(name="sent_id", size=1),
- dict_file=trg_dict_path,
- result_file=gen_trans_file)
+ beam_gen = beam_search(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs,
+ bos_id=0,
+ eos_id=1,
+ beam_size=beam_size,
+ max_length=max_length)
+
+ seqtext_printer_evaluator(
+ input=beam_gen,
+ id_input=data_layer(
+ name="sent_id", size=1),
+ dict_file=trg_dict_path,
+ result_file=gen_trans_file)
outputs(beam_gen)
diff --git a/demo/sequence_tagging/dataprovider.py b/demo/sequence_tagging/dataprovider.py
index 6f412d6834be6d02397821215b1317353cd5df18..37dcb7aa17c0abd197ef2f3121bf8be6c54375c2 100644
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
@@ -17,8 +17,7 @@ import gzip
import logging
logging.basicConfig(
- format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s',
-)
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
logger = logging.getLogger('paddle')
logger.setLevel(logging.INFO)
@@ -32,59 +31,58 @@ num_original_columns = 3
# [[-1,0], [0,0]] means previous token at column 0 and current token at
# column 0 are combined as one feature.
patterns = [
- [[-2,0]],
- [[-1,0]],
- [[0,0]],
- [[1,0]],
- [[2,0]],
-
- [[-1,0], [0,0]],
- [[0,0], [1,0]],
-
- [[-2,1]],
- [[-1,1]],
- [[0,1]],
- [[1,1]],
- [[2,1]],
- [[-2,1], [-1,1]],
- [[-1,1], [0,1]],
- [[0,1], [1,1]],
- [[1,1], [2,1]],
-
- [[-2,1], [-1,1], [0,1]],
- [[-1,1], [0,1], [1,1]],
- [[0,1], [1,1], [2,1]],
+ [[-2, 0]],
+ [[-1, 0]],
+ [[0, 0]],
+ [[1, 0]],
+ [[2, 0]],
+ [[-1, 0], [0, 0]],
+ [[0, 0], [1, 0]],
+ [[-2, 1]],
+ [[-1, 1]],
+ [[0, 1]],
+ [[1, 1]],
+ [[2, 1]],
+ [[-2, 1], [-1, 1]],
+ [[-1, 1], [0, 1]],
+ [[0, 1], [1, 1]],
+ [[1, 1], [2, 1]],
+ [[-2, 1], [-1, 1], [0, 1]],
+ [[-1, 1], [0, 1], [1, 1]],
+ [[0, 1], [1, 1], [2, 1]],
]
dict_label = {
- 'B-ADJP': 0,
- 'I-ADJP': 1,
- 'B-ADVP': 2,
- 'I-ADVP': 3,
- 'B-CONJP': 4,
- 'I-CONJP': 5,
- 'B-INTJ': 6,
- 'I-INTJ': 7,
- 'B-LST': 8,
- 'I-LST': 9,
- 'B-NP': 10,
- 'I-NP': 11,
- 'B-PP': 12,
- 'I-PP': 13,
- 'B-PRT': 14,
- 'I-PRT': 15,
- 'B-SBAR': 16,
- 'I-SBAR': 17,
- 'B-UCP': 18,
- 'I-UCP': 19,
- 'B-VP': 20,
- 'I-VP': 21,
- 'O': 22
+ 'B-ADJP': 0,
+ 'I-ADJP': 1,
+ 'B-ADVP': 2,
+ 'I-ADVP': 3,
+ 'B-CONJP': 4,
+ 'I-CONJP': 5,
+ 'B-INTJ': 6,
+ 'I-INTJ': 7,
+ 'B-LST': 8,
+ 'I-LST': 9,
+ 'B-NP': 10,
+ 'I-NP': 11,
+ 'B-PP': 12,
+ 'I-PP': 13,
+ 'B-PRT': 14,
+ 'I-PRT': 15,
+ 'B-SBAR': 16,
+ 'I-SBAR': 17,
+ 'B-UCP': 18,
+ 'I-UCP': 19,
+ 'B-VP': 20,
+ 'I-VP': 21,
+ 'O': 22
}
+
def make_features(sequence):
length = len(sequence)
num_features = len(sequence[0])
+
def get_features(pos):
if pos < 0:
return ['#B%s' % -pos] * num_features
@@ -94,9 +92,10 @@ def make_features(sequence):
for i in xrange(length):
for pattern in patterns:
- fname = '/'.join([get_features(i+pos)[f] for pos, f in pattern])
+ fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
sequence[i].append(fname)
+
'''
Source file format:
Each line is for one timestep. The features are separated by space.
@@ -109,6 +108,8 @@ i-th column.
return a list of dict for each column
'''
+
+
def create_dictionaries(filename, cutoff, oov_policy):
def add_to_dict(sequence, dicts):
num_features = len(dicts)
@@ -140,7 +141,6 @@ def create_dictionaries(filename, cutoff, oov_policy):
features = line.split(' ')
sequence.append(features)
-
for i in xrange(num_features):
dct = dicts[i]
n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
@@ -151,7 +151,7 @@ def create_dictionaries(filename, cutoff, oov_policy):
else:
dct[k] = n
n += 1
-
+
if oov_policy[i] == OOV_POLICY_USE:
# placeholder so that len(dct) will be the number of features
# including OOV
@@ -187,12 +187,15 @@ def initializer(settings, **xargs):
logger.info("feature size=%s" % dim)
settings.input_types = input_types
+
'''
if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
existed in dicts[i] will be assigned to id 0.
if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
in dicts[i].
'''
+
+
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
input_file = filename
@@ -231,7 +234,7 @@ def process(settings, filename):
logger.fatal("Unknown token: %s" % features[i])
else:
vec.ids.append(dim + 0)
-
+
dim += len(dicts[i])
sample[-1].append(vec)
return sample
@@ -255,4 +258,3 @@ def process(settings, filename):
f.close()
logger.info("num_sequences=%s" % num_sequences)
-
diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py
index 2bd1a20bc52fc546dcd0a0874bc09433e7212152..64895742e1b8c0a11cbedee0b88e61b5b63b007f 100644
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
@@ -16,11 +16,11 @@ from paddle.trainer_config_helpers import *
import math
-define_py_data_sources2(train_list="data/train.list",
- test_list="data/test.list",
- module="dataprovider",
- obj="process")
-
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
batch_size = 1
settings(
@@ -30,14 +30,15 @@ settings(
average_window=0.5,
learning_rate=1e-1,
learning_rate_decay_a=1e-5,
- learning_rate_decay_b=0.25,
-)
+ learning_rate_decay_b=0.25, )
+
+num_label_types = 23
-num_label_types=23
def get_simd_size(size):
return int(math.ceil(float(size) / 8)) * 8
+
# Currently, in order to use sparse_update=True,
# the size has to be aligned.
num_label_types = get_simd_size(num_label_types)
@@ -45,40 +46,37 @@ num_label_types = get_simd_size(num_label_types)
features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
- size=num_label_types)
+chunk = data_layer(name="chunk", size=num_label_types)
crf_input = fc_layer(
input=features,
size=num_label_types,
act=LinearActivation(),
bias_attr=False,
- param_attr=ParamAttr(initial_std=0, sparse_update=True))
+ param_attr=ParamAttr(
+ initial_std=0, sparse_update=True))
-crf=crf_layer(
+crf = crf_layer(
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw", initial_std=0),
-)
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0), )
-crf_decoding=crf_decoding_layer(
+crf_decoding = crf_decoding_layer(
size=num_label_types,
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw"),
-)
+ param_attr=ParamAttr(name="crfw"), )
sum_evaluator(
name="error",
- input=crf_decoding,
-)
+ input=crf_decoding, )
chunk_evaluator(
name="chunk_f1",
- input =[crf_decoding, chunk],
+ input=[crf_decoding, chunk],
chunk_scheme="IOB",
- num_chunk_types=11,
-)
+ num_chunk_types=11, )
inputs(word, pos, chunk, features)
outputs(crf)
diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py
index fb157bf3ea7193bca2c8a281e1afaf4b5f1d7309..90d4bbdddfdb4e38b930d54a2bc865df9fac589c 100644
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -16,10 +16,11 @@ from paddle.trainer_config_helpers import *
import math
-define_py_data_sources2(train_list="data/train.list",
- test_list="data/test.list",
- module="dataprovider",
- obj="process")
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
batch_size = 16
settings(
@@ -27,29 +28,27 @@ settings(
batch_size=batch_size,
regularization=L2Regularization(batch_size * 1e-5),
average_window=0.5,
- learning_rate = 2e-3,
- learning_rate_decay_a = 5e-7,
- learning_rate_decay_b = 0.5,
-)
+ learning_rate=2e-3,
+ learning_rate_decay_a=5e-7,
+ learning_rate_decay_b=0.5, )
-word_dim=128
+word_dim = 128
hidden_dim = 128
with_rnn = True
-initial_std=1/math.sqrt(hidden_dim)
-param_attr=ParamAttr(initial_std=initial_std)
-cpu_layer_attr=ExtraLayerAttribute(device=-1)
+initial_std = 1 / math.sqrt(hidden_dim)
+param_attr = ParamAttr(initial_std=initial_std)
+cpu_layer_attr = ExtraLayerAttribute(device=-1)
default_device(0)
-num_label_types=23
+num_label_types = 23
features = data_layer(name="features", size=76328)
word = data_layer(name="word", size=6778)
pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk",
- size=num_label_types,
- layer_attr=cpu_layer_attr)
+chunk = data_layer(
+ name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
emb = embedding_layer(
input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
@@ -58,73 +57,64 @@ hidden1 = mixed_layer(
size=hidden_dim,
act=STanhActivation(),
bias_attr=True,
- input=[full_matrix_projection(emb),
- table_projection(pos, param_attr=param_attr)]
-)
+ input=[
+ full_matrix_projection(emb), table_projection(
+ pos, param_attr=param_attr)
+ ])
if with_rnn:
rnn1 = recurrent_layer(
act=ReluActivation(),
bias_attr=True,
input=hidden1,
- param_attr=ParamAttr(initial_std=0),
- )
+ param_attr=ParamAttr(initial_std=0), )
hidden2 = mixed_layer(
size=hidden_dim,
act=STanhActivation(),
bias_attr=True,
- input=[full_matrix_projection(hidden1)
- ] + ([
- full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
- ] if with_rnn else []),
-)
+ input=[full_matrix_projection(hidden1)] +
+ ([full_matrix_projection(
+ rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
if with_rnn:
- rnn2=recurrent_layer(
+ rnn2 = recurrent_layer(
reverse=True,
act=ReluActivation(),
bias_attr=True,
input=hidden2,
- param_attr=ParamAttr(initial_std=0),
- )
+ param_attr=ParamAttr(initial_std=0), )
crf_input = mixed_layer(
size=num_label_types,
bias_attr=False,
- input=[
- full_matrix_projection(hidden2),
- ] + ([
- full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
- ] if with_rnn else []),
-)
+ input=[full_matrix_projection(hidden2), ] +
+ ([full_matrix_projection(
+ rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
crf = crf_layer(
input=crf_input,
label=chunk,
- param_attr=ParamAttr(name="crfw", initial_std=0),
- layer_attr=cpu_layer_attr,
-)
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0),
+ layer_attr=cpu_layer_attr, )
crf_decoding = crf_decoding_layer(
size=num_label_types,
input=crf_input,
label=chunk,
param_attr=ParamAttr(name="crfw"),
- layer_attr=cpu_layer_attr,
-)
+ layer_attr=cpu_layer_attr, )
sum_evaluator(
name="error",
- input=crf_decoding,
-)
+ input=crf_decoding, )
chunk_evaluator(
name="chunk_f1",
- input =[crf_decoding, chunk],
+ input=[crf_decoding, chunk],
chunk_scheme="IOB",
- num_chunk_types=11,
-)
+ num_chunk_types=11, )
inputs(word, pos, chunk, features)
outputs(crf)
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index ef4e9d102d35fc95e96711175a57f7e181a946c6..efcf8b0ad3d6f2f831fe71f3c09163015cc1ac96 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -15,25 +15,11 @@ set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
-
-set(PADDLE_DOXYGEN_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/doxygen_xml")
-
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
@ONLY)
-configure_file(
- "${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in"
- "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile"
- @ONLY
- )
-
-add_custom_target(paddle_doxygen_docs ALL
- ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
@@ -41,6 +27,5 @@ sphinx_add_target(paddle_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
-add_dependencies(paddle_docs
- gen_proto_py
- paddle_doxygen_docs)
+add_dependencies(paddle_docs
+ gen_proto_py)
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
deleted file mode 100644
index a1fc3801925dd340709ac77c9aa77c82051ee111..0000000000000000000000000000000000000000
--- a/doc/Doxyfile.in
+++ /dev/null
@@ -1,2384 +0,0 @@
-# Doxyfile 1.8.10
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME = "paddle"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER = 1.0.0
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF =
-
-# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
-# in the documentation. The maximum height of the logo should not exceed 55
-# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
-# the logo to the output directory.
-
-PROJECT_LOGO =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY = @PADDLE_DOXYGEN_OUTPUT@
-
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS = NO
-
-# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
-# characters to appear in the names of generated files. If set to NO, non-ASCII
-# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
-# U+3044.
-# The default value is: NO.
-
-ALLOW_UNICODE_NAMES = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC = YES
-
-# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB = NO
-
-# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
-# page for each member. If set to NO, the documentation of a member will be part
-# of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE = 2
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
-
-ALIASES =
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
-# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
-# Fortran. In the later case the parser tries to guess whether the code is fixed
-# or free formatted code, this is the default for Fortran type files), VHDL. For
-# instance to make doxygen treat .inc files as Fortran files (default is PHP),
-# and .f files as C (default is Fortran), use: inc=Fortran f=C.
-#
-# Note: For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT = YES
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by putting a % sign in front of the word or
-# globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT = YES
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC = NO
-
-# If one adds a struct or class to a group and this option is enabled, then also
-# any nested class or struct is added to the same group. By default this option
-# is disabled and one has to add nested compounds explicitly via \ingroup.
-# The default value is: NO.
-
-GROUP_NESTED_COMPOUNDS = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE = NO
-
-# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE = NO
-
-# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC = NO
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO,
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES = YES
-
-# This flag is only useful for Objective-C code. If set to YES, local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO, only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO, these declarations will be
-# included in the documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO, these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES, the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES = NO
-
-# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
-# append additional text to a page's title, such as Class Reference. If set to
-# YES the compound reference will be hidden.
-# The default value is: NO.
-
-HIDE_COMPOUND_REFERENCE= NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES = NO
-
-# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
-# grouped member an include statement to the documentation, telling the reader
-# which file to include in order to use the member.
-# The default value is: NO.
-
-SHOW_GROUPED_MEMB_INC = NO
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order. Note that
-# this will also influence the order of the classes in the class list.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
-# list. This list is created by putting \todo commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
-# list. This list is created by putting \test commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if ... \endif and \cond
-# ... \endcond blocks.
-
-ENABLED_SECTIONS =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
-# initial value of a variable or macro / define can have for it to appear in the
-# documentation. If the initializer consists of more lines than specified here
-# it will be hidden. Use a value of 0 to hide initializers completely. The
-# appearance of the value of individual variables and macros / defines can be
-# controlled using \showinitializer or \hideinitializer command in the
-# documentation regardless of this setting.
-# Minimum value: 0, maximum value: 10000, default value: 30.
-
-MAX_INITIALIZER_LINES = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
-# the bottom of the documentation of classes and structs. If set to YES, the
-# list will mention the files that were used to generate the documentation.
-# The default value is: YES.
-
-SHOW_USED_FILES = YES
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
-# will remove the Files entry from the Quick Index and from the Folder Tree View
-# (if specified).
-# The default value is: YES.
-
-SHOW_FILES = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
-# page. This will remove the Namespaces entry from the Quick Index and from the
-# Folder Tree View (if specified).
-# The default value is: YES.
-
-SHOW_NAMESPACES = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
-# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
-# by doxygen. Whatever the program writes to standard output is used as the file
-# version. For an example see the documentation.
-
-FILE_VERSION_FILTER =
-
-# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
-# by doxygen. The layout file controls the global structure of the generated
-# output files in an output format independent way. To create the layout file
-# that represents doxygen's defaults, run doxygen with the -l option. You can
-# optionally specify a file name after the option, if omitted DoxygenLayout.xml
-# will be used as the name of the layout file.
-#
-# Note that if you run doxygen from a directory containing a file called
-# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
-# tag is left empty.
-
-LAYOUT_FILE =
-
-# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
-# the reference definitions. This must be a list of .bib files. The .bib
-# extension is automatically appended if omitted. This requires the bibtex tool
-# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info.
-# For LaTeX the style of the bibliography can be controlled using
-# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
-# search path. See also \cite for info how to create references.
-
-CITE_BIB_FILES =
-
-#---------------------------------------------------------------------------
-# Configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated to
-# standard output by doxygen. If QUIET is set to YES this implies that the
-# messages are off.
-# The default value is: NO.
-
-QUIET = NO
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
-# this implies that the warnings are on.
-#
-# Tip: Turn warnings on while writing the documentation.
-# The default value is: YES.
-
-WARNINGS = YES
-
-# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
-# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
-# will automatically be disabled.
-# The default value is: YES.
-
-WARN_IF_UNDOCUMENTED = NO
-
-# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some parameters
-# in a documented function, or documenting parameters that don't exist or using
-# markup commands wrongly.
-# The default value is: YES.
-
-WARN_IF_DOC_ERROR = YES
-
-# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
-# are documented, but have no documentation for their parameters or return
-# value. If set to NO, doxygen will only warn about wrong or incomplete
-# parameter documentation, but not about the absence of documentation.
-# The default value is: NO.
-
-WARN_NO_PARAMDOC = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that doxygen
-# can produce. The string should contain the $file, $line, and $text tags, which
-# will be replaced by the file and line number from which the warning originated
-# and the warning text. Optionally the format may contain $version, which will
-# be replaced by the version of the file (if it could be obtained via
-# FILE_VERSION_FILTER)
-# The default value is: $file:$line: $text.
-
-WARN_FORMAT = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning and error
-# messages should be written. If left blank the output is written to standard
-# error (stderr).
-
-WARN_LOGFILE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag is used to specify the files and/or directories that contain
-# documented source files. You may enter file names like myfile.cpp or
-# directories like /usr/src/myproject. Separate the files or directories with
-# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
-# Note: If this tag is empty the current directory is searched.
-
-INPUT = @PROJ_ROOT@/paddle
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
-# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
-# documentation (see: http://www.gnu.org/software/libiconv) for the list of
-# possible encodings.
-# The default value is: UTF-8.
-
-INPUT_ENCODING = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
-# *.h) to filter out the source-files in the directories.
-#
-# Note that for custom extensions or not directly supported extensions you also
-# need to set EXTENSION_MAPPING for the extension otherwise the files are not
-# read by doxygen.
-#
-# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
-# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
-# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
-# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd,
-# *.vhdl, *.ucf, *.qsf, *.as and *.js.
-
-FILE_PATTERNS = *.c *.cc *.cpp *.cu *.h *.hpp *.cuh *.ph
-
-# The RECURSIVE tag can be used to specify whether or not subdirectories should
-# be searched for input files as well.
-# The default value is: NO.
-
-RECURSIVE = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should be
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-#
-# Note that relative paths are relative to the directory from which doxygen is
-# run.
-
-EXCLUDE =
-
-# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
-# directories that are symbolic links (a Unix file system feature) are excluded
-# from the input.
-# The default value is: NO.
-
-EXCLUDE_SYMLINKS = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories.
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories for example use the pattern */test/*
-
-EXCLUDE_PATTERNS = */x86_64-scm-linux-gnu/* */internals/* */mkl/* */test/* */tests/* */platform/*
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-#
-# Note that the wildcards are matched against the file with absolute path, so to
-# exclude all test directories use the pattern */test/*
-
-EXCLUDE_SYMBOLS =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or directories
-# that contain example code fragments that are included (see the \include
-# command).
-
-EXAMPLE_PATH =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
-# *.h) to filter out the source-files in the directories. If left blank all
-# files are included.
-
-EXAMPLE_PATTERNS =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude commands
-# irrespective of the value of the RECURSIVE tag.
-# The default value is: NO.
-
-EXAMPLE_RECURSIVE = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or directories
-# that contain images that are to be included in the documentation (see the
-# \image command).
-
-IMAGE_PATH =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command:
-#
-#
-#
-# where is the value of the INPUT_FILTER tag, and is the
-# name of an input file. Doxygen will then use the output that the filter
-# program writes to standard output. If FILTER_PATTERNS is specified, this tag
-# will be ignored.
-#
-# Note that the filter must not add or remove lines; it is applied before the
-# code is scanned, but not when the output code is generated. If lines are added
-# or removed, the anchors will not be placed correctly.
-
-INPUT_FILTER =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form: pattern=filter
-# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
-# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
-# patterns match the file name, INPUT_FILTER is applied.
-
-FILTER_PATTERNS =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will also be used to filter the input files that are used for
-# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
-# The default value is: NO.
-
-FILTER_SOURCE_FILES = NO
-
-# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
-# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
-# it is also possible to disable source filtering for a specific pattern using
-# *.ext= (so without naming a filter).
-# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
-
-FILTER_SOURCE_PATTERNS =
-
-# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
-# is part of the input, its contents will be placed on the main page
-# (index.html). This can be useful if you have a project on for instance GitHub
-# and want to reuse the introduction page also for the doxygen output.
-
-USE_MDFILE_AS_MAINPAGE =
-
-#---------------------------------------------------------------------------
-# Configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
-# generated. Documented entities will be cross-referenced with these sources.
-#
-# Note: To get rid of all source code in the generated output, make sure that
-# also VERBATIM_HEADERS is set to NO.
-# The default value is: NO.
-
-SOURCE_BROWSER = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body of functions,
-# classes and enums directly into the documentation.
-# The default value is: NO.
-
-INLINE_SOURCES = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
-# special comment blocks from generated source code fragments. Normal C, C++ and
-# Fortran comments will always remain visible.
-# The default value is: YES.
-
-STRIP_CODE_COMMENTS = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
-# function all documented functions referencing it will be listed.
-# The default value is: NO.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES then for each documented function
-# all documented entities called/used by that function will be listed.
-# The default value is: NO.
-
-REFERENCES_RELATION = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
-# to YES then the hyperlinks from functions in REFERENCES_RELATION and
-# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
-# link to the documentation.
-# The default value is: YES.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
-# source code will show a tooltip with additional information such as prototype,
-# brief description and links to the definition and documentation. Since this
-# will make the HTML file larger and loading of large files a bit slower, you
-# can opt to disable this feature.
-# The default value is: YES.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-SOURCE_TOOLTIPS = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code will
-# point to the HTML generated by the htags(1) tool instead of doxygen built-in
-# source browser. The htags tool is part of GNU's global source tagging system
-# (see http://www.gnu.org/software/global/global.html). You will need version
-# 4.8.6 or higher.
-#
-# To use it do the following:
-# - Install the latest version of global
-# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
-# - Make sure the INPUT points to the root of the source tree
-# - Run doxygen as normal
-#
-# Doxygen will invoke htags (and that will in turn invoke gtags), so these
-# tools must be available from the command line (i.e. in the search path).
-#
-# The result: instead of the source browser generated by doxygen, the links to
-# source code will now point to the output of htags.
-# The default value is: NO.
-# This tag requires that the tag SOURCE_BROWSER is set to YES.
-
-USE_HTAGS = NO
-
-# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
-# verbatim copy of the header file for each class for which an include is
-# specified. Set to NO to disable this.
-# See also: Section \class.
-# The default value is: YES.
-
-VERBATIM_HEADERS = YES
-
-#---------------------------------------------------------------------------
-# Configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
-# compounds will be generated. Enable this if the project contains a lot of
-# classes, structs, unions or interfaces.
-# The default value is: YES.
-
-ALPHABETICAL_INDEX = YES
-
-# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
-# which the alphabetical index list will be split.
-# Minimum value: 1, maximum value: 20, default value: 5.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-COLS_IN_ALPHA_INDEX = 5
-
-# In case all classes in a project start with a common prefix, all classes will
-# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
-# can be used to specify a prefix (or a list of prefixes) that should be ignored
-# while generating the index headers.
-# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
-
-IGNORE_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
-# The default value is: YES.
-
-GENERATE_HTML = NO
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
-# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
-# it.
-# The default directory is: html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_OUTPUT = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
-# generated HTML page (for example: .htm, .php, .asp).
-# The default value is: .html.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FILE_EXTENSION = .html
-
-# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
-# each generated HTML page. If the tag is left blank doxygen will generate a
-# standard header.
-#
-# To get valid HTML the header file that includes any scripts and style sheets
-# that doxygen needs, which is dependent on the configuration options used (e.g.
-# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
-# default header using
-# doxygen -w html new_header.html new_footer.html new_stylesheet.css
-# YourConfigFile
-# and then modify the file new_header.html. See also section "Doxygen usage"
-# for information on how to generate the default header that doxygen normally
-# uses.
-# Note: The header is subject to change so you typically have to regenerate the
-# default header when upgrading to a newer version of doxygen. For a description
-# of the possible markers and block names see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_HEADER =
-
-# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
-# generated HTML page. If the tag is left blank doxygen will generate a standard
-# footer. See HTML_HEADER for more information on how to generate a default
-# footer and what special commands can be used inside the footer. See also
-# section "Doxygen usage" for information on how to generate the default footer
-# that doxygen normally uses.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_FOOTER =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
-# sheet that is used by each HTML page. It can be used to fine-tune the look of
-# the HTML output. If left blank doxygen will generate a default style sheet.
-# See also section "Doxygen usage" for information on how to generate the style
-# sheet that doxygen normally uses.
-# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
-# it is more robust and this tag (HTML_STYLESHEET) will in the future become
-# obsolete.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_STYLESHEET =
-
-# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
-# cascading style sheets that are included after the standard style sheets
-# created by doxygen. Using this option one can overrule certain style aspects.
-# This is preferred over using HTML_STYLESHEET since it does not replace the
-# standard style sheet and is therefore more robust against future updates.
-# Doxygen will copy the style sheet files to the output directory.
-# Note: The order of the extra style sheet files is of importance (e.g. the last
-# style sheet in the list overrules the setting of the previous ones in the
-# list). For an example see the documentation.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_STYLESHEET =
-
-# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
-# other source files which should be copied to the HTML output directory. Note
-# that these files will be copied to the base HTML output directory. Use the
-# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
-# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
-# files will be copied as-is; there are no commands or markers available.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_EXTRA_FILES =
-
-# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
-# will adjust the colors in the style sheet and background images according to
-# this color. Hue is specified as an angle on a colorwheel, see
-# http://en.wikipedia.org/wiki/Hue for more information. For instance the value
-# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
-# purple, and 360 is red again.
-# Minimum value: 0, maximum value: 359, default value: 220.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_HUE = 220
-
-# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
-# in the HTML output. For a value of 0 the output will use grayscales only. A
-# value of 255 will produce the most vivid colors.
-# Minimum value: 0, maximum value: 255, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_SAT = 100
-
-# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
-# luminance component of the colors in the HTML output. Values below 100
-# gradually make the output lighter, whereas values above 100 make the output
-# darker. The value divided by 100 is the actual gamma applied, so 80 represents
-# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
-# change the gamma.
-# Minimum value: 40, maximum value: 240, default value: 80.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_COLORSTYLE_GAMMA = 80
-
-# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
-# page will contain the date and time when the page was generated. Setting this
-# to YES can help to show when doxygen was last run and thus if the
-# documentation is up to date.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_TIMESTAMP = NO
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_DYNAMIC_SECTIONS = NO
-
-# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
-# shown in the various tree structured indices initially; the user can expand
-# and collapse entries dynamically later on. Doxygen will expand the tree to
-# such a level that at most the specified number of entries are visible (unless
-# a fully collapsed tree already exceeds this amount). So setting the number of
-# entries 1 will produce a full collapsed tree by default. 0 is a special value
-# representing an infinite number of entries and will result in a full expanded
-# tree by default.
-# Minimum value: 0, maximum value: 9999, default value: 100.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-HTML_INDEX_NUM_ENTRIES = 100
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files will be
-# generated that can be used as input for Apple's Xcode 3 integrated development
-# environment (see: http://developer.apple.com/tools/xcode/), introduced with
-# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
-# Makefile in the HTML output directory. Running make will produce the docset in
-# that directory and running make install will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
-# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
-# for more information.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_DOCSET = NO
-
-# This tag determines the name of the docset feed. A documentation feed provides
-# an umbrella under which multiple documentation sets from a single provider
-# (such as a company or product suite) can be grouped.
-# The default value is: Doxygen generated docs.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_FEEDNAME = "Doxygen generated docs"
-
-# This tag specifies a string that should uniquely identify the documentation
-# set bundle. This should be a reverse domain-name style string, e.g.
-# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_BUNDLE_ID = org.doxygen.Project
-
-# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
-# the documentation publisher. This should be a reverse domain-name style
-# string, e.g. com.mycompany.MyDocSet.documentation.
-# The default value is: org.doxygen.Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_ID = org.doxygen.Publisher
-
-# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
-# The default value is: Publisher.
-# This tag requires that the tag GENERATE_DOCSET is set to YES.
-
-DOCSET_PUBLISHER_NAME = Publisher
-
-# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
-# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
-# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
-# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
-# Windows.
-#
-# The HTML Help Workshop contains a compiler that can convert all HTML output
-# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
-# files are now used as the Windows 98 help format, and will replace the old
-# Windows help format (.hlp) on all Windows platforms in the future. Compressed
-# HTML files also contain an index, a table of contents, and you can search for
-# words in the documentation. The HTML workshop also contains a viewer for
-# compressed HTML files.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_HTMLHELP = NO
-
-# The CHM_FILE tag can be used to specify the file name of the resulting .chm
-# file. You can add a path in front of the file if the result should not be
-# written to the html output directory.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_FILE =
-
-# The HHC_LOCATION tag can be used to specify the location (absolute path
-# including file name) of the HTML help compiler (hhc.exe). If non-empty,
-# doxygen will try to run the HTML help compiler on the generated index.hhp.
-# The file has to be specified with full path.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-HHC_LOCATION =
-
-# The GENERATE_CHI flag controls if a separate .chi index file is generated
-# (YES) or that it should be included in the master .chm file (NO).
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-GENERATE_CHI = NO
-
-# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
-# and project file content.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-CHM_INDEX_ENCODING =
-
-# The BINARY_TOC flag controls whether a binary table of contents is generated
-# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
-# enables the Previous and Next buttons.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-BINARY_TOC = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members to
-# the table of contents of the HTML help documentation and to the tree view.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
-
-TOC_EXPAND = NO
-
-# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
-# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
-# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
-# (.qch) of the generated HTML documentation.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_QHP = NO
-
-# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
-# the file name of the resulting .qch file. The path specified is relative to
-# the HTML output folder.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QCH_FILE =
-
-# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
-# Project output. For more information please see Qt Help Project / Namespace
-# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace).
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_NAMESPACE = org.doxygen.Project
-
-# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
-# Help Project output. For more information please see Qt Help Project / Virtual
-# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual-
-# folders).
-# The default value is: doc.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_VIRTUAL_FOLDER = doc
-
-# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
-# filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_NAME =
-
-# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
-# custom filter to add. For more information please see Qt Help Project / Custom
-# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-
-# filters).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_CUST_FILTER_ATTRS =
-
-# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
-# project's filter section matches. Qt Help Project / Filter Attributes (see:
-# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes).
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHP_SECT_FILTER_ATTRS =
-
-# The QHG_LOCATION tag can be used to specify the location of Qt's
-# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
-# generated .qhp file.
-# This tag requires that the tag GENERATE_QHP is set to YES.
-
-QHG_LOCATION =
-
-# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
-# generated, together with the HTML files, they form an Eclipse help plugin. To
-# install this plugin and make it available under the help contents menu in
-# Eclipse, the contents of the directory containing the HTML and XML files needs
-# to be copied into the plugins directory of eclipse. The name of the directory
-# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
-# After copying Eclipse needs to be restarted before the help appears.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_ECLIPSEHELP = NO
-
-# A unique identifier for the Eclipse help plugin. When installing the plugin
-# the directory name containing the HTML and XML files should also have this
-# name. Each documentation set should have its own identifier.
-# The default value is: org.doxygen.Project.
-# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
-
-ECLIPSE_DOC_ID = org.doxygen.Project
-
-# If you want full control over the layout of the generated HTML pages it might
-# be necessary to disable the index and replace it with your own. The
-# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
-# of each HTML page. A value of NO enables the index and the value YES disables
-# it. Since the tabs in the index contain the same information as the navigation
-# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-DISABLE_INDEX = NO
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information. If the tag
-# value is set to YES, a side panel will be generated containing a tree-like
-# index structure (just like the one that is generated for HTML Help). For this
-# to work a browser that supports JavaScript, DHTML, CSS and frames is required
-# (i.e. any modern browser). Windows users are probably better off using the
-# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
-# further fine-tune the look of the index. As an example, the default style
-# sheet generated by doxygen has an example that shows how to put an image at
-# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
-# the same information as the tab index, you could consider setting
-# DISABLE_INDEX to YES when enabling this option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-GENERATE_TREEVIEW = NO
-
-# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
-# doxygen will group on one line in the generated HTML documentation.
-#
-# Note that a value of 0 will completely suppress the enum values from appearing
-# in the overview section.
-# Minimum value: 0, maximum value: 20, default value: 4.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-ENUM_VALUES_PER_LINE = 4
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
-# to set the initial width (in pixels) of the frame in which the tree is shown.
-# Minimum value: 0, maximum value: 1500, default value: 250.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-TREEVIEW_WIDTH = 250
-
-# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
-# external symbols imported via tag files in a separate window.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-EXT_LINKS_IN_WINDOW = NO
-
-# Use this tag to change the font size of LaTeX formulas included as images in
-# the HTML documentation. When you change the font size after a successful
-# doxygen run you need to manually remove any form_*.png images from the HTML
-# output directory to force them to be regenerated.
-# Minimum value: 8, maximum value: 50, default value: 10.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_FONTSIZE = 10
-
-# Use the FORMULA_TRANPARENT tag to determine whether or not the images
-# generated for formulas are transparent PNGs. Transparent PNGs are not
-# supported properly for IE 6.0, but are supported on all modern browsers.
-#
-# Note that when changing this option you need to delete any form_*.png files in
-# the HTML output directory before the changes have effect.
-# The default value is: YES.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-FORMULA_TRANSPARENT = YES
-
-# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
-# http://www.mathjax.org) which uses client side Javascript for the rendering
-# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
-# installed or if you want to formulas look prettier in the HTML output. When
-# enabled you may also need to install MathJax separately and configure the path
-# to it using the MATHJAX_RELPATH option.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_HTML is set to YES.
-
-USE_MATHJAX = NO
-
-# When MathJax is enabled you can set the default output format to be used for
-# the MathJax output. See the MathJax site (see:
-# http://docs.mathjax.org/en/latest/output.html) for more details.
-# Possible values are: HTML-CSS (which is slower, but has the best
-# compatibility), NativeMML (i.e. MathML) and SVG.
-# The default value is: HTML-CSS.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_FORMAT = HTML-CSS
-
-# When MathJax is enabled you need to specify the location relative to the HTML
-# output directory using the MATHJAX_RELPATH option. The destination directory
-# should contain the MathJax.js script. For instance, if the mathjax directory
-# is located at the same level as the HTML output directory, then
-# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
-# Content Delivery Network so you can quickly see the result without installing
-# MathJax. However, it is strongly recommended to install a local copy of
-# MathJax from http://www.mathjax.org before deployment.
-# The default value is: http://cdn.mathjax.org/mathjax/latest.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
-
-# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
-# extension names that should be enabled during MathJax rendering. For example
-# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_EXTENSIONS =
-
-# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
-# of code that will be used on startup of the MathJax code. See the MathJax site
-# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
-# example see the documentation.
-# This tag requires that the tag USE_MATHJAX is set to YES.
-
-MATHJAX_CODEFILE =
-
-# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
-# the HTML output. The underlying search engine uses javascript and DHTML and
-# should work on any modern browser. Note that when using HTML help
-# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
-# there is already a search function so this one should typically be disabled.
-# For large projects the javascript based search engine can be slow, then
-# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
-# search using the keyboard; to jump to the search box use + S
-# (what the is depends on the OS and browser, but it is typically
-# , /