diff --git a/.clang-format b/.clang-format
index 6bbd46d0ff956517991d4faad3f2c026487f412b..9ba433b17362424973626470d930356c2173dd84 100644
--- a/.clang-format
+++ b/.clang-format
@@ -13,8 +13,6 @@
# The document of clang-format is
# http://clang.llvm.org/docs/ClangFormat.html
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
---
Language: Cpp
BasedOnStyle: Google
@@ -22,8 +20,9 @@ IndentWidth: 2
TabWidth: 2
ContinuationIndentWidth: 4
AccessModifierOffset: -2 # The private/protected/public has no indent in class
-PointerAlignment: Left # int* p/int& p, not int *p/int &p
Standard: Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
...
diff --git a/.gitignore b/.gitignore
index 65ba217de37c82287829eef105066aba86d69651..ee8489c1d71bd050b9a1d9358a664d2294165292 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,6 @@ build/
.vscode
.idea
.project
+.cproject
.pydevproject
+Makefile
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..90c25e435083d78ad4c123999a588aaf9092f719
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+- repo: https://github.com/Lucas-C/pre-commit-hooks.git
+ sha: c25201a00e6b0514370501050cf2a8538ac12270
+ hooks:
+ - id: remove-crlf
+- repo: https://github.com/reyoung/mirrors-yapf.git
+ sha: v0.13.2
+ hooks:
+ - id: yapf
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
+ hooks:
+ - id: check-added-large-files
+ - id: check-merge-conflict
+ - id: check-symlinks
+ - id: detect-private-key
+ - id: end-of-file-fixer
+- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
+ sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
+ hooks:
+ - id: clang-formater
diff --git a/.style.yapf b/.style.yapf
new file mode 100644
index 0000000000000000000000000000000000000000..4741fb4f3bbc6681088cf9e960321e7b857a93a8
--- /dev/null
+++ b/.style.yapf
@@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
diff --git a/.travis.yml b/.travis.yml
index bf0e0b7bbddd4c1f69e287e0f5ad471a54a75600..ffe3bc193b49eb3b3318cbbc7f1c3d86dc205c14 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,11 +35,22 @@ addons:
- libgoogle-glog-dev
- libgflags-dev
- libgtest-dev
+ - curl
+ - lcov
- graphviz
+ - swig
before_install:
+ - |
+ if [ ${JOB} == "BUILD_AND_TEST" ]; then
+ if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+ then
+ echo "Only markdown docs were updated, stopping build process."
+ exit
+ fi
+ fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- - pip install wheel protobuf sphinx breathe recommonmark
+ - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
script:
- paddle/scripts/travis/main.sh
notifications:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4613155f7700b25b2a8d7c250832722085b332fa..090ac9e188422099cc4270b87064b5590e7b620c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b2)
+set(PADDLE_MINOR_VERSION 9)
+set(PADDLE_PATCH_VERSION 0a0)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package)
-include(swig)
+find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED)
@@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
+option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
+option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
+
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
@@ -49,11 +52,16 @@ endif()
include(enableCXX11)
include(cpplint)
include(ccache)
+if(WITH_RDMA)
+ include(rdma)
+endif()
include(util)
include(flags)
include(cudnn)
include(FindPythonModule)
include(check_packages)
+include(swig)
+include(coveralls)
# add PaddlePaddle version
if(DEFINED ENV{PADDLE_VERSION})
@@ -87,11 +95,24 @@ if(NOT WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
+ if(${CUDA_VERSION_MAJOR} GREATER 6)
+ if(COMPILER_SUPPORT_CXX11)
+ LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
+ endif()
+ endif()
+
# TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
+
+ if(WITH_AVX)
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
+ else(WITH_AVX)
+ set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
+ endif(WITH_AVX)
if(WITH_DSO)
set(CUDA_LIBRARIES "")
@@ -115,11 +136,11 @@ if(NOT WITH_TIMER)
endif(NOT WITH_TIMER)
if(WITH_AVX)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
if(WITH_PYTHON)
@@ -129,12 +150,15 @@ else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON)
-if(NOT WITH_RDMA)
- add_definitions(-DPADDLE_DISABLE_RDMA)
-endif()
+if(WITH_RDMA)
+ include_directories("${RDMA_INC_DIR}")
+else(WITH_RDMA)
+ add_definitions(-DPADDLE_DISABLE_RDMA)
+endif(WITH_RDMA)
if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG)
+ include_directories(${LIBGLOG_INCLUDE_DIR})
endif()
if(WITH_GFLAGS)
diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000000000000000000000000000000000000..6b2614b1011081a5e0e03a53fec2012bc7b81333
--- /dev/null
+++ b/ISSUE_TEMPLATE.md
@@ -0,0 +1,14 @@
+Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
+Both Chinese and English issues are welcome.
+
+It's hard to solve a problem when important details are missing.
+Before submitting the issue, look over the following criteria before handing your request in.
+
+- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
+- [ ] Did you retrieve your issue from widespread search engines ?
+- [ ] Is my description of the issue clear enough to reproduce this problem?
+ * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
+ * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
+- [ ] Is my description of the issue use the github markdown correctly?
+ * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
+ * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.
diff --git a/README.md b/README.md
index 1cc0444c0617af3da0ec1d9beaf2ae73e31bd7b2..8a8e15841586ae6a01bb93e94f6074189f556f5a 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,13 @@
# PaddlePaddle
-| **`Linux`** | **`License`** | **`Chat Room`** |
-|----------------|---------------|-----------------|
-|[](https://travis-ci.org/baidu/Paddle)|[](LICENSE)|[](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)|
+
+[](https://travis-ci.org/PaddlePaddle/Paddle)
+[](http://www.paddlepaddle.org/)
+[](http://www.paddlepaddle.org/cn/index.html)
+[](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
+[](https://github.com/PaddlePaddle/Paddle/releases)
+[](LICENSE)
+
Welcome to the PaddlePaddle GitHub.
@@ -12,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
learning to many products at Baidu.
Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release log](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle.
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
## Features
@@ -24,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
connection.
- **Efficiency**
-
+
In order to unleash the power of heterogeneous computing resource,
optimization occurs at different levels of PaddlePaddle, including
computing, memory, architecture and communication. The following are some
examples:
- Optimized math operations through SSE/AVX intrinsics, BLAS libraries
- (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
- - Highly optimized recurrent networks which can handle **variable-length**
+ (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
+ - Highly optimized recurrent networks which can handle **variable-length**
sequence without padding.
- Optimized local and distributed training for models with high dimensional
sparse data.
@@ -55,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or
+pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
-
+
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en)
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
-
+
- [Example and Demo](http://paddlepaddle.org/doc/demo/)
We provide five demos, including: image classification, sentiment analysis,
- sequence to sequence model, recommendation, semantic role labeling.
-
+ sequence to sequence model, recommendation, semantic role labeling.
+
- [Distributed Training](http://paddlepaddle.org/doc/cluster)
This system supports training deep learning models on multiple machines
with data parallelism.
-
+
- [Python API](http://paddlepaddle.org/doc/ui/)
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
-
+
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html)
We sincerely appreciate your interest and contributions. If you would like to
- contribute, please read the contribution guide.
+ contribute, please read the contribution guide.
- [Source Code Documents](http://paddlepaddle.org/doc/source/)
## Ask Questions
-Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-Framework development discussions and
-bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
+
+You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
## Copyright and License
PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
diff --git a/benchmark/README.md b/benchmark/README.md
index 8b453a7b59e9f19c7b96da1160cd348c74250bb7..29c7155a0f33ecf29a04d7c276f4531ae64e07a6 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -5,11 +5,11 @@ Machine:
- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
- GPU: Tesla K40m
- cuDNN: v5.1
-- system: Docker 1.12.1, all platform are tested in docker environment.
+- system: Docker 1.12.1, all platforms are tested in docker environment.
Platforms:
-- PaddlePaddle:
+- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0
- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu
- Caffe: kaixhin/cuda-caffe
@@ -28,7 +28,7 @@ AlexNet, GoogleNet and a small network used in Caffe.
- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)
-### Singe-GPU
+### Single-GPU
- AlexNet: input - 3 * 227 * 227, Time: ms/batch
@@ -61,7 +61,7 @@ All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, be
**Notation**
-All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. The time in PaddlePaddle and TensorFlow contains it. But, compared with the total time, the time of parameter updating is relatively little.
+All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. While PaddlePaddle and TensorFlow contains this time. But, compared with the total time, the time of parameter updating is relatively little on single machine.
In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.
@@ -106,7 +106,7 @@ We use lstm network for text classfication to test benchmark.
- Dictionary size=30000
- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
-### Single GPU
+### Single-GPU
#### LSTM in Text Classification
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
index 8b832473231f9894f99830149b96a14a923197f4..3358d43a4b08c6a9b89d59e1a8be53ee1f12bbe0 100644
--- a/benchmark/paddle/image/alexnet.py
+++ b/benchmark/paddle/image/alexnet.py
@@ -2,56 +2,63 @@
from paddle.trainer_config_helpers import *
-height=227
-width=227
+height = 227
+width = 227
num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
-
-args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
-define_py_data_sources2("train.list",
- None,
- module="provider",
- obj="process",
- args=args)
+batch_size = get_config_arg('batch_size', int, 128)
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
settings(
- batch_size = batch_size,
- learning_rate = 0.01 / batch_size,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * batch_size)
-)
-
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
# conv1
net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(input=net, filter_size=11, num_channels=3,
- num_filters=96, stride=4, padding=1)
+net = img_conv_layer(
+ input=net,
+ filter_size=11,
+ num_channels=3,
+ num_filters=96,
+ stride=4,
+ padding=1)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv2
-net = img_conv_layer(input=net, filter_size=5, num_filters=256,
- stride=1, padding=2, groups=1)
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv3
-net = img_conv_layer(input=net, filter_size=3, num_filters=384,
- stride=1, padding=1)
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1)
# conv4
-net = img_conv_layer(input=net, filter_size=3, num_filters=384,
- stride=1, padding=1, groups=1)
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)
# conv5
-net = img_conv_layer(input=net, filter_size=3, num_filters=256,
- stride=1, padding=1, groups=1)
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
net = img_pool_layer(input=net, pool_size=3, stride=2)
-net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
lab = data_layer('label', num_class)
-loss = cross_entropy(input=net, label=lab)
+loss = cross_entropy(input=net, label=lab)
outputs(loss)
diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py
index 1078136a2b40b69c7e4b361487d22c414af7501f..bc893bab98c4d2e07c62fbd012d51a0939db4766 100644
--- a/benchmark/paddle/image/googlenet.py
+++ b/benchmark/paddle/image/googlenet.py
@@ -1,24 +1,20 @@
#!/usr/bin/env python
from paddle.trainer_config_helpers import *
-height=224
-width=224
+height = 224
+width = 224
num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
+batch_size = get_config_arg('batch_size', int, 128)
-args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
-define_py_data_sources2("train.list",
- None,
- module="provider",
- obj="process",
- args=args)
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
settings(
- batch_size = batch_size,
- learning_rate = 0.01 / batch_size,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * batch_size)
-)
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
def inception2(name, input, channels, \
filter1,
@@ -34,26 +30,61 @@ def inception2(name, input, channels, \
maxpool = name + '_max'
convproj = name + '_proj'
- cov1 = img_conv_layer(name=conv1, input=input, filter_size=1,
- num_channels=channels, num_filters=filter1,
- stride=1, padding=0)
-
- cov3r = img_conv_layer(name=conv3r, input=input, filter_size=1,
- num_channels=channels, num_filters=filter3R,
- stride=1, padding=0)
- cov3 = img_conv_layer(name=conv3, input=cov3r, filter_size=3,
- num_filters=filter3, stride=1, padding=1)
-
- cov5r = img_conv_layer(name=conv5r, input=input, filter_size=1,
- num_channels=channels, num_filters=filter5R,
- stride=1, padding=0)
- cov5 = img_conv_layer(name=conv5, input=cov5r, filter_size=5,
- num_filters=filter5, stride=1, padding=2)
-
- pool1 = img_pool_layer(name=maxpool, input=input, pool_size=3,
- num_channels=channels, stride=1, padding=1)
- covprj = img_conv_layer(name=convproj, input=pool1, filter_size=1,
- num_filters=proj, stride=1, padding=0)
+ cov1 = img_conv_layer(
+ name=conv1,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=conv3r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = img_conv_layer(
+ name=conv3,
+ input=cov3r,
+ filter_size=3,
+ num_filters=filter3,
+ stride=1,
+ padding=1)
+
+ cov5r = img_conv_layer(
+ name=conv5r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = img_conv_layer(
+ name=conv5,
+ input=cov5r,
+ filter_size=5,
+ num_filters=filter5,
+ stride=1,
+ padding=2)
+
+ pool1 = img_pool_layer(
+ name=maxpool,
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = img_conv_layer(
+ name=convproj,
+ input=pool1,
+ filter_size=1,
+ num_filters=proj,
+ stride=1,
+ padding=0)
cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
return cat
@@ -64,28 +95,51 @@ def inception(name, input, channels, \
filter5R, filter5,
proj):
- cov1 = conv_projection(input=input, filter_size=1, num_channels=channels,
- num_filters=filter1, stride=1, padding=0)
-
- cov3r = img_conv_layer(name=name + '_3r', input=input, filter_size=1,
- num_channels=channels, num_filters=filter3R,
- stride=1, padding=0)
- cov3 = conv_projection(input=cov3r, filter_size=3, num_filters=filter3,
- stride=1, padding=1)
-
- cov5r = img_conv_layer(name=name + '_5r', input=input, filter_size=1,
- num_channels=channels, num_filters=filter5R,
- stride=1, padding=0)
- cov5 = conv_projection(input=cov5r, filter_size=5, num_filters=filter5,
- stride=1, padding=2)
-
- pool1 = img_pool_layer(name=name + '_max', input=input, pool_size=3,
- num_channels=channels, stride=1, padding=1)
- covprj = conv_projection(input=pool1, filter_size=1, num_filters=proj,
- stride=1, padding=0)
-
- cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj],
- bias_attr=True, act=ReluActivation())
+ cov1 = conv_projection(
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=name + '_3r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = conv_projection(
+ input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
+
+ cov5r = img_conv_layer(
+ name=name + '_5r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = conv_projection(
+ input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
+
+ pool1 = img_pool_layer(
+ name=name + '_max',
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = conv_projection(
+ input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
+
+ cat = concat_layer(
+ name=name,
+ input=[cov1, cov3, cov5, covprj],
+ bias_attr=True,
+ act=ReluActivation())
return cat
@@ -93,36 +147,60 @@ lab = data_layer(name="label", size=1000)
data = data_layer(name="input", size=3 * height * width)
# stage 1
-conv1 = img_conv_layer(name="conv1", input=data, filter_size=7,
- num_channels=3, num_filters=64, stride=2, padding=3)
-pool1 = img_pool_layer(name="pool1", input=conv1, pool_size=3,
- num_channels=64, stride=2)
+conv1 = img_conv_layer(
+ name="conv1",
+ input=data,
+ filter_size=7,
+ num_channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
+pool1 = img_pool_layer(
+ name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
# stage 2
-conv2_1 = img_conv_layer(name="conv2_1", input=pool1, filter_size=1,
- num_filters=64, stride=1, padding=0)
-conv2_2 = img_conv_layer(name="conv2_2", input=conv2_1, filter_size=3,
- num_filters=192, stride=1, padding=1)
-pool2 = img_pool_layer(name="pool2", input=conv2_2, pool_size=3,
- num_channels=192, stride=2)
+conv2_1 = img_conv_layer(
+ name="conv2_1",
+ input=pool1,
+ filter_size=1,
+ num_filters=64,
+ stride=1,
+ padding=0)
+conv2_2 = img_conv_layer(
+ name="conv2_2",
+ input=conv2_1,
+ filter_size=3,
+ num_filters=192,
+ stride=1,
+ padding=1)
+pool2 = img_pool_layer(
+ name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
# stage 3
-ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
-ince3b = inception("ince3b", ince3a, 256, 128, 128,192, 32, 96, 64)
-pool3 = img_pool_layer(name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
+ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+pool3 = img_pool_layer(
+ name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
# stage 4
-ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
-ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
-ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
-ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
-pool4 = img_pool_layer(name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
+ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+pool4 = img_pool_layer(
+ name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
# stage 5
-ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
-pool5 = img_pool_layer(name="pool5", input=ince5b, num_channels=1024, pool_size=7, stride=7, pool_type=AvgPooling())
+pool5 = img_pool_layer(
+ name="pool5",
+ input=ince5b,
+ num_channels=1024,
+ pool_size=7,
+ stride=7,
+ pool_type=AvgPooling())
# We remove loss1 and loss2 for all system when testing benchmark
# output 1
@@ -141,7 +219,8 @@ pool5 = img_pool_layer(name="pool5", input=ince5b, num_channels=1024, pool_size=
# output 3
dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
-out3 = fc_layer(name="output3", input=dropout, size=1000, act=SoftmaxActivation())
-loss3 = cross_entropy(name='loss3', input=out3, label=lab)
+out3 = fc_layer(
+ name="output3", input=dropout, size=1000, act=SoftmaxActivation())
+loss3 = cross_entropy(name='loss3', input=out3, label=lab)
outputs(loss3)
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
index 0d45268aa3f4900349e176a56acc9a9eb6eb120b..b6bc0e9aa21a1083ddc2c8f3ada4acf637425a62 100644
--- a/benchmark/paddle/image/provider.py
+++ b/benchmark/paddle/image/provider.py
@@ -1,13 +1,14 @@
-import io,os
+import io, os
import random
import numpy as np
from paddle.trainer.PyDataProvider2 import *
+
def initHook(settings, height, width, color, num_class, **kwargs):
- settings.height = height
- settings.width = width
- settings.color = color
- settings.num_class = num_class
+ settings.height = height
+ settings.width = width
+ settings.color = color
+ settings.num_class = num_class
if settings.color:
settings.data_size = settings.height * settings.width * 3
else:
@@ -15,7 +16,9 @@ def initHook(settings, height, width, color, num_class, **kwargs):
settings.slots = [dense_vector(settings.data_size), integer_value(1)]
-@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_list):
with open(file_list, 'r') as fdata:
for line in fdata:
diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py
index 78dba880d29250158326b23834a60273407eb111..58879c454f37991405d83bbb593bb5d1e977ff53 100644
--- a/benchmark/paddle/image/smallnet_mnist_cifar.py
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@@ -2,42 +2,44 @@
from paddle.trainer_config_helpers import *
-height=32
-width=32
+height = 32
+width = 32
num_class = 10
-batch_size = get_config_arg('batch_size', int, 128)
+batch_size = get_config_arg('batch_size', int, 128)
-args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
-define_py_data_sources2("train.list",
- None,
- module="provider",
- obj="process",
- args=args)
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
settings(
- batch_size = batch_size,
- learning_rate = 0.01 / batch_size,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * batch_size)
-)
-
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
# conv1
net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(input=net, filter_size=5, num_channels=3,
- num_filters=32, stride=1, padding=2)
+net = img_conv_layer(
+ input=net,
+ filter_size=5,
+ num_channels=3,
+ num_filters=32,
+ stride=1,
+ padding=2)
net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
# conv2
-net = img_conv_layer(input=net, filter_size=5, num_filters=32,
- stride=1, padding=2)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=32, stride=1, padding=2)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
# conv3
-net = img_conv_layer(input=net, filter_size=3, num_filters=64,
- stride=1, padding=1)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=64, stride=1, padding=1)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
net = fc_layer(input=net, size=64, act=ReluActivation())
net = fc_layer(input=net, size=10, act=SoftmaxActivation())
diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py
index 93e1686854b447c4248ae1809fb5289a36e3e0f7..fc4ed4025f9ed2e0a32a1709ff8df4af53521196 100755
--- a/benchmark/paddle/rnn/imdb.py
+++ b/benchmark/paddle/rnn/imdb.py
@@ -4,6 +4,7 @@ import gzip
import os
import numpy
+
def get_dataset_file(dataset, default_dataset, origin):
data_dir, data_file = os.path.split(dataset)
if (not os.path.isfile(dataset)) and data_file == default_dataset:
@@ -13,13 +14,14 @@ def get_dataset_file(dataset, default_dataset, origin):
return dataset
+
def create_data(path="imdb.pkl"):
if (not os.path.isfile('imdb.train.pkl')):
path = get_dataset_file(
path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
-
+
if path.endswith(".gz"):
f = gzip.open(path, 'rb')
else:
@@ -35,8 +37,10 @@ def create_data(path="imdb.pkl"):
if (not os.path.isfile('train.list')):
file('train.list', 'w').write('imdb.train.pkl\n')
+
def main():
create_data('imdb.pkl')
+
if __name__ == "__main__":
main()
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
index 90d3fee67601604b236b27fb2e5492e92095cb72..928ca75daf84ccebb775364b0be0d8b3d5eebff9 100644
--- a/benchmark/paddle/rnn/provider.py
+++ b/benchmark/paddle/rnn/provider.py
@@ -1,19 +1,25 @@
-import io,os
+import io, os
import random
import numpy as np
import six.moves.cPickle as pickle
from paddle.trainer.PyDataProvider2 import *
+
def remove_unk(x, n_words):
return [[1 if w >= n_words else w for w in sen] for sen in x]
+
# ==============================================================
# tensorflow uses fixed length, but PaddlePaddle can process
# variable-length. Padding is used in benchmark in order to
# compare with other platform.
# ==============================================================
-def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
- truncating='post', value=0.):
+def pad_sequences(sequences,
+ maxlen=None,
+ dtype='int32',
+ padding='post',
+ truncating='post',
+ value=0.):
lengths = [len(s) for s in sequences]
nb_samples = len(sequences)
@@ -43,12 +49,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
settings.vocab_size = vocab_size
settings.pad_seq = pad_seq
- settings.maxlen = maxlen
+ settings.maxlen = maxlen
settings.input_types = [
- integer_value_sequence(vocab_size),
- integer_value(2)]
+ integer_value_sequence(vocab_size), integer_value(2)
+ ]
+
-@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file):
f = open(file, 'rb')
train_set = pickle.load(f)
@@ -57,8 +65,8 @@ def process(settings, file):
# remove unk, namely remove the words out of dictionary
x = remove_unk(x, settings.vocab_size)
- if settings.pad_seq:
+ if settings.pad_seq:
x = pad_sequences(x, maxlen=settings.maxlen, value=0.)
for i in range(len(y)):
- yield map(int,x[i]), int(y[i])
+ yield map(int, x[i]), int(y[i])
diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py
index fc8221b1126649d3d1b6a2a8743d25fe4a8d4aec..83eb3e565473f7e7e91cddeaa3cd2aafb7e3df2c 100755
--- a/benchmark/paddle/rnn/rnn.py
+++ b/benchmark/paddle/rnn/rnn.py
@@ -6,33 +6,29 @@ import imdb
num_class = 2
vocab_size = 30000
fixedlen = 100
-batch_size = get_config_arg('batch_size', int, 128)
-lstm_num = get_config_arg('lstm_num', int, 1)
-hidden_size = get_config_arg('hidden_size', int, 128)
+batch_size = get_config_arg('batch_size', int, 128)
+lstm_num = get_config_arg('lstm_num', int, 1)
+hidden_size = get_config_arg('hidden_size', int, 128)
# whether to pad sequence into fixed length
pad_seq = get_config_arg('pad_seq', bool, True)
imdb.create_data('imdb.pkl')
-args={'vocab_size':vocab_size, 'pad_seq':pad_seq, 'maxlen':fixedlen}
-define_py_data_sources2("train.list",
- None,
- module="provider",
- obj="process",
- args=args)
+args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
settings(
batch_size=batch_size,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
net = data_layer('data', size=vocab_size)
net = embedding_layer(input=net, size=128)
for i in xrange(lstm_num):
- net = simple_lstm(input=net, size=hidden_size)
+ net = simple_lstm(input=net, size=hidden_size)
net = last_seq(input=net)
net = fc_layer(input=net, size=2, act=SoftmaxActivation())
diff --git a/benchmark/tensorflow/image/alexnet.py b/benchmark/tensorflow/image/alexnet.py
index 57b7ef6c323243c8e03324533d0022ab00bb8516..f6a39ef778e21bee7374718a1b1ddf43392825a8 100644
--- a/benchmark/tensorflow/image/alexnet.py
+++ b/benchmark/tensorflow/image/alexnet.py
@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 128,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False,
@@ -23,47 +21,64 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW',
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
+
def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w',[kH, kW, nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
- dtype=tf.float32)
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW':
- strides = [1, 1, dH, dW]
+ strides = [1, 1, dH, dW]
else:
- strides = [1, dH, dW, 1]
- conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType,
- data_format=FLAGS.data_format)
-
- biases = tf.get_variable(name=name + '_b', shape=[nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
dtype=tf.float32)
bias = tf.reshape(
- tf.nn.bias_add(conv, biases, data_format=FLAGS.data_format),
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope)
return conv1
+
def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w', [nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
- biases = tf.get_variable(name + '_b', [nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
- dtype=tf.float32,trainable=True)
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases
@@ -72,31 +87,36 @@ def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
return output
+
def _mpool(name, inpOp, kH, kW, dH, dW):
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.max_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding='VALID',
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
def _norm(name, l_input, lsize=4):
- return tf.nn.lrn(l_input, lsize, bias=1.0,
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
alpha=0.001 / 9.0,
- beta=0.75, name=name)
-
+ beta=0.75,
+ name=name)
def loss(logits, labels):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
- logits, labels, name='cross_entropy_per_example')
+ logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
@@ -104,6 +124,7 @@ def loss(logits, labels):
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
def get_incoming_shape(incoming):
""" Returns the incoming data shape """
if isinstance(incoming, tf.Tensor):
@@ -113,50 +134,52 @@ def get_incoming_shape(incoming):
else:
raise Exception("Invalid incoming layer.")
+
def inference(images):
- conv1 = _conv ('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
- pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
- norm1 = _norm ('norm1', pool1, lsize=5)
- conv2 = _conv ('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
- pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
- norm2 = _norm ('norm2', pool2, lsize=5)
- conv3 = _conv ('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
- conv4 = _conv ('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
- conv5 = _conv ('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
- pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096, 0.5)
affn2 = _affine('fc7', affn1, 4096, 4096, 0.5)
- affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
return affn3
def time_tensorflow_run(session, target, info_string):
- num_steps_burn_in = 10
- total_duration = 0.0
- total_duration_squared = 0.0
- if not isinstance(target, list):
- target = [target]
- target_op = tf.group(*target)
- for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _ = session.run(target_op)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- print ('%s: step %d, duration = %.3f' %
- (datetime.now(), i - num_steps_burn_in, duration))
- total_duration += duration
- total_duration_squared += duration * duration
- mn = total_duration / FLAGS.num_batches
- vr = total_duration_squared / FLAGS.num_batches - mn * mn
- sd = math.sqrt(vr)
- print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
- (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
def _add_loss_summaries(total_loss):
- """
+ """
Generates moving average for all losses and associated summaries for
visualizing the performance of the network.
@@ -165,96 +188,111 @@ def _add_loss_summaries(total_loss):
Returns:
loss_averages_op: op for generating moving averages of losses.
"""
- # Compute the moving average of all individual losses and the total loss.
- loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
- losses = tf.get_collection('losses')
- loss_averages_op = loss_averages.apply(losses + [total_loss])
-
- # Attach a scalar summary to all individual losses and the total loss; do the
- # same for the averaged version of the losses.
- for l in losses + [total_loss]:
- # Name each loss as '(raw)' and name the moving average version of the loss
- # as the original loss name.
- tf.scalar_summary(l.op.name +' (raw)', l)
- tf.scalar_summary(l.op.name, loss_averages.average(l))
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ losses = tf.get_collection('losses')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
- return loss_averages_op
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(l.op.name + ' (raw)', l)
+ tf.scalar_summary(l.op.name, loss_averages.average(l))
+ return loss_averages_op
def run_benchmark():
- with tf.Graph().as_default():
- with tf.device('/gpu:0'):
- # Generate some dummy images.
- image_size = 224
- # Note that our padding definition is slightly different the cuda-convnet.
- # In order to force the model to start with the same activations sizes,
- # we add 3 to the image_size and employ VALID padding above.
- if FLAGS.data_format == 'NCHW':
- image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3]
- else:
- image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3]
- images = tf.get_variable('image', image_shape,
- initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32),
- dtype=tf.float32,
- trainable=False)
-
- labels = tf.get_variable('label', [FLAGS.batch_size],
- initializer=tf.constant_initializer(1),
- dtype=tf.int32,
- trainable=False)
-
- # Build a Graph that computes the logits predictions from the
- # inference model.
- last_layer = inference(images)
-
- objective = loss(last_layer, labels)
- # Compute the gradient with respect to all the parameters.
-
- # Compute gradients.
- # opt = tf.train.GradientDescentOptimizer(0.001)
- opt = tf.train.MomentumOptimizer(0.001, 0.9)
- grads = opt.compute_gradients(objective)
- global_step = tf.get_variable('global_step', [],
- initializer=tf.constant_initializer(0.0, dtype=tf.float32),
- trainable=False, dtype=tf.float32)
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Track the moving averages of all trainable variables.
- variable_averages = tf.train.ExponentialMovingAverage(
- 0.9, global_step)
- variables_averages_op = variable_averages.apply(tf.trainable_variables())
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
-
- run_forward = True
- run_forward_backward = True
- if FLAGS.forward_only and FLAGS.forward_backward_only:
- raise ValueError("Cannot specify --forward_only and "
- "--forward_backward_only at the same time.")
- if FLAGS.forward_only:
- run_forward_backward = False
- elif FLAGS.forward_backward_only:
- run_forward = False
-
- if run_forward:
- time_tensorflow_run(sess, last_layer, "Forward")
-
- if run_forward_backward:
- with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
- train_op = tf.no_op(name='train')
- time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+ with tf.Graph().as_default():
+ with tf.device('/gpu:0'):
+ # Generate some dummy images.
+ image_size = 224
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [
+ FLAGS.batch_size, 3, image_size + 3, image_size + 3
+ ]
+ else:
+ image_shape = [
+ FLAGS.batch_size, image_size + 3, image_size + 3, 3
+ ]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+ # Compute the gradient with respect to all the parameters.
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9,
+ global_step)
+ variables_averages_op = variable_averages.apply(
+ tf.trainable_variables())
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective],
+ "Forward-backward")
+
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/alexnet_multi_gpu.py b/benchmark/tensorflow/image/alexnet_multi_gpu.py
index f006fb56af7bcdfd2912976fff3ec6c3fcb18fdb..7b5ee78f4dd5429abd85d75c092a6e3a2a39f922 100644
--- a/benchmark/tensorflow/image/alexnet_multi_gpu.py
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
@@ -9,10 +9,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 64,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_string('data_format', 'NCHW',
"""The data format for Convnet operations.
Can be either NHWC or NCHW.
@@ -21,88 +19,110 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW',
tf.app.flags.DEFINE_string('train_dir', '/train_model',
"""Directory where to write event logs """
"""and checkpoint.""")
-tf.app.flags.DEFINE_integer('num_gpus', 4,
- """How many GPUs to use.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
-NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=50000
-NUM_EPOCHS_PER_DECAY=50
-INITIAL_LEARNING_RATE = 0.1
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
LEARNING_RATE_DECAY_FACTOR = 0.1
TOWER_NAME = 'tower'
def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w',[kH, kW, nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
- dtype=tf.float32)
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW':
- strides = [1, 1, dH, dW]
+ strides = [1, 1, dH, dW]
else:
- strides = [1, dH, dW, 1]
- conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType,
- data_format=FLAGS.data_format)
-
- biases = tf.get_variable(name=name + '_b', shape=[nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
dtype=tf.float32)
bias = tf.reshape(
- tf.nn.bias_add(conv, biases, data_format=FLAGS.data_format),
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope)
return conv1
+
def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w', [nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
- biases = tf.get_variable(name + '_b', [nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
- dtype=tf.float32,trainable=True)
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases
return affine1
+
def _mpool(name, inpOp, kH, kW, dH, dW):
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.max_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding='VALID',
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
def _norm(name, l_input, lsize=4):
- return tf.nn.lrn(l_input, lsize, bias=1.0,
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
alpha=0.001 / 9.0,
- beta=0.75, name=name)
+ beta=0.75,
+ name=name)
+
def loss(logits, labels):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
- logits, labels, name='cross_entropy_per_example')
+ logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
@@ -120,24 +140,26 @@ def get_incoming_shape(incoming):
else:
raise Exception("Invalid incoming layer.")
+
def inference(images):
- conv1 = _conv ('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
- pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
- norm1 = _norm ('norm1', pool1, lsize=5)
- conv2 = _conv ('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
- pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
- norm2 = _norm ('norm2', pool2, lsize=5)
- conv3 = _conv ('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
- conv4 = _conv ('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
- conv5 = _conv ('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
- pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096)
affn2 = _affine('fc7', affn1, 4096, 4096)
- affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
return affn3
+
def tower_loss(scope):
"""Calculate the total loss on a single tower running the model.
Args:
@@ -150,15 +172,19 @@ def tower_loss(scope):
image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3]
else:
image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3]
- images = tf.get_variable('image', image_shape,
- initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32),
- dtype=tf.float32,
- trainable=False)
-
- labels = tf.get_variable('label', [FLAGS.batch_size],
- initializer=tf.constant_initializer(1),
- dtype=tf.int32,
- trainable=False)
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
# Build a Graph that computes the logits predictions from the
# inference model.
@@ -167,7 +193,7 @@ def tower_loss(scope):
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = loss(last_layer, labels)
-
+
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
@@ -186,7 +212,7 @@ def tower_loss(scope):
loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
- tf.scalar_summary(loss_name +' (raw)', l)
+ tf.scalar_summary(loss_name + ' (raw)', l)
tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]):
@@ -195,7 +221,7 @@ def tower_loss(scope):
def average_gradients(tower_grads):
- """Calculate the average gradient for each shared variable across all towers.
+ """Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
@@ -205,130 +231,135 @@ def average_gradients(tower_grads):
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
- average_grads = []
- for grad_and_vars in zip(*tower_grads):
- # Note that each grad_and_vars looks like the following:
- # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
- grads = []
- for g, _ in grad_and_vars:
- # Add 0 dimension to the gradients to represent the tower.
- expanded_g = tf.expand_dims(g, 0)
-
- # Append on a 'tower' dimension which we will average over below.
- grads.append(expanded_g)
-
- # Average over the 'tower' dimension.
- grad = tf.concat(0, grads)
- grad = tf.reduce_mean(grad, 0)
-
- # Keep in mind that the Variables are redundant because they are shared
- # across towers. So .. we will just return the first tower's pointer to
- # the Variable.
- v = grad_and_vars[0][1]
- grad_and_var = (grad, v)
- average_grads.append(grad_and_var)
- return average_grads
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
def time_tensorflow_run(session, target):
num_steps_burn_in = 50
total_duration = 0.0
total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _, loss_value = session.run(target)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
- examples_per_sec = num_examples_per_step / duration
- sec_per_batch = duration
-
- format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
- 'sec/batch batch_size = %d)')
- print (format_str %
- (datetime.now(), i - num_steps_burn_in,
- loss_value, duration, sec_per_batch, num_examples_per_step))
-
- total_duration += duration
- total_duration_squared += duration * duration
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
- print ('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), FLAGS.num_batches, mn, sd))
+
def run_benchmark():
- with tf.Graph().as_default(), tf.device('/cpu:0'):
- # Create a variable to count the number of train() calls. This equals the
- # number of batches processed * FLAGS.num_gpus.
- global_step = tf.get_variable(
- 'global_step', [],
- initializer=tf.constant_initializer(0), trainable=False)
-
- # Calculate the learning rate schedule.
- num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
- FLAGS.batch_size)
- decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
-
- # Decay the learning rate exponentially based on the number of steps.
- lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
- global_step,
- decay_steps,
- LEARNING_RATE_DECAY_FACTOR,
- staircase=True)
-
- # Create an optimizer that performs gradient descent.
- opt = tf.train.MomentumOptimizer(lr, 0.9)
-
- # Calculate the gradients for each model tower.
- tower_grads = []
- for i in xrange(FLAGS.num_gpus):
- with tf.device('/gpu:%d' % i):
- with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
- # Calculate the loss for one tower of the model. This function
- # constructs the entire model but shares the variables across
- # all towers.
- loss = tower_loss(scope)
-
- # Reuse variables for the next tower.
- tf.get_variable_scope().reuse_variables()
-
- # Retain the summaries from the final tower.
- summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
-
- # Calculate the gradients for the batch of data on this tower.
- grads = opt.compute_gradients(loss)
-
- # Keep track of the gradients across all towers.
- tower_grads.append(grads)
-
- # We must calculate the mean of each gradient. Note that this is the
- # synchronization point across all towers.
- grads = average_gradients(tower_grads)
-
- # Apply the gradients to adjust the shared variables.
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Group all updates to into a single train op.
- train_op = tf.group(apply_gradient_op)
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph. allow_soft_placement must be set to
- # True to build towers on GPU, as some of the ops do not have GPU
- # implementations.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
- time_tensorflow_run(sess, [train_op, loss])
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet.py b/benchmark/tensorflow/image/googlenet.py
index 097a8997b78ff55813897b7f32c4d7d931e8288d..decf855b54451efba5f6a7868fbcf631789f3572 100644
--- a/benchmark/tensorflow/image/googlenet.py
+++ b/benchmark/tensorflow/image/googlenet.py
@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 128,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False,
@@ -29,72 +27,92 @@ conv_counter = 1
pool_counter = 1
affine_counter = 1
-def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd = 0.0005):
+
+def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
global conv_counter
global parameters
name = 'conv' + str(conv_counter)
conv_counter += 1
with tf.name_scope(name) as scope:
- kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW':
- strides = [1, 1, dH, dW]
+ strides = [1, 1, dH, dW]
else:
- strides = [1, dH, dW, 1]
- conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType,
- data_format=FLAGS.data_format)
- biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.reshape(tf.nn.bias_add(conv, biases,
- data_format=FLAGS.data_format),
- conv.get_shape())
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope)
parameters += [kernel, biases]
return conv1
-def _affine(inpOp, nIn, nOut, act=True, wd = 0.0005):
+
+def _affine(inpOp, nIn, nOut, act=True, wd=0.0005):
global affine_counter
global parameters
name = 'affine' + str(affine_counter)
affine_counter += 1
with tf.name_scope(name) as scope:
- kernel = tf.Variable(tf.truncated_normal([nIn, nOut],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
if wd is not None and wd > 0:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
- biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
- trainable=True, name='biases')
- affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else tf.matmul(inpOp, kernel) + biases
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
parameters += [kernel, biases]
return affine1
+
def _mpool(inpOp, kH, kW, dH, dW, padding):
global pool_counter
global parameters
name = 'pool' + str(pool_counter)
pool_counter += 1
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.max_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
def _apool(inpOp, kH, kW, dH, dW, padding):
global pool_counter
@@ -102,17 +120,19 @@ def _apool(inpOp, kH, kW, dH, dW, padding):
name = 'pool' + str(pool_counter)
pool_counter += 1
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.avg_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
conv1 = _conv(inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
@@ -127,9 +147,9 @@ def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
pool = _conv(pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
if FLAGS.data_format == 'NCHW':
- channel_dim = 1
+ channel_dim = 1
else:
- channel_dim = 3
+ channel_dim = 3
incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
return incept
@@ -139,40 +159,40 @@ def loss(logits, labels):
labels = tf.expand_dims(labels, 1)
indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
concated = tf.concat(1, [indices, labels])
- onehot_labels = tf.sparse_to_dense(
- concated, tf.pack([batch_size, 1000]), 1.0, 0.0)
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
- onehot_labels,
- name='xentropy')
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 1000]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
+
def inference(images):
# stage 1
- conv1 = _conv (images, 3, 64, 7, 7, 2, 2, 'SAME')
- pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
# stage 2
- conv2 = _conv (pool1, 64, 64, 1, 1, 1, 1, 'VALID')
- conv3 = _conv (conv2, 64, 192, 3, 3, 1, 1, 'SAME')
- pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME')
+ conv2 = _conv(pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME')
# stage 3
- incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32)
incept3b = _inception(incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
- pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME')
+ pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME')
# stage 4
- incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64)
incept4b = _inception(incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
incept4c = _inception(incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
incept4d = _inception(incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
incept4e = _inception(incept4d, 528, 256, 160, 320, 32, 128, 3, 128)
- pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME')
+ pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME')
# stage 5
- incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128)
incept5b = _inception(incept5a, 832, 384, 192, 384, 48, 128, 3, 128)
- pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID')
+ pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID')
# output 1
resh1 = tf.reshape(pool6, [-1, 1024])
@@ -183,100 +203,109 @@ def inference(images):
def time_tensorflow_run(session, target, info_string):
- num_steps_burn_in = 10
- total_duration = 0.0
- total_duration_squared = 0.0
- if not isinstance(target, list):
- target = [target]
- target_op = tf.group(*target)
- for i in range(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _ = session.run(target_op)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- print ('%s: step %d, duration = %.3f' %
- (datetime.now(), i - num_steps_burn_in, duration))
- total_duration += duration
- total_duration_squared += duration * duration
- mn = total_duration / FLAGS.num_batches
- vr = total_duration_squared / FLAGS.num_batches - mn * mn
- sd = math.sqrt(vr)
- print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
- (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in range(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
def run_benchmark():
- global parameters
- with tf.Graph().as_default():
- # Generate some dummy images.
- image_size = 224
- if FLAGS.data_format == 'NCHW':
- image_shape = [FLAGS.batch_size, 3, image_size, image_size]
- else:
- image_shape = [FLAGS.batch_size, image_size, image_size, 3]
-
- images = tf.get_variable('image', image_shape,
- initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32),
- dtype=tf.float32,
- trainable=False)
-
- labels = tf.get_variable('label', [FLAGS.batch_size],
- initializer=tf.constant_initializer(1),
- dtype=tf.int32,
- trainable=False)
-
- # Build a Graph that computes the logits predictions from the
- # inference model.
- last_layer = inference(images)
-
- objective = loss(last_layer, labels)
-
- # Compute gradients.
- # opt = tf.train.GradientDescentOptimizer(0.001)
- opt = tf.train.MomentumOptimizer(0.001, 0.9)
- grads = opt.compute_gradients(objective)
- global_step = tf.get_variable('global_step', [],
- initializer=tf.constant_initializer(0.0, dtype=tf.float32),
- trainable=False, dtype=tf.float32)
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Track the moving averages of all trainable variables.
- variable_averages = tf.train.ExponentialMovingAverage(
- 0.9, global_step)
- variables_averages_op = variable_averages.apply(tf.trainable_variables())
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
-
- run_forward = True
- run_forward_backward = True
- if FLAGS.forward_only and FLAGS.forward_backward_only:
- raise ValueError("Cannot specify --forward_only and "
- "--forward_backward_only at the same time.")
- if FLAGS.forward_only:
- run_forward_backward = False
- elif FLAGS.forward_backward_only:
- run_forward = False
-
- if run_forward:
- # Run the forward benchmark.
- time_tensorflow_run(sess, last_layer, "Forward")
-
- if run_forward_backward:
- with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
- train_op = tf.no_op(name='train')
- time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet_multi_gpu.py b/benchmark/tensorflow/image/googlenet_multi_gpu.py
index e22a6b6253eedcbc2680309a29de10c9dd2bf4ff..31466faa37c47c66e4fe4628e28c867875e89f2e 100644
--- a/benchmark/tensorflow/image/googlenet_multi_gpu.py
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
@@ -9,10 +9,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 64,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_string('data_format', 'NCHW',
"""The data format for Convnet operations.
Can be either NHWC or NCHW.
@@ -21,97 +19,117 @@ tf.app.flags.DEFINE_string('data_format', 'NCHW',
tf.app.flags.DEFINE_string('train_dir', '/train_model',
"""Directory where to write event logs """
"""and checkpoint.""")
-tf.app.flags.DEFINE_integer('num_gpus', 4,
- """How many GPUs to use.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
-NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=50000
-NUM_EPOCHS_PER_DECAY=50
-INITIAL_LEARNING_RATE = 0.1
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
LEARNING_RATE_DECAY_FACTOR = 0.1
TOWER_NAME = 'tower'
def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w',[kH, kW, nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
- dtype=tf.float32)
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW':
- strides = [1, 1, dH, dW]
+ strides = [1, 1, dH, dW]
else:
- strides = [1, dH, dW, 1]
- conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType,
- data_format=FLAGS.data_format)
-
- biases = tf.get_variable(name=name + '_b', shape=[nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
dtype=tf.float32)
bias = tf.reshape(
- tf.nn.bias_add(conv, biases, data_format=FLAGS.data_format),
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope)
return conv1
+
def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w', [nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
- biases = tf.get_variable(name + '_b', [nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
- dtype=tf.float32,trainable=True)
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases
return affine1
+
def _mpool(name, inpOp, kH, kW, dH, dW, padding):
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.max_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
def _apool(name, inpOp, kH, kW, dH, dW, padding):
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.avg_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
def loss(logits, labels):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
- logits, labels, name='cross_entropy_per_example')
+ logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
@@ -131,7 +149,7 @@ def get_incoming_shape(incoming):
def _inception(name, inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
- conv1 = _conv(name + '_1' , inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
+ conv1 = _conv(name + '_1', inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
conv3_ = _conv(name + '_3r', inp, inSize, o2s1, 1, 1, 1, 1, 'VALID')
conv3 = _conv(name + '_3', conv3_, o2s1, o2s2, 3, 3, 1, 1, 'SAME')
@@ -143,40 +161,42 @@ def _inception(name, inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
pool = _conv(name + 'proj', pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
if FLAGS.data_format == 'NCHW':
- channel_dim = 1
+ channel_dim = 1
else:
- channel_dim = 3
+ channel_dim = 3
incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
return incept
def inference(images):
# stage 1
- conv1 = _conv ('conv1', images, 3, 64, 7, 7, 2, 2, 'SAME')
- pool1 = _mpool('pool1', conv1, 3, 3, 2, 2, 'SAME')
+ conv1 = _conv('conv1', images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2, 'SAME')
# stage 2
- conv2 = _conv ('conv2', pool1, 64, 64, 1, 1, 1, 1, 'VALID')
- conv3 = _conv ('conv3', conv2, 64, 192, 3, 3, 1, 1, 'SAME')
- pool3 = _mpool('pool3', conv3, 3, 3, 2, 2, 'SAME')
+ conv2 = _conv('conv2', pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv('conv3', conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool('pool3', conv3, 3, 3, 2, 2, 'SAME')
# stage 3
- incept3a = _inception('ince3a', pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3a = _inception('ince3a', pool3, 192, 64, 96, 128, 16, 32, 3, 32)
incept3b = _inception('ince3b', incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
- pool4 = _mpool('pool4', incept3b, 3, 3, 2, 2, 'SAME')
+ pool4 = _mpool('pool4', incept3b, 3, 3, 2, 2, 'SAME')
# stage 4
- incept4a = _inception('ince4a', pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4a = _inception('ince4a', pool4, 480, 192, 96, 208, 16, 48, 3, 64)
incept4b = _inception('ince4b', incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
incept4c = _inception('ince4c', incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
incept4d = _inception('ince4d', incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
- incept4e = _inception('ince4e', incept4d, 528, 256, 160, 320, 32, 128, 3, 128)
- pool5 = _mpool('pool5', incept4e, 3, 3, 2, 2, 'SAME')
+ incept4e = _inception('ince4e', incept4d, 528, 256, 160, 320, 32, 128, 3,
+ 128)
+ pool5 = _mpool('pool5', incept4e, 3, 3, 2, 2, 'SAME')
# stage 5
- incept5a = _inception('ince5a', pool5, 832, 256, 160, 320, 32, 128, 3, 128)
- incept5b = _inception('ince5b', incept5a, 832, 384, 192, 384, 48, 128, 3, 128)
- pool6 = _apool('pool6', incept5b, 7, 7, 1, 1, 'VALID')
+ incept5a = _inception('ince5a', pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5b = _inception('ince5b', incept5a, 832, 384, 192, 384, 48, 128, 3,
+ 128)
+ pool6 = _apool('pool6', incept5b, 7, 7, 1, 1, 'VALID')
# output 1
resh1 = tf.reshape(pool6, [-1, 1024])
@@ -185,6 +205,7 @@ def inference(images):
return affn1
+
def tower_loss(scope):
"""Calculate the total loss on a single tower running the model.
Args:
@@ -197,15 +218,19 @@ def tower_loss(scope):
image_shape = [FLAGS.batch_size, 3, image_size, image_size]
else:
image_shape = [FLAGS.batch_size, image_size, image_size, 3]
- images = tf.get_variable('image', image_shape,
- initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32),
- dtype=tf.float32,
- trainable=False)
-
- labels = tf.get_variable('label', [FLAGS.batch_size],
- initializer=tf.constant_initializer(1),
- dtype=tf.int32,
- trainable=False)
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
# Build a Graph that computes the logits predictions from the
# inference model.
@@ -214,7 +239,7 @@ def tower_loss(scope):
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = loss(last_layer, labels)
-
+
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
@@ -233,7 +258,7 @@ def tower_loss(scope):
loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
- tf.scalar_summary(loss_name +' (raw)', l)
+ tf.scalar_summary(loss_name + ' (raw)', l)
tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]):
@@ -242,7 +267,7 @@ def tower_loss(scope):
def average_gradients(tower_grads):
- """Calculate the average gradient for each shared variable across all towers.
+ """Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
@@ -252,130 +277,135 @@ def average_gradients(tower_grads):
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
- average_grads = []
- for grad_and_vars in zip(*tower_grads):
- # Note that each grad_and_vars looks like the following:
- # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
- grads = []
- for g, _ in grad_and_vars:
- # Add 0 dimension to the gradients to represent the tower.
- expanded_g = tf.expand_dims(g, 0)
-
- # Append on a 'tower' dimension which we will average over below.
- grads.append(expanded_g)
-
- # Average over the 'tower' dimension.
- grad = tf.concat(0, grads)
- grad = tf.reduce_mean(grad, 0)
-
- # Keep in mind that the Variables are redundant because they are shared
- # across towers. So .. we will just return the first tower's pointer to
- # the Variable.
- v = grad_and_vars[0][1]
- grad_and_var = (grad, v)
- average_grads.append(grad_and_var)
- return average_grads
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
def time_tensorflow_run(session, target):
num_steps_burn_in = 50
total_duration = 0.0
total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _, loss_value = session.run(target)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
- examples_per_sec = num_examples_per_step / duration
- sec_per_batch = duration
-
- format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
- 'sec/batch batch_size = %d)')
- print (format_str %
- (datetime.now(), i - num_steps_burn_in,
- loss_value, duration, sec_per_batch, num_examples_per_step))
-
- total_duration += duration
- total_duration_squared += duration * duration
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
- print ('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), FLAGS.num_batches, mn, sd))
+
def run_benchmark():
- with tf.Graph().as_default(), tf.device('/cpu:0'):
- # Create a variable to count the number of train() calls. This equals the
- # number of batches processed * FLAGS.num_gpus.
- global_step = tf.get_variable(
- 'global_step', [],
- initializer=tf.constant_initializer(0), trainable=False)
-
- # Calculate the learning rate schedule.
- num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
- FLAGS.batch_size)
- decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
-
- # Decay the learning rate exponentially based on the number of steps.
- lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
- global_step,
- decay_steps,
- LEARNING_RATE_DECAY_FACTOR,
- staircase=True)
-
- # Create an optimizer that performs gradient descent.
- opt = tf.train.MomentumOptimizer(lr, 0.9)
-
- # Calculate the gradients for each model tower.
- tower_grads = []
- for i in xrange(FLAGS.num_gpus):
- with tf.device('/gpu:%d' % i):
- with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
- # Calculate the loss for one tower of the model. This function
- # constructs the entire model but shares the variables across
- # all towers.
- loss = tower_loss(scope)
-
- # Reuse variables for the next tower.
- tf.get_variable_scope().reuse_variables()
-
- # Retain the summaries from the final tower.
- summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
-
- # Calculate the gradients for the batch of data on this tower.
- grads = opt.compute_gradients(loss)
-
- # Keep track of the gradients across all towers.
- tower_grads.append(grads)
-
- # We must calculate the mean of each gradient. Note that this is the
- # synchronization point across all towers.
- grads = average_gradients(tower_grads)
-
- # Apply the gradients to adjust the shared variables.
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Group all updates to into a single train op.
- train_op = tf.group(apply_gradient_op)
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph. allow_soft_placement must be set to
- # True to build towers on GPU, as some of the ops do not have GPU
- # implementations.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
- time_tensorflow_run(sess, [train_op, loss])
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/smallnet_mnist_cifar.py b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
index 679dd1ab32293f73481dfcc03f6491af95519f94..1a625134a6c58586b29190ede9c66253f484d2cf 100644
--- a/benchmark/tensorflow/image/smallnet_mnist_cifar.py
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
@@ -8,10 +8,8 @@ import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 128,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False,
@@ -29,78 +27,97 @@ conv_counter = 1
pool_counter = 1
affine_counter = 1
+
def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005, act=True):
global conv_counter
global parameters
name = 'conv' + str(conv_counter)
conv_counter += 1
with tf.name_scope(name) as scope:
- kernel = tf.Variable(tf.truncated_normal([kH, kW, nIn, nOut],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
if FLAGS.data_format == 'NCHW':
- strides = [1, 1, dH, dW]
+ strides = [1, 1, dH, dW]
else:
- strides = [1, dH, dW, 1]
- conv = tf.nn.conv2d(inpOp, kernel, strides, padding=padType,
- data_format=FLAGS.data_format)
- biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
- trainable=True, name='biases')
- bias = tf.reshape(tf.nn.bias_add(conv, biases,
- data_format=FLAGS.data_format),
- conv.get_shape())
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
conv1 = tf.nn.relu(bias, name=scope) if act else bias
-
+
parameters += [kernel, biases]
return conv1
+
def _affine(inpOp, nIn, nOut, wd=None, act=True):
global affine_counter
global parameters
name = 'affine' + str(affine_counter)
affine_counter += 1
with tf.name_scope(name) as scope:
- kernel = tf.Variable(tf.truncated_normal([nIn, nOut],
- dtype=tf.float32,
- stddev=1e-1), name='weights')
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
if wd is not None:
weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
- biases = tf.Variable(tf.constant(0.0, shape=[nOut], dtype=tf.float32),
- trainable=True, name='biases')
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
- affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else tf.matmul(inpOp, kernel) + biases
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
parameters += [kernel, biases]
return affine1
+
def _mpool(inpOp, kH, kW, dH, dW, padding):
global pool_counter
global parameters
name = 'pool' + str(pool_counter)
pool_counter += 1
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.max_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
def _apool(inpOp, kH, kW, dH, dW, padding):
@@ -109,36 +126,42 @@ def _apool(inpOp, kH, kW, dH, dW, padding):
name = 'pool' + str(pool_counter)
pool_counter += 1
if FLAGS.data_format == 'NCHW':
- ksize = [1, 1, kH, kW]
- strides = [1, 1, dH, dW]
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
else:
- ksize = [1, kH, kW, 1]
- strides = [1, dH, dW, 1]
- return tf.nn.avg_pool(inpOp,
- ksize=ksize,
- strides=strides,
- padding=padding,
- data_format=FLAGS.data_format,
- name=name)
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
def _norm(name, l_input, lsize=4):
- return tf.nn.lrn(l_input, lsize, bias=1.0,
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
alpha=0.001 / 9.0,
- beta=0.75, name=name)
+ beta=0.75,
+ name=name)
+
def loss(logits, labels):
batch_size = tf.size(labels)
labels = tf.expand_dims(labels, 1)
indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
concated = tf.concat(1, [indices, labels])
- onehot_labels = tf.sparse_to_dense(
- concated, tf.pack([batch_size, 10]), 1.0, 0.0)
- cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
- onehot_labels,
- name='xentropy')
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 10]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
+
def get_incoming_shape(incoming):
""" Returns the incoming data shape """
if isinstance(incoming, tf.Tensor):
@@ -148,125 +171,134 @@ def get_incoming_shape(incoming):
else:
raise Exception("Invalid incoming layer.")
+
def inference(images):
- conv1 = _conv (images, 3, 32, 5, 5, 1, 1, 'SAME')
- pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
- conv2 = _conv (pool1, 32, 32, 5, 5, 1, 1, 'SAME')
- pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME')
- conv3 = _conv (pool2, 32, 64, 5, 5, 1, 1, 'SAME')
- pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME')
+ conv1 = _conv(images, 3, 32, 5, 5, 1, 1, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ conv2 = _conv(pool1, 32, 32, 5, 5, 1, 1, 'SAME')
+ pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME')
+ conv3 = _conv(pool2, 32, 64, 5, 5, 1, 1, 'SAME')
+ pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME')
resh1 = tf.reshape(pool3, [-1, 64 * 4 * 4])
affn1 = _affine(resh1, 64 * 4 * 4, 64)
affn2 = _affine(affn1, 64, 10, act=False)
- print ('conv1:', get_incoming_shape(conv1))
- print ('pool1:', get_incoming_shape(pool1))
- print ('conv2:', get_incoming_shape(conv2))
- print ('pool2:', get_incoming_shape(pool2))
- print ('conv3:', get_incoming_shape(conv3))
- print ('pool3:', get_incoming_shape(pool3))
-
+ print('conv1:', get_incoming_shape(conv1))
+ print('pool1:', get_incoming_shape(pool1))
+ print('conv2:', get_incoming_shape(conv2))
+ print('pool2:', get_incoming_shape(pool2))
+ print('conv3:', get_incoming_shape(conv3))
+ print('pool3:', get_incoming_shape(pool3))
+
return affn2
def time_tensorflow_run(session, target, info_string):
- num_steps_burn_in = 10
- total_duration = 0.0
- total_duration_squared = 0.0
- if not isinstance(target, list):
- target = [target]
- target_op = tf.group(*target)
- for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _ = session.run(target_op)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- print ('%s: step %d, duration = %.3f' %
- (datetime.now(), i - num_steps_burn_in, duration))
- total_duration += duration
- total_duration_squared += duration * duration
- mn = total_duration / FLAGS.num_batches
- vr = total_duration_squared / FLAGS.num_batches - mn * mn
- sd = math.sqrt(vr)
- print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
- (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
def run_benchmark():
- global parameters
- with tf.Graph().as_default():
- # Generate some dummy images.
- image_size = 32
- # Note that our padding definition is slightly different the cuda-convnet.
- # In order to force the model to start with the same activations sizes,
- # we add 3 to the image_size and employ VALID padding above.
- if FLAGS.data_format == 'NCHW':
- image_shape = [FLAGS.batch_size, 3, image_size, image_size]
- else:
- image_shape = [FLAGS.batch_size, image_size, image_size, 3]
-
- images = tf.get_variable('image', image_shape,
- initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32),
- dtype=tf.float32,
- trainable=False)
-
- labels = tf.get_variable('label', [FLAGS.batch_size],
- initializer=tf.constant_initializer(1),
- dtype=tf.int32,
- trainable=False)
-
- # Build a Graph that computes the logits predictions from the
- # inference model.
- last_layer = inference(images)
-
- objective = loss(last_layer, labels)
-
- # Compute gradients.
- opt = tf.train.MomentumOptimizer(0.001, 0.9)
- grads = opt.compute_gradients(objective)
- global_step = tf.get_variable('global_step', [],
- initializer=tf.constant_initializer(0.0, dtype=tf.float32),
- trainable=False, dtype=tf.float32)
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Track the moving averages of all trainable variables.
- variable_averages = tf.train.ExponentialMovingAverage(
- 0.9, global_step)
- variables_averages_op = variable_averages.apply(tf.trainable_variables())
-
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
-
- run_forward = True
- run_forward_backward = True
- if FLAGS.forward_only and FLAGS.forward_backward_only:
- raise ValueError("Cannot specify --forward_only and "
- "--forward_backward_only at the same time.")
- if FLAGS.forward_only:
- run_forward_backward = False
- elif FLAGS.forward_backward_only:
- run_forward = False
-
- if run_forward:
- # Run the forward benchmark.
- time_tensorflow_run(sess, last_layer, "Forward")
-
- if run_forward_backward:
- with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
- train_op = tf.no_op(name='train')
- time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 32
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/README.md b/benchmark/tensorflow/rnn/README.md
index b5314d544608480a732f7d0d94ec69c53b4c8049..da8e7b8b07969051cbec3ac6a713eaf7fc738a55 100644
--- a/benchmark/tensorflow/rnn/README.md
+++ b/benchmark/tensorflow/rnn/README.md
@@ -1,5 +1,5 @@
You also should install tflearn:
```bash
-pip install tflearn
+pip install -r requirements.txt
```
diff --git a/benchmark/tensorflow/rnn/reader.py b/benchmark/tensorflow/rnn/reader.py
index 0d8308046ed6543b218f604480e9630e6b4b1091..f538329a15ea9ad9293c97c94340989e2c421eb2 100755
--- a/benchmark/tensorflow/rnn/reader.py
+++ b/benchmark/tensorflow/rnn/reader.py
@@ -8,14 +8,13 @@ import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb
-
FLAGS = tf.app.flags.FLAGS
+
class DataSet(object):
def __init__(self, data, labels):
assert data.shape[0] == labels.shape[0], (
- 'data.shape: %s labels.shape: %s' % (data.shape,
- labels.shape))
+ 'data.shape: %s labels.shape: %s' % (data.shape, labels.shape))
self._num_examples = data.shape[0]
self._data = data
@@ -64,8 +63,11 @@ class DataSet(object):
def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):
# IMDB Dataset loading
- train, test, _ = imdb.load_data(path=file_path, n_words=vocab_size,
- valid_portion=val_fraction, sort_by_len=False)
+ train, test, _ = imdb.load_data(
+ path=file_path,
+ n_words=vocab_size,
+ valid_portion=val_fraction,
+ sort_by_len=False)
trainX, trainY = train
testX, testY = test
diff --git a/benchmark/tensorflow/rnn/requirements.txt b/benchmark/tensorflow/rnn/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4242e7d24fbbeb18e8fb9a760d76fa6d5363b03f
--- /dev/null
+++ b/benchmark/tensorflow/rnn/requirements.txt
@@ -0,0 +1 @@
+tflearn
diff --git a/benchmark/tensorflow/rnn/rnn.py b/benchmark/tensorflow/rnn/rnn.py
index 5377187f39141be6b9884d8a75c1c1772710c525..f288083e13656563b511980553245142efec4e65 100755
--- a/benchmark/tensorflow/rnn/rnn.py
+++ b/benchmark/tensorflow/rnn/rnn.py
@@ -11,27 +11,22 @@ from tensorflow.python.ops import rnn
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 128,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('num_layers', 1,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('max_len', 100,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('forward_only', False,
"""Only run the forward pass.""")
tf.app.flags.DEFINE_boolean('forward_backward_only', False,
"""Only run the forward-forward pass.""")
-tf.app.flags.DEFINE_integer('hidden_size', 128,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('emb_size', 128,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 128, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
-VOCAB_SIZE=30000
-NUM_CLASS=2
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
+
def get_feed_dict(x_data, y_data=None):
feed_dict = {}
@@ -44,6 +39,7 @@ def get_feed_dict(x_data, y_data=None):
return feed_dict
+
def get_incoming_shape(incoming):
""" Returns the incoming data shape """
if isinstance(incoming, tf.Tensor):
@@ -56,53 +52,75 @@ def get_incoming_shape(incoming):
# Note input * W is done in LSTMCell,
# which is different from PaddlePaddle
-def single_lstm(name, incoming, n_units, use_peepholes=True,
- return_seq=False, return_state=False):
- with tf.name_scope(name) as scope:
- cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
- output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
- out = output if return_seq else output[-1]
- return (out, _cell_state) if return_state else out
-
-def lstm(name, incoming, n_units, use_peepholes=True,
- return_seq=False, return_state=False, num_layers=1):
- with tf.name_scope(name) as scope:
- lstm_cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
- cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
- initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
- if not isinstance(incoming, list):
- # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
- incoming = [tf.squeeze(input_, [1])
- for input_ in tf.split(1, FLAGS.max_len, incoming)]
- outputs, state = tf.nn.rnn(cell, incoming, initial_state=initial_state,
- dtype=tf.float32)
- out = outputs if return_seq else outputs[-1]
- return (out, _cell_state) if return_state else out
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
def embedding(name, incoming, vocab_size, emb_size):
- with tf.name_scope(name) as scope:
- #with tf.device("/cpu:0"):
- embedding = tf.get_variable(
- name+'_emb', [vocab_size, emb_size], dtype=tf.float32)
- out = tf.nn.embedding_lookup(embedding, incoming)
- return out
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
+
def fc(name, inpOp, nIn, nOut, act=True):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w', [nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
- biases = tf.get_variable(name + '_b', [nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
- dtype=tf.float32,trainable=True)
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases
return net
+
def inference(seq):
net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
print "emb:", get_incoming_shape(net)
@@ -111,91 +129,95 @@ def inference(seq):
net = fc('fc1', net, FLAGS.hidden_size, 2)
return net
+
def loss(logits, labels):
# one label index for one sample
labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
- logits, labels, name='cross_entropy_per_example')
+ logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def time_tensorflow_run(session, target, x_input, y_input, info_string):
- num_steps_burn_in = 50
- total_duration = 0.0
- total_duration_squared = 0.0
- if not isinstance(target, list):
- target = [target]
- target_op = tf.group(*target)
- train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
- for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- data, label = train_dataset.next_batch(FLAGS.batch_size)
- _ = session.run(target_op, feed_dict={x_input:data, y_input:label})
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- print ('%s: step %d, duration = %.3f' %
- (datetime.now(), i - num_steps_burn_in, duration))
- total_duration += duration
- total_duration_squared += duration * duration
- mn = total_duration / FLAGS.num_batches
- vr = total_duration_squared / FLAGS.num_batches - mn * mn
- sd = math.sqrt(vr)
- print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
- (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ data, label = train_dataset.next_batch(FLAGS.batch_size)
+ _ = session.run(target_op, feed_dict={x_input: data, y_input: label})
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
def run_benchmark():
- with tf.Graph().as_default():
- global_step=0
- with tf.device('/cpu:0'):
- global_step = tf.Variable(0, trainable=False)
- with tf.device('/gpu:0'):
- #x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input")
- #y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input")
- x_input = tf.placeholder(tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input")
- y_input = tf.placeholder(tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input")
- # Generate some dummy sequnce.
-
-
- last_layer = inference(x_input)
-
- objective = loss(last_layer, y_input)
- opt = tf.train.AdamOptimizer(0.001)
- grads = opt.compute_gradients(objective)
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- init = tf.initialize_all_variables()
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
-
- run_forward = True
- run_forward_backward = True
- if FLAGS.forward_only and FLAGS.forward_backward_only:
- raise ValueError("Cannot specify --forward_only and "
- "--forward_backward_only at the same time.")
- if FLAGS.forward_only:
- run_forward_backward = False
- elif FLAGS.forward_backward_only:
- run_forward = False
-
- if run_forward:
- time_tensorflow_run(sess, last_layer, x_input, y_input, "Forward")
-
- if run_forward_backward:
- with tf.control_dependencies([apply_gradient_op]):
- train_op = tf.no_op(name='train')
- time_tensorflow_run(sess, [train_op, objective], x_input, y_input, "Forward-backward")
+ with tf.Graph().as_default():
+ global_step = 0
+ with tf.device('/cpu:0'):
+ global_step = tf.Variable(0, trainable=False)
+ with tf.device('/gpu:0'):
+ #x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input")
+ #y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input")
+ x_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input")
+ y_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input")
+ # Generate some dummy sequnce.
+
+ last_layer = inference(x_input)
+
+ objective = loss(last_layer, y_input)
+ opt = tf.train.AdamOptimizer(0.001)
+ grads = opt.compute_gradients(objective)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ init = tf.initialize_all_variables()
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, x_input, y_input,
+ "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies([apply_gradient_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], x_input,
+ y_input, "Forward-backward")
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
-
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/rnn_multi_gpu.py b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
index 97ba5d4c29672afe2756850430351b2abdeb20ca..eabee4fa8fe6325212ace1c11be4862cd2720b08 100755
--- a/benchmark/tensorflow/rnn/rnn_multi_gpu.py
+++ b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
@@ -12,35 +12,28 @@ from tensorflow.python.ops import rnn
FLAGS = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_integer('batch_size', 64,
- """Batch size.""")
-tf.app.flags.DEFINE_integer('num_batches', 100,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('num_layers', 1,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('max_len', 100,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('hidden_size', 128,
- """Number of batches to run.""")
-tf.app.flags.DEFINE_integer('emb_size', 64,
- """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 64, """Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
-tf.app.flags.DEFINE_integer('num_gpus', 4,
- """How many GPUs to use.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
-VOCAB_SIZE=30000
-NUM_CLASS=2
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
-
-NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN=50000
-NUM_EPOCHS_PER_DECAY=50
-INITIAL_LEARNING_RATE = 0.1
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
LEARNING_RATE_DECAY_FACTOR = 0.1
TOWER_NAME = 'tower'
train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
def get_incoming_shape(incoming):
""" Returns the incoming data shape """
if isinstance(incoming, tf.Tensor):
@@ -53,49 +46,68 @@ def get_incoming_shape(incoming):
# Note input * W is done in LSTMCell,
# which is different from PaddlePaddle
-def single_lstm(name, incoming, n_units, use_peepholes=True,
- return_seq=False, return_state=False):
- with tf.name_scope(name) as scope:
- cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
- output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
- out = output if return_seq else output[-1]
- return (out, _cell_state) if return_state else out
-
-
-def lstm(name, incoming, n_units, use_peepholes=True,
- return_seq=False, return_state=False, num_layers=1):
- with tf.name_scope(name) as scope:
- lstm_cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
- cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
- initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
- if not isinstance(incoming, list):
- # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
- incoming = [tf.squeeze(input_, [1])
- for input_ in tf.split(1, FLAGS.max_len, incoming)]
- outputs, state = tf.nn.rnn(cell, incoming, initial_state=initial_state,
- dtype=tf.float32)
- out = outputs if return_seq else outputs[-1]
- return (out, _cell_state) if return_state else out
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
def embedding(name, incoming, vocab_size, emb_size):
- with tf.name_scope(name) as scope:
- #with tf.device("/cpu:0"):
- embedding = tf.get_variable(
- name+'_emb', [vocab_size, emb_size], dtype=tf.float32)
- out = tf.nn.embedding_lookup(embedding, incoming)
- return out
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
def fc(name, inpOp, nIn, nOut, act=True):
with tf.name_scope(name) as scope:
- kernel = tf.get_variable(name + '_w', [nIn, nOut],
- initializer=tf.truncated_normal_initializer(stddev=0.01, dtype=tf.float32),
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
dtype=tf.float32)
- biases = tf.get_variable(name + '_b', [nOut],
- initializer=tf.constant_initializer(value=0.0, dtype=tf.float32),
- dtype=tf.float32,trainable=True)
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
tf.matmul(inpOp, kernel) + biases
@@ -119,7 +131,7 @@ def loss(logits, labels):
# logits, labels, name='cross_entropy_per_example')
labels = tf.cast(labels, tf.float32)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
- logits, labels, name='cross_entropy_per_example')
+ logits, labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
@@ -142,7 +154,7 @@ def tower_loss(scope):
# assemble the total_loss using a custom function below.
#_ = loss(last_layer, label)
_ = loss(last_layer, label)
-
+
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
@@ -161,7 +173,7 @@ def tower_loss(scope):
loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
- tf.scalar_summary(loss_name +' (raw)', l)
+ tf.scalar_summary(loss_name + ' (raw)', l)
#tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]):
@@ -170,7 +182,7 @@ def tower_loss(scope):
def average_gradients(tower_grads):
- """Calculate the average gradient for each shared variable across all towers.
+ """Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
@@ -180,127 +192,131 @@ def average_gradients(tower_grads):
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
- average_grads = []
- for grad_and_vars in zip(*tower_grads):
- # Note that each grad_and_vars looks like the following:
- # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
- grads = []
- for g, _ in grad_and_vars:
- # Add 0 dimension to the gradients to represent the tower.
- expanded_g = tf.expand_dims(g, 0)
-
- # Append on a 'tower' dimension which we will average over below.
- grads.append(expanded_g)
-
- # Average over the 'tower' dimension.
- grad = tf.concat(0, grads)
- grad = tf.reduce_mean(grad, 0)
-
- # Keep in mind that the Variables are redundant because they are shared
- # across towers. So .. we will just return the first tower's pointer to
- # the Variable.
- v = grad_and_vars[0][1]
- grad_and_var = (grad, v)
- average_grads.append(grad_and_var)
- return average_grads
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
def time_tensorflow_run(session, target):
num_steps_burn_in = 80
total_duration = 0.0
total_duration_squared = 0.0
for i in xrange(FLAGS.num_batches + num_steps_burn_in):
- start_time = time.time()
- _ = session.run(target, feed_dict={x_input:data, y_input:label})
- _, loss_value = session.run(target)
- duration = time.time() - start_time
- if i > num_steps_burn_in:
- if not i % 10:
- num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
- examples_per_sec = num_examples_per_step / duration
- # sec_per_batch = duration / FLAGS.num_gpus
- sec_per_batch = duration
-
- format_str = ('%s: step %d, loss= %.2f (%.1f examples/sec; %.3f '
- 'sec/batch batch_size= %d)')
- print (format_str %
- (datetime.now(), i - num_steps_burn_in,
- loss_value, duration, sec_per_batch, num_examples_per_step))
-
- total_duration += duration
- total_duration_squared += duration * duration
+ start_time = time.time()
+ _ = session.run(target, feed_dict={x_input: data, y_input: label})
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ # sec_per_batch = duration / FLAGS.num_gpus
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss= %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size= %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
mn = total_duration / FLAGS.num_batches
vr = total_duration_squared / FLAGS.num_batches - mn * mn
sd = math.sqrt(vr)
- print ('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
(datetime.now(), FLAGS.num_batches, mn, sd))
+
def run_benchmark():
- with tf.Graph().as_default(), tf.device('/cpu:0'):
- # Create a variable to count the number of train() calls. This equals the
- # number of batches processed * FLAGS.num_gpus.
- global_step = tf.get_variable(
- 'global_step', [],
- initializer=tf.constant_initializer(0), trainable=False)
-
- # Calculate the learning rate schedule.
- num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
- FLAGS.batch_size)
- decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
-
- # Create an optimizer that performs gradient descent.
- opt = tf.train.AdamOptimizer(0.001)
-
- #train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
-
- # Calculate the gradients for each model tower.
- tower_grads = []
- for i in xrange(FLAGS.num_gpus):
- with tf.device('/gpu:%d' % i):
- with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
- # Calculate the loss for one tower of the model. This function
- # constructs the entire model but shares the variables across
- # all towers.
- loss = tower_loss(scope)
-
- # Reuse variables for the next tower.
- tf.get_variable_scope().reuse_variables()
-
- # Retain the summaries from the final tower.
- # summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
-
- # Calculate the gradients for the batch of data on this tower.
- grads = opt.compute_gradients(loss)
-
- # Keep track of the gradients across all towers.
- tower_grads.append(grads)
-
- # We must calculate the mean of each gradient. Note that this is the
- # synchronization point across all towers.
- grads = average_gradients(tower_grads)
-
- # Apply the gradients to adjust the shared variables.
- apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
-
- # Group all updates to into a single train op.
- train_op = tf.group(apply_gradient_op)
-
- # Build an initialization operation.
- init = tf.initialize_all_variables()
-
- # Start running operations on the Graph. allow_soft_placement must be set to
- # True to build towers on GPU, as some of the ops do not have GPU
- # implementations.
- sess = tf.Session(config=tf.ConfigProto(
- allow_soft_placement=True,
- log_device_placement=FLAGS.log_device_placement))
- sess.run(init)
- time_tensorflow_run(sess, [train_op, loss])
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.AdamOptimizer(0.001)
+
+ #train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ # summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
def main(_):
- run_benchmark()
+ run_benchmark()
if __name__ == '__main__':
- tf.app.run()
+ tf.app.run()
diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake
index f6103c6e667e8a8f6b8998d8eb467235fb49cb19..d380c996dfa95f0caa2b9cd9daa0ac9141e51fe0 100644
--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@@ -3,36 +3,55 @@
INCLUDE(CheckCXXSourceRuns)
-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(CMAKE_REQUIRED_FLAGS "-mavx2")
-ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS
- SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+ set(MMX_FLAG "-mmmx")
+ set(SSE2_FLAG "-msse2")
+ set(SSE3_FLAG "-msse3")
+ SET(AVX_FLAG "-mavx")
+ SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+ set(MMX_FLAG "/arch:MMX")
+ set(SSE2_FLAG "/arch:SSE2")
+ set(SSE3_FLAG "/arch:SSE3")
+ SET(AVX_FLAG "/arch:AVX")
+ SET(AVX2_FLAG "/arch:AVX2")
ENDIF()
+# Check MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS("
-#include
+#include
int main()
{
- __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
- __m256i result = _mm256_abs_epi32 (a);
+ _mm_setzero_si64();
return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)
-# Check AVX
-SET(CMAKE_REQUIRED_FLAGS)
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(CMAKE_REQUIRED_FLAGS "-mavx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
- SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
-endif()
+# Check SSE2
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ _mm_setzero_si128();
+ return 0;
+}" SSE2_FOUND)
+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ __m128d a = _mm_set1_pd(6.28);
+ __m128d b = _mm_set1_pd(3.14);
+ __m128d result = _mm_addsub_pd(a, b);
+ result = _mm_movedup_pd(result);
+ return 0;
+}" SSE3_FOUND)
+
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
CHECK_CXX_SOURCE_RUNS("
#include
int main()
@@ -41,25 +60,17 @@ int main()
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
-}" FIND_AVX_10)
-
-IF(${FIND_AVX_20})
- IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
- ELSEIF(MSVC)
- SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
- ENDIF()
-ENDIF()
+}" AVX_FOUND)
-IF(${FIND_AVX_10})
- IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
- ELSEIF(MSVC)
- SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
- ENDIF()
-ENDIF()
+# Check AVX 2
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include
+int main()
+{
+ __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+ __m256i result = _mm256_abs_epi32 (a);
+ return 0;
+}" AVX2_FOUND)
-IF(${FIND_AVX_10})
- SET(AVX_FOUND TRUE)
- MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 57c32a54cd727e3acb181eeb19f811fab4dc82fd..685334c6585060c0344e552c6f3fda2c7324de03 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -1,4 +1,4 @@
-# Find the CBlas libraries
+# Find the CBlas and lapack libraries
#
# It will search MKL, atlas, OpenBlas, reference-cblas in order.
#
@@ -19,6 +19,8 @@ set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include)
+find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
+ ${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64)
@@ -37,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
+ message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return() # return file.
endif()
@@ -55,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS
)
find_path(ATLAS_INC_DIR NAMES cblas.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
+find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h
+ PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
-find_library(ATLAS_LIB NAMES atlas libatlas.so.3
+find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS)
- set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
+ set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
+ add_definitions(-DPADDLE_USE_ATLAS)
+ message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return()
endif()
@@ -83,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS
find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
+find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
+ PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas
PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
@@ -90,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB})
+ message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return()
endif()
diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..9be7643819efdde3f42e4d39b2849ecc17e0d9fb
--- /dev/null
+++ b/cmake/coveralls.cmake
@@ -0,0 +1,103 @@
+# CMake script for code coverage.
+# If _COVERALLS_UPLOAD is ON, it will upload json files to overalls.io automatically.
+
+# Param _COVERAGE_SRCS A list of coverage source files.
+# Param _COVERALLS_UPLOAD Upload the result to coveralls.
+# Param _CMAKE_SCRIPT_PATH CMake script path.
+function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
+ # clean previous gcov data.
+ file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
+
+ # find curl for upload JSON soon.
+ if (_COVERALLS_UPLOAD)
+ find_program(CURL_EXECUTABLE curl)
+ if (NOT CURL_EXECUTABLE)
+ message(FATAL_ERROR "Coveralls: curl not found!")
+ endif()
+ endif()
+
+ # When passing a CMake list to an external process, the list
+ # will be converted from the format "1;2;3" to "1 2 3".
+ set(COVERAGE_SRCS "")
+ foreach (SINGLE_SRC ${_COVERAGE_SRCS})
+ set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}")
+ endforeach()
+
+ # query number of logical cores
+ cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES)
+ # coveralls json file.
+ set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
+ add_custom_target(coveralls_generate
+ # Run regress tests.
+ COMMAND ${CMAKE_CTEST_COMMAND}
+ -j ${core_size}
+ --output-on-failure
+ # Generate Gcov and translate it into coveralls JSON.
+ COMMAND ${CMAKE_COMMAND}
+ -DCOVERAGE_SRCS="${COVERAGE_SRCS}"
+ -DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
+ -DCOV_PATH="${PROJECT_BINARY_DIR}"
+ -DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
+ -P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake"
+ WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+ COMMENT "Coveralls: generating coveralls output..."
+ )
+
+ if (_COVERALLS_UPLOAD)
+ message("COVERALLS UPLOAD: ON")
+ # Upload the JSON to coveralls.
+ add_custom_target(coveralls_upload
+ COMMAND ${CURL_EXECUTABLE}
+ -S -F json_file=@${COVERALLS_FILE}
+ https://coveralls.io/api/v1/jobs
+ DEPENDS coveralls_generate
+ WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+ COMMENT "Coveralls: uploading coveralls output...")
+
+ add_custom_target(coveralls DEPENDS coveralls_upload)
+ else()
+ message("COVERALLS UPLOAD: OFF")
+ add_custom_target(coveralls DEPENDS coveralls_generate)
+ endif()
+endfunction()
+
+if(ON_COVERALLS)
+ set(CMAKE_BUILD_TYPE "Debug")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
+
+ set(EXCLUDE_DIRS
+ "demo/"
+ "build/"
+ "tests/"
+ ".test_env/"
+ )
+
+ if(WITH_GPU)
+ file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu")
+ else()
+ file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c")
+ endif()
+
+ # exclude trivial files in PADDLE_SOURCES
+ foreach(EXCLUDE_DIR ${EXCLUDE_DIRS})
+ foreach(TMP_PATH ${PADDLE_SOURCES})
+ string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
+ if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
+ list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH})
+ endif()
+ endforeach(TMP_PATH)
+ endforeach()
+
+ # convert to absolute path
+ set(PADDLE_SRCS "")
+ foreach(PADDLE_SRC ${PADDLE_SOURCES})
+ set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}")
+ endforeach()
+
+ code_coverage(
+ "${PADDLE_SRCS}"
+ ${COVERALLS_UPLOAD}
+ "${PROJECT_SOURCE_DIR}/cmake"
+ )
+endif()
diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ae3530c3a0eeb79ddbcbf4f2e99be75aa7968a2f
--- /dev/null
+++ b/cmake/coverallsGcovJsons.cmake
@@ -0,0 +1,403 @@
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Copyright (C) 2014 Joakim Söderberg
+#
+# This is intended to be run by a custom target in a CMake project like this.
+# 0. Compile program with coverage support.
+# 1. Clear coverage data. (Recursively delete *.gcda in build dir)
+# 2. Run the unit tests.
+# 3. Run this script specifying which source files the coverage should be performed on.
+#
+# This script will then use gcov to generate .gcov files in the directory specified
+# via the COV_PATH var. This should probably be the same as your cmake build dir.
+#
+# It then parses the .gcov files to convert them into the Coveralls JSON format:
+# https://coveralls.io/docs/api
+#
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+
+# Since it's not possible to pass a CMake list properly in the
+# "1;2;3" format to an external process, we have replaced the
+# ";" with "*", so reverse that here so we get it back into the
+# CMake list format.
+string(REGEX REPLACE "\\*" ";" COVERAGE_SRCS ${COVERAGE_SRCS})
+
+find_program(GCOV_EXECUTABLE gcov)
+if (NOT GCOV_EXECUTABLE)
+ message(FATAL_ERROR "gcov not found! Aborting...")
+endif()
+
+find_package(Git)
+
+# TODO: Add these git things to the coveralls json.
+if (GIT_FOUND)
+ # Branch.
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref HEAD
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+ OUTPUT_VARIABLE GIT_BRANCH
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+
+ macro (git_log_format FORMAT_CHARS VAR_NAME)
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%${FORMAT_CHARS}
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+ OUTPUT_VARIABLE ${VAR_NAME}
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ )
+ endmacro()
+
+ git_log_format(an GIT_AUTHOR_EMAIL)
+ git_log_format(ae GIT_AUTHOR_EMAIL)
+ git_log_format(cn GIT_COMMITTER_NAME)
+ git_log_format(ce GIT_COMMITTER_EMAIL)
+ git_log_format(B GIT_COMMIT_MESSAGE)
+
+ message("Git exe: ${GIT_EXECUTABLE}")
+ message("Git branch: ${GIT_BRANCH}")
+ message("Git author: ${GIT_AUTHOR_NAME}")
+ message("Git e-mail: ${GIT_AUTHOR_EMAIL}")
+ message("Git commiter name: ${GIT_COMMITTER_NAME}")
+ message("Git commiter e-mail: ${GIT_COMMITTER_EMAIL}")
+ message("Git commit message: ${GIT_COMMIT_MESSAGE}")
+
+endif()
+
+############################# Macros #########################################
+
+#
+# This macro converts from the full path format gcov outputs:
+#
+# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+# to the original source file path the .gcov is for:
+#
+# /path/to/project/root/subdir/the_file.c
+#
+macro(get_source_path_from_gcov_filename _SRC_FILENAME _GCOV_FILENAME)
+
+ # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+ # ->
+ # #path#to#project#root#subdir#the_file.c.gcov
+ get_filename_component(_GCOV_FILENAME_WEXT ${_GCOV_FILENAME} NAME)
+
+ # #path#to#project#root#subdir#the_file.c.gcov -> /path/to/project/root/subdir/the_file.c
+ string(REGEX REPLACE "\\.gcov$" "" SRC_FILENAME_TMP ${_GCOV_FILENAME_WEXT})
+ string(REGEX REPLACE "\#" "/" SRC_FILENAME_TMP ${SRC_FILENAME_TMP})
+ set(${_SRC_FILENAME} "${SRC_FILENAME_TMP}")
+endmacro()
+
+##############################################################################
+
+# Get the coverage data.
+file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
+message("GCDA files:")
+
+# Get a list of all the object directories needed by gcov
+# (The directories the .gcda files and .o files are found in)
+# and run gcov on those.
+foreach(GCDA ${GCDA_FILES})
+ message("Process: ${GCDA}")
+ message("------------------------------------------------------------------------------")
+ get_filename_component(GCDA_DIR ${GCDA} PATH)
+
+ #
+ # The -p below refers to "Preserve path components",
+ # This means that the generated gcov filename of a source file will
+ # keep the original files entire filepath, but / is replaced with #.
+ # Example:
+ #
+ # /path/to/project/root/build/CMakeFiles/the_file.dir/subdir/the_file.c.gcda
+ # ------------------------------------------------------------------------------
+ # File '/path/to/project/root/subdir/the_file.c'
+ # Lines executed:68.34% of 199
+ # /path/to/project/root/subdir/the_file.c:creating '#path#to#project#root#subdir#the_file.c.gcov'
+ #
+ # If -p is not specified then the file is named only "the_file.c.gcov"
+ #
+ execute_process(
+ COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
+ WORKING_DIRECTORY ${GCDA_DIR}
+ )
+endforeach()
+
+# TODO: Make these be absolute path
+file(GLOB_RECURSE ALL_GCOV_FILES "${COV_PATH}" "*.gcov")
+
+# Get only the filenames to use for filtering.
+#set(COVERAGE_SRCS_NAMES "")
+#foreach (COVSRC ${COVERAGE_SRCS})
+# get_filename_component(COVSRC_NAME ${COVSRC} NAME)
+# message("${COVSRC} -> ${COVSRC_NAME}")
+# list(APPEND COVERAGE_SRCS_NAMES "${COVSRC_NAME}")
+#endforeach()
+
+#
+# Filter out all but the gcov files we want.
+#
+# We do this by comparing the list of COVERAGE_SRCS filepaths that the
+# user wants the coverage data for with the paths of the generated .gcov files,
+# so that we only keep the relevant gcov files.
+#
+# Example:
+# COVERAGE_SRCS =
+# /path/to/project/root/subdir/the_file.c
+#
+# ALL_GCOV_FILES =
+# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+# /path/to/project/root/build/#path#to#project#root#subdir#other_file.c.gcov
+#
+# Result should be:
+# GCOV_FILES =
+# /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+#
+set(GCOV_FILES "")
+#message("Look in coverage sources: ${COVERAGE_SRCS}")
+message("\nFilter out unwanted GCOV files:")
+message("===============================")
+
+set(COVERAGE_SRCS_REMAINING ${COVERAGE_SRCS})
+
+foreach (GCOV_FILE ${ALL_GCOV_FILES})
+
+ #
+ # /path/to/project/root/build/#path#to#project#root#subdir#the_file.c.gcov
+ # ->
+ # /path/to/project/root/subdir/the_file.c
+ get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+
+ # Is this in the list of source files?
+ # TODO: We want to match against relative path filenames from the source file root...
+ list(FIND COVERAGE_SRCS ${GCOV_SRC_PATH} WAS_FOUND)
+
+ if (NOT WAS_FOUND EQUAL -1)
+ message("YES: ${GCOV_FILE}")
+ list(APPEND GCOV_FILES ${GCOV_FILE})
+
+ # We remove it from the list, so we don't bother searching for it again.
+ # Also files left in COVERAGE_SRCS_REMAINING after this loop ends should
+ # have coverage data generated from them (no lines are covered).
+ list(REMOVE_ITEM COVERAGE_SRCS_REMAINING ${GCOV_SRC_PATH})
+ else()
+ message("NO: ${GCOV_FILE}")
+ endif()
+endforeach()
+
+# TODO: Enable setting these
+set(JSON_SERVICE_NAME "travis-ci")
+set(JSON_SERVICE_JOB_ID $ENV{TRAVIS_JOB_ID})
+
+set(JSON_TEMPLATE
+"{
+ \"service_name\": \"\@JSON_SERVICE_NAME\@\",
+ \"service_job_id\": \"\@JSON_SERVICE_JOB_ID\@\",
+ \"source_files\": \@JSON_GCOV_FILES\@
+}"
+)
+
+set(SRC_FILE_TEMPLATE
+"{
+ \"name\": \"\@GCOV_SRC_REL_PATH\@\",
+ \"source_digest\": \"\@GCOV_CONTENTS_MD5\@\",
+ \"coverage\": \@GCOV_FILE_COVERAGE\@
+ }"
+)
+
+message("\nGenerate JSON for files:")
+message("=========================")
+
+set(JSON_GCOV_FILES "[")
+
+# Read the GCOV files line by line and get the coverage data.
+foreach (GCOV_FILE ${GCOV_FILES})
+
+ get_source_path_from_gcov_filename(GCOV_SRC_PATH ${GCOV_FILE})
+ file(RELATIVE_PATH GCOV_SRC_REL_PATH "${PROJECT_ROOT}" "${GCOV_SRC_PATH}")
+
+ # The new coveralls API doesn't need the entire source (Yay!)
+ # However, still keeping that part for now. Will cleanup in the future.
+ file(MD5 "${GCOV_SRC_PATH}" GCOV_CONTENTS_MD5)
+ message("MD5: ${GCOV_SRC_PATH} = ${GCOV_CONTENTS_MD5}")
+
+ # Loads the gcov file as a list of lines.
+ # (We first open the file and replace all occurences of [] with _
+ # because CMake will fail to parse a line containing unmatched brackets...
+ # also the \ to escaped \n in macros screws up things.)
+ # https://public.kitware.com/Bug/view.php?id=15369
+ file(READ ${GCOV_FILE} GCOV_CONTENTS)
+ string(REPLACE "[" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+ string(REPLACE "]" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+ string(REPLACE "\\" "_" GCOV_CONTENTS "${GCOV_CONTENTS}")
+ file(WRITE ${GCOV_FILE}_tmp "${GCOV_CONTENTS}")
+
+ file(STRINGS ${GCOV_FILE}_tmp GCOV_LINES)
+ list(LENGTH GCOV_LINES LINE_COUNT)
+
+ # Instead of trying to parse the source from the
+ # gcov file, simply read the file contents from the source file.
+ # (Parsing it from the gcov is hard because C-code uses ; in many places
+ # which also happens to be the same as the CMake list delimeter).
+ file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)
+
+ string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ string(REGEX REPLACE "\"" "\\\\\"" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ string(REPLACE "\t" "\\\\t" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ string(REPLACE "\r" "\\\\r" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ string(REPLACE "\n" "\\\\n" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ # According to http://json.org/ these should be escaped as well.
+ # Don't know how to do that in CMake however...
+ #string(REPLACE "\b" "\\\\b" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ #string(REPLACE "\f" "\\\\f" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+ #string(REGEX REPLACE "\u([a-fA-F0-9]{4})" "\\\\u\\1" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
+
+ # We want a json array of coverage data as a single string
+ # start building them from the contents of the .gcov
+ set(GCOV_FILE_COVERAGE "[")
+
+ set(GCOV_LINE_COUNT 1) # Line number for the .gcov.
+ set(DO_SKIP 0)
+ foreach (GCOV_LINE ${GCOV_LINES})
+ #message("${GCOV_LINE}")
+ # Example of what we're parsing:
+ # Hitcount |Line | Source
+ # " 8: 26: if (!allowed || (strlen(allowed) == 0))"
+ string(REGEX REPLACE
+ "^([^:]*):([^:]*):(.*)$"
+ "\\1;\\2;\\3"
+ RES
+ "${GCOV_LINE}")
+
+ # Check if we should exclude lines using the Lcov syntax.
+ string(REGEX MATCH "LCOV_EXCL_START" START_SKIP "${GCOV_LINE}")
+ string(REGEX MATCH "LCOV_EXCL_END" END_SKIP "${GCOV_LINE}")
+ string(REGEX MATCH "LCOV_EXCL_LINE" LINE_SKIP "${GCOV_LINE}")
+
+ set(RESET_SKIP 0)
+ if (LINE_SKIP AND NOT DO_SKIP)
+ set(DO_SKIP 1)
+ set(RESET_SKIP 1)
+ endif()
+
+ if (START_SKIP)
+ set(DO_SKIP 1)
+ message("${GCOV_LINE_COUNT}: Start skip")
+ endif()
+
+ if (END_SKIP)
+ set(DO_SKIP 0)
+ endif()
+
+ list(LENGTH RES RES_COUNT)
+
+ if (RES_COUNT GREATER 2)
+ list(GET RES 0 HITCOUNT)
+ list(GET RES 1 LINE)
+ list(GET RES 2 SOURCE)
+
+ string(STRIP ${HITCOUNT} HITCOUNT)
+ string(STRIP ${LINE} LINE)
+
+ # Lines with 0 line numbers are metadata and can be ignored.
+ if (NOT ${LINE} EQUAL 0)
+
+ if (DO_SKIP)
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+ else()
+ # Translate the hitcount into valid JSON values.
+ if (${HITCOUNT} STREQUAL "#####")
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+ elseif (${HITCOUNT} STREQUAL "-")
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}null, ")
+ else()
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}${HITCOUNT}, ")
+ endif()
+ endif()
+ endif()
+ else()
+ message(WARNING "Failed to properly parse line (RES_COUNT = ${RES_COUNT}) ${GCOV_FILE}:${GCOV_LINE_COUNT}\n-->${GCOV_LINE}")
+ endif()
+
+ if (RESET_SKIP)
+ set(DO_SKIP 0)
+ endif()
+ math(EXPR GCOV_LINE_COUNT "${GCOV_LINE_COUNT}+1")
+ endforeach()
+
+ message("${GCOV_LINE_COUNT} of ${LINE_COUNT} lines read!")
+
+ # Advanced way of removing the trailing comma in the JSON array.
+ # "[1, 2, 3, " -> "[1, 2, 3"
+ string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+
+ # Append the trailing ] to complete the JSON array.
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+
+ # Generate the final JSON for this file.
+ message("Generate JSON for file: ${GCOV_SRC_REL_PATH}...")
+ string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+
+ set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+
+# Loop through all files we couldn't find any coverage for
+# as well, and generate JSON for those as well with 0% coverage.
+foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
+
+ # Loads the source file as a list of lines.
+ file(STRINGS ${NOT_COVERED_SRC} SRC_LINES)
+
+ set(GCOV_FILE_COVERAGE "[")
+ set(GCOV_FILE_SOURCE "")
+
+ foreach (SOURCE ${SRC_LINES})
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}0, ")
+
+ string(REPLACE "\\" "\\\\" SOURCE "${SOURCE}")
+ string(REGEX REPLACE "\"" "\\\\\"" SOURCE "${SOURCE}")
+ string(REPLACE "\t" "\\\\t" SOURCE "${SOURCE}")
+ string(REPLACE "\r" "\\\\r" SOURCE "${SOURCE}")
+ set(GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}${SOURCE}\\n")
+ endforeach()
+
+ # Remove trailing comma, and complete JSON array with ]
+ string(REGEX REPLACE ",[ ]*$" "" GCOV_FILE_COVERAGE ${GCOV_FILE_COVERAGE})
+ set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
+
+ # Generate the final JSON for this file.
+ message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
+ string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
+ set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
+endforeach()
+
+# Get rid of trailing comma.
+string(REGEX REPLACE ",[ ]*$" "" JSON_GCOV_FILES ${JSON_GCOV_FILES})
+set(JSON_GCOV_FILES "${JSON_GCOV_FILES}]")
+
+# Generate the final complete JSON!
+message("Generate final JSON...")
+string(CONFIGURE ${JSON_TEMPLATE} JSON)
+
+file(WRITE "${COVERALLS_OUTPUT_FILE}" "${JSON}")
+message("###########################################################################")
+message("Generated coveralls JSON containing coverage data:")
+message("${COVERALLS_OUTPUT_FILE}")
+message("###########################################################################")
diff --git a/cmake/flags.cmake b/cmake/flags.cmake
index dbad6be3f41b3f565d6bf275633d07198491ff3d..e087770991aefc17535d50c0539c50f6316520d7 100644
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name)
endif()
if(${safe_name})
set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
- if(is_c)
- set(CUDA_NVCC_FLAGS
- --compiler-options;${flag_name}
- ${CUDA_NVCC_FLAGS}
- PARENT_SCOPE)
- endif()
endif()
endfunction()
@@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name)
safe_set_flag(OFF ${src_list} ${flag_name})
endmacro()
+# helper macro to set nvcc flag
+macro(safe_set_nvflag flag_name)
+ string(REPLACE "-" "_" safe_name ${flag_name})
+ string(REPLACE "=" "_" safe_name ${safe_name})
+ CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
+ set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
+ if(${safe_name})
+ set(CUDA_NVCC_FLAGS
+ --compiler-options;${flag_name}
+ ${CUDA_NVCC_FLAGS})
+ endif()
+endmacro()
+
+
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
if(NOT UINT64_MAX_EXISTS)
set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
@@ -63,20 +71,43 @@ set(COMMON_FLAGS
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
+ -Wno-unused-function
+ -Wno-error=literal-suffix
+ -Wno-error=unused-local-typedefs)
+
+set(GPU_COMMON_FLAGS
+ -fPIC
+ -fno-omit-frame-pointer
+ -Wnon-virtual-dtor
+ -Wdelete-non-virtual-dtor
+ -Wno-unused-parameter
+ -Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
)
+if (APPLE)
+ # On Mac OS X build fat binaries with x86_64 architectures by default.
+ set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
+else()
+ set(GPU_COMMON_FLAGS
+ -Wall
+ -Wextra
+ -Werror
+ ${GPU_COMMON_FLAGS})
+endif()
+
+
foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach()
-# On Mac OS X build fat binaries with x86_64 architectures by default.
-if (APPLE)
- set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
-endif ()
+foreach(flag ${GPU_COMMON_FLAGS})
+ safe_set_nvflag(${flag})
+endforeach()
+
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..e9a4da79aa92a92aa7e5d21bb795ab9aaf60ab8b
--- /dev/null
+++ b/cmake/rdma.cmake
@@ -0,0 +1,76 @@
+# user should download rdma first from subversion repository
+
+# execute following instruction to download svn mannally
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
+# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
+# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
+
+set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
+
+function(generate_rdma_links)
+ #redirect to current DIR to isolate the pollution from system runtime environment
+ #it can benifits unified control for different gcc environment.
+ #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
+ #runtime libraries that will crash process while loading it. That redirect trick
+ #can fix it.
+ execute_process(
+ COMMAND mkdir -p librdma
+ COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
+ COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
+ COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
+ COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ )
+endfunction(generate_rdma_links)
+
+
+#check and set headers
+find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
+find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+
+#check and set libs
+find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
+find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
+find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
+find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
+
+if(
+ RDMA_INC_SXISOCK AND
+ RDMA_INC_XIO AND
+ RDMA_INC_EVENT AND
+ RDMA_INC_NUMA AND
+ RDMA_LIB_SXISOCK AND
+ RDMA_LIB_XIO AND
+ RDMA_LIB_EVENT AND
+ RDMA_LIB_EVENT_CORE AND
+ RDMA_LIB_EVENT_EXTRA AND
+ RDMA_LIB_EVENT_PTHREADS AND
+ RDMA_LIB_NUMA
+ )
+
+ set(RDMA_INC_DIR
+ ${RDMA_INC_SXISOCK}
+ ${RDMA_INC_XIO}
+ ${RDMA_INC_EVENT}
+ ${RDMA_INC_NUMA})
+ set(RDMA_LIBS
+ ${RDMA_LIB_SXISOCK}
+ ${RDMA_LIB_XIO}
+ ${RDMA_LIB_EVENT}
+ ${RDMA_LIB_EVENT_CORE}
+ ${RDMA_LIB_EVENT_EXTRA}
+ ${RDMA_LIB_EVENT_PTHREADS}
+ ${RDMA_LIB_NUMA}
+ )
+ set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
+ return()
+endif()
+
+#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
+
+message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")
diff --git a/cmake/swig.cmake b/cmake/swig.cmake
index f5c1bcc79b3dc0e6c4f4489ee9f72a084afe8847..97e87aa947791e2c5a88e7e554dec43bcd661664 100644
--- a/cmake/swig.cmake
+++ b/cmake/swig.cmake
@@ -1,25 +1,3 @@
-find_program(
- SWIG_BINARY_PATH
- swig)
-
-if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
- set(SWIG_FOUND OFF)
-else()
- set(SWIG_FOUND ON)
-endif()
-
-set(MIN_SWIG_VERSION 2)
-if(SWIG_FOUND)
- execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
- OUTPUT_VARIABLE _SWIG_VERSION
- OUTPUT_STRIP_TRAILING_WHITESPACE)
- if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
- message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
- "Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
- set(SWIG_FOUND FALSE)
- endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
-endif(SWIG_FOUND)
-
function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
@@ -27,6 +5,7 @@ function(generate_python_api target_name)
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
+ ${PROJ_ROOT}/paddle/api/PaddleAPI.h
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS
diff --git a/cmake/util.cmake b/cmake/util.cmake
index 0fa36f070cc11be543efe9573b93173ec771b9be..a8282f07184c34f77d506ed7ef40206fbbd55b41 100644
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -67,6 +67,10 @@ endmacro()
#
# It will handle WITH_PYTHON/WITH_GLOG etc.
function(link_paddle_exe TARGET_NAME)
+ if(WITH_RDMA)
+ generate_rdma_links()
+ endif()
+
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
@@ -109,6 +113,12 @@ function(link_paddle_exe TARGET_NAME)
${ZLIB_LIBRARIES}
${INTERAL_LIBS}
${CMAKE_DL_LIBS})
+
+ if(WITH_RDMA)
+ target_link_libraries(${TARGET_NAME}
+ ${RDMA_LD_FLAGS}
+ ${RDMA_LIBS})
+ endif()
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
@@ -178,14 +188,6 @@ macro(add_simple_unittest TARGET_NAME)
add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp)
endmacro()
-macro(add_paddle_culib TARGET_NAME)
- set(NVCC_FLAG ${CUDA_NVCC_FLAGS})
- set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math)
- cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
- set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
-endmacro()
-
-
# Creates C resources file from files in given resource file
function(create_resources res_file output)
# Create empty output file
diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore
index 76961dd1436f859f85f75ff9ed7d3fefdec83dc4..6a05b8f6632db0977fceade8b48a89b9f7f6e6cc 100644
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@@ -5,3 +5,5 @@ plot.png
train.log
image_provider_copy_1.py
*pyc
+train.list
+test.list
diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh
old mode 100644
new mode 100755
diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py
index b766118eb00737c7a196ed85850b3cebd690b0d0..b235010e4ece377beffaaa1b9247a77d7a96b712 100644
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@@ -16,7 +16,6 @@ import numpy as np
import sys
import os
import PIL.Image as Image
-
"""
Usage: python process_cifar input_dir output_dir
"""
@@ -30,6 +29,7 @@ def mkdir_not_exist(path):
if not os.path.exists(path):
os.mkdir(path)
+
def create_dir_structure(output_dir):
"""
Create the directory structure for the directory.
@@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
mkdir_not_exist(os.path.join(output_dir, "train"))
mkdir_not_exist(os.path.join(output_dir, "test"))
-def convert_batch(batch_path, label_set, label_map,
- output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
"""
Convert CIFAR batch to the structure of Paddle format.
batch_path: the batch to be converted.
@@ -67,11 +67,23 @@ if __name__ == '__main__':
output_dir = sys.argv[2]
num_batch = 5
create_dir_structure(output_dir)
- label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
- 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+ label_map = {
+ 0: "airplane",
+ 1: "automobile",
+ 2: "bird",
+ 3: "cat",
+ 4: "deer",
+ 5: "dog",
+ 6: "frog",
+ 7: "horse",
+ 8: "ship",
+ 9: "truck"
+ }
labels = {}
for i in range(1, num_batch + 1):
- convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
- label_map, output_dir, "train")
- convert_batch(os.path.join(input_dir, "test_batch"), {},
- label_map, output_dir, "test")
\ No newline at end of file
+ convert_batch(
+ os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+ output_dir, "train")
+ convert_batch(
+ os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+ "test")
diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py
index 9e2f8b8949b39b930680e6d84758133eed566881..28bf1bb02c1f08b2e8ec9acd38f0a8594b05ab66 100644
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@@ -46,36 +46,41 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.img_mean = image_util.load_meta(settings.meta_path,
settings.mean_img_size,
- settings.img_size,
- settings.color)
+ settings.img_size, settings.color)
settings.logger.info('Image size: %s', settings.img_size)
settings.logger.info('Meta path: %s', settings.meta_path)
settings.input_types = [
dense_vector(settings.img_raw_size), # image feature
- integer_value(settings.num_classes)] # labels
+ integer_value(settings.num_classes)
+ ] # labels
settings.logger.info('DataProvider Initialization finished')
-@provider(init_hook=hook)
-def processData(settings, file_name):
+@provider(init_hook=hook, min_pool_size=0)
+def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
- file_name: the batch file name.
+ file_list: the batch file list.
"""
- data = cPickle.load(io.open(file_name, 'rb'))
- indexes = list(range(len(data['images'])))
- if settings.is_train:
- random.shuffle(indexes)
- for i in indexes:
- if settings.use_jpeg == 1:
- img = image_util.decode_jpeg(data['images'][i])
- else:
- img = data['images'][i]
- img_feat = image_util.preprocess_img(img, settings.img_mean,
- settings.img_size, settings.is_train,
- settings.color)
- label = data['labels'][i]
- yield img_feat.tolist(), int(label)
+ with open(file_list, 'r') as fdata:
+ lines = [line.strip() for line in fdata]
+ random.shuffle(lines)
+ for file_name in lines:
+ with io.open(file_name.strip(), 'rb') as file:
+ data = cPickle.load(file)
+ indexes = list(range(len(data['images'])))
+ if settings.is_train:
+ random.shuffle(indexes)
+ for i in indexes:
+ if settings.use_jpeg == 1:
+ img = image_util.decode_jpeg(data['images'][i])
+ else:
+ img = data['images'][i]
+ img_feat = image_util.preprocess_img(
+ img, settings.img_mean, settings.img_size,
+ settings.is_train, settings.color)
+ label = data['labels'][i]
+ yield img_feat.astype('float32'), int(label)
diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py
index c545d16aafbc741bce25f9469e7f67de5b88fa8c..b5c6431c06f77cef5c31ca844a8427eebaea2fce 100644
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@@ -16,17 +16,20 @@ import numpy as np
from PIL import Image
from cStringIO import StringIO
+
def resize_image(img, target_size):
"""
Resize an image so that the shorter edge has length target_size.
img: the input image to be resized.
target_size: the target resized image size.
"""
- percent = (target_size/float(min(img.size[0], img.size[1])))
- resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+ percent = (target_size / float(min(img.size[0], img.size[1])))
+ resized_size = int(round(img.size[0] * percent)), int(
+ round(img.size[1] * percent))
img = img.resize(resized_size, Image.ANTIALIAS)
return img
+
def flip(im):
"""
Return the flipped image.
@@ -38,6 +41,7 @@ def flip(im):
else:
return im[:, ::-1]
+
def crop_img(im, inner_size, color=True, test=True):
"""
Return cropped image.
@@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
If True, crop the center of images.
"""
if color:
- height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+ height, width = max(inner_size, im.shape[1]), max(inner_size,
+ im.shape[2])
padded_im = np.zeros((3, height, width))
startY = (height - im.shape[1]) / 2
startX = (width - im.shape[2]) / 2
endY, endX = startY + im.shape[1], startX + im.shape[2]
- padded_im[:, startY: endY, startX: endX] = im
+ padded_im[:, startY:endY, startX:endX] = im
else:
im = im.astype('float32')
- height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+ height, width = max(inner_size, im.shape[0]), max(inner_size,
+ im.shape[1])
padded_im = np.zeros((height, width))
startY = (height - im.shape[0]) / 2
startX = (width - im.shape[1]) / 2
endY, endX = startY + im.shape[0], startX + im.shape[1]
- padded_im[startY: endY, startX: endX] = im
+ padded_im[startY:endY, startX:endX] = im
if test:
startY = (height - inner_size) / 2
startX = (width - inner_size) / 2
@@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
startX = np.random.randint(0, width - inner_size + 1)
endY, endX = startY + inner_size, startX + inner_size
if color:
- pic = padded_im[:, startY: endY, startX: endX]
+ pic = padded_im[:, startY:endY, startX:endX]
else:
- pic = padded_im[startY: endY, startX: endX]
+ pic = padded_im[startY:endY, startX:endX]
if (not test) and (np.random.randint(2) == 0):
pic = flip(pic)
return pic
+
def decode_jpeg(jpeg_string):
np_array = np.array(Image.open(StringIO(jpeg_string)))
if len(np_array.shape) == 3:
np_array = np.transpose(np_array, (2, 0, 1))
return np_array
+
def preprocess_img(im, img_mean, crop_size, is_train, color=True):
"""
Does data augmentation for images.
@@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
pic -= img_mean
return pic.flatten()
+
def load_meta(meta_path, mean_img_size, crop_size, color=True):
"""
Return the loaded meta file.
@@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
mean = np.load(meta_path)['data_mean']
border = (mean_img_size - crop_size) / 2
if color:
- assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+ assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
mean = mean.reshape(3, mean_img_size, mean_img_size)
- mean = mean[:, border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[:, border:border + crop_size, border:border +
+ crop_size].astype('float32')
else:
- assert(mean_img_size * mean_img_size == mean.shape[0])
+ assert (mean_img_size * mean_img_size == mean.shape[0])
mean = mean.reshape(mean_img_size, mean_img_size)
- mean = mean[border: border + crop_size,
- border: border + crop_size].astype('float32')
+ mean = mean[border:border + crop_size, border:border +
+ crop_size].astype('float32')
return mean
+
def load_image(img_path, is_color=True):
"""
Load image and return.
@@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
img.load()
return img
+
def oversample(img, crop_dims):
"""
image : iterable of (H x W x K) ndarrays
@@ -152,50 +163,53 @@ def oversample(img, crop_dims):
for j in w_indices:
crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
curr += 1
- crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
- -crop_dims / 2.0,
- crop_dims / 2.0
- ])
+ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+ [-crop_dims / 2.0, crop_dims / 2.0])
crops_ix = np.tile(crops_ix, (2, 1))
# Extract crops
- crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
- im_shape[-1]), dtype=np.float32)
+ crops = np.empty(
+ (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+ dtype=np.float32)
ix = 0
for im in img:
for crop in crops_ix:
crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
ix += 1
- crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
+ crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors
return crops
+
class ImageTransformer:
- def __init__(self, transpose = None,
- channel_swap = None, mean = None, is_color = True):
+ def __init__(self,
+ transpose=None,
+ channel_swap=None,
+ mean=None,
+ is_color=True):
self.transpose = transpose
self.channel_swap = None
self.mean = None
- self.is_color = is_color
+ self.is_color = is_color
- def set_transpose(self, order):
+ def set_transpose(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.transpose = order
- def set_channel_swap(self, order):
+ def set_channel_swap(self, order):
if self.is_color:
- assert 3 == len(order)
+ assert 3 == len(order)
self.channel_swap = order
def set_mean(self, mean):
# mean value, may be one value per channel
if mean.ndim == 1:
- mean = mean[:, np.newaxis, np.newaxis]
- else:
+ mean = mean[:, np.newaxis, np.newaxis]
+ else:
# elementwise mean
if self.is_color:
assert len(mean.shape) == 3
- self.mean = mean
+ self.mean = mean
def transformer(self, data):
if self.transpose is not None:
diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py
index 5d9e93265867389ca6d2aa26e48fcfa08561e6ae..6a47bd5851c99635dd7d3f1d5df67dd081ca4584 100755
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import os,sys
+import os, sys
import numpy as np
import logging
from PIL import Image
@@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
def __init__(self,
train_conf,
@@ -58,18 +60,19 @@ class ImageClassifier():
self.oversample = oversample
self.is_color = is_color
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
self.mean_file = mean_file
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
gpu = 1 if use_gpu else 0
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -90,14 +93,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -133,22 +136,24 @@ class ImageClassifier():
lab = np.argsort(-prob)
logging.info("Label of %s is: %d", image, lab[0])
+
if __name__ == '__main__':
- image_size=32
- crop_size=32
- multi_crop=True
- config="vgg_16_cifar.py"
- output_layer="__fc_layer_1__"
- mean_path="data/cifar-out/batches/batches.meta"
- model_path=sys.argv[1]
- image=sys.argv[2]
- use_gpu=bool(int(sys.argv[3]))
-
- obj = ImageClassifier(train_conf=config,
- model_dir=model_path,
- resize_dim=image_size,
- crop_dim=crop_size,
- mean_file=mean_path,
- use_gpu=use_gpu,
- oversample=multi_crop)
+ image_size = 32
+ crop_size = 32
+ multi_crop = True
+ config = "vgg_16_cifar.py"
+ output_layer = "__fc_layer_1__"
+ mean_path = "data/cifar-out/batches/batches.meta"
+ model_path = sys.argv[1]
+ image = sys.argv[2]
+ use_gpu = bool(int(sys.argv[3]))
+
+ obj = ImageClassifier(
+ train_conf=config,
+ model_dir=model_path,
+ resize_dim=image_size,
+ crop_dim=crop_size,
+ mean_file=mean_path,
+ use_gpu=use_gpu,
+ oversample=multi_crop)
obj.predict(image, output_layer)
diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py
index 0286a5d7e9dc8d0f546b18b1ed846c9452cdbe4b..10b9c1691b5e51273c73a975545cd36f3822e901 100755
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@@ -19,22 +19,36 @@ from optparse import OptionParser
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--input", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-s", "--size", action="store",
- dest="size", help="Processed image size.")
- parser.add_option("-c", "--color", action="store",
- dest="color", help="whether to use color images.")
+ parser.add_option(
+ "-i",
+ "--input",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-s",
+ "--size",
+ action="store",
+ dest="size",
+ help="Processed image size.")
+ parser.add_option(
+ "-c",
+ "--color",
+ action="store",
+ dest="color",
+ help="whether to use color images.")
return parser.parse_args()
+
if __name__ == '__main__':
- options, args = option_parser()
- data_dir = options.input
- processed_image_size = int(options.size)
- color = options.color == "1"
- data_creator = ImageClassificationDatasetCreater(data_dir,
- processed_image_size,
- color)
- data_creator.num_per_batch = 1000
- data_creator.overwrite = True
- data_creator.create_batches()
+ options, args = option_parser()
+ data_dir = options.input
+ processed_image_size = int(options.size)
+ color = options.color == "1"
+ data_creator = ImageClassificationDatasetCreater(
+ data_dir, processed_image_size, color)
+ data_creator.train_list_name = "train.txt"
+ data_creator.test_list_name = "test.txt"
+ data_creator.num_per_batch = 1000
+ data_creator.overwrite = True
+ data_creator.create_batches()
diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh
index dfe3eb95d1ab8b2114fcf5e0f461ea0efb7cc1e5..e3e86ff10675c0622867af2eb0d26c87f4bc2db5 100755
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
+
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py
index e8b8af4bd313d0738aafab8da93fc510e40cc3d6..58ceff5fc2f46cac9997b6d8af2b0db0c43e0c75 100755
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
- data_dir='data/cifar-out/batches/'
- meta_path=data_dir+'batches.meta'
-
- args = {'meta':meta_path,'mean_img_size': 32,
- 'img_size': 32,'num_classes': 10,
- 'use_jpeg': 1,'color': "color"}
-
- define_py_data_sources2(train_list=data_dir+"train.list",
- test_list=data_dir+'test.list',
- module='image_provider',
- obj='processData',
- args=args)
+ data_dir = 'data/cifar-out/batches/'
+ meta_path = data_dir + 'batches.meta'
+
+ args = {
+ 'meta': meta_path,
+ 'mean_img_size': 32,
+ 'img_size': 32,
+ 'num_classes': 10,
+ 'use_jpeg': 1,
+ 'color': "color"
+ }
+
+ define_py_data_sources2(
+ train_list="train.list",
+ test_list="train.list",
+ module='image_provider',
+ obj='processData',
+ args=args)
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=3*32*32
-label_size=10
-img = data_layer(name='image',
- size=data_size)
+data_size = 3 * 32 * 32
+label_size = 10
+img = data_layer(name='image', size=data_size)
# small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
- num_channels=3,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/introduction/README.md b/demo/introduction/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0614a7afe645677ef0b65a17ea05f1dcfa45214f
--- /dev/null
+++ b/demo/introduction/README.md
@@ -0,0 +1,3 @@
+This folder contains scripts used in PaddlePaddle introduction.
+- use `bash train.sh` to train a simple linear regression model
+- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py
new file mode 100644
index 0000000000000000000000000000000000000000..8515022e18dc6bbf055e6db3121568acf1df1c55
--- /dev/null
+++ b/demo/introduction/dataprovider.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer.PyDataProvider2 import *
+import random
+
+
+# define data types of input: 2 real numbers
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
+def process(settings, input_file):
+ for i in xrange(2000):
+ x = random.random()
+ yield [x], [2 * x + 0.3]
diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..ca4a1872731abde90e72cb167929b3d9e2e1ebf4
--- /dev/null
+++ b/demo/introduction/evaluate_model.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Print model parameters in last model
+
+Usage:
+ python evaluate_model.py
+"""
+import numpy as np
+import os
+
+
+def load(file_name):
+ with open(file_name, 'rb') as f:
+ f.read(16) # skip header for float type.
+ return np.fromfile(f, dtype=np.float32)
+
+
+def main():
+ print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
+ load('output/pass-00029/b'))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/introduction/train.sh b/demo/introduction/train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..06db8edd105ada071597ed1aa5e42f7de547174d
--- /dev/null
+++ b/demo/introduction/train.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+paddle train \
+ --config=trainer_config.py \
+ --save_dir=./output \
+ --num_passes=30 \
+ 2>&1 |tee 'train.log'
diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c838c1a8f5b3cb6ac732197c85cd7c728eb013f
--- /dev/null
+++ b/demo/introduction/trainer_config.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+# 1. read data. Suppose you saved above python code as dataprovider.py
+data_file = 'empty.list'
+with open(data_file, 'w') as f:
+ f.writelines(' ')
+define_py_data_sources2(
+ train_list=data_file,
+ test_list=None,
+ module='dataprovider',
+ obj='process',
+ args={})
+
+# 2. learning algorithm
+settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
+
+# 3. Network configuration
+x = data_layer(name='x', size=1)
+y = data_layer(name='y', size=1)
+y_predict = fc_layer(
+ input=x,
+ param_attr=ParamAttr(name='w'),
+ size=1,
+ act=LinearActivation(),
+ bias_attr=ParamAttr(name='b'))
+cost = regression_cost(input=y_predict, label=y)
+outputs(cost)
diff --git a/demo/mnist/data/generate_list.py b/demo/mnist/data/generate_list.py
index 1b929048b4d82b5e9d80585b6d0180f2e92200ce..d880721f94c68bbbc1740f82872462efdb368fa2 100644
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@@ -13,9 +13,9 @@
# limitations under the License.
o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
o.close()
o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
-o.close()
\ No newline at end of file
+o.write("./data/raw_data/t10k" + "\n")
+o.close()
diff --git a/demo/mnist/data/get_mnist_data.sh b/demo/mnist/data/get_mnist_data.sh
index 9099b5ab6fb85d86d346a7ad819538fbd013c6ff..5a2e34026d4fe7f8315d4f5453bec7c4ee4f6885 100755
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@@ -19,4 +19,3 @@ done
cd $DIR
rm -f *.list
python generate_list.py
-
diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py
index 32af29730a7365df1a98fe54a2edf8850ee93e8d..6df4676da3bdc2e6949cc911fa3720cb51ddc568 100644
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *
# Define a py data provider
-@provider(input_types={
- 'pixel': dense_vector(28 * 28),
- 'label': integer_value(10)
-})
+@provider(
+ input_types={'pixel': dense_vector(28 * 28),
+ 'label': integer_value(10)})
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
diff --git a/demo/mnist/vgg_16_mnist.py b/demo/mnist/vgg_16_mnist.py
index 45a45bb061aa781231a944bb82ebfbc6b0dc9618..f9e89bc588abacd98a8f5fc82a00fae6bb2de10e 100644
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
-
if not is_predict:
- data_dir='./data/'
- define_py_data_sources2(train_list= data_dir + 'train.list',
- test_list= data_dir + 'test.list',
- module='mnist_provider',
- obj='process')
+ data_dir = './data/'
+ define_py_data_sources2(
+ train_list=data_dir + 'train.list',
+ test_list=data_dir + 'test.list',
+ module='mnist_provider',
+ obj='process')
######################Algorithm Configuration #############
settings(
- batch_size = 128,
- learning_rate = 0.1 / 128.0,
- learning_method = MomentumOptimizer(0.9),
- regularization = L2Regularization(0.0005 * 128)
-)
+ batch_size=128,
+ learning_rate=0.1 / 128.0,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
-data_size=1*28*28
-label_size=10
+data_size = 1 * 28 * 28
+label_size = 10
img = data_layer(name='pixel', size=data_size)
# small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
- num_channels=1,
- num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
if not is_predict:
lbl = data_layer(name="label", size=label_size)
diff --git a/demo/model_zoo/embedding/extract_para.py b/demo/model_zoo/embedding/extract_para.py
index 17067792fc38d0d25bd28dc35bfb1b88ad5020cd..47e06fae9caa9c3d9e0d6eb2e3f6633a776c5b1d 100755
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python extract_para.py --preModel PREMODEL --preDict PREDICT \
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def get_row_index(preDict, usrDict):
"""
Get the row positions for all words in user dictionary from pre-trained dictionary.
@@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
pos.append(index[word])
return pos
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+ paraDim):
"""
Extract desired parameters from a pretrained embedding model based on user dictionary
"""
@@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
print "extract parameters finish, total", len(rowIndex), "lines"
fi.close()
+
def main():
"""
Main entry for running paraconvert.py
@@ -78,19 +81,33 @@ def main():
"python %prog --preModel PREMODEL --preDict PREDICT" \
" --usrModel USRMODEL --usrDict USRDICT -d DIM"
parser = OptionParser(usage)
- parser.add_option("--preModel", action="store", dest="preModel",
- help="the name of pretrained embedding model")
- parser.add_option("--preDict", action="store", dest="preDict",
- help="the name of pretrained dictionary")
- parser.add_option("--usrModel", action="store", dest="usrModel",
- help="the name of output usr embedding model")
- parser.add_option("--usrDict", action="store", dest="usrDict",
- help="the name of user specified dictionary")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--preModel",
+ action="store",
+ dest="preModel",
+ help="the name of pretrained embedding model")
+ parser.add_option(
+ "--preDict",
+ action="store",
+ dest="preDict",
+ help="the name of pretrained dictionary")
+ parser.add_option(
+ "--usrModel",
+ action="store",
+ dest="usrModel",
+ help="the name of output usr embedding model")
+ parser.add_option(
+ "--usrDict",
+ action="store",
+ dest="usrDict",
+ help="the name of user specified dictionary")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
- extract_parameters_by_usrDict(options.preModel, options.preDict,
- options.usrModel, options.usrDict, int(options.dim))
+ extract_parameters_by_usrDict(options.preModel, options.preDict,
+ options.usrModel, options.usrDict,
+ int(options.dim))
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/embedding/paraconvert.py b/demo/model_zoo/embedding/paraconvert.py
index 523412303617a38035392e4bb99f8ce119be8ac8..54155eff8e26b16ff5303d8d279e81b4bf8a90f4 100755
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@@ -29,6 +28,7 @@ Options:
from optparse import OptionParser
import struct
+
def binary2text(input, output, paraDim):
"""
Convert a binary parameter file of embedding model to be a text file.
@@ -76,12 +76,13 @@ def binary2text(input, output, paraDim):
fo.close()
print "binary2text finish, total", line, "lines"
+
def get_para_count(input):
"""
Compute the total number of embedding parameters in input text file.
input: the name of input text file
"""
- numRows = 1
+ numRows = 1
paraDim = 0
with open(input) as f:
line = f.readline()
@@ -90,6 +91,7 @@ def get_para_count(input):
numRows += 1
return numRows * paraDim
+
def text2binary(input, output, paddle_head=True):
"""
Convert a text parameter file of embedding model to be a binary file.
@@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
fo.close()
print "text2binary finish, total", count, "lines"
+
def main():
"""
Main entry for running paraconvert.py
@@ -131,21 +134,26 @@ def main():
"python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
"python %prog --t2b -i INPUT -o OUTPUT"
parser = OptionParser(usage)
- parser.add_option("--b2t", action="store_true",
- help="convert parameter file of embedding model from binary to text")
- parser.add_option("--t2b", action="store_true",
- help="convert parameter file of embedding model from text to binary")
- parser.add_option("-i", action="store", dest="input",
- help="input parameter file name")
- parser.add_option("-o", action="store", dest="output",
- help="output parameter file name")
- parser.add_option("-d", action="store", dest="dim",
- help="dimension of parameter")
+ parser.add_option(
+ "--b2t",
+ action="store_true",
+ help="convert parameter file of embedding model from binary to text")
+ parser.add_option(
+ "--t2b",
+ action="store_true",
+ help="convert parameter file of embedding model from text to binary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input parameter file name")
+ parser.add_option(
+ "-o", action="store", dest="output", help="output parameter file name")
+ parser.add_option(
+ "-d", action="store", dest="dim", help="dimension of parameter")
(options, args) = parser.parse_args()
if options.b2t:
binary2text(options.input, options.output, options.dim)
if options.t2b:
text2binary(options.input, options.output)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py
index 06d471722f8059804a59e6823bebccff85a8d542..7855126edcfec20de251e5bc08c08c7aab8f7a8e 100755
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
class ImageClassifier():
- def __init__(self, train_conf, model_dir=None,
- resize_dim=256, crop_dim=224,
+ def __init__(self,
+ train_conf,
+ model_dir=None,
+ resize_dim=256,
+ crop_dim=224,
use_gpu=True,
mean_file=None,
output_layer=None,
- oversample=False, is_color=True):
+ oversample=False,
+ is_color=True):
"""
train_conf: network configure.
model_dir: string, directory of model.
@@ -62,24 +68,25 @@ class ImageClassifier():
assert isinstance(self.output_layer, basestring)
self.output_layer = self.output_layer.split(",")
- self.transformer = image_util.ImageTransformer(is_color = is_color)
- self.transformer.set_transpose((2,0,1))
- self.transformer.set_channel_swap((2,1,0))
+ self.transformer = image_util.ImageTransformer(is_color=is_color)
+ self.transformer.set_transpose((2, 0, 1))
+ self.transformer.set_channel_swap((2, 1, 0))
self.mean_file = mean_file
if self.mean_file is not None:
mean = np.load(self.mean_file)['data_mean']
mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
- self.transformer.set_mean(mean) # mean pixel
+ self.transformer.set_mean(mean) # mean pixel
else:
# if you use three mean value, set like:
# this three mean value is calculated from ImageNet.
- self.transformer.set_mean(np.array([103.939,116.779,123.68]))
+ self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
conf = parse_config(train_conf, conf_args)
swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(self.network, swig_paddle.GradientMachine)
self.network.loadParameters(self.model_dir)
@@ -105,14 +112,14 @@ class ImageClassifier():
# image_util.resize_image: short side is self.resize_dim
image = image_util.resize_image(image, self.resize_dim)
image = np.array(image)
- input = np.zeros((1, image.shape[0], image.shape[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
input[0] = image.astype(np.float32)
input = image_util.oversample(input, self.crop_dims)
else:
image = image.resize(self.crop_dims, Image.ANTIALIAS)
- input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
- dtype=np.float32)
+ input = np.zeros(
+ (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
input[0] = np.array(image).astype(np.float32)
data_in = []
@@ -172,7 +179,7 @@ class ImageClassifier():
logging.info("Label of %s is: %d", image, lab[0])
return results
- def extract(self, data_file, output_dir, batch_size = 10000):
+ def extract(self, data_file, output_dir, batch_size=10000):
"""
extract and save features of output layers, which are
specify in Outputs() in network configure.
@@ -197,7 +204,7 @@ class ImageClassifier():
image_feature[file_name] = feature
sample_num += 1
if sample_num == batch_size:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
batch_num += 1
@@ -206,7 +213,7 @@ class ImageClassifier():
if idx % 1000 == 0:
logging.info('%d/%d, %s', idx, len(image_files), file_name)
if sample_num > 0:
- batch_name = os.path.join(output_dir, 'batch_%d' %(batch_num))
+ batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
self.save_file(image_feature, batch_name)
logging.info('Finish batch %d', batch_num)
logging.info('Done: make image feature batch')
@@ -215,38 +222,64 @@ class ImageClassifier():
of = open(file, 'wb')
cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
+
def option_parser():
"""
Main entry for predciting
"""
usage = "%prog -c config -i data_list -w model_dir [options]"
parser = OptionParser(usage="usage: %s" % usage)
- parser.add_option("-j", "--job",
- action="store", dest="job_type",
- help="job type: predict, extract\
+ parser.add_option(
+ "-j",
+ "--job",
+ action="store",
+ dest="job_type",
+ help="job type: predict, extract\
predict: predicting,\
extract: extract features")
- parser.add_option("-c", "--conf",
- action="store", dest="train_conf",
- help="network config")
- parser.add_option("-i", "--data",
- action="store", dest="data_file",
- help="image list")
- parser.add_option("-w", "--model",
- action="store", dest="model_path",
- default=None, help="model path")
- parser.add_option("-g", "--use_gpu", action="store",
- dest="use_gpu", default=True,
- help="Whether to use gpu mode.")
- parser.add_option("-o", "--output_dir",
- action="store", dest="output_dir",
- default="output", help="output path")
- parser.add_option("-m", "--mean", action="store",
- dest="mean", default=None,
- help="mean file.")
- parser.add_option("-p", "--multi_crop", action="store_true",
- dest="multi_crop", default=False,
- help="Wether to use multiple crops on image.")
+ parser.add_option(
+ "-c",
+ "--conf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-i", "--data", action="store", dest="data_file", help="image list")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
+ parser.add_option(
+ "-g",
+ "--use_gpu",
+ action="store",
+ dest="use_gpu",
+ default=True,
+ help="Whether to use gpu mode.")
+ parser.add_option(
+ "-o",
+ "--output_dir",
+ action="store",
+ dest="output_dir",
+ default="output",
+ help="output path")
+ parser.add_option(
+ "-m",
+ "--mean",
+ action="store",
+ dest="mean",
+ default=None,
+ help="mean file.")
+ parser.add_option(
+ "-p",
+ "--multi_crop",
+ action="store_true",
+ dest="multi_crop",
+ default=False,
+ help="Wether to use multiple crops on image.")
parser.add_option("-l", "--output_layer", action="store",
dest="output_layer", default=None,
help="--job=extract, specify layers to extract "\
@@ -254,24 +287,26 @@ def option_parser():
"classification probability, output in resnet.py.")
return parser.parse_args()
+
def main():
"""
1. parse input arguments.
2. predicting or extract features according job type.
"""
options, args = option_parser()
- obj = ImageClassifier(options.train_conf,
- options.model_path,
- use_gpu=options.use_gpu,
- mean_file=options.mean,
- output_layer=options.output_layer,
- oversample=options.multi_crop)
+ obj = ImageClassifier(
+ options.train_conf,
+ options.model_path,
+ use_gpu=options.use_gpu,
+ mean_file=options.mean,
+ output_layer=options.output_layer,
+ oversample=options.multi_crop)
if options.job_type == "predict":
obj.predict(options.data_file)
elif options.job_type == "extract":
- obj.extract(options.data_file,
- options.output_dir)
+ obj.extract(options.data_file, options.output_dir)
+
if __name__ == '__main__':
main()
diff --git a/demo/model_zoo/resnet/example/__init__.py b/demo/model_zoo/resnet/example/__init__.py
index 7f9e87eee6037666b86420fba194624859d356b3..c90af2ee000d46a032984ee23559e7e99b49ddad 100644
--- a/demo/model_zoo/resnet/example/__init__.py
+++ b/demo/model_zoo/resnet/example/__init__.py
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
diff --git a/demo/model_zoo/resnet/example/image_list_provider.py b/demo/model_zoo/resnet/example/image_list_provider.py
index ee457e1fffc7ed8629dc6bde63a8047818c0ff9d..9e415f76a53326c5809b7a8c508701c519ab443b 100644
--- a/demo/model_zoo/resnet/example/image_list_provider.py
+++ b/demo/model_zoo/resnet/example/image_list_provider.py
@@ -16,8 +16,7 @@ from paddle.utils.image_util import *
from paddle.trainer.PyDataProvider2 import *
-def hook(settings, image_size, crop_size, color, file_list,
- is_train, **kwargs):
+def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
"""
Description: Init with a list of data file
file_list is the name list of input files.
@@ -58,7 +57,7 @@ def hook(settings, image_size, crop_size, color, file_list,
sz = settings.crop_size * settings.crop_size
settings.img_mean = np.zeros(sz * 3, dtype=np.single)
for idx, value in enumerate(settings.mean_value):
- settings.img_mean[idx * sz: (idx + 1) * sz] = value
+ settings.img_mean[idx * sz:(idx + 1) * sz] = value
settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
settings.crop_size)
@@ -69,7 +68,8 @@ def hook(settings, image_size, crop_size, color, file_list,
settings.input_types = [
dense_vector(settings.img_input_size), # image feature
- integer_value(1)] # labels
+ integer_value(1)
+ ] # labels
settings.logger.info('Image short side: %s', settings.img_size)
settings.logger.info('Crop size: %s', settings.crop_size)
@@ -97,9 +97,6 @@ def processData(settings, file_list):
# swap channel
if settings.is_swap_channel:
img = img[settings.swap_channel, :, :]
- img_feat = preprocess_img(img,
- settings.img_mean,
- settings.crop_size,
- settings.is_train,
- settings.color)
+ img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
+ settings.is_train, settings.color)
yield img_feat.tolist(), int(lab.strip())
diff --git a/demo/model_zoo/resnet/load_feature.py b/demo/model_zoo/resnet/load_feature.py
index ee4930b7a17f7f21ceeba8db253eed64416ebf10..b0948b75fd0ac9a3fa89070aed04d523ce286f4e 100644
--- a/demo/model_zoo/resnet/load_feature.py
+++ b/demo/model_zoo/resnet/load_feature.py
@@ -17,9 +17,11 @@ import sys
import cPickle
import logging
-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
logging.getLogger().setLevel(logging.INFO)
+
def load_feature_c(file):
"""
Load feature extracted by C++ interface.
@@ -30,14 +32,15 @@ def load_feature_c(file):
f = open(file, 'r')
for line in f:
sample = []
- for slot in line.strip().split(";"):
- fea = [float(val) for val in slot.strip().split()]
+ for slot in line.strip().split(";"):
+ fea = [float(val) for val in slot.strip().split()]
if fea:
sample.append(fea)
features.append(sample)
f.close()
return features
+
def load_feature_py(feature_dir):
"""
Load feature extracted by python interface.
@@ -54,6 +57,7 @@ def load_feature_py(feature_dir):
logging.info('Load feature file %s', file_name)
return features
+
if __name__ == '__main__':
- print load_feature_py(sys.argv[1])
+ print load_feature_py(sys.argv[1])
#print load_feature_c(sys.argv[1])
diff --git a/demo/model_zoo/resnet/resnet.py b/demo/model_zoo/resnet/resnet.py
index 483e308ac804e13ca249ef4e47e9e9b00770ce1b..015b74cd484596039b9fcf010576ca340d044db7 100644
--- a/demo/model_zoo/resnet/resnet.py
+++ b/demo/model_zoo/resnet/resnet.py
@@ -13,7 +13,6 @@
# limitations under the License.
from paddle.trainer_config_helpers import *
-
"""
paper: https://arxiv.org/abs/1512.03385
"""
@@ -28,15 +27,19 @@ if not is_predict and data_provider:
# mean.meta size : 3 x 224 x 224.
# If you use three mean value, set like:
# "mean_value:103.939,116.779,123.68;"
- args={
+ args = {
'mean_meta': "model/mean_meta_224/mean.meta",
- 'image_size': 224, 'crop_size': 224,
- 'color': True,'swap_channel:': [2, 1, 0]}
- define_py_data_sources2(train_list,
- 'example/test.list',
- module="example.image_list_provider",
- obj="processData",
- args=args)
+ 'image_size': 224,
+ 'crop_size': 224,
+ 'color': True,
+ 'swap_channel:': [2, 1, 0]
+ }
+ define_py_data_sources2(
+ train_list,
+ 'example/test.list',
+ module="example.image_list_provider",
+ obj="processData",
+ args=args)
batch_size = 1
learning_rate = 0.1 / batch_size
@@ -54,12 +57,16 @@ Settings(
learning_method='momentum',
learning_rate_decay_a=0.5,
learning_rate_decay_b=1200000 * 10,
- learning_rate_schedule="discexp",
-)
+ learning_rate_schedule="discexp", )
-def conv_bn_layer(name, input, filter_size, num_filters,
- stride, padding, channels=None,
+def conv_bn_layer(name,
+ input,
+ filter_size,
+ num_filters,
+ stride,
+ padding,
+ channels=None,
active_type=ReluActivation()):
"""
A wrapper for conv layer with batch normalization layers.
@@ -67,19 +74,18 @@ def conv_bn_layer(name, input, filter_size, num_filters,
conv layer has no activation.
"""
- tmp = img_conv_layer(name=name + "_conv",
- input=input,
- filter_size=filter_size,
- num_channels=channels,
- num_filters=num_filters,
- stride=stride,
- padding=padding,
- act=LinearActivation(),
- bias_attr=False)
- return batch_norm_layer(name=name + "_bn",
- input=tmp,
- act=active_type,
- use_global_stats=is_test)
+ tmp = img_conv_layer(
+ name=name + "_conv",
+ input=input,
+ filter_size=filter_size,
+ num_channels=channels,
+ num_filters=num_filters,
+ stride=stride,
+ padding=padding,
+ act=LinearActivation(),
+ bias_attr=False)
+ return batch_norm_layer(
+ name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
def bottleneck_block(name, input, num_filters1, num_filters2):
@@ -88,29 +94,31 @@ def bottleneck_block(name, input, num_filters1, num_filters2):
Last conv_bn_layer has no activation.
Addto layer has activation of relu.
"""
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=1,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[input, last_name],
- act=ReluActivation())
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=1,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[input, last_name], act=ReluActivation())
def mid_projection(name, input, num_filters1, num_filters2, stride=2):
@@ -123,38 +131,41 @@ def mid_projection(name, input, num_filters1, num_filters2, stride=2):
branch2x: bottleneck building block, shortcuts are identity.
"""
# stride = 2
- branch1 = conv_bn_layer(name=name + '_branch1',
- input=input,
- filter_size=1,
- num_filters=num_filters2,
- stride=stride,
- padding=0,
- active_type=LinearActivation())
-
- last_name = conv_bn_layer(name=name + '_branch2a',
- input=input,
- filter_size=1,
- num_filters=num_filters1,
- stride=stride,
- padding=0)
- last_name = conv_bn_layer(name=name + '_branch2b',
- input=last_name,
- filter_size=3,
- num_filters=num_filters1,
- stride=1,
- padding=1)
-
- last_name = conv_bn_layer(name=name + '_branch2c',
- input=last_name,
- filter_size=1,
- num_filters=num_filters2,
- stride=1,
- padding=0,
- active_type=LinearActivation())
-
- return addto_layer(name=name + "_addto",
- input=[branch1, last_name],
- act=ReluActivation())
+ branch1 = conv_bn_layer(
+ name=name + '_branch1',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=stride,
+ padding=0,
+ active_type=LinearActivation())
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2a',
+ input=input,
+ filter_size=1,
+ num_filters=num_filters1,
+ stride=stride,
+ padding=0)
+ last_name = conv_bn_layer(
+ name=name + '_branch2b',
+ input=last_name,
+ filter_size=3,
+ num_filters=num_filters1,
+ stride=1,
+ padding=1)
+
+ last_name = conv_bn_layer(
+ name=name + '_branch2c',
+ input=last_name,
+ filter_size=1,
+ num_filters=num_filters2,
+ stride=1,
+ padding=0,
+ active_type=LinearActivation())
+
+ return addto_layer(
+ name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
@@ -168,67 +179,67 @@ def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
# For ImageNet
# conv1: 112x112
img = data_layer(name='input', size=224 * 224 * 3)
- tmp = conv_bn_layer("conv1", img,
- filter_size=7,
- channels=3,
- num_filters=64,
- stride=2,
- padding=3)
+ tmp = conv_bn_layer(
+ "conv1",
+ img,
+ filter_size=7,
+ channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
# conv2_x: 56x56
- tmp = mid_projection(name="res2_1",
- input=tmp,
- num_filters1=64,
- num_filters2=256,
- stride=1)
+ tmp = mid_projection(
+ name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
for i in xrange(2, res2_num + 1, 1):
- tmp = bottleneck_block(name="res2_" + str(i),
- input=tmp,
- num_filters1=64,
- num_filters2=256)
+ tmp = bottleneck_block(
+ name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
# conv3_x: 28x28
- tmp = mid_projection(name="res3_1",
- input=tmp,
- num_filters1=128,
- num_filters2=512)
+ tmp = mid_projection(
+ name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
for i in xrange(2, res3_num + 1, 1):
- tmp = bottleneck_block(name="res3_" + str(i),
- input=tmp, num_filters1=128,
- num_filters2=512)
+ tmp = bottleneck_block(
+ name="res3_" + str(i),
+ input=tmp,
+ num_filters1=128,
+ num_filters2=512)
# conv4_x: 14x14
- tmp = mid_projection(name="res4_1", input=tmp,
- num_filters1=256, num_filters2=1024)
+ tmp = mid_projection(
+ name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
for i in xrange(2, res4_num + 1, 1):
- tmp = bottleneck_block(name="res4_" + str(i),
- input=tmp,
- num_filters1=256,
- num_filters2=1024)
+ tmp = bottleneck_block(
+ name="res4_" + str(i),
+ input=tmp,
+ num_filters1=256,
+ num_filters2=1024)
# conv5_x: 7x7
- tmp = mid_projection(name="res5_1", input=tmp,
- num_filters1=512, num_filters2=2048)
+ tmp = mid_projection(
+ name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
for i in xrange(2, res5_num + 1, 1):
- tmp = bottleneck_block(name="res5_" + str(i),
- input=tmp, num_filters1=512,
- num_filters2=2048)
-
- tmp = img_pool_layer(name='avgpool',
- input=tmp,
- pool_size=7,
- stride=1,
- pool_type=AvgPooling())
-
- output = fc_layer(name='output',
- input=tmp,
- size=1000,
- act=SoftmaxActivation())
+ tmp = bottleneck_block(
+ name="res5_" + str(i),
+ input=tmp,
+ num_filters1=512,
+ num_filters2=2048)
+
+ tmp = img_pool_layer(
+ name='avgpool',
+ input=tmp,
+ pool_size=7,
+ stride=1,
+ pool_type=AvgPooling())
+
+ output = fc_layer(
+ name='output', input=tmp, size=1000, act=SoftmaxActivation())
if not is_predict:
- classification_cost(input=output, label=data_layer(name='label',
- size=1))
+ classification_cost(
+ input=output, label=data_layer(
+ name='label', size=1))
def res_net_50():
diff --git a/demo/quick_start/api_train.py b/demo/quick_start/api_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..66cbb856484d231613a0026be129a7bc3a7cfdf5
--- /dev/null
+++ b/demo/quick_start/api_train.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import itertools
+import random
+
+from paddle.trainer.config_parser import parse_config
+from py_paddle import swig_paddle as api
+from py_paddle import DataProviderConverter
+from paddle.trainer.PyDataProvider2 \
+ import integer_value, integer_value_sequence, sparse_binary_vector
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--train_data", type=str, required=False, help="train data file")
+ parser.add_argument("--test_data", type=str, help="test data file")
+ parser.add_argument(
+ "--config", type=str, required=True, help="config file name")
+ parser.add_argument("--dict_file", required=True, help="dictionary file")
+ parser.add_argument(
+ "--seq", default=1, type=int, help="whether use sequence training")
+ parser.add_argument(
+ "--use_gpu", default=0, type=int, help="whether use GPU for training")
+ parser.add_argument(
+ "--trainer_count",
+ default=1,
+ type=int,
+ help="Number of threads for training")
+ parser.add_argument(
+ "--num_passes", default=5, type=int, help="Number of training passes")
+ return parser.parse_args()
+
+
+UNK_IDX = 0
+
+
+def load_data(file_name, word_dict):
+ with open(file_name, 'r') as f:
+ for line in f:
+ label, comment = line.strip().split('\t')
+ words = comment.split()
+ word_slot = [word_dict.get(w, UNK_IDX) for w in words]
+ yield word_slot, int(label)
+
+
+def load_dict(dict_file):
+ word_dict = dict()
+ with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+ return word_dict
+
+
+def main():
+ options = parse_arguments()
+ api.initPaddle("--use_gpu=%s" % options.use_gpu,
+ "--trainer_count=%s" % options.trainer_count)
+
+ word_dict = load_dict(options.dict_file)
+ train_dataset = list(load_data(options.train_data, word_dict))
+ if options.test_data:
+ test_dataset = list(load_data(options.test_data, word_dict))
+ else:
+ test_dataset = None
+
+ trainer_config = parse_config(options.config,
+ "dict_file=%s" % options.dict_file)
+ # No need to have data provider for trainer
+ trainer_config.ClearField('data_config')
+ trainer_config.ClearField('test_data_config')
+
+ # create a GradientMachine from the model configuratin
+ model = api.GradientMachine.createFromConfigProto(
+ trainer_config.model_config)
+ # create a trainer for the gradient machine
+ trainer = api.Trainer.create(trainer_config, model)
+
+ # create a data converter which converts data to PaddlePaddle
+ # internal format
+ input_types = [
+ integer_value_sequence(len(word_dict)) if options.seq else
+ sparse_binary_vector(len(word_dict)), integer_value(2)
+ ]
+ converter = DataProviderConverter(input_types)
+
+ batch_size = trainer_config.opt_config.batch_size
+ trainer.startTrain()
+ for train_pass in xrange(options.num_passes):
+ trainer.startTrainPass()
+ random.shuffle(train_dataset)
+ for pos in xrange(0, len(train_dataset), batch_size):
+ batch = itertools.islice(train_dataset, pos, pos + batch_size)
+ size = min(batch_size, len(train_dataset) - pos)
+ trainer.trainOneDataBatch(size, converter(batch))
+ trainer.finishTrainPass()
+ if test_dataset:
+ trainer.startTestPeriod()
+ for pos in xrange(0, len(test_dataset), batch_size):
+ batch = itertools.islice(test_dataset, pos, pos + batch_size)
+ size = min(batch_size, len(test_dataset) - pos)
+ trainer.testOneDataBatch(size, converter(batch))
+ trainer.finishTestPeriod()
+ trainer.finishTrain()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/quick_start/api_train.sh b/demo/quick_start/api_train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..40e9d0a09aaa6b672d6b3997c67c07a5e8a8c3d8
--- /dev/null
+++ b/demo/quick_start/api_train.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+# Note: if using trainer_config.emb.py, trainer_config.cnn.py
+# or trainer_config.lstm.py, you need to change --seq to --seq=1
+# because they are sequence models.
+python api_train.py \
+ --config=trainer_config.lr.py \
+ --trainer_count=2 \
+ --num_passes=15 \
+ --use_gpu=0 \
+ --seq=0 \
+ --train_data=data/train.txt \
+ --test_data=data/test.txt \
+ --dict_file=data/dict.txt \
+ 2>&1 | tee 'train.log'
diff --git a/demo/quick_start/dataprovider_bow.py b/demo/quick_start/dataprovider_bow.py
index f8cde189cf87d73aec05da4b34e064cddecff56b..a5156a2d40cc04c02e50d676045ae6da8937ba01 100644
--- a/demo/quick_start/dataprovider_bow.py
+++ b/demo/quick_start/dataprovider_bow.py
@@ -17,6 +17,7 @@ from paddle.trainer.PyDataProvider2 import *
# id of the word not in dictionary
UNK_IDX = 0
+
# initializer is called by the framework during initialization.
# It allows the user to describe the data types and setup the
# necessary data structure for later use.
@@ -38,7 +39,9 @@ def initializer(settings, dictionary, **kwargs):
# The second input is an integer. It represents the category id of the
# sample. 2 means there are two labels in the dataset.
# (1 for positive and 0 for negative)
- integer_value(2)]
+ integer_value(2)
+ ]
+
# Delaring a data provider. It has an initializer 'data_initialzer'.
# It will cache the generated data of the first pass in memory, so that
@@ -69,9 +72,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
- settings.input_types = [
- sparse_binary_vector(len(dictionary))
- ]
+ settings.input_types = [sparse_binary_vector(len(dictionary))]
+
# Declaring a data provider for prediction. The difference with process
# is that label is not generated.
diff --git a/demo/quick_start/dataprovider_emb.py b/demo/quick_start/dataprovider_emb.py
index ca940a89e54770eaf93b7c704a8d1274de2dc693..286f3f5c82081f1a6e02a26023969790792a78a3 100755
--- a/demo/quick_start/dataprovider_emb.py
+++ b/demo/quick_start/dataprovider_emb.py
@@ -16,6 +16,7 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
+
def initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
@@ -23,7 +24,8 @@ def initializer(settings, dictionary, **kwargs):
# The value of the integers range from 0 to len(dictrionary)-1
integer_value_sequence(len(dictionary)),
# Define the second input for label id
- integer_value(2)]
+ integer_value(2)
+ ]
@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
@@ -39,7 +41,8 @@ def process(settings, file_name):
def predict_initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value(len(dictionary), seq_type=SequenceType.SEQUENCE)
+ integer_value(
+ len(dictionary), seq_type=SequenceType.SEQUENCE)
]
diff --git a/demo/quick_start/preprocess.py b/demo/quick_start/preprocess.py
index 69fdbe44b5245bc2855847a1507e6eaed517eb96..d87fad632a7429f7d9682badabe4c72ca127354f 100755
--- a/demo/quick_start/preprocess.py
+++ b/demo/quick_start/preprocess.py
@@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
1. (remove HTML before or not)tokensizing
2. pos sample : rating score 5; neg sample: rating score 1-2.
@@ -35,7 +34,8 @@ import multiprocessing
batch_size = 5000
word_count = {}
-num_tokenize = max(1, multiprocessing.cpu_count() - 2) # parse + tokenize + save
+num_tokenize = max(1,
+ multiprocessing.cpu_count() - 2) # parse + tokenize + save
max_queue_size = 8
parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
diff --git a/demo/quick_start/preprocess.sh b/demo/quick_start/preprocess.sh
index fb2bee98beb268e88d82b64332273aa10399ff42..c9190e2dd2ef754bf3c7287006322b52493dc3a0 100755
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@@ -20,13 +20,22 @@
set -e
+export LC_ALL=C
+UNAME_STR=`uname`
+
+if [ ${UNAME_STR} == 'Linux' ]; then
+ SHUF_PROG='shuf'
+else
+ SHUF_PROG='gshuf'
+fi
+
mkdir -p data/tmp
python preprocess.py -i data/reviews_Electronics_5.json.gz
# uniq and shuffle
cd data/tmp
echo 'uniq and shuffle...'
-cat pos_*|sort|uniq|shuf> pos.shuffed
-cat neg_*|sort|uniq|shuf> neg.shuffed
+cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
+cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
min_len=`sed -n '$=' neg.shuffed`
test_num=$((min_len/10))
@@ -40,8 +49,8 @@ head -n$train_num neg.shuffed >train.neg
tail -n$test_num pos.shuffed >test.pos
tail -n$test_num neg.shuffed >test.neg
-cat train.pos train.neg|shuf>../train.txt
-cat test.pos test.neg|shuf>../test.txt
+cat train.pos train.neg | ${SHUF_PROG} >../train.txt
+cat test.pos test.neg | ${SHUF_PROG} >../test.txt
cd -
echo 'data/train.txt' > data/train.list
diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh
index 1f0a137c8bd59498a327df4c0136314030bbaf7e..b3c471608c3248bfc714d5e44dd927f25dd23ea0 100755
--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@@ -18,11 +18,14 @@ cfg=trainer_config.lr.py
#cfg=trainer_config.emb.py
#cfg=trainer_config.cnn.py
#cfg=trainer_config.lstm.py
+#cfg=trainer_config.bidi-lstm.py
+#cfg=trainer_config.db-lstm.py
+#cfg=trainer_config.resnet-lstm.py
paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
- --log_period=20 \
+ --log_period=100 \
--num_passes=15 \
--use_gpu=false \
--show_parameter_stats_period=100 \
diff --git a/demo/quick_start/trainer_config.bidi-lstm.py b/demo/quick_start/trainer_config.bidi-lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..51deaf31f94681b6b61f98f798cef14a65ec92cb
--- /dev/null
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@@ -0,0 +1,61 @@
+# edit-mode: -*- python -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
+
+batch_size = 128 if not is_predict else 1
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
+
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+
+bi_lstm = bidirectional_lstm(input=emb, size=128)
+dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
+
+output = fc_layer(
+ input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
+
+if is_predict:
+ maxid = maxid_layer(output)
+ outputs([maxid, output])
+else:
+ label = data_layer(name="label", size=2)
+ cls = classification_cost(input=output, label=label)
+ outputs(cls)
diff --git a/demo/quick_start/trainer_config.cnn.py b/demo/quick_start/trainer_config.cnn.py
index 253ec0aee26cf42226d79726a75aad6c61c77565..388efa75f903e0c7c803c99cd50d73a004133a67 100644
--- a/demo/quick_start/trainer_config.cnn.py
+++ b/demo/quick_start/trainer_config.cnn.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,8 +40,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.db-lstm.py b/demo/quick_start/trainer_config.db-lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..02bc898d881efbd3bfaed95d45cd9e70ed046746
--- /dev/null
+++ b/demo/quick_start/trainer_config.db-lstm.py
@@ -0,0 +1,74 @@
+# edit-mode: -*- python -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
+
+batch_size = 128 if not is_predict else 1
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
+
+bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
+
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+
+hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)])
+lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
+
+input_layers = [hidden_0, lstm_0]
+
+for i in range(1, 8):
+ fc = fc_layer(input=input_layers, size=128)
+ lstm = lstmemory(
+ input=fc,
+ layer_attr=ExtraAttr(drop_rate=0.1),
+ reverse=(i % 2) == 1, )
+ input_layers = [fc, lstm]
+
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+
+output = fc_layer(
+ input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
+
+if is_predict:
+ maxid = maxid_layer(output)
+ outputs([maxid, output])
+else:
+ label = data_layer(name="label", size=2)
+ cls = classification_cost(input=output, label=label)
+ outputs(cls)
diff --git a/demo/quick_start/trainer_config.emb.py b/demo/quick_start/trainer_config.emb.py
index 34dd7b96f2f142159472b98a09fd0092fac15e43..8fd18a7aac704e62b137845edb46cce5bc373285 100644
--- a/demo/quick_start/trainer_config.emb.py
+++ b/demo/quick_start/trainer_config.emb.py
@@ -27,18 +27,16 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
- batch_size=batch_size,
- learning_rate=2e-3,
- learning_method=AdamOptimizer()
-)
+ batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
data = data_layer(name="word", size=len(word_dict))
embedding = embedding_layer(input=data, size=128)
diff --git a/demo/quick_start/trainer_config.lr.py b/demo/quick_start/trainer_config.lr.py
index 119e3849a4b7e01713bc983d83c000772a60b76d..b9c9441baac28a8a8f6078065b75664819d6cd04 100644
--- a/demo/quick_start/trainer_config.lr.py
+++ b/demo/quick_start/trainer_config.lr.py
@@ -16,7 +16,7 @@
from paddle.trainer_config_helpers import *
-dict_file = "./data/dict.txt"
+dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
@@ -32,11 +32,12 @@ process = 'process' if not is_predict else 'process_predict'
# We need to use different process for training and prediction.
# For training, the input data includes both word IDs and labels.
# For prediction, the input data only includs word Ids.
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_bow",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_bow",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -44,8 +45,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ gradient_clipping_threshold=25)
# Define the data for text features. The size of the data layer is the number
# of words in the dictionary.
@@ -63,7 +63,6 @@ if not is_predict:
label = data_layer(name="label", size=2)
# Define cross-entropy classification loss and error.
- classification_cost(input=output, label=label)
cls = classification_cost(input=output, label=label)
outputs(cls)
else:
diff --git a/demo/quick_start/trainer_config.lstm.py b/demo/quick_start/trainer_config.lstm.py
index ec8a2cb00abd19ef80c327ac564e91661ecc3928..8821e02d9bd4a0d06b8afa99df8e0fac3e2fcefe 100644
--- a/demo/quick_start/trainer_config.lstm.py
+++ b/demo/quick_start/trainer_config.lstm.py
@@ -27,11 +27,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(train_list=trn,
- test_list=tst,
- module="dataprovider_emb",
- obj=process,
- args={"dictionary": word_dict})
+define_py_data_sources2(
+ train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@@ -39,24 +40,14 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
-
-bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+ gradient_clipping_threshold=25)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
-fc = fc_layer(input=emb, size=512,
- act=LinearActivation(),
- bias_attr=bias_attr,
- layer_attr=ExtraAttr(drop_rate=0.1))
-lstm = lstmemory(input=fc, act=TanhActivation(),
- bias_attr=bias_attr,
- layer_attr=ExtraAttr(drop_rate=0.25))
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_last, size=2,
- bias_attr=bias_attr,
- act=SoftmaxActivation())
+lstm = simple_lstm(
+ input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
+lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)
outputs([maxid, output])
diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/demo/quick_start/trainer_config.resnet-lstm.py
new file mode 100644
index 0000000000000000000000000000000000000000..91e1581c386eb880d481b7352c4d21f3a5ef5c9a
--- /dev/null
+++ b/demo/quick_start/trainer_config.resnet-lstm.py
@@ -0,0 +1,94 @@
+# edit-mode: -*- python -*-
+
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This configuration is a demonstration of how to implement the stacked LSTM
+with residual connections, i.e. an LSTM layer takes the sum of the hidden states
+and inputs of the previous LSTM layer instead of only the hidden states.
+This architecture is from:
+Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
+Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
+Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
+Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
+George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
+Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
+Google's Neural Machine Translation System: Bridging the Gap between Human and
+Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
+Different from the architecture described in the paper, we use a stack single
+direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
+since this is a demo code, to reduce computation time, we stacked 4 layers
+instead of 8 layers.
+"""
+
+from paddle.trainer_config_helpers import *
+
+dict_file = "./data/dict.txt"
+word_dict = dict()
+with open(dict_file, 'r') as f:
+ for i, line in enumerate(f):
+ w = line.strip().split()[0]
+ word_dict[w] = i
+
+is_predict = get_config_arg('is_predict', bool, False)
+trn = 'data/train.list' if not is_predict else None
+tst = 'data/test.list' if not is_predict else 'data/pred.list'
+process = 'process' if not is_predict else 'process_predict'
+define_py_data_sources2(train_list=trn,
+ test_list=tst,
+ module="dataprovider_emb",
+ obj=process,
+ args={"dictionary": word_dict})
+
+batch_size = 128 if not is_predict else 1
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25
+)
+
+bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
+
+data = data_layer(name="word", size=len(word_dict))
+emb = embedding_layer(input=data, size=128)
+lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+
+previous_input, previous_hidden_state = emb, lstm
+
+for i in range(3):
+ # The input to the current layer is the sum of the hidden state
+ # and input of the previous layer.
+ current_input = addto_layer(input=[previous_input, previous_hidden_state])
+ hidden_state = simple_lstm(input=current_input, size=128,
+ lstm_cell_attr=ExtraAttr(drop_rate=0.1))
+ previous_input, previous_hidden_state = current_input, hidden_state
+
+lstm = previous_hidden_state
+
+lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
+output = fc_layer(input=lstm_last, size=2,
+ bias_attr=bias_attr,
+ act=SoftmaxActivation())
+
+
+if is_predict:
+ maxid = maxid_layer(output)
+ outputs([maxid, output])
+else:
+ label = data_layer(name="label", size=2)
+ cls = classification_cost(input=output, label=label)
+ outputs(cls)
diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py
index a5f00b3ef9ca00b42b8e31ddd6cfeca3580152b0..613e36b496e47edbc0eabd8f15a0abdcb50f6424 100755
--- a/demo/recommendation/common_utils.py
+++ b/demo/recommendation/common_utils.py
@@ -21,8 +21,9 @@ def meta_to_header(meta, name):
yield integer_value(each_meta['max'])
elif each_meta['type'] == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
- yield integer_value(len(each_meta['dict']),
- seq_type=SequenceType.SEQUENCE if is_seq
- else SequenceType.NO_SEQUENCE)
+ yield integer_value(
+ len(each_meta['dict']),
+ seq_type=SequenceType.SEQUENCE
+ if is_seq else SequenceType.NO_SEQUENCE)
elif each_meta['type'] == 'one_hot_dense':
yield dense_vector(len(each_meta['dict']))
diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json
index 71a9dd7be6bd10e177dfb443a94b719c3816d833..f26e74ce47bb7843a571e6033f051c046b31f054 100644
--- a/demo/recommendation/data/config.json
+++ b/demo/recommendation/data/config.json
@@ -14,4 +14,3 @@
"fields": ["id", "title", "genres"]
}
}
-
diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py
index 29f38082693ad890ac4dfa302399663af8dbd27b..fa605458300f81da6772d88cfbad413e4dcf97fe 100644
--- a/demo/recommendation/data/config_generator.py
+++ b/demo/recommendation/data/config_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
config_generator.py
@@ -29,10 +28,7 @@ import json
import docopt
import copy
-DEFAULT_FILE = {
- "type": "split",
- "delimiter": ","
-}
+DEFAULT_FILE = {"type": "split", "delimiter": ","}
DEFAULT_FIELD = {
"id": {
@@ -107,19 +103,16 @@ def main(filename, fmt):
field = copy.deepcopy(DEFAULT_FIELD[field_key])
field['pos'] = pos
fields.append(field)
- obj[k] = {
- "file": file_dict,
- "fields": fields
- }
- meta = {
- "meta": obj
- }
+ obj[k] = {"file": file_dict, "fields": fields}
+ meta = {"meta": obj}
# print meta
if fmt == 'json':
+
def formatter(x):
import json
return json.dumps(x, indent=2)
elif fmt == 'yaml':
+
def formatter(x):
import yaml
return yaml.safe_dump(x, default_flow_style=False)
diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py
index 8d1a33d02aea112e51f1d43bedc06fdcee1186f5..593c863670d5eb5d684adf643ff745f3914b656b 100644
--- a/demo/recommendation/data/meta_generator.py
+++ b/demo/recommendation/data/meta_generator.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Preprocess Movielens dataset, to get movie/user object.
@@ -66,8 +65,8 @@ class SortedIDGenerator(object):
self.__key_set__.add(key)
def finish_scan(self, compare=None, key=None, reverse=False):
- self.__key_set__ = sorted(list(self.__key_set__), cmp=compare,
- key=key, reverse=reverse)
+ self.__key_set__ = sorted(
+ list(self.__key_set__), cmp=compare, key=key, reverse=reverse)
self.dict = dict()
for idx, each_key in enumerate(self.__key_set__):
self.dict[each_key] = idx
@@ -207,11 +206,10 @@ class EmbeddingFieldParser(object):
self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict(
self.seq_type == EmbeddingFieldParser.SEQUENCE)
elif config['dict']['type'] == 'split':
- self.dict = SplitEmbeddingDict(
- config['dict'].get('delimiter', ','))
+ self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ','))
elif config['dict']['type'] == 'whole_content':
- self.dict = EmbeddingFieldParser.WholeContentDict(
- config['dict']['sort'])
+ self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][
+ 'sort'])
else:
print config
assert False
@@ -333,8 +331,8 @@ class ContentExtractorFactory(object):
return PositionContentExtractor(config['pos'])
else:
extra_args = config['regex']
- return RegexPositionContentExtractor(pos=config['pos'],
- **extra_args)
+ return RegexPositionContentExtractor(
+ pos=config['pos'], **extra_args)
class MetaFile(object):
@@ -364,9 +362,10 @@ class MetaFile(object):
metas = map(lambda x: x.meta_field(), field_parsers)
# print metas
- key_index = filter(lambda x: x is not None, map(
- lambda (idx, meta): idx if 'is_key' in meta and meta['is_key']
- else None, enumerate(metas)))[0]
+ key_index = filter(
+ lambda x: x is not None,
+ map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None,
+ enumerate(metas)))[0]
key_map = []
for i in range(min(key_index, len(metas))):
@@ -374,12 +373,7 @@ class MetaFile(object):
for i in range(key_index + 1, len(metas)):
key_map.append(i)
- obj = {
- '__meta__': {
- 'raw_meta': metas,
- 'feature_map': key_map
- }
- }
+ obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}}
for each_block in reader.read():
idx = field_parsers[key_index].parse(each_block)
diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py
index ff1f7fab7befdb5bdfa39fd0f1753e6804e82d8f..8dd0cbd32af6074439e98dac024c5fed76cd52b2 100644
--- a/demo/recommendation/data/split.py
+++ b/demo/recommendation/data/split.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Separate movielens 1m dataset to train/test file.
diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py
index 454467f40b44bb526d143934c4a7350d41e54c0e..ff3932be03f1e4a1fc1d0bdb189ab7fe1fbbeca0 100755
--- a/demo/recommendation/dataprovider.py
+++ b/demo/recommendation/dataprovider.py
@@ -15,6 +15,7 @@
from paddle.trainer.PyDataProvider2 import *
import common_utils # parse
+
def hook(settings, meta, **kwargs):
"""
Init hook is invoked before process data. It will set obj.slots and store
@@ -41,6 +42,7 @@ def hook(settings, meta, **kwargs):
settings.input_types = headers
settings.meta = meta
+
@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename):
with open(filename, 'r') as f:
diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py
index f8044a3195ec25bc2fa7c9079e4977f971059352..e2a202cfd1a476046d7e1d1896b87d72c4906ff2 100755
--- a/demo/recommendation/prediction.py
+++ b/demo/recommendation/prediction.py
@@ -28,7 +28,8 @@ if __name__ == '__main__':
model_path = sys.argv[1]
swig_paddle.initPaddle('--use_gpu=0')
conf = parse_config("trainer_config.py", "is_predict=1")
- network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
assert isinstance(network, swig_paddle.GradientMachine)
network.loadParameters(model_path)
with open('./data/meta.bin', 'rb') as f:
@@ -39,11 +40,12 @@ if __name__ == '__main__':
while True:
movie_id = int(raw_input("Input movie_id: "))
user_id = int(raw_input("Input user_id: "))
- movie_meta = meta['movie'][movie_id] # Query Data From Meta.
+ movie_meta = meta['movie'][movie_id] # Query Data From Meta.
user_meta = meta['user'][user_id]
data = [movie_id - 1]
data.extend(movie_meta)
data.append(user_id - 1)
data.extend(user_meta)
- print "Prediction Score is %.2f" % ((network.forwardTest(
- cvt.convert([data]))[0]['value'][0][0] + 5) / 2)
+ print "Prediction Score is %.2f" % (
+ (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5)
+ / 2)
diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py
index 624c22ec969dc98808863ad53573b9633f1791ac..cec340b0b65a841029a1c0538d9881bb38f026ff 100755
--- a/demo/recommendation/trainer_config.py
+++ b/demo/recommendation/trainer_config.py
@@ -27,8 +27,8 @@ with open(META_FILE, 'rb') as f:
# load meta file
meta = pickle.load(f)
-settings(batch_size=1600, learning_rate=1e-3,
- learning_method=RMSPropOptimizer())
+settings(
+ batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer())
def construct_feature(name):
@@ -59,11 +59,10 @@ def construct_feature(name):
slot_name = each_meta.get('name', '%s_id' % name)
if type_name == 'id':
slot_dim = each_meta['max']
- embedding = embedding_layer(input=data_layer(slot_name,
- size=slot_dim),
- size=256)
- fusion.append(fc_layer(input=embedding,
- size=256))
+ embedding = embedding_layer(
+ input=data_layer(
+ slot_name, size=slot_dim), size=256)
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'embedding':
is_seq = each_meta['seq'] == 'sequence'
slot_dim = len(each_meta['dict'])
@@ -71,17 +70,14 @@ def construct_feature(name):
embedding = embedding_layer(input=din, size=256)
if is_seq:
fusion.append(
- text_conv_pool(input=embedding, context_len=5,
- hidden_size=256))
+ text_conv_pool(
+ input=embedding, context_len=5, hidden_size=256))
else:
- fusion.append(fc_layer(input=embedding,
- size=256))
+ fusion.append(fc_layer(input=embedding, size=256))
elif type_name == 'one_hot_dense':
slot_dim = len(each_meta['dict'])
- hidden = fc_layer(input=data_layer(slot_name, slot_dim),
- size=256)
- fusion.append(fc_layer(input=hidden,
- size=256))
+ hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256)
+ fusion.append(fc_layer(input=hidden, size=256))
return fc_layer(name="%s_fusion" % name, input=fusion, size=256)
@@ -90,10 +86,16 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
- outputs(regression_cost(input=similarity,
- label=data_layer('rating', size=1)))
-
- define_py_data_sources2('data/train.list', 'data/test.list', module='dataprovider',
- obj='process', args={'meta': meta})
+ outputs(
+ regression_cost(
+ input=similarity, label=data_layer(
+ 'rating', size=1)))
+
+ define_py_data_sources2(
+ 'data/train.list',
+ 'data/test.list',
+ module='dataprovider',
+ obj='process',
+ args={'meta': meta})
else:
outputs(similarity)
diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..cd90ca7bbe9be46f54cb656a8067c794a55d8cfc
--- /dev/null
+++ b/demo/semantic_role_labeling/.gitignore
@@ -0,0 +1,10 @@
+*.pyc
+train.log
+data/feature
+data/conll05st-release/
+data/src.dict
+data/test.wsj.props
+data/test.wsj.seq_pair
+data/test.wsj.words
+data/tgt.dict
+output
diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py
index 2982e54c665b41400aab0a893ff3c76335404988..daca5f01cf2b3bd231bf530f17ec760272ce93e0 100644
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@@ -17,24 +17,15 @@ import os
from optparse import OptionParser
-def extract_dict_features(pair_file, feature_file, src_dict_file,
- tgt_dict_file):
- src_dict = set()
- tgt_dict = set()
-
- with open(pair_file) as fin, open(feature_file, 'w') as feature_out, open(
- src_dict_file, 'w') as src_dict_out, open(tgt_dict_file,
- 'w') as tgt_dict_out:
+def extract_dict_features(pair_file, feature_file):
+
+ with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
for line in fin:
- sentence, labels = line.strip().split('\t')
+ sentence, predicate, labels = line.strip().split('\t')
sentence_list = sentence.split()
labels_list = labels.split()
- src_dict.update(sentence_list)
- tgt_dict.update(labels_list)
-
verb_index = labels_list.index('B-V')
- verb_feature = sentence_list[verb_index]
mark = [0] * len(labels_list)
if verb_index > 0:
@@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file,
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
- ctx_n1_feature = ctx_n1
+
+ if verb_index > 1:
+ mark[verb_index - 2] = 1
+ ctx_n2 = sentence_list[verb_index - 2]
+ else:
+ ctx_n2 = 'bos'
mark[verb_index] = 1
- ctx_0_feature = sentence_list[verb_index]
+ ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
- ctx_p1_feature = ctx_p1
+
+ if verb_index < len(labels_list) - 3:
+ mark[verb_index + 2] = 1
+ ctx_p2 = sentence_list[verb_index + 2]
+ else:
+ ctx_p2 = 'eos'
+
feature_str = sentence + '\t' \
- + verb_feature + '\t' \
- + ctx_n1_feature + '\t' \
- + ctx_0_feature + '\t' \
- + ctx_p1_feature + '\t' \
+ + predicate + '\t' \
+ + ctx_n2 + '\t' \
+ + ctx_n1 + '\t' \
+ + ctx_0 + '\t' \
+ + ctx_p1 + '\t' \
+ + ctx_p2 + '\t' \
+ ' '.join([str(i) for i in mark]) + '\t' \
+ labels
feature_out.write(feature_str + '\n')
- src_dict_out.write('\n')
- src_dict_out.write('\n'.join(list(src_dict)))
-
- tgt_dict_out.write('\n'.join(list(tgt_dict)))
if __name__ == '__main__':
- usage = '-p pair_file -f feature_file -s source dictionary -t target dictionary '
+ usage = '-p pair_file -f feature_file'
parser = OptionParser(usage)
parser.add_option('-p', dest='pair_file', help='the pair file')
- parser.add_option(
- '-f', dest='feature_file', help='the file to store feature')
- parser.add_option(
- '-s', dest='src_dict', help='the file to store source dictionary')
- parser.add_option(
- '-t', dest='tgt_dict', help='the file to store target dictionary')
+ parser.add_option('-f', dest='feature_file', help='the feature file')
(options, args) = parser.parse_args()
- extract_dict_features(options.pair_file, options.feature_file,
- options.src_dict, options.tgt_dict)
+ extract_dict_features(options.pair_file, options.feature_file)
diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py
index 4d1bef8f958a62be9941d474a0b67542dcc5cfab..86ab00ce41723169de035a841d9e129a1b9e82a3 100644
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ b/demo/semantic_role_labeling/data/extract_pairs.py
@@ -51,7 +51,7 @@ def read_sentences(words_file):
for line in fin:
line = line.strip()
if line == '':
- sentences.append(s.lower())
+ sentences.append(s)
s = ''
else:
s += line + ' '
@@ -64,6 +64,11 @@ def transform_labels(sentences, labels):
if len(labels[i]) == 1:
continue
else:
+ verb_list = []
+ for x in labels[i][0]:
+ if x !='-':
+ verb_list.append(x)
+
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
current_tag = 'O'
@@ -88,8 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True
else:
print 'error:', ll
-
- sen_lab_pair.append((sentences[i], label_seq))
+ sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
return sen_lab_pair
@@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file):
with open(output_file, 'w') as fout:
for x in sen_lab_pair:
sentence = x[0]
- label_seq = ' '.join(x[1])
- assert len(sentence.split()) == len(x[1])
- fout.write(sentence + '\t' + label_seq + '\n')
+ label_seq = ' '.join(x[2])
+ assert len(sentence.split()) == len(x[2])
+ fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh
index 268c0995e27006ec62f38bdda9b0a0994dab096c..55e33f4685627ed483aa6642c518a33558091531 100644
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ b/demo/semantic_role_labeling/data/get_data.sh
@@ -14,6 +14,10 @@
# limitations under the License.
set -e
wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
tar -xzvf conll05st-tests.tar.gz
rm conll05st-tests.tar.gz
cp ./conll05st-release/test.wsj/words/test.wsj.words.gz .
@@ -22,4 +26,4 @@ gunzip test.wsj.words.gz
gunzip test.wsj.props.gz
python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
-python extract_dict_feature.py -p test.wsj.seq_pair -f feature -s src.dict -t tgt.dict
+python extract_dict_feature.py -p test.wsj.seq_pair -f feature
diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py
index 2ef25c42c1794c410fe85fd497a6ed9d2295dca9..d4c137ef42c4e2ec609f3e6f809363e602dfd8dd 100644
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@@ -17,41 +17,51 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
-def hook(settings, word_dict, label_dict, **kwargs):
+def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
+ settings.predicate_dict = predicate_dict
+
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
- integer_value_sequence(2),
- integer_value_sequence(len(label_dict))]
+ integer_value_sequence(len(word_dict)), integer_value_sequence(2),
+ integer_value_sequence(len(label_dict))
+ ]
-@provider(init_hook=hook)
-def process(obj, file_name):
+def get_batch_size(yeild_data):
+ return len(yeild_data[0])
+
+
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
+ can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = \
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
-
+
words = sentence.split()
sen_len = len(words)
- word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+ word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
- ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
- ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX)] * sen_len
- ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+ ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+ ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+ ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+ ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
- label_slot = [obj.label_dict.get(w) for w in label_list]
-
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+ label_slot = [settings.label_dict.get(w) for w in label_list]
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py
index 364460afbe31caf42cd4f0836eba75e444b3f5b8..54ceff0e724220cc9ea96b9e0ec6844947a8343e 100644
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import math
import os
import sys
from paddle.trainer_config_helpers import *
#file paths
-word_dict_file = './data/src.dict'
-label_dict_file = './data/tgt.dict'
+word_dict_file = './data/wordDict.txt'
+label_dict_file = './data/targetDict.txt'
+predicate_file= './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
@@ -31,8 +31,10 @@ if not is_predict:
#load dictionaries
word_dict = dict()
label_dict = dict()
+ predicate_dict = dict()
with open(word_dict_file, 'r') as f_word, \
- open(label_dict_file, 'r') as f_label:
+ open(label_dict_file, 'r') as f_label, \
+ open(predicate_file, 'r') as f_pre:
for i, line in enumerate(f_word):
w = line.strip()
word_dict[w] = i
@@ -41,8 +43,13 @@ if not is_predict:
w = line.strip()
label_dict[w] = i
+ for i, line in enumerate(f_pre):
+ w = line.strip()
+ predicate_dict[w] = i
+
+
if is_test:
- train_list_file = None
+ train_list_file = None
#define data provider
define_py_data_sources2(
@@ -51,91 +58,157 @@ if not is_predict:
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
- 'label_dict': label_dict})
+ 'label_dict': label_dict,
+ 'predicate_dict': predicate_dict })
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
+ pred_len = len(predicate_dict)
else:
word_dict_len = get_config_arg('dict_len', int)
label_dict_len = get_config_arg('label_len', int)
+ pred_len = get_config_arg('pred_len', int)
+############################## Hyper-parameters ##################################
mark_dict_len = 2
word_dim = 32
mark_dim = 5
-hidden_dim = 128
+hidden_dim = 512
depth = 8
-emb_lr = 1e-2
-fc_lr = 1e-2
-lstm_lr = 2e-2
+
+
+
+########################### Optimizer #######################################
+
settings(
batch_size=150,
- learning_method=AdamOptimizer(),
- learning_rate=1e-3,
+ learning_method=MomentumOptimizer(momentum=0),
+ learning_rate=2e-2,
regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25)
+ is_async=False,
+ model_average=ModelAverage(average_window=0.5,
+ max_average_window=10000),
+
+)
-#6 features
+
+
+
+####################################### network ##############################
+#8 features and 1 target
word = data_layer(name='word_data', size=word_dict_len)
-predicate = data_layer(name='verb_data', size=word_dict_len)
+predicate = data_layer(name='verb_data', size=pred_len)
+
+ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
+ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
+
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
-ptt = ParameterAttribute(name='src_emb', learning_rate=emb_lr)
-layer_attr = ExtraLayerAttribute(drop_rate=0.5)
-fc_para_attr = ParameterAttribute(learning_rate=fc_lr)
-lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=lstm_lr)
-para_attr = [fc_para_attr, lstm_para_attr]
-word_embedding = embedding_layer(size=word_dim, input=word, param_attr=ptt)
-predicate_embedding = embedding_layer(
- size=word_dim, input=predicate, param_attr=ptt)
-ctx_n1_embedding = embedding_layer(size=word_dim, input=ctx_n1, param_attr=ptt)
-ctx_0_embedding = embedding_layer(size=word_dim, input=ctx_0, param_attr=ptt)
-ctx_p1_embedding = embedding_layer(size=word_dim, input=ctx_p1, param_attr=ptt)
-mark_embedding = embedding_layer(size=mark_dim, input=mark)
+default_std=1/math.sqrt(hidden_dim)/3.0
+
+emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
+std_0 = ParameterAttribute(initial_std=0.)
+std_default = ParameterAttribute(initial_std=default_std)
+
+predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
+mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
+
+word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
+emb_layers.append(predicate_embedding)
+emb_layers.append(mark_embedding)
hidden_0 = mixed_layer(
+ name='hidden0',
size=hidden_dim,
- input=[
- full_matrix_projection(input=word_embedding),
- full_matrix_projection(input=predicate_embedding),
- full_matrix_projection(input=ctx_n1_embedding),
- full_matrix_projection(input=ctx_0_embedding),
- full_matrix_projection(input=ctx_p1_embedding),
- full_matrix_projection(input=mark_embedding),
- ])
+ bias_attr=std_default,
+ input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
+
-lstm_0 = lstmemory(input=hidden_0, layer_attr=layer_attr)
+mix_hidden_lr = 1e-3
+lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
+hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
+
+lstm_0 = lstmemory(name='lstm0',
+ input=hidden_0,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
+
for i in range(1, depth):
- fc = fc_layer(input=input_tmp, size=hidden_dim, param_attr=para_attr)
+ mix_hidden = mixed_layer(name='hidden'+str(i),
+ size=hidden_dim,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ]
+ )
+
+ lstm = lstmemory(name='lstm'+str(i),
+ input=mix_hidden,
+ act=ReluActivation(),
+ gate_act=SigmoidActivation(),
+ state_act=SigmoidActivation(),
+ reverse=((i % 2)==1),
+ bias_attr=std_0,
+ param_attr=lstm_para_attr)
+
+ input_tmp = [mix_hidden, lstm]
+
+feature_out = mixed_layer(name='output',
+ size=label_dict_len,
+ bias_attr=std_default,
+ input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+ full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+ ],
+ )
- lstm = lstmemory(
- input=fc,
- act=ReluActivation(),
- reverse=(i % 2) == 1,
- layer_attr=layer_attr)
- input_tmp = [fc, lstm]
-prob = fc_layer(
- input=input_tmp,
- size=label_dict_len,
- act=SoftmaxActivation(),
- param_attr=para_attr)
if not is_predict:
- cls = classification_cost(input=prob, label=target)
- outputs(cls)
+ crf_l = crf_layer( name = 'crf',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
+
+ )
+
+
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ label = target,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+
+ eval = sum_evaluator(input=crf_dec_l)
+
+ outputs(crf_l)
+
else:
- outputs(prob)
+ crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+ size = label_dict_len,
+ input = feature_out,
+ param_attr=ParameterAttribute(name='crfw')
+ )
+
+ outputs(crf_dec_l)
+
diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py
index 9a27112828e449174e3da79dc7db9fed20bfed6f..2761814e1811e701122e0be4850526c5b290c457 100644
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@@ -26,7 +26,7 @@ UNK_IDX = 0
class Prediction():
- def __init__(self, train_conf, dict_file, model_dir, label_file):
+ def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -35,16 +35,19 @@ class Prediction():
self.dict = {}
self.labels = {}
+ self.predicate_dict={}
self.labels_reverse = {}
- self.load_dict_label(dict_file, label_file)
+ self.load_dict_label(dict_file, label_file, predicate_dict_file)
len_dict = len(self.dict)
len_label = len(self.labels)
+ len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
- 'dict_len=' + str(len_dict) +
+ 'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
+ ',pred_len=' + str(len_pred) +
',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
@@ -52,15 +55,21 @@ class Prediction():
slots = [
integer_value_sequence(len_dict),
+ integer_value_sequence(len_pred),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict),
integer_value_sequence(2)
+ ]
+ integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict), integer_value_sequence(len_dict),
+ integer_value_sequence(len_dict), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
- def load_dict_label(self, dict_file, label_file):
+ def load_dict_label(self, dict_file, label_file, predicate_dict_file):
"""
Load dictionary from self.dict_file.
"""
@@ -71,52 +80,55 @@ class Prediction():
self.labels[line.strip()] = line_count
self.labels_reverse[line_count] = line.strip()
+ for line_count, line in enumerate(open(predicate_dict_file, 'r')):
+ self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
"""
with open(data_file, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip(
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
).split('\t')
words = sentence.split()
sen_len = len(words)
-
+
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [self.dict.get(predicate, UNK_IDX)] * sen_len
+ predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
+ ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
+
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot
- yield word_slot, predicate_slot, ctx_n1_slot, \
- ctx_0_slot, ctx_p1_slot, mark_slot
-
- def predict(self, data_file):
+ def predict(self, data_file, output_file):
"""
data_file: file name of input data.
"""
input = self.converter(self.get_data(data_file))
output = self.network.forwardTest(input)
- prob = output[0]["value"]
- lab = list(np.argsort(-prob)[:, 0])
+ lab = output[0]["id"].tolist()
- with open(data_file, 'r') as fin, open('predict.res', 'w') as fout:
+ with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
index = 0
for line in fin:
sen = line.split('\t')[0]
len_sen = len(sen.split())
line_labels = lab[index:index + len_sen]
index += len_sen
- fout.write(sen + '\t' + ' '.join([self.labels_reverse[
- i] for i in line_labels]) + '\n')
+ fout.write(sen + '\t' + ' '.join(
+ [self.labels_reverse[i] for i in line_labels]) + '\n')
def option_parser():
- usage = ("python predict.py -c config -w model_dir "
- "-d word dictionary -l label_file -i input_file")
+ usage = ("python predict.py -c config -w model_dir "
+ "-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
@@ -137,6 +149,13 @@ def option_parser():
dest="label_file",
default=None,
help="label file")
+ parser.add_option(
+ "-p",
+ "--predict_dict_file",
+ action="store",
+ dest="predict_dict_file",
+ default=None,
+ help="predict_dict_file")
parser.add_option(
"-i",
"--data",
@@ -150,6 +169,14 @@ def option_parser():
dest="model_path",
default=None,
help="model path")
+
+ parser.add_option(
+ "-o",
+ "--output_file",
+ action="store",
+ dest="output_file",
+ default=None,
+ help="output file")
return parser.parse_args()
@@ -160,10 +187,12 @@ def main():
dict_file = options.dict_file
model_path = options.model_path
label_file = options.label_file
+ predict_dict_file = options.predict_dict_file
+ output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
- predict = Prediction(train_conf, dict_file, model_path, label_file)
- predict.predict(data_file)
+ predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
+ predict.predict(data_file,output_file)
if __name__ == '__main__':
diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh
index a545b9a5d591b41bdbd54905cbbffc410abc8fb0..d0acdb0bd093974485475cf796c6d41ac7899135 100644
--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
@@ -26,15 +26,18 @@ LOG=`get_best_pass $log`
LOG=(${LOG})
best_model_path="output/pass-${LOG[1]}"
-
config_file=db_lstm.py
-dict_file=./data/src.dict
-label_file=./data/tgt.dict
+dict_file=./data/wordDict.txt
+label_file=./data/targetDict.txt
+predicate_dict_file=./data/verbDict.txt
input_file=./data/feature
+output_file=predict.res
python predict.py \
-c $config_file \
-w $best_model_path \
-l $label_file \
+ -p $predicate_dict_file \
-d $dict_file \
- -i $input_file
+ -i $input_file \
+ -o $output_file
diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh
index 804f722e5b8e9ee5b54c778c54f7833f5e6c4de0..c4ab44f5ca08aefd18f2851a1410aa08563925a9 100644
--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@@ -36,5 +36,5 @@ paddle train \
--job=test \
--use_gpu=false \
--config_args=is_test=1 \
+ --test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
-
diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh
index 94c7b6f31df3b5e5e059d6e1323ae0c0bec74753..420768bb2b4ebed7b135a49c5eee5e5538426ae1 100644
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@@ -16,12 +16,14 @@
set -e
paddle train \
--config=./db_lstm.py \
+ --use_gpu=0 \
+ --log_period=5000 \
+ --trainer_count=1 \
+ --show_parameter_stats_period=5000 \
--save_dir=./output \
- --trainer_count=4 \
- --log_period=10 \
- --num_passes=500 \
- --use_gpu=false \
- --show_parameter_stats_period=10 \
+ --num_passes=10000 \
+ --average_test_period=10000000 \
+ --init_model_path=./data \
+ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
-2>&1 | tee 'train.log'
-
+ 2>&1 | tee 'train.log'
diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh
index 41523927afe75428ef1151cef8184ede14eea9a7..28fa86232d89964b3f1680080239cf8a4ebefa9a 100755
--- a/demo/sentiment/data/get_imdb.sh
+++ b/demo/sentiment/data/get_imdb.sh
@@ -38,11 +38,11 @@ unzip master.zip
mkdir -p imdb/train
mkdir -p imdb/test
-cp -r aclImdb/train/pos/ imdb/train/
-cp -r aclImdb/train/neg/ imdb/train/
+cp -r aclImdb/train/pos/ imdb/train/pos
+cp -r aclImdb/train/neg/ imdb/train/neg
-cp -r aclImdb/test/pos/ imdb/test/
-cp -r aclImdb/test/neg/ imdb/test/
+cp -r aclImdb/test/pos/ imdb/test/pos
+cp -r aclImdb/test/neg/ imdb/test/neg
#remove compressed package
rm aclImdb_v1.tar.gz
diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py
index 9a9fd81f030cb1d2a10a5000fd1d12810d12112b..53e3d1d20df92b8815347bd8937064871f326b3f 100755
--- a/demo/sentiment/dataprovider.py
+++ b/demo/sentiment/dataprovider.py
@@ -17,8 +17,8 @@ from paddle.trainer.PyDataProvider2 import *
def hook(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [
- integer_value_sequence(len(settings.word_dict)),
- integer_value(2)]
+ integer_value_sequence(len(settings.word_dict)), integer_value(2)
+ ]
settings.logger.info('dict len : %d' % (len(settings.word_dict)))
@@ -29,6 +29,7 @@ def process(settings, file_name):
label, comment = line.strip().split('\t\t')
label = int(label)
words = comment.split()
- word_slot = [settings.word_dict[w] for w in words if w in
- settings.word_dict]
+ word_slot = [
+ settings.word_dict[w] for w in words if w in settings.word_dict
+ ]
yield word_slot, label
diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py
index c61628d34db4a2bcecd8b367879045f7cb57d491..bc0f6f31264294034ed38309f7fda370865b2845 100755
--- a/demo/sentiment/predict.py
+++ b/demo/sentiment/predict.py
@@ -18,14 +18,14 @@ from optparse import OptionParser
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import integer_value_sequence
from paddle.trainer.config_parser import parse_config
-
"""
Usage: run following command to show help message.
python predict.py -h
"""
+
class SentimentPrediction():
- def __init__(self, train_conf, dict_file, model_dir=None, label_file = None):
+ def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@@ -44,10 +44,11 @@ class SentimentPrediction():
self.load_label(label_file)
conf = parse_config(train_conf, "is_predict=1")
- self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+ self.network = swig_paddle.GradientMachine.createFromConfigProto(
+ conf.model_config)
self.network.loadParameters(self.model_dir)
- slots = [integer_value_sequence(self.dict_dim)]
- self.converter = DataProviderConverter(slots)
+ input_types = [integer_value_sequence(self.dict_dim)]
+ self.converter = DataProviderConverter(input_types)
def load_dict(self):
"""
@@ -61,7 +62,7 @@ class SentimentPrediction():
"""
Load label.
"""
- self.label={}
+ self.label = {}
for v in open(label_file, 'r'):
self.label[int(v.split('\t')[1])] = v.split('\t')[0]
@@ -72,7 +73,9 @@ class SentimentPrediction():
with open(data_file, 'r') as fdata:
for line in fdata:
words = line.strip().split()
- word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
+ word_slot = [
+ self.word_dict[w] for w in words if w in self.word_dict
+ ]
if not word_slot:
print "all words are not in dictionary: %s", line
continue
@@ -89,25 +92,48 @@ class SentimentPrediction():
if self.label is None:
print("%s: predicting label is %d" % (data_file, lab[0][0]))
else:
- print("%s: predicting label is %s" % (data_file, self.label[lab[0][0]]))
+ print("%s: predicting label is %s" %
+ (data_file, self.label[lab[0][0]]))
+
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
parser = OptionParser(usage="usage: %s [options]" % usage)
- parser.add_option("-n", "--tconf", action="store",
- dest="train_conf", help="network config")
- parser.add_option("-d", "--dict", action="store",
- dest="dict_file",help="dictionary file")
- parser.add_option("-b", "--label", action="store",
- dest="label", default=None,
- help="dictionary file")
- parser.add_option("-i", "--data", action="store",
- dest="data", help="data file to predict")
- parser.add_option("-w", "--model", action="store",
- dest="model_path", default=None,
- help="model path")
+ parser.add_option(
+ "-n",
+ "--tconf",
+ action="store",
+ dest="train_conf",
+ help="network config")
+ parser.add_option(
+ "-d",
+ "--dict",
+ action="store",
+ dest="dict_file",
+ help="dictionary file")
+ parser.add_option(
+ "-b",
+ "--label",
+ action="store",
+ dest="label",
+ default=None,
+ help="dictionary file")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="data",
+ help="data file to predict")
+ parser.add_option(
+ "-w",
+ "--model",
+ action="store",
+ dest="model_path",
+ default=None,
+ help="model path")
return parser.parse_args()
+
def main():
options, args = option_parser()
train_conf = options.train_conf
@@ -119,5 +145,6 @@ def main():
predict = SentimentPrediction(train_conf, dict_file, model_path, label)
predict.predict(data)
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py
index 49b53d500a1bf816bde9c9675b251be8e9a68ae9..7146e95d751c4de649e204fab724085994dfa4d3 100755
--- a/demo/sentiment/preprocess.py
+++ b/demo/sentiment/preprocess.py
@@ -22,13 +22,13 @@ from os.path import join as join_path
from optparse import OptionParser
from paddle.utils.preprocess_util import *
-
"""
Usage: run following command to show help message.
python preprocess.py -h
"""
-def save_dict(dict, filename, is_reverse = True):
+
+def save_dict(dict, filename, is_reverse=True):
"""
Save dictionary into file.
dict: input dictionary.
@@ -39,9 +39,10 @@ def save_dict(dict, filename, is_reverse = True):
f = open(filename, 'w')
for k, v in sorted(dict.items(), key=operator.itemgetter(1),\
reverse=is_reverse):
- f.write('%s\t%s\n'%(k, v))
+ f.write('%s\t%s\n' % (k, v))
f.close()
+
def tokenize(sentences):
"""
Use tokenizer.perl to tokenize input sentences.
@@ -58,6 +59,7 @@ def tokenize(sentences):
toks = tok_text.split('\n')[:-1]
return toks
+
def read_lines(path):
"""
path: String, file path.
@@ -71,12 +73,17 @@ def read_lines(path):
seqs.append(line)
return seqs
+
class SentimentDataSetCreate():
"""
A class to process data for sentiment analysis task.
"""
- def __init__(self, data_path, output_path,
- use_okenizer = True, multi_lines = False):
+
+ def __init__(self,
+ data_path,
+ output_path,
+ use_okenizer=True,
+ multi_lines=False):
"""
data_path: string, traing and testing dataset path
output_path: string, output path, store processed dataset
@@ -164,23 +171,17 @@ class SentimentDataSetCreate():
# Preprocess train data.
train_data, train_lab_set = self.data_list(self.train_dir)
print "processing train set..."
- file_lists = self.save_data(train_data,
- "train",
- self.batch_size,
- True,
- True)
+ file_lists = self.save_data(train_data, "train", self.batch_size, True,
+ True)
save_list(file_lists, self.train_list)
# If have test data path, preprocess test data.
if os.path.exists(self.test_dir):
test_data, test_lab_set = self.data_list(self.test_dir)
- assert(train_lab_set == test_lab_set)
+ assert (train_lab_set == test_lab_set)
print "processing test set..."
- file_lists = self.save_data(test_data,
- "test",
- self.batch_size,
- False,
- self.dict_with_test)
+ file_lists = self.save_data(test_data, "test", self.batch_size,
+ False, self.dict_with_test)
save_list(file_lists, self.test_list)
# save labels set.
@@ -191,7 +192,9 @@ class SentimentDataSetCreate():
save_dict(self.word_count, self.dict_file, True)
self.dict_size = len(self.word_count)
- def save_data(self, data, prefix = "",
+ def save_data(self,
+ data,
+ prefix="",
batch_size=50000,
is_shuffle=False,
build_dict=False):
@@ -205,7 +208,8 @@ class SentimentDataSetCreate():
return: list of batch names
"""
if is_shuffle and self.multi_lines:
- return self.save_data_multi_lines(data, prefix, batch_size, build_dict)
+ return self.save_data_multi_lines(data, prefix, batch_size,
+ build_dict)
if is_shuffle:
random.shuffle(data)
@@ -213,7 +217,7 @@ class SentimentDataSetCreate():
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, len(data))
# read a batch of data
@@ -246,7 +250,9 @@ class SentimentDataSetCreate():
data_list = tokenize(data_list)
return label_list, data_list
- def save_data_multi_lines(self, data, prefix = "",
+ def save_data_multi_lines(self,
+ data,
+ prefix="",
batch_size=50000,
build_dict=False):
"""
@@ -274,14 +280,14 @@ class SentimentDataSetCreate():
self.create_dict(data_list)
length = len(label_list)
- perm_list = np.array([ i for i in xrange(length) ])
+ perm_list = np.array([i for i in xrange(length)])
random.shuffle(perm_list)
num_batches = int(math.ceil(length / float(batch_size)))
batch_names = []
for i in range(num_batches):
batch_name = join_path(self.output_path,
- "%s_part_%03d" %(prefix, i))
+ "%s_part_%03d" % (prefix, i))
begin = i * batch_size
end = min((i + 1) * batch_size, length)
sub_label = [label_list[perm_list[i]] for i in range(begin, end)]
@@ -304,35 +310,50 @@ class SentimentDataSetCreate():
f.write('%s\t\t%s\n' % (lab, seq))
f.close()
+
def option_parser():
parser = OptionParser(usage="usage: python preprcoess.py "\
"-i data_dir [options]")
- parser.add_option("-i", "--data", action="store",
- dest="input", help="Input data directory.")
- parser.add_option("-o", "--output", action="store",
- dest="output", default=None,
- help="Output directory.")
- parser.add_option("-t", "--tokenizer", action="store",
- dest="use_tokenizer", default=True,
- help="Whether to use tokenizer.")
+ parser.add_option(
+ "-i",
+ "--data",
+ action="store",
+ dest="input",
+ help="Input data directory.")
+ parser.add_option(
+ "-o",
+ "--output",
+ action="store",
+ dest="output",
+ default=None,
+ help="Output directory.")
+ parser.add_option(
+ "-t",
+ "--tokenizer",
+ action="store",
+ dest="use_tokenizer",
+ default=True,
+ help="Whether to use tokenizer.")
parser.add_option("-m", "--multi_lines", action="store",
dest="multi_lines", default=False,
help="If input text files have multi lines and they "\
"need to be shuffled, you should set -m True,")
return parser.parse_args()
+
def main():
options, args = option_parser()
- data_dir=options.input
- output_dir=options.output
- use_tokenizer=options.use_tokenizer
- multi_lines=options.multi_lines
+ data_dir = options.input
+ output_dir = options.output
+ use_tokenizer = options.use_tokenizer
+ multi_lines = options.multi_lines
if output_dir is None:
outname = os.path.basename(options.input)
output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname)
- data_creator = SentimentDataSetCreate(data_dir, output_dir,
- use_tokenizer, multi_lines)
+ data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer,
+ multi_lines)
data_creator.create_dataset()
+
if __name__ == '__main__':
main()
diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py
index 31e585edcaa111898c950ad016d3996fae15a7db..ff6a3624a404cb52d5d7ac0934fedba0d489dc22 100644
--- a/demo/sentiment/sentiment_net.py
+++ b/demo/sentiment/sentiment_net.py
@@ -47,10 +47,12 @@ def sentiment_data(data_dir=None,
for i, line in enumerate(open(dict_file, 'r')):
word_dict[line.split('\t')[0]] = i
- define_py_data_sources2(train_list, test_list,
- module="dataprovider",
- obj="process",
- args={'dictionary': word_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={'dictionary': word_dict})
return dict_dim, class_dim
@@ -64,8 +66,7 @@ def bidirectional_lstm_net(input_dim,
emb = embedding_layer(input=data, size=emb_dim)
bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim)
dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
- output = fc_layer(input=dropout, size=class_dim,
- act=SoftmaxActivation())
+ output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer("label", 1)
@@ -109,27 +110,36 @@ def stacked_lstm_net(input_dim,
data = data_layer("word", input_dim)
emb = embedding_layer(input=data, size=emb_dim)
- fc1 = fc_layer(input=emb, size=hid_dim, act=linear,
- bias_attr=bias_attr)
- lstm1 = lstmemory(input=fc1, act=relu, bias_attr=bias_attr,
- layer_attr=layer_attr)
+ fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
+ lstm1 = lstmemory(
+ input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
- fc = fc_layer(input=inputs, size=hid_dim, act=linear,
- param_attr=para_attr, bias_attr=bias_attr)
- lstm = lstmemory(input=fc, reverse=(i % 2) == 0, act=relu,
- bias_attr=bias_attr, layer_attr=layer_attr)
+ fc = fc_layer(
+ input=inputs,
+ size=hid_dim,
+ act=linear,
+ param_attr=para_attr,
+ bias_attr=bias_attr)
+ lstm = lstmemory(
+ input=fc,
+ reverse=(i % 2) == 0,
+ act=relu,
+ bias_attr=bias_attr,
+ layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling())
lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling())
- output = fc_layer(input=[fc_last, lstm_last], size=class_dim,
- act=SoftmaxActivation(),
- bias_attr=bias_attr, param_attr=para_attr)
+ output = fc_layer(
+ input=[fc_last, lstm_last],
+ size=class_dim,
+ act=SoftmaxActivation(),
+ bias_attr=bias_attr,
+ param_attr=para_attr)
if is_predict:
outputs(output)
else:
- outputs(
- classification_cost(input=output, label=data_layer('label', 1)))
+ outputs(classification_cost(input=output, label=data_layer('label', 1)))
diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py
index db24182a8d7359786bd1f3b2083892cf846605d1..114a9138ebfef054c7d3ba99b4a510a452f8f2cd 100644
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@@ -20,20 +20,20 @@ is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
-data_dir = "./data/pre-imdb"
+data_dir = "./data/pre-imdb"
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
- batch_size=128,
- learning_rate=2e-3,
- learning_method=AdamOptimizer(),
- regularization=L2Regularization(8e-4),
- gradient_clipping_threshold=25
-)
+ batch_size=128,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ average_window=0.5,
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
#################### Network Config ######################
-stacked_lstm_net(dict_dim, class_dim=class_dim,
- stacked_num=3, is_predict=is_predict)
+stacked_lstm_net(
+ dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict)
# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py
index df19db109ed223c7515c3ebf2cb1918f41163930..c5da1b7685f47fda337921c7c60ac1497b9e48bb 100755
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -30,14 +30,14 @@ def hook(settings, src_dict, trg_dict, file_list, **kwargs):
if settings.job_mode:
settings.trg_dict = trg_dict
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
- integer_value_sequence(len(settings.trg_dict)),
+ integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))
]
settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
else:
settings.slots = [
- integer_value_sequence(len(settings.src_dict)),
+ integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(open(file_list[0], "r").readlines()))
]
@@ -62,8 +62,7 @@ def process(settings, file_name):
if settings.job_mode:
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
- trg_ids = [settings.trg_dict.get(w, UNK_IDX)
- for w in trg_words]
+ trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py
index 5efb17a664b9a2525972c29b9b5700b483b8c07e..bd1c51b1514b790ec385d48f49197b3e0285e736 100755
--- a/demo/seqToseq/preprocess.py
+++ b/demo/seqToseq/preprocess.py
@@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
"""
Example:
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
@@ -24,12 +23,13 @@ Options:
-m --mergeDict merge source and target dictionary
"""
import os
-import sys
+import sys
import string
from optparse import OptionParser
from paddle.utils.preprocess_util import save_list, DatasetCreater
+
class SeqToSeqDatasetCreater(DatasetCreater):
"""
A class to process data for sequence to sequence application.
@@ -75,7 +75,7 @@ class SeqToSeqDatasetCreater(DatasetCreater):
if not os.path.exists(output):
os.system(cmd + '> ' + output)
- def build_dict(self, file_path, dict_path, dict_size = -1):
+ def build_dict(self, file_path, dict_path, dict_size=-1):
"""
Create the dictionary for the file, Note that
1. Valid characters include all printable characters
@@ -99,20 +99,23 @@ class SeqToSeqDatasetCreater(DatasetCreater):
for word in words:
if word not in dictory:
dictory[word] = 1
- else:
+ else:
dictory[word] += 1
output = open(dict_path, "w+")
output.write('\n\n\n')
count = 3
- for key, value in sorted(dictory.items(), key = lambda d:d[1], reverse = True):
+ for key, value in sorted(
+ dictory.items(), key=lambda d: d[1], reverse=True):
output.write(key + "\n")
count += 1
if count == dict_size:
break
self.dict_size = count
-
- def create_dataset(self, dict_size = -1, mergeDict = False,
- suffixes = ['.src', '.trg']):
+
+ def create_dataset(self,
+ dict_size=-1,
+ mergeDict=False,
+ suffixes=['.src', '.trg']):
"""
Create seqToseq dataset
"""
@@ -135,13 +138,14 @@ class SeqToSeqDatasetCreater(DatasetCreater):
# checkout dataset should be parallel corpora
suffix_len = len(suffixes[0])
for dataset in dataset_list:
- file_list = os.listdir(dataset)
- if len(file_list) % 2 == 1:
- raise RuntimeError("dataset should be parallel corpora")
- file_list.sort()
- for i in range(0, len(file_list), 2):
- if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
- raise RuntimeError("source and target file name should be equal")
+ file_list = os.listdir(dataset)
+ if len(file_list) % 2 == 1:
+ raise RuntimeError("dataset should be parallel corpora")
+ file_list.sort()
+ for i in range(0, len(file_list), 2):
+ if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]:
+ raise RuntimeError(
+ "source and target file name should be equal")
# cat all the files with the same suffix in dataset
for suffix in suffixes:
@@ -155,16 +159,18 @@ class SeqToSeqDatasetCreater(DatasetCreater):
list = ['train.list', 'test.list', 'gen.list']
for dataset in dataset_list:
outname = os.path.basename(dataset)
- self.concat_file(dataset, outname + suffixes[0],
+ self.concat_file(dataset, outname + suffixes[0],
outname + suffixes[1], dir_list[id], outname)
- save_list([os.path.join(dir_list[id], outname)],
+ save_list([os.path.join(dir_list[id], outname)],
os.path.join(self.output_path, list[id]))
id += 1
# build dictionary for train data
dict = ['src.dict', 'trg.dict']
- dict_path = [os.path.join(self.output_path, dict[0]),
- os.path.join(self.output_path, dict[1])]
+ dict_path = [
+ os.path.join(self.output_path, dict[0]),
+ os.path.join(self.output_path, dict[1])
+ ]
if mergeDict:
outname = os.path.join(train_dir, train_dataset.split('/')[-1])
print 'build src dictionary for train data'
@@ -173,22 +179,30 @@ class SeqToSeqDatasetCreater(DatasetCreater):
os.system('cp ' + dict_path[0] + ' ' + dict_path[1])
else:
outname = os.path.join(train_dataset, self.train_dir_name)
- for id in range(0,2):
+ for id in range(0, 2):
suffix = suffixes[id]
print 'build ' + suffix[1:] + ' dictionary for train data'
self.build_dict(outname + suffix, dict_path[id], dict_size)
print 'dictionary size is', self.dict_size
+
def main():
usage = "usage: \n" \
"python %prog -i INPUT [-d DICTSIZE] [-m]"
parser = OptionParser(usage)
- parser.add_option("-i", action="store", dest="input",
- help="input original dataset path")
- parser.add_option("-d", action="store", dest="dictsize",
- help="specified word count of dictionary")
- parser.add_option("-m", "--mergeDict", action="store_true", dest="mergeDict",
- help="merge source and target dictionary")
+ parser.add_option(
+ "-i", action="store", dest="input", help="input original dataset path")
+ parser.add_option(
+ "-d",
+ action="store",
+ dest="dictsize",
+ help="specified word count of dictionary")
+ parser.add_option(
+ "-m",
+ "--mergeDict",
+ action="store_true",
+ dest="mergeDict",
+ help="merge source and target dictionary")
(options, args) = parser.parse_args()
if options.input[-1] == os.path.sep:
options.input = options.input[:-1]
@@ -200,5 +214,6 @@ def main():
data_creator = SeqToSeqDatasetCreater(options.input, output_path)
data_creator.create_dataset(dictsize, options.mergeDict)
+
if __name__ == "__main__":
- main();
+ main()
diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py
index edd6ad3f739b6cefc24d235be55c7a8f541e1ab7..ad5e3339c1461de06732eb62aca9e8323eea707b 100644
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -50,16 +50,21 @@ def seq_to_seq_data(data_dir,
trg_dict = None
else:
train_list = os.path.join(data_dir, train_list)
- test_list = os.path.join(data_dir,test_list)
+ test_list = os.path.join(data_dir, test_list)
- define_py_data_sources2(train_list, test_list,
- module = "dataprovider",
- obj = "process",
- args = {"src_dict": src_dict,
- "trg_dict": trg_dict})
+ define_py_data_sources2(
+ train_list,
+ test_list,
+ module="dataprovider",
+ obj="process",
+ args={"src_dict": src_dict,
+ "trg_dict": trg_dict})
- return {"src_dict_path": src_lang_dict, "trg_dict_path": trg_lang_dict,
- "gen_result": gen_result}
+ return {
+ "src_dict_path": src_lang_dict,
+ "trg_dict_path": trg_lang_dict,
+ "gen_result": gen_result
+ }
def gru_encoder_decoder(data_conf,
@@ -90,51 +95,55 @@ def gru_encoder_decoder(data_conf,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(input=src_embedding, size=encoder_size)
- src_backward = simple_gru(input=src_embedding,
- size=encoder_size,
- reverse=True)
+ src_backward = simple_gru(
+ input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
- with mixed_layer(size=decoder_size,
- act=TanhActivation(), ) as decoder_boot:
+ with mixed_layer(
+ size=decoder_size,
+ act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
- decoder_mem = memory(name='gru_decoder',
- size=decoder_size,
- boot_layer=decoder_boot)
+ decoder_mem = memory(
+ name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
- context = simple_attention(encoded_sequence=enc_vec,
- encoded_proj=enc_proj,
- decoder_state=decoder_mem, )
+ context = simple_attention(
+ encoded_sequence=enc_vec,
+ encoded_proj=enc_proj,
+ decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
- gru_step = gru_step_layer(name='gru_decoder',
- input=decoder_inputs,
- output_mem=decoder_mem,
- size=decoder_size)
+ gru_step = gru_step_layer(
+ name='gru_decoder',
+ input=decoder_inputs,
+ output_mem=decoder_mem,
+ size=decoder_size)
- with mixed_layer(size=target_dict_dim,
- bias_attr=True,
- act=SoftmaxActivation()) as out:
+ with mixed_layer(
+ size=target_dict_dim, bias_attr=True,
+ act=SoftmaxActivation()) as out:
out += full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
- group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
- StaticInput(input=encoded_proj,is_seq=True)]
+ group_inputs = [
+ StaticInput(
+ input=encoded_vector, is_seq=True), StaticInput(
+ input=encoded_proj, is_seq=True)
+ ]
if not is_generating:
trg_embedding = embedding_layer(
- input=data_layer(name='target_language_word',
- size=target_dict_dim),
+ input=data_layer(
+ name='target_language_word', size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
@@ -144,12 +153,12 @@ def gru_encoder_decoder(data_conf,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
- decoder = recurrent_group(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs)
+ decoder = recurrent_group(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs)
- lbl = data_layer(name='target_language_next_word',
- size=target_dict_dim)
+ lbl = data_layer(name='target_language_next_word', size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl)
outputs(cost)
else:
@@ -168,16 +177,19 @@ def gru_encoder_decoder(data_conf,
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
- beam_gen = beam_search(name=decoder_group_name,
- step=gru_decoder_with_attention,
- input=group_inputs,
- bos_id=0,
- eos_id=1,
- beam_size=beam_size,
- max_length=max_length)
-
- seqtext_printer_evaluator(input=beam_gen,
- id_input=data_layer(name="sent_id", size=1),
- dict_file=trg_dict_path,
- result_file=gen_trans_file)
+ beam_gen = beam_search(
+ name=decoder_group_name,
+ step=gru_decoder_with_attention,
+ input=group_inputs,
+ bos_id=0,
+ eos_id=1,
+ beam_size=beam_size,
+ max_length=max_length)
+
+ seqtext_printer_evaluator(
+ input=beam_gen,
+ id_input=data_layer(
+ name="sent_id", size=1),
+ dict_file=trg_dict_path,
+ result_file=gen_trans_file)
outputs(beam_gen)
diff --git a/demo/sequence_tagging/data/get_data.sh b/demo/sequence_tagging/data/get_data.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e579d6c46ce5ed96f442acc448b4cc61bf8394a3
--- /dev/null
+++ b/demo/sequence_tagging/data/get_data.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+cd $DIR
+
+wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
+wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
diff --git a/demo/sequence_tagging/data/test.list b/demo/sequence_tagging/data/test.list
new file mode 100644
index 0000000000000000000000000000000000000000..073c0a0c9063ac55f762ac261746aa73057d70e8
--- /dev/null
+++ b/demo/sequence_tagging/data/test.list
@@ -0,0 +1 @@
+data/test.txt.gz
diff --git a/demo/sequence_tagging/data/train.list b/demo/sequence_tagging/data/train.list
new file mode 100644
index 0000000000000000000000000000000000000000..43c24d5f6484a90fe883ad5516fe100d27c9ce47
--- /dev/null
+++ b/demo/sequence_tagging/data/train.list
@@ -0,0 +1 @@
+data/train.txt.gz
diff --git a/demo/sequence_tagging/dataprovider.py b/demo/sequence_tagging/dataprovider.py
new file mode 100644
index 0000000000000000000000000000000000000000..37dcb7aa17c0abd197ef2f3121bf8be6c54375c2
--- /dev/null
+++ b/demo/sequence_tagging/dataprovider.py
@@ -0,0 +1,260 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer.PyDataProvider2 import *
+import gzip
+import logging
+
+logging.basicConfig(
+ format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
+logger = logging.getLogger('paddle')
+logger.setLevel(logging.INFO)
+
+OOV_POLICY_IGNORE = 0
+OOV_POLICY_USE = 1
+OOV_POLICY_ERROR = 2
+
+num_original_columns = 3
+
+# Feature combination patterns.
+# [[-1,0], [0,0]] means previous token at column 0 and current token at
+# column 0 are combined as one feature.
+patterns = [
+ [[-2, 0]],
+ [[-1, 0]],
+ [[0, 0]],
+ [[1, 0]],
+ [[2, 0]],
+ [[-1, 0], [0, 0]],
+ [[0, 0], [1, 0]],
+ [[-2, 1]],
+ [[-1, 1]],
+ [[0, 1]],
+ [[1, 1]],
+ [[2, 1]],
+ [[-2, 1], [-1, 1]],
+ [[-1, 1], [0, 1]],
+ [[0, 1], [1, 1]],
+ [[1, 1], [2, 1]],
+ [[-2, 1], [-1, 1], [0, 1]],
+ [[-1, 1], [0, 1], [1, 1]],
+ [[0, 1], [1, 1], [2, 1]],
+]
+
+dict_label = {
+ 'B-ADJP': 0,
+ 'I-ADJP': 1,
+ 'B-ADVP': 2,
+ 'I-ADVP': 3,
+ 'B-CONJP': 4,
+ 'I-CONJP': 5,
+ 'B-INTJ': 6,
+ 'I-INTJ': 7,
+ 'B-LST': 8,
+ 'I-LST': 9,
+ 'B-NP': 10,
+ 'I-NP': 11,
+ 'B-PP': 12,
+ 'I-PP': 13,
+ 'B-PRT': 14,
+ 'I-PRT': 15,
+ 'B-SBAR': 16,
+ 'I-SBAR': 17,
+ 'B-UCP': 18,
+ 'I-UCP': 19,
+ 'B-VP': 20,
+ 'I-VP': 21,
+ 'O': 22
+}
+
+
+def make_features(sequence):
+ length = len(sequence)
+ num_features = len(sequence[0])
+
+ def get_features(pos):
+ if pos < 0:
+ return ['#B%s' % -pos] * num_features
+ if pos >= length:
+ return ['#E%s' % (pos - length + 1)] * num_features
+ return sequence[pos]
+
+ for i in xrange(length):
+ for pattern in patterns:
+ fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
+ sequence[i].append(fname)
+
+
+'''
+Source file format:
+Each line is for one timestep. The features are separated by space.
+An empty line indicates end of a sequence.
+
+cutoff: a list of numbers. If count of a feature is smaller than this,
+ it will be ignored.
+if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
+i-th column.
+
+return a list of dict for each column
+'''
+
+
+def create_dictionaries(filename, cutoff, oov_policy):
+ def add_to_dict(sequence, dicts):
+ num_features = len(dicts)
+ for features in sequence:
+ l = len(features)
+ assert l == num_features, "Wrong number of features " + line
+ for i in xrange(l):
+ if features[i] in dicts[i]:
+ dicts[i][features[i]] += 1
+ else:
+ dicts[i][features[i]] = 1
+
+ num_features = len(cutoff)
+ dicts = []
+ for i in xrange(num_features):
+ dicts.append(dict())
+
+ f = gzip.open(filename, 'rb')
+
+ sequence = []
+
+ for line in f:
+ line = line.strip()
+ if not line:
+ make_features(sequence)
+ add_to_dict(sequence, dicts)
+ sequence = []
+ continue
+ features = line.split(' ')
+ sequence.append(features)
+
+ for i in xrange(num_features):
+ dct = dicts[i]
+ n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
+ todo = []
+ for k, v in dct.iteritems():
+ if v < cutoff[i]:
+ todo.append(k)
+ else:
+ dct[k] = n
+ n += 1
+
+ if oov_policy[i] == OOV_POLICY_USE:
+ # placeholder so that len(dct) will be the number of features
+ # including OOV
+ dct['#OOV#'] = 0
+
+ logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
+ for k in todo:
+ del dct[k]
+
+ f.close()
+ return dicts
+
+
+def initializer(settings, **xargs):
+ cutoff = [3, 1, 0]
+ cutoff += [3] * len(patterns)
+ oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
+ oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
+ dicts = create_dictionaries('data/train.txt.gz', cutoff, oov_policy)
+ dicts[2] = dict_label
+ settings.dicts = dicts
+ settings.oov_policy = oov_policy
+ input_types = []
+ num_features = len(dicts)
+ for i in xrange(num_original_columns):
+ input_types.append(integer_sequence(len(dicts[i])))
+ logger.info("slot %s size=%s" % (i, len(dicts[i])))
+ if patterns:
+ dim = 0
+ for i in xrange(num_original_columns, num_features):
+ dim += len(dicts[i])
+ input_types.append(sparse_binary_vector_sequence(dim))
+ logger.info("feature size=%s" % dim)
+ settings.input_types = input_types
+
+
+'''
+if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
+existed in dicts[i] will be assigned to id 0.
+if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
+in dicts[i].
+'''
+
+
+@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, filename):
+ input_file = filename
+ dicts = settings.dicts
+ oov_policy = settings.oov_policy
+
+ def gen_sample(sequence):
+ num_features = len(dicts)
+ sample = [list() for i in xrange(num_original_columns)]
+ if patterns:
+ sample.append([])
+ for features in sequence:
+ assert len(features) == num_features, \
+ "Wrong number of features: " + line
+ for i in xrange(num_original_columns):
+ id = dicts[i].get(features[i], -1)
+ if id != -1:
+ sample[i].append(id)
+ elif oov_policy[i] == OOV_POLICY_IGNORE:
+ sample[i].append(0xffffffff)
+ elif oov_policy[i] == OOV_POLICY_ERROR:
+ logger.fatal("Unknown token: %s" % features[i])
+ else:
+ sample[i].append(0)
+
+ if patterns:
+ dim = 0
+ vec = []
+ for i in xrange(num_original_columns, num_features):
+ id = dicts[i].get(features[i], -1)
+ if id != -1:
+ vec.append(dim + id)
+ elif oov_policy[i] == OOV_POLICY_IGNORE:
+ pass
+ elif oov_policy[i] == OOV_POLICY_ERROR:
+ logger.fatal("Unknown token: %s" % features[i])
+ else:
+ vec.ids.append(dim + 0)
+
+ dim += len(dicts[i])
+ sample[-1].append(vec)
+ return sample
+
+ num_features = len(dicts)
+ f = gzip.open(input_file, 'rb')
+
+ num_sequences = 0
+ sequence = []
+ for line in f:
+ line = line.strip()
+ if not line:
+ make_features(sequence)
+ yield gen_sample(sequence)
+ sequence = []
+ num_sequences += 1
+ continue
+ features = line.split(' ')
+ sequence.append(features)
+
+ f.close()
+
+ logger.info("num_sequences=%s" % num_sequences)
diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py
new file mode 100644
index 0000000000000000000000000000000000000000..64895742e1b8c0a11cbedee0b88e61b5b63b007f
--- /dev/null
+++ b/demo/sequence_tagging/linear_crf.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+import math
+
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
+
+batch_size = 1
+settings(
+ learning_method=MomentumOptimizer(),
+ batch_size=batch_size,
+ regularization=L2Regularization(batch_size * 1e-4),
+ average_window=0.5,
+ learning_rate=1e-1,
+ learning_rate_decay_a=1e-5,
+ learning_rate_decay_b=0.25, )
+
+num_label_types = 23
+
+
+def get_simd_size(size):
+ return int(math.ceil(float(size) / 8)) * 8
+
+
+# Currently, in order to use sparse_update=True,
+# the size has to be aligned.
+num_label_types = get_simd_size(num_label_types)
+
+features = data_layer(name="features", size=76328)
+word = data_layer(name="word", size=6778)
+pos = data_layer(name="pos", size=44)
+chunk = data_layer(name="chunk", size=num_label_types)
+
+crf_input = fc_layer(
+ input=features,
+ size=num_label_types,
+ act=LinearActivation(),
+ bias_attr=False,
+ param_attr=ParamAttr(
+ initial_std=0, sparse_update=True))
+
+crf = crf_layer(
+ input=crf_input,
+ label=chunk,
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0), )
+
+crf_decoding = crf_decoding_layer(
+ size=num_label_types,
+ input=crf_input,
+ label=chunk,
+ param_attr=ParamAttr(name="crfw"), )
+
+sum_evaluator(
+ name="error",
+ input=crf_decoding, )
+
+chunk_evaluator(
+ name="chunk_f1",
+ input=[crf_decoding, chunk],
+ chunk_scheme="IOB",
+ num_chunk_types=11, )
+
+inputs(word, pos, chunk, features)
+outputs(crf)
diff --git a/demo/sequence_tagging/readme.md b/demo/sequence_tagging/readme.md
new file mode 100644
index 0000000000000000000000000000000000000000..2e17fffb83c532f5e5fec1227f169c97c1f20e22
--- /dev/null
+++ b/demo/sequence_tagging/readme.md
@@ -0,0 +1,45 @@
+# Sequence Tagging
+
+This demo is a sequence model for assigning tags to each token in a sentence. The task is described at CONLL2000 Text Chunking task.
+
+## Download data
+```bash
+cd demo/sequence_tagging
+./data/get_data.sh
+```
+
+## Train model
+```bash
+cd demo/sequence_tagging
+./train.sh
+```
+
+## Model description
+
+We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at leon.bottou.org/projects/sgd. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
+
+
+
+
+
Model name
+
Number of parameters
+
F1 score
+
+
+
+
+
linear_crf
+
1.8M
+
0.937
+
+
+
+
rnn_crf
+
960K
+
0.941
+
+
+
+
+
+
diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py
new file mode 100644
index 0000000000000000000000000000000000000000..90d4bbdddfdb4e38b930d54a2bc865df9fac589c
--- /dev/null
+++ b/demo/sequence_tagging/rnn_crf.py
@@ -0,0 +1,120 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+import math
+
+define_py_data_sources2(
+ train_list="data/train.list",
+ test_list="data/test.list",
+ module="dataprovider",
+ obj="process")
+
+batch_size = 16
+settings(
+ learning_method=MomentumOptimizer(),
+ batch_size=batch_size,
+ regularization=L2Regularization(batch_size * 1e-5),
+ average_window=0.5,
+ learning_rate=2e-3,
+ learning_rate_decay_a=5e-7,
+ learning_rate_decay_b=0.5, )
+
+word_dim = 128
+hidden_dim = 128
+with_rnn = True
+
+initial_std = 1 / math.sqrt(hidden_dim)
+param_attr = ParamAttr(initial_std=initial_std)
+cpu_layer_attr = ExtraLayerAttribute(device=-1)
+
+default_device(0)
+
+num_label_types = 23
+
+features = data_layer(name="features", size=76328)
+word = data_layer(name="word", size=6778)
+pos = data_layer(name="pos", size=44)
+chunk = data_layer(
+ name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
+
+emb = embedding_layer(
+ input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
+
+hidden1 = mixed_layer(
+ size=hidden_dim,
+ act=STanhActivation(),
+ bias_attr=True,
+ input=[
+ full_matrix_projection(emb), table_projection(
+ pos, param_attr=param_attr)
+ ])
+
+if with_rnn:
+ rnn1 = recurrent_layer(
+ act=ReluActivation(),
+ bias_attr=True,
+ input=hidden1,
+ param_attr=ParamAttr(initial_std=0), )
+
+hidden2 = mixed_layer(
+ size=hidden_dim,
+ act=STanhActivation(),
+ bias_attr=True,
+ input=[full_matrix_projection(hidden1)] +
+ ([full_matrix_projection(
+ rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
+
+if with_rnn:
+ rnn2 = recurrent_layer(
+ reverse=True,
+ act=ReluActivation(),
+ bias_attr=True,
+ input=hidden2,
+ param_attr=ParamAttr(initial_std=0), )
+
+crf_input = mixed_layer(
+ size=num_label_types,
+ bias_attr=False,
+ input=[full_matrix_projection(hidden2), ] +
+ ([full_matrix_projection(
+ rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
+
+crf = crf_layer(
+ input=crf_input,
+ label=chunk,
+ param_attr=ParamAttr(
+ name="crfw", initial_std=0),
+ layer_attr=cpu_layer_attr, )
+
+crf_decoding = crf_decoding_layer(
+ size=num_label_types,
+ input=crf_input,
+ label=chunk,
+ param_attr=ParamAttr(name="crfw"),
+ layer_attr=cpu_layer_attr, )
+
+sum_evaluator(
+ name="error",
+ input=crf_decoding, )
+
+chunk_evaluator(
+ name="chunk_f1",
+ input=[crf_decoding, chunk],
+ chunk_scheme="IOB",
+ num_chunk_types=11, )
+
+inputs(word, pos, chunk, features)
+outputs(crf)
diff --git a/demo/sequence_tagging/train.sh b/demo/sequence_tagging/train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9a706b98d8686101ba21b513644bdd791062ec26
--- /dev/null
+++ b/demo/sequence_tagging/train.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+paddle train \
+ --config rnn_crf.py \
+ --parallel_nn=1 \
+ --use_gpu=1 \
+ --dot_period=10 \
+ --log_period=1000 \
+ --test_period=0 \
+ --num_passes=10
diff --git a/demo/sequence_tagging/train_linear.sh b/demo/sequence_tagging/train_linear.sh
new file mode 100755
index 0000000000000000000000000000000000000000..597b5afea9c63a8e209b69b6a40e74556e27ac31
--- /dev/null
+++ b/demo/sequence_tagging/train_linear.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+paddle train \
+ --config linear_crf.py \
+ --use_gpu=0 \
+ --dot_period=100 \
+ --log_period=10000 \
+ --test_period=0 \
+ --num_passes=10
diff --git a/doc/algorithm/index.rst b/doc/algorithm/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..6073add3c0cbb12529eabb0f8d8a051bcb84e628
--- /dev/null
+++ b/doc/algorithm/index.rst
@@ -0,0 +1,7 @@
+Algorithm Tutorial
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ rnn/rnn.rst
diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst
index 343f55a20e464f63f054ebe724b5ef90f848d5e9..01d2caefb5cdf4e949511fd0f5bbafe0e604e881 100644
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
@@ -1,5 +1,5 @@
-Recurrent Neural Network Configuration
-======================================
+RNN Configuration
+=================
This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to:
@@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding.
.. code-block:: python
- settings.slots = [
+ settings.input_types = [
integer_value_sequence(len(settings.src_dict)),
integer_value_sequence(len(settings.trg_dict)),
integer_value_sequence(len(settings.trg_dict))]
diff --git a/doc/build/build_from_source.md b/doc/build/build_from_source.md
index f7db0a9b92e67e1ecf5e44f1edb17cb8cacd8d2d..e44fa0d38e9982e5d0ed159743994ce6acc51246 100644
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
@@ -4,13 +4,12 @@ Installing from Sources
* [1. Download and Setup](#download)
* [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu)
-* [4. Build on Mac OS X](#mac)
## Download and Setup
-You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
+You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
```bash
-git clone https://github.com/baidu/Paddle paddle
+git clone https://github.com/PaddlePaddle/Paddle paddle
cd paddle
```
@@ -153,12 +152,12 @@ As a simple example, consider the following:
- **Only CPU**
```bash
- cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
+ cmake .. -DWITH_GPU=OFF
```
- **GPU**
```bash
- cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
+ cmake .. -DWITH_GPU=ON
```
- **GPU with doc and swig**
@@ -171,7 +170,7 @@ Finally, you can build PaddlePaddle:
```bash
# you can add build option here, such as:
-cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=
+cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
@@ -191,122 +190,3 @@ sudo pip install /opt/paddle/share/wheels/*.whl
# or just run
sudo paddle version
```
-
-## Building on Mac OS X
-
-### Prerequisites
-This guide is based on Mac OS X 10.11 (El Capitan). Note that if you are running an up to date version of OS X,
-you will already have Python 2.7.10 and Numpy 1.8 installed.
-
-The best option is to use the package manager homebrew to handle installations and upgrades for you.
-To install [homebrew](http://brew.sh/), first open a terminal window (you can find Terminal in the Utilities folder in Applications), and issue the command:
-
-```bash
-# install brew
-/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
-# install pip
-easy_install pip
-```
-
-### Install Dependencies
-
-- **CPU Dependencies**
-
- ```bash
- # Install fundamental dependents
- brew install glog gflags cmake protobuf openblas
-
- # Install google test on Mac OS X
- # Download gtest 1.7.0
- wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
- tar -xvf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
- # Build gtest
- mkdir build && cmake ..
- make
- # Install gtest library
- sudo cp -r ../include/gtest /usr/local/include/
- sudo cp lib*.a /usr/local/lib
- ```
-
-- **GPU Dependencies(optional)**
-
- To build GPU version, you will need the following installed:
-
- 1. a CUDA-capable GPU
- 2. Mac OS X 10.11 or later
- 2. the Clang compiler and toolchain installed using Xcode
- 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
- 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
-
- The CUDA development environment relies on tight integration with the host development environment,
- including the host compiler and C runtime libraries, and is therefore only supported on
- distribution versions that have been qualified for this CUDA Toolkit release.
-
- 1. After downloading cuDNN library, issue the following commands:
-
- ```bash
- sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
- sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
- ```
- 2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
-
- ```bash
- export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
- export PATH=/usr/local/cuda/bin:$PATH
- ```
-
-### Build and Install
-
-As usual, the best option is to create build folder under paddle project directory.
-
-```bash
-mkdir build && cd build
-cmake ..
-```
-
-CMake first check PaddlePaddle's dependencies in system default path. After installing some optional
-libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags).
-If still not found, you can manually set it based on CMake error information from your screen.
-
-As a simple example, consider the following:
-
-- **Only CPU**
-
- ```bash
- cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
- ```
-- **GPU**
-
- ```bash
- cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
- ```
-
-- **GPU with doc and swig**
-
- ```bash
- cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON
- ```
-
-Finally, you can build PaddlePaddle:
-
-```bash
-# you can add build option here, such as:
-cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=
-# please use sudo make install, if you want to install PaddlePaddle into the system
-make -j `nproc` && make install
-# set PaddlePaddle installation path in ~/.bashrc
-export PATH=/bin:$PATH
-```
-**Note:**
-
-If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
-Otherwise, PaddlePaddle will automatically install python dependencies
-at first time when user run paddle commands, such as `paddle version`, `paddle train`.
-It may require sudo privileges:
-
-```bash
-# you can run
-sudo pip install /opt/paddle/share/wheels/*.whl
-# or just run
-sudo paddle version
-```
diff --git a/doc/build/contribute_to_paddle.md b/doc/build/contribute_to_paddle.md
index 06fcff61720755432c5618500ac509c5b3f867df..1d03eb7362b1b6f2fcdac7b53f8b7f93fb75e49c 100644
--- a/doc/build/contribute_to_paddle.md
+++ b/doc/build/contribute_to_paddle.md
@@ -1,10 +1,10 @@
-# Contribute to PaddlePaddle
+# Contribute Code
We sincerely appreciate your contributions. You can use fork and pull request
workflow to merge your code.
## Code Requirements
-- Your code mush be fully documented by
+- Your code must be fully documented by
[doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler
passes the code style check.
@@ -20,16 +20,30 @@ It's just that simple.
## Clone
+Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
+The **develop** is the main branch, and other user's branches are feature branches.
+
Once you've created a fork, you can use your favorite git client to clone your
repo or just head straight to the command line:
```shell
# Clone your fork to your local machine
-git clone https://github.com/USERNAME/Paddle.git
+git clone --branch develop https://github.com/USERNAME/Paddle.git
+```
+If your repository doesn't contain **develop** branch, just create it by your own.
+
+```shell
+git clone https://github.com/USERNAME/Paddle.git Paddle
+cd Paddle
+git checkout -b develop # create develop branch.
+git remote add upstream https://github.com/baidu/Paddle.git # add upstream to baidu/Paddle
+git pull upstream develop # update to upstream
```
+
Then you can start to develop by making a local developement branch
+
```shell
-git checkout -b MY_COOL_STUFF_BRANCH origin/master
+git checkout -b MY_COOL_STUFF_BRANCH
```
## Commit
@@ -41,7 +55,7 @@ Commit your changes by following command lines:
git status
# add modified files
git add xx
-git commit -m "commit info"
+env EDITOR=vim git commit # You can write your comments by vim/nano/emacs.
```
The first line of commit infomation is the title. The second and later lines
are the details if any.
@@ -63,7 +77,7 @@ git remote -v
Update your fork with the latest upstream changes:
```shell
-git pull --rebase upstream HEAD
+git pull --rebase upstream develop
```
If there are no unique commits locally, git will simply perform a fast-forward.
@@ -76,7 +90,7 @@ Now, your local master branch is up-to-date with everything modified upstream.
```shell
# push to your repository in Github
-git push origin HEAD
+git push -u origin MY_COOL_STUFF_BRANCH # create remote branch MY_COOL_STUFF_BRANCH to origin.
```
## Pull Request
@@ -93,9 +107,24 @@ of conflict, you need to do the update manually. You need to do the following on
your local repository:
```shell
git checkout MY_COOL_STUFF_BRANCH
-git pull --rebase upstream HEAD
+git pull upstream develop
# You may need to resolve the conflict according to the git prompt.
# Make and test your code.
-git push -f origin HEAD
+git push origin MY_COOL_STUFF_BRANCH
```
Now your Pull Request is updated with the latest version.
+
+## Revise your pull request
+
+When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request.
+
+The possible commands are
+
+```shell
+git checkout MY_COOL_STUFF_BRANCH
+git pull upstream develop # update local to newest code base.
+# May be some conflicts will occured.
+# And develop your cool stuff
+env EDITOR=vim git commit # add your revise log
+git push origin MY_COOL_STUFF_BRANCH
+```
diff --git a/doc/build/docker_install.rst b/doc/build/docker_install.rst
index 542b9bac27afb84d2b41e5295145540bf2aa5485..e95de35f4da35fee511551f13bc6026532cce5c3 100644
--- a/doc/build/docker_install.rst
+++ b/doc/build/docker_install.rst
@@ -69,7 +69,7 @@ If you want to launch container with GPU support, you need to set some environme
.. code-block:: bash
- export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}"
+ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
diff --git a/doc/build/index.rst b/doc/build/index.rst
index 511cdea145c7fd0e41566d0a85115dbb06f84058..b4fe4596047c7d201fdf36bc76c26d5134611560 100644
--- a/doc/build/index.rst
+++ b/doc/build/index.rst
@@ -1,5 +1,5 @@
-Build And Install PaddlePaddle
-================================
+Install and Build
+=================
Install PaddlePaddle
----------------------
@@ -18,11 +18,7 @@ Build from Source
.. warning::
- Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing to PaddlePaddle.
-
-
-If you want to hack and contribute PaddlePaddle source code, following guides can help you\:
-
+ Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code.
.. toctree::
:maxdepth: 1
@@ -30,4 +26,3 @@ If you want to hack and contribute PaddlePaddle source code, following guides ca
build_from_source.md
contribute_to_paddle.md
-
diff --git a/doc/cluster/opensource/cluster_train.md b/doc/cluster/opensource/cluster_train.md
index 4763ede39b049b6c49225dc9ae7add77325d704e..cb493a88f031850cb6a5eeed0ebe9e41bb7e01c3 100644
--- a/doc/cluster/opensource/cluster_train.md
+++ b/doc/cluster/opensource/cluster_train.md
@@ -1,26 +1,24 @@
-# Cluster Training
+# Distributed Training
-We provide some simple scripts ```paddle/scripts/cluster_train``` to help you to launch cluster training Job to harness PaddlePaddle's distributed trainning. For MPI and other cluster scheduler refer this naive script to implement more robust cluster training platform by yourself.
+In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation).
-The following cluster demo is based on RECOMMENDATION local training demo in PaddlePaddle ```demo/recommendation``` directory. Assuming you enter the ```paddle/scripts/cluster_train/``` directory.
+[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and Kubernetes.
-## Pre-requirements
+## Prerequisite
-Firstly,
+1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric:
-```bash
+ ```bash
pip install fabric
-```
-
-Secondly, go through installing scripts to install PaddlePaddle at all nodes to make sure demo can run as local mode. For CUDA enabled training, we assume that CUDA is installed in ```/usr/local/cuda```, otherwise missed cuda runtime libraries error could be reported at cluster runtime. In one word, the local training environment should be well prepared for the simple scripts.
+ ```
-Then you should prepare same ROOT_DIR directory in all nodes. ROOT_DIR is from in cluster_train/conf.py. Assuming that the ROOT_DIR = /home/paddle, you can create ```paddle``` user account as well, at last ```paddle.py``` can ssh connections to all nodes with ```paddle``` user automatically.
+1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime.
-At last you can create ssh mutual trust relationship between all nodes for easy ssh login, otherwise ```password``` should be provided at runtime from ```paddle.py```.
+1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes. For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`. In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password.
## Prepare Job Workspace
-```Job workspace``` is defined as one package directory which contains dependency libraries, train data, test data, model config file and all other related file dependencies.
+We refer to the directory where we put dependent libraries, config files, etc., as *workspace*.
These ```train/test``` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition.
diff --git a/doc/demo/quick_start/index_en.md b/doc/demo/quick_start/index_en.md
index ee3fa2a2166f497524663574270b239a6170ab19..80d816a768a71156ce72cda6ea92b749fbcdbe1f 100644
--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
@@ -1,4 +1,4 @@
-# Quick Start Tutorial
+# Quick Start
This tutorial will teach the basics of deep learning (DL), including how to implement many different models in PaddlePaddle. You will learn how to:
- Prepare data into the standardized format that PaddlePaddle accepts.
@@ -134,7 +134,7 @@ def process(settings, file_name):
You need to add a data provider definition `define_py_data_sources2` in our network configuration. This definition specifies:
- The path of the training and testing data (`data/train.list`, `data/test.list`).
-- The location of the data provider file (`dataprovider_pow`).
+- The location of the data provider file (`dataprovider_bow`).
- The function to call to get data. (`process`).
- Additional arguments or data. Here it passes the path of word dictionary.
diff --git a/doc/demo/semantic_role_labeling/curve.jpg b/doc/demo/semantic_role_labeling/curve.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..baa35ae7f0a0b6c246f3a0d331735477ab8bcd70
Binary files /dev/null and b/doc/demo/semantic_role_labeling/curve.jpg differ
diff --git a/doc/demo/semantic_role_labeling/semantic_role_labeling.md b/doc/demo/semantic_role_labeling/semantic_role_labeling.md
index 05fbc8278daf204df60ad19b742c920e47128c27..e2793b2b3494160a7a80f07ec2127bd1f1a4f2e4 100644
--- a/doc/demo/semantic_role_labeling/semantic_role_labeling.md
+++ b/doc/demo/semantic_role_labeling/semantic_role_labeling.md
@@ -30,8 +30,6 @@ Several new files appear in the `data `directory as follows.
conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments
-src.dict:the dictionary of words in sentences
-tgt.dict:the labels dictionary
feature: the extracted features from data set
```
@@ -67,6 +65,8 @@ def hook(settings, word_dict, label_dict, **kwargs):
settings.label_dict = label_dict
#all inputs are integral and sequential type
settings.slots = [
+ integer_value_sequence(len(word_dict)),
+ integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
@@ -77,34 +77,39 @@ def hook(settings, word_dict, label_dict, **kwargs):
```
The corresponding data iterator is as following:
```
-@provider(use_seq=True, init_hook=hook)
-def process(obj, file_name):
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
+ can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
- sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip().split('\t')
+ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
+ line.strip().split('\t')
+
words = sentence.split()
sen_len = len(words)
- word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+ word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
- predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
- ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX) ] * sen_len
- ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX) ] * sen_len
- ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX) ] * sen_len
+ predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+ ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+ ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+ ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+ ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+ ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
- label_slot = [obj.label_dict.get(w) for w in label_list]
-
- yield word_slot, predicate_slot, ctx_n1_slot, ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+ label_slot = [settings.label_dict.get(w) for w in label_list]
+ yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
+ ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
```
-The `process`function yield 7 lists which are six features and labels.
+The `process`function yield 9 lists which are 8 features and label.
### Neural Network Config
`db_lstm.py` is the neural network config file to load the dictionaries and define the data provider module and network architecture during the training procedure.
-Seven `data_layer` load instances from data provider. Six features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
+Nine `data_layer` load instances from data provider. Eight features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
### Run Training
The script for training is `train.sh`, user just need to execute:
@@ -115,27 +120,36 @@ The content in `train.sh`:
```
paddle train \
--config=./db_lstm.py \
+ --use_gpu=0 \
+ --log_period=5000 \
+ --trainer_count=1 \
+ --show_parameter_stats_period=5000 \
--save_dir=./output \
- --trainer_count=4 \
- --log_period=10 \
- --num_passes=500 \
- --use_gpu=false \
- --show_parameter_stats_period=10 \
+ --num_passes=10000 \
+ --average_test_period=10000000 \
+ --init_model_path=./data \
+ --load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : network config file.
-- \--save_di=./output: output path to save models.
-- \--trainer_count=4 : set thread number (or GPU count).
-- \--log_period=10 : print log every 20 batches.
-- \--num_passes=500: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
-- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train.
-- \--show_parameter_stats_period=10: show parameter statistic every 100 batches.
-- \--test_all_data_in_one_period=1: test all data in every testing.
-
-
-After training, the models will be saved in directory `output`.
+- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train, until now crf_layer do not support GPU
+- \--log_period=500: print log every 20 batches.
+- \--trainer_count=1: set thread number (or GPU count).
+- \--show_parameter_stats_period=5000: show parameter statistic every 100 batches.
+- \--save_dir=./output: output path to save models.
+- \--num_passes=10000: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
+- \--average_test_period=10000000: do test on average parameter every average_test_period batches
+- \--init_model_path=./data: parameter initialization path
+- \--load_missing_parameter_strategy=rand: random initialization unexisted parameters
+- \--test_all_data_in_one_period=1: test all data in one period
+
+
+After training, the models will be saved in directory `output`. Our training curve is as following:
+
+
+
### Run testing
The script for testing is `test.sh`, user just need to execute:
@@ -155,6 +169,7 @@ paddle train \
- \--model_list=$model_list.list: model list file
- \--job=test: indicate the test job
- \--config_args=is_test=1: flag to indicate test
+ - \--test_all_data_in_one_period=1: test all data in 1 period
### Run prediction
@@ -166,11 +181,13 @@ The script for prediction is `predict.sh`, user just need to execute:
In `predict.sh`, user should offer the network config file, model path, label file, word dictionary file, feature file
```
python predict.py
- -c $config_file
- -w $model_path
- -l $label_file
- -d $dict_file
- -i $input_file
+ -c $config_file \
+ -w $best_model_path \
+ -l $label_file \
+ -p $predicate_dict_file \
+ -d $dict_file \
+ -i $input_file \
+ -o $output_file
```
`predict.py` is the main executable python script, which includes functions: load model, load data, data prediction. The network model will output the probability distribution of labels. In the demo, we take the label with maximum probability as result. User can also implement the beam search or viterbi decoding upon the probability distribution matrix.
diff --git a/doc/demo/sentiment_analysis/sentiment_analysis.md b/doc/demo/sentiment_analysis/sentiment_analysis.md
index 385f49891dcd840c525f7d1c3aaf7f08a7e4903f..c53952c544de9fa88a6318432e34b0d05b149445 100644
--- a/doc/demo/sentiment_analysis/sentiment_analysis.md
+++ b/doc/demo/sentiment_analysis/sentiment_analysis.md
@@ -6,7 +6,7 @@ Sentiment analysis is also used to monitor social media based on large amount of
On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products.
-This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the [Internet Movie Database (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
+This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the Internet Movie Database (IMDB). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
## Data Preparation
@@ -39,7 +39,7 @@ imdbEr.txt imdb.vocab README test train
* imdbEr.txt: expected rating for each token in imdb.vocab.
* README: data documentation.
-Both train and test set directory contains:
+The file in train set directory is as follows. The test set also contains them except `unsup` and `urls_unsup.txt`.
```
labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
@@ -151,6 +151,7 @@ settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
+ average_window=0.5,
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
@@ -163,17 +164,18 @@ stacked_lstm_net(dict_dim, class_dim=class_dim,
* **Data Definition**:
* get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument.
- * Define TrainData and TestData provider, here using Python interface (PyDataProviderWrapper) of PaddlePaddle to load data. For details, you can refer to the document of PyDataProvider.
+ * Define data provider, here using Python interface to load data. For details, you can refer to the document of PyDataProvider2.
* **Algorithm Configuration**:
- * use sgd algorithm.
- * use adam optimization.
* set batch size of 128.
- * set average sgd window.
* set global learning rate.
+ * use adam optimization.
+ * set average sgd window.
+ * set L2 regularization.
+ * set gradient clipping threshold.
* **Network Configuration**:
- * dict_dim: get dictionary dimension.
- * class_dim: set category number, IMDB has two label, namely positive and negative label.
+ * dict_dim: dictionary dimension.
+ * class_dim: category number, IMDB has two label, namely positive and negative label.
* `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default.
* `bidirectional_lstm_net`: predefined network as shown in Figure 2.
diff --git a/doc/dev/index.rst b/doc/dev/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..0468dd492b6246cfe0771a05c3597ddee95b3ddd
--- /dev/null
+++ b/doc/dev/index.rst
@@ -0,0 +1,9 @@
+Development Guide
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ layer.md
+ new_layer/new_layer.rst
+ ../source/index.md
diff --git a/doc/dev/layer.md b/doc/dev/layer.md
new file mode 100644
index 0000000000000000000000000000000000000000..930fb0de1ac074b15d06197ed0e732f92288b411
--- /dev/null
+++ b/doc/dev/layer.md
@@ -0,0 +1,4 @@
+# Layer Documents
+
+* [Layer Source Code Document](../source/gserver/layers/index.rst)
+* [Layer Python API Document](../ui/api/trainer_config_helpers/index.rst)
diff --git a/doc/dev/new_layer/index.rst b/doc/dev/new_layer/index.rst
deleted file mode 100644
index 37dac3a14dedf2aaa99335e1b0ebe110dc746174..0000000000000000000000000000000000000000
--- a/doc/dev/new_layer/index.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Writing New Layers
-==================
-
-.. toctree::
- :maxdepth: 3
-
- new_layer.rst
diff --git a/doc/dev/new_layer/new_layer.rst b/doc/dev/new_layer/new_layer.rst
index bd4a4c46c87f6429338b4d220a80b6265a1f253f..af8b76a3075194ead9be40d2c943238b2cfadecc 100644
--- a/doc/dev/new_layer/new_layer.rst
+++ b/doc/dev/new_layer/new_layer.rst
@@ -1,3 +1,4 @@
+==================
Writing New Layers
==================
@@ -59,7 +60,7 @@ Implement C++ Class
The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below.
-It needs to derive the base class :code:`paddle::BaseLayer`, and it needs to override the following functions:
+It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions:
- constructor and destructor.
- :code:`init` function. It is used to initialize the parameters and settings.
diff --git a/doc/index.md b/doc/index.md
deleted file mode 100644
index df03a33fac98c46635eef05d88639235ac72cf8f..0000000000000000000000000000000000000000
--- a/doc/index.md
+++ /dev/null
@@ -1,22 +0,0 @@
-PaddlePaddle Documentation
-==========================
-
-User Guide
-----------
-* [Quick Start](demo/quick_start/index_en.md)
-* [Build and Installation](build/index.rst)
-* [Contribute Code](build/contribute_to_paddle.md)
-* [User Interface](ui/index.md)
-* [Model Config Interface](ui/api/trainer_config_helpers/index.md)
-* [Example and Demo](demo/index.md)
-* [Cluster Train](cluster/index.md)
-
-Development Guide
------------------
-* [Layer Documents](layer.md)
-* [Writing New Layers](dev/new_layer/index.rst)
-* [Source Code Documents](source/index.md)
-
-Algorithm Tutorial
-------------------
-* [RNN Configuration](algorithm/rnn/rnn.rst)
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..668ad75a902bdd14c6198c41380ae93e29cec0d3
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,10 @@
+PaddlePaddle Documentation
+==========================
+
+.. toctree::
+ :maxdepth: 1
+
+ introduction/index.md
+ user_guide.rst
+ dev/index.rst
+ algorithm/index.rst
diff --git a/doc/introduction/index.md b/doc/introduction/index.md
new file mode 100644
index 0000000000000000000000000000000000000000..01f52031a1d0247cd0b885218c17001f23685239
--- /dev/null
+++ b/doc/introduction/index.md
@@ -0,0 +1,100 @@
+# Introduction
+
+PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on.
+
+## 1. A Classic Problem
+
+Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - **simple linear regression** : you have observed a set of two-dimensional data points of `X` and `Y`, where `X` is an explanatory variable and `Y` is corresponding dependent variable, and you want to recover the underlying correlation between `X` and `Y`. Linear regression can be used in many practical scenarios. For example, `X` can be a variable about house size, and `Y` a variable about house price. You can build a model that captures relationship between them by observing real estate markets.
+
+## 2. Prepare the Data
+
+Suppose the true relationship can be characterized as `Y = 2X + 0.3`, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types.
+
+```python
+# dataprovider.py
+from paddle.trainer.PyDataProvider2 import *
+import random
+
+# define data types of input: 2 real numbers
+@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+def process(settings, input_file):
+ for i in xrange(2000):
+ x = random.random()
+ yield [x], [2*x+0.3]
+```
+
+## 3. Train a NeuralNetwork in PaddlePaddle
+
+To recover this relationship between `X` and `Y`, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line `Y' = wX + b` , then we gradually adapt `w` and `b` to minimize the difference between `Y'` and `Y`. Here is what it looks like in PaddlePaddle:
+
+```python
+# trainer_config.py
+from paddle.trainer_config_helpers import *
+
+# 1. read data. Suppose you saved above python code as dataprovider.py
+data_file = 'empty.list'
+with open(data_file, 'w') as f: f.writelines(' ')
+define_py_data_sources2(train_list=data_file, test_list=None,
+ module='dataprovider', obj='process',args={})
+
+# 2. learning algorithm
+settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
+
+# 3. Network configuration
+x = data_layer(name='x', size=1)
+y = data_layer(name='y', size=1)
+y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+cost = regression_cost(input=y_predict, label=y)
+outputs(cost)
+```
+
+Some of the most fundamental usages of PaddlePaddle are demonstrated:
+
+- The first part shows how to feed data into PaddlePaddle. In general cases, PaddlePaddle reads raw data from a list of files, and then do some user-defined process to get real input. In this case, we only need to create a placeholder file since we are generating synthetic data on the fly.
+
+- The second part describes learning algorithm. It defines in what ways adjustments are made to model parameters. PaddlePaddle provides a rich set of optimizers, but a simple momentum based optimizer will suffice here, and it processes 12 data points each time.
+
+- Finally, the network configuration. It usually is as simple as "stacking" layers. Three kinds of layers are used in this configuration:
+ - **Data Layer**: a network always starts with one or more data layers. They provide input data to the rest of the network. In this problem, two data layers are used respectively for `X` and `Y`.
+ - **FC Layer**: FC layer is short for Fully Connected Layer, which connects all the input units to current layer and does the actual computation specified as activation function. Computation layers like this are the fundamental building blocks of a deeper model.
+ - **Cost Layer**: in training phase, cost layers are usually the last layers of the network. They measure the performance of current model, and provide guidence to adjust parameters.
+
+Now that everything is ready, you can train the network with a simple command line call:
+ ```
+ paddle train --config=trainer_config.py --save_dir=./output --num_passes=30
+ ```
+
+This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path `./output`. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess.
+
+
+## 4. Evaluate the Model
+
+Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: `w=2, b=0.3`, thus a better option is to check out model parameters directly.
+
+In PaddlePaddle, training is just to get a collection of model parameters, which are `w` and `b` in this case. Each parameter is saved in an individual file in the popular `numpy` array format. Here is the code that reads parameters from last pass.
+
+```python
+import numpy as np
+import os
+
+def load(file_name):
+ with open(file_name, 'rb') as f:
+ f.read(16) # skip header for float type.
+ return np.fromfile(f, dtype=np.float32)
+
+print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b'))
+# w=1.999743, b=0.300137
+```
+
+