提交 4bc5bf97 编写于 作者: X Xingyi 提交者: GitHub

Merge pull request #1 from PaddlePaddle/master

Master
.gitignore
\ No newline at end of file
*.DS_Store
build/
*.user
.vscode
.idea
.project
.cproject
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
!build/*.deb
*.DS_Store
build/
build_doc/
*.user
.vscode
......@@ -7,6 +8,7 @@ build/
.project
.cproject
.pydevproject
.settings/
Makefile
.test_env/
third_party/
......
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
sha: c25201a00e6b0514370501050cf2a8538ac12270
sha: v1.0.1
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax.
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
hooks:
- id: check-added-large-files
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
language: cpp
cache: ccache
cache:
directories:
- $HOME/third_party
- $HOME/.ccache
- $HOME/.cache/pip
sudo: required
dist: trusty
os:
- linux
- osx
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
packages:
- gcc-4.8
- g++-4.8
- wget
- gfortran-4.8
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev
- python-numpy
- python-wheel
- curl
- swig
- graphviz
- clang-format-3.8
- automake
- libtool
- ccache
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
......@@ -46,12 +44,16 @@ before_install:
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install --upgrade pip
- pip install wheel protobuf sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script:
- paddle/scripts/travis/main.sh
- |
timeout 2580 paddle/scripts/travis/main.sh # 43min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
notifications:
email:
on_success: change
......
......@@ -12,37 +12,70 @@
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.0)
project(paddle CXX C)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(system)
if(ANDROID)
cmake_minimum_required(VERSION 3.7)
else()
cmake_minimum_required(VERSION 3.0)
endif()
project(paddle CXX C)
find_package(Sphinx)
find_package(CUDA QUIET)
if(NOT CMAKE_CROSSCOMPILING)
find_package(CUDA QUIET)
endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED)
find_package(Threads REQUIRED)
include(system)
include(simd)
###################### Configurations ############################
option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON)
option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND})
option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF)
option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ON)
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ON)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ON)
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON)
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF)
option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
if(ANDROID)
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21")
endif()
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android" FORCE)
set(WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android" FORCE)
endif(ANDROID)
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
########################################################################################
include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags
......@@ -53,6 +86,7 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
......@@ -63,8 +97,6 @@ include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(python_module) # set python module
include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}")
......@@ -72,14 +104,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES}
${PYTHON_LIBRARIES}
)
if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
......
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment
FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG WITH_GPU
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV WOBOQ OFF
ENV WITH_GPU=${WITH_GPU:-OFF}
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV HOME /root
# Add bash enhancements
COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake locales clang-format-3.8 swig doxygen && \
apt-get clean -y
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# FIXME: due to temporary ipykernel dependency issue, specify ipykernel jupyter
# version util jupyter fixes this issue.
RUN pip install --upgrade pip && \
pip install -U 'protobuf==3.1.0' && \
pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \
pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0'
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1
VOLUME ["/woboq_out"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
# development image default do build work
CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
......@@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
......@@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
the capability of PaddlePaddle to make a huge impact for your product.
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
It is recommended to check out the
[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
before looking into the
[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
We provide [English](http://www.paddlepaddle.org/develop/doc/) and
[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling.
You can also run distributed training jobs on Kubernetes clusters.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
This system supports training deep learning models on multiple machines
with data parallelism.
- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html)
- [Python API](http://paddlepaddle.org/doc/ui/) <br>
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide.
- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
We appreciate your contributions!
## Ask Questions
......
# v0.10.0版本
我们非常高兴发布了PaddlePaddle V0.10.0版,并开发了新的[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/)
- 旧的Python API由于难以学习和使用已经过时了。使用旧版本的API至少需要两份python文件,分别是定义数据生成器和定义网络拓扑结构的文件。用户通过运行`paddle_trainer`的C++程序来启动PaddlePaddle任务,该程序调用Python解释器来运行定义网络拓扑结构的文件,然后通过迭代加载数据生成器提供的小批量数据启动训练循环。这与Python的现代编辑方式不符,比如Jupyter Notebook。
- 新版的API被称为 *V2 API*,允许我们在单个.py文件中,通过编辑更短的Python程序来定义网络结构和数据。此外,该Python程序也可以在Jupyter Notebook中运行,因为PaddlePaddle可以作为共享库来被Python程序加载和使用。
基于新的API,我们提供了一个在线的学习文档 [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) 及其[中文版本](http://book.paddlepaddle.org/)
我们还致力于迭代更新新版API的在线文档,并将新版API引入分布式集群(包括MPI和Kubernetes)训练中。我们将在下一个版本中发布更多的内容。
## 新特点
* 发布新版[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/)
* 发布深度学习系列课程 [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) 及其[中文版本](http://book.paddlepaddle.org/)
* 支持矩形输入的CNN。
* 为seqlastin和seqfirstin提供stride pooling。
*`trainer_config_helpers`中暴露`seq_concat_layer/seq_reshape_layer`
* 添加公共数据集包:CIFAR,MNIST,IMDB,WMT14,CONLL05,movielens,imikolov。
* 针对Single Shot Multibox Detection增加 Prior box layer。
* 增加光滑的L1损失。
* 在V2 API中增加 data reader 创建器和修饰器。
* 增加cmrnorm投影的CPU实现。
## 改进
* 提供`paddle_trainer`的Python virtualenv支持。
* 增加代码自动格式化的pre-commit hooks。
* 升级protobuf到3.x版本。
* 在Python数据生成器中提供一个检测数据类型的选项。
* 加速GPU中average层的后向反馈计算。
* 细化文档。
* 使用Travis-CI检查文档中的死链接。
* 增加解释`sparse_vector`的示例。
* 在layer_math.py中添加ReLU。
* 简化Quick Start示例中的数据处理流程。
* 支持CUDNN Deconv。
* 在v2 API中增加数据feeder。
* 在情感分析示例的演示中增加对标准输入流中样本的预测。
* 提供图像预处理的多进程接口。
* 增加V1 API的基准文档。
*`layer_math.py`中增加ReLU。
* 提供公共数据集的自动下载包。
*`Argument::sumCost`重新命名为`Argument::sum`,并暴露给python。
* 为矩阵相关的表达式评估增加一个新的`TensorExpression`实现。
* 增加延迟分配来优化批处理多表达式计算。
* 增加抽象的类函数及其实现:
* `PadFunc``PadGradFunc`
* `ContextProjectionForwardFunc``ContextProjectionBackwardFunc`
* `CosSimBackward``CosSimBackwardFunc`
* `CrossMapNormalFunc``CrossMapNormalGradFunc`
* `MulFunc`
* 增加`AutoCompare``FunctionCompare`类,使得编写比较gpu和cpu版本函数的单元测试更容易。
* 生成`libpaddle_test_main.a`并删除测试文件内的主函数。
* 支持PyDataProvider2中numpy的稠密向量。
* 清理代码库,删除一些复制粘贴的代码片段:
* 增加`SparseRowMatrix`的抽样类`RowBuffer`
* 清理`GradientMachine`的接口。
* 在layer中增加`override`关键字。
* 简化`Evaluator::create`,使用`ClassRegister`来创建`Evaluator`
* 下载演示的数据集时检查MD5校验。
* 添加`paddle::Error`,用于替代Paddle中的`LOG(FATAL)`
## 错误修复
* 检查`recurrent_group`的layer输入类型。
* 不要用.cu源文件运行`clang-format`
* 修复`LogActivation`的使用错误。
* 修复运行`test_layerHelpers`多次的错误。
* 修复seq2seq示例超出消息大小限制的错误。
* 修复在GPU模式下dataprovider转换的错误。
* 修复`GatedRecurrentLayer`中的错误。
* 修复在测试多个模型时`BatchNorm`的错误。
* 修复paramRelu在单元测试时崩溃的错误。
* 修复`CpuSparseMatrix`编译时相关的警告。
* 修复`MultiGradientMachine``trainer_count > batch_size`时的错误。
* 修复`PyDataProvider2`阻止异步加载数据的错误。
# Release v0.10.0
We are glad to release version 0.10.0. In this version, we are happy to
release the
new
[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/).
- Our old Python API is kind of out of date. It's hard to learn and hard to
use. To write a PaddlePaddle program using the old API, we'd have to write
at least two Python files: one `data provider` and another one that defines
the network topology. Users start a PaddlePaddle job by running the
`paddle_trainer` C++ program, which calls Python interpreter to run the
network topology configuration script and then start the training loop,
which iteratively calls the data provider function to load minibatches.
This prevents us from writing a Python program in a modern way, e.g., in the
Jupyter Notebook.
- The new API, which we often refer to as the *v2 API*, allows us to write
much shorter Python programs to define the network and the data in a single
.py file. Also, this program can run in Jupyter Notebook, since the entry
point is in Python program and PaddlePaddle runs as a shared library loaded
and invoked by this Python program.
Basing on the new API, we delivered an online interative
book, [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
and [its Chinese version](http://book.paddlepaddle.org/).
We also worked on updating our online documentation to describe the new API.
But this is an ongoing work. We will release more documentation improvements
in the next version.
We also worked on bring the new API to distributed model training (via MPI and
Kubernetes). This work is ongoing. We will release more about it in the next
version.
## New Features
* We release [new Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/).
* Deep Learning 101 book in [English](http://book.paddlepaddle.org/index.en.html) and [Chinese](http://book.paddlepaddle.org/).
* Support rectangle input for CNN.
* Support stride pooling for seqlastin and seqfirstin.
* Expose `seq_concat_layer/seq_reshape_layer` in `trainer_config_helpers`.
* Add dataset package: CIFAR, MNIST, IMDB, WMT14, CONLL05, movielens, imikolov.
* Add Priorbox layer for Single Shot Multibox Detection.
* Add smooth L1 cost.
* Add data reader creator and data reader decorator for v2 API.
* Add the CPU implementation of cmrnorm projection.
## Improvements
* Support Python virtualenv for `paddle_trainer`.
* Add pre-commit hooks, used for automatically format our code.
* Upgrade protobuf to version 3.x.
* Add an option to check data type in Python data provider.
* Speedup the backward of average layer on GPU.
* Documentation refinement.
* Check dead links in documents using Travis-CI.
* Add a example for explaining `sparse_vector`.
* Add ReLU in layer_math.py
* Simplify data processing flow for Quick Start.
* Support CUDNN Deconv.
* Add data feeder in v2 API.
* Support predicting the samples from sys.stdin for sentiment demo.
* Provide multi-proccess interface for image preprocessing.
* Add benchmark document for v1 API.
* Add ReLU in `layer_math.py`.
* Add packages for automatically downloading public datasets.
* Rename `Argument::sumCost` to `Argument::sum` since class `Argument` is nothing with cost.
* Expose Argument::sum to Python
* Add a new `TensorExpression` implementation for matrix-related expression evaluations.
* Add lazy assignment for optimizing the calculation of a batch of multiple expressions.
* Add abstract calss `Function` and its implementation:
* `PadFunc` and `PadGradFunc`.
* `ContextProjectionForwardFunc` and `ContextProjectionBackwardFunc`.
* `CosSimBackward` and `CosSimBackwardFunc`.
* `CrossMapNormalFunc` and `CrossMapNormalGradFunc`.
* `MulFunc`.
* Add class `AutoCompare` and `FunctionCompare`, which make it easier to write unit tests for comparing gpu and cpu version of a function.
* Generate `libpaddle_test_main.a` and remove the main function inside the test file.
* Support dense numpy vector in PyDataProvider2.
* Clean code base, remove some copy-n-pasted code snippets:
* Extract `RowBuffer` class for `SparseRowMatrix`.
* Clean the interface of `GradientMachine`.
* Use `override` keyword in layer.
* Simplify `Evaluator::create`, use `ClassRegister` to create `Evaluator`s.
* Check MD5 checksum when downloading demo's dataset.
* Add `paddle::Error` which intentially replace `LOG(FATAL)` in Paddle.
## Bug Fixes
* Check layer input types for `recurrent_group`.
* Don't run `clang-format` with .cu source files.
* Fix bugs with `LogActivation`.
* Fix the bug that runs `test_layerHelpers` multiple times.
* Fix the bug that the seq2seq demo exceeds protobuf message size limit.
* Fix the bug in dataprovider converter in GPU mode.
* Fix a bug in `GatedRecurrentLayer`.
* Fix bug for `BatchNorm` when testing more than one models.
* Fix broken unit test of paramRelu.
* Fix some compile-time warnings about `CpuSparseMatrix`.
* Fix `MultiGradientMachine` error when `trainer_count > batch_size`.
* Fix bugs that prevents from asynchronous data loading in `PyDataProvider2`.
# Release v0.9.0
## New Features:
......
......@@ -29,13 +29,16 @@ Luo, Tao
Lyu, Qin
Mao, Hongyue
Qian, Xiaojun
Qiao, Longfei
Qi, Jun
Qin, Duohao
Shen, Guolong
Shi, Guangchuan
Song, Xiang
Wang, Helin
Wang, Jiang
Wang, Yanfei
Wang, Yi
Wang, Yong
Weng, Renliang
Xu, Tianbing
......
......@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
COMMAND cd ${destination} && ln -sf ./index_*.html index.html
)
set_property(
......
......@@ -16,11 +16,12 @@
set(CBLAS_FOUND OFF)
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
find_path(MKL_INC_DIR mkl.h PATHS
${MKL_ROOT}/include)
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
......@@ -33,15 +34,19 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
${MKL_ROOT}/lib/intel64)
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_INC_DIR ${MKL_INC_DIR})
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR})
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
endif()
return() # return file.
endif()
......@@ -67,13 +72,17 @@ find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
endif()
return()
endif()
......@@ -102,8 +111,12 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif()
return()
endif()
......
# Use ccache if found ccache program
find_program(CCACHE_FOUND ccache)
find_program(CCACHE_PATH ccache)
if(CCACHE_FOUND)
if(CCACHE_PATH)
message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)
\ No newline at end of file
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)
......@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(NOT WITH_PYTHON)
if(WITH_DSO)
add_definitions(-DPADDLE_USE_DSO)
endif(WITH_DSO)
......@@ -28,6 +32,14 @@ if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(NOT CMAKE_CROSSCOMPILING)
if(WITH_AVX AND AVX_FOUND)
set(SIMD_FLAG ${AVX_FLAG})
elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG})
endif()
endif()
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
......@@ -44,21 +56,12 @@ else()
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
......@@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
endif()
endfunction()
if(ON_COVERALLS)
if(WITH_COVERAGE)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
......
......@@ -110,14 +110,13 @@ endmacro()
# Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:")
message("Process GCDA files:")
message("===============================")
# Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in)
# and run gcov on those.
foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH)
#
......@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov"
#
execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null
WORKING_DIRECTORY ${GCDA_DIR}
)
endforeach()
......@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach()
......
if(NOT WITH_GPU)
return()
endif()
set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT")
find_path(CUDNN_INCLUDE_DIR cudnn.h
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include
......@@ -11,6 +15,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT}
${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib
${CUDNN_ROOT}/lib/x86_64-linux-gnu
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
......
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any)
ExternalProject_Add(
linb_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/thelink2012/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
......@@ -14,8 +14,8 @@
INCLUDE(ExternalProject)
SET(GFLAGS_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/gflags)
SET(GFLAGS_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/gflags)
SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags)
SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
......@@ -31,9 +31,17 @@ ExternalProject_Add(
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies gflags)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -14,8 +14,8 @@
INCLUDE(ExternalProject)
SET(GLOG_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/glog)
SET(GLOG_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/glog)
SET(GLOG_SOURCES_DIR ${THIRD_PARTY_PATH}/glog)
SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog)
SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE)
IF(WIN32)
......@@ -29,13 +29,23 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=OFF
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies glog)
......@@ -16,8 +16,8 @@ IF(WITH_TESTING)
ENABLE_TESTING()
INCLUDE(ExternalProject)
SET(GTEST_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/gtest)
SET(GTEST_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/gtest)
SET(GTEST_SOURCES_DIR ${THIRD_PARTY_PATH}/gtest)
SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest)
SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR})
......@@ -41,11 +41,19 @@ IF(WITH_TESTING)
GIT_TAG "release-1.8.0"
PREFIX ${GTEST_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_GMOCK=ON
CMAKE_ARGS -Dgtest_disable_pthreads=ON
CMAKE_ARGS -Dgtest_force_shared_crt=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies gtest)
ENDIF(WITH_TESTING)
......@@ -17,8 +17,8 @@ INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/openblas)
SET(CBLAS_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/openblas)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
IF(WIN32)
......@@ -27,20 +27,59 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
if (NOT CMAKE_Fortran_COMPILER_VERSION)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION)
endif()
string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION})
list(GET Fortran_VERSION 0 Fortran_MAJOR)
list(GET Fortran_VERSION 1 Fortran_MINOR)
find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/)
if (NOT GFORTRAN_LIBRARY)
message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas")
endif()
find_package(Threads REQUIRED)
LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
)
LIST(APPEND external_project_dependencies openblas)
ENDIF()
ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
......@@ -14,49 +14,67 @@
INCLUDE(ExternalProject)
SET(PROTOBUF_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/protobuf)
SET(PROTOBUF_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
set(PROTOBUF_VERSION 3.1)
FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION})
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
ENDIF(PROTOBUF_FOUND)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
IF(${HOST_SYSTEM} STREQUAL "centos")
SET(LIB "lib64")
ELSE()
SET(LIB "lib")
ENDIF()
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
)
LIST(APPEND external_project_dependencies protobuf)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DZLIB_ROOT:STRING=${ZLIB_ROOT}
)
LIST(APPEND external_project_dependencies protobuf)
ENDIF(NOT PROTOBUF_FOUND)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
......@@ -13,192 +13,215 @@
# limitations under the License.
INCLUDE(ExternalProject)
INCLUDE(python_module)
FIND_PACKAGE(PythonInterp 2.7)
FIND_PACKAGE(PythonLibs 2.7)
SET(py_env "")
IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(pip REQUIRED)
find_python_module(numpy REQUIRED)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF()
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
SET(_python_DIR ${PYTHON_INSTALL_DIR})
IF(UNIX)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSEIF(WIN32)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSE()
MESSAGE(FATAL_ERROR "Unknown system !")
ENDIF()
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/python)
SET(PYTHON_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/python)
SET(_python_DIR ${PYTHON_INSTALL_DIR})
IF(UNIX)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSEIF(WIN32)
SET(PYTHON_FOUND ON)
SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE)
SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE)
SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE)
SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE)
ELSE()
MESSAGE(FATAL_ERROR "Unknown system !")
ENDIF()
SET(py_env
PATH=${PYTHON_INSTALL_DIR}/bin/:$ENV{PATH}
PYTHONHOME=${PYTHON_INSTALL_DIR}
PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH})
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
IF(APPLE)
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS
-DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON
)
ENDIF()
SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS)
# Force Python build to "Release".
IF(CMAKE_CONFIGURATION_TYPES)
SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR})
SET(CMAKE_CFG_INTDIR "Release")
IF(APPLE)
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS
-DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON
)
ENDIF()
SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS)
# Force Python build to "Release".
IF(CMAKE_CONFIGURATION_TYPES)
SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR})
SET(CMAKE_CFG_INTDIR "Release")
ELSE()
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
)
ENDIF()
ExternalProject_Add(python
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git"
PREFIX ${PYTHON_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DPYTHON_VERSION=2.7.12
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR}
-DBUILD_LIBPYTHON_SHARED:BOOL=OFF
-DUSE_SYSTEM_LIBRARIES:BOOL=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR}
-DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES}
-DDOWNLOAD_SOURCES:BOOL=ON
-DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS}
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS}
DEPENDS zlib
)
SET(py_env
PATH=${PYTHON_INSTALL_DIR}/bin
PYTHONHOME=${PYTHON_INSTALL_DIR}
PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH})
####################################################################################
##################################### SETUPTOOLS ###################################
SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools)
ExternalProject_Add(setuptools
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SETUPTOOLS_SOURCES_DIR}
URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz"
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python zlib
)
#####################################################################################
##################################### SIX ###########################################
SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six)
ExternalProject_Add(six
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SIX_SOURCES_DIR}
URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools
)
#####################################################################################
##################################### CYTHON ########################################
SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython)
ExternalProject_Add(cython
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${CYTHON_SOURCES_DIR}
URL https://github.com/cython/cython/archive/0.25.2.tar.gz
GIT_TAG 0.25.2
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
PATCH_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python
)
####################################################################################
##################################### NUMPY ########################################
SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy)
SET(NUMPY_TAG_VERSION "v1.11.3")
SET(NUMPY_VERSION "1.11.3")
SET(EGG_NAME "")
SET(PYTHON_NUMPY_INCLUDE_DIR "")
IF(WIN32)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg")
ELSE(WIN32)
IF(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}")
ELSE(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
ENDIF(APPLE)
FOREACH(suffix x86_64 intel fat64 fat32 universal)
LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include)
ENDFOREACH()
ENDIF(WIN32)
ExternalProject_Add(numpy
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/numpy/numpy.git
GIT_TAG ${NUMPY_TAG_VERSION}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
PREFIX ${NUMPY_SOURCES_DIR}
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools cython
)
####################################################################################
##################################### WHEEL ########################################
SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel)
ExternalProject_Add(wheel
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz
PREFIX ${WHEEL_SOURCES_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools
)
####################################################################################
################################### PROTOBUF #######################################
SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf)
ExternalProject_Add(python-protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz
URL_MD5 38b5fb160c768d2f8444d0c6d637ff91
PREFIX ${PY_PROTOBUF_SOURCES_DIR}
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools six
)
####################################################################################
LIST(APPEND external_project_dependencies python setuptools six cython wheel python-protobuf numpy)
ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
IF(WITH_PYTHON)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
ELSE()
LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=Release
)
SET(PYTHON_LIBRARIES "")
ENDIF()
ExternalProject_Add(python
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git"
PREFIX ${PYTHON_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DPYTHON_VERSION=2.7.12
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR}
-DBUILD_LIBPYTHON_SHARED:BOOL=OFF
-DUSE_SYSTEM_LIBRARIES:BOOL=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR}
-DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES}
-DDOWNLOAD_SOURCES:BOOL=ON
-DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS}
${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS}
DEPENDS zlib
)
####################################################################################
##################################### SETUPTOOLS ###################################
SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools)
ExternalProject_Add(setuptools
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SETUPTOOLS_SOURCES_DIR}
URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz"
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python zlib
)
#####################################################################################
##################################### SIX ###########################################
SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six)
ExternalProject_Add(six
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SIX_SOURCES_DIR}
URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools
)
#####################################################################################
##################################### CYTHON ########################################
SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython)
ExternalProject_Add(cython
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${CYTHON_SOURCES_DIR}
URL https://github.com/cython/cython/archive/0.25.2.tar.gz
GIT_TAG 0.25.2
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
PATCH_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python
)
####################################################################################
##################################### NUMPY ########################################
SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy)
SET(NUMPY_TAG_VERSION "v1.11.3")
SET(NUMPY_VERSION "1.11.3")
SET(EGG_NAME "")
SET(PYTHON_NUMPY_INCLUDE_DIR "")
IF(WIN32)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg")
ELSE(WIN32)
IF(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}")
ELSE(APPLE)
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux")
ENDIF(APPLE)
FOREACH(suffix x86_64 intel fat64 fat32 universal)
LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include)
ENDFOREACH()
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
ExternalProject_Add(numpy
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/numpy/numpy.git
GIT_TAG ${NUMPY_TAG_VERSION}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
PREFIX ${NUMPY_SOURCES_DIR}
BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools cython
)
####################################################################################
##################################### WHEEL ########################################
SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel)
ExternalProject_Add(wheel
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz
PREFIX ${WHEEL_SOURCES_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools
)
####################################################################################
################################### PROTOBUF #######################################
SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf)
ExternalProject_Add(python-protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz
URL_MD5 38b5fb160c768d2f8444d0c6d637ff91
PREFIX ${PY_PROTOBUF_SOURCES_DIR}
BUILD_IN_SOURCE 1
PATCH_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND env PATH=${PROTOBUF_INSTALL_DIR}/bin:$ENV{PATH} ${py_env} ${PYTHON_EXECUTABLE} setup.py build
INSTALL_COMMAND env PATH=${PROTOBUF_INSTALL_DIR}/bin:$ENV{PATH} ${py_env} ${PYTHON_EXECUTABLE} setup.py install
DEPENDS python setuptools six
)
LIST(APPEND external_project_dependencies python setuptools six cython numpy wheel python-protobuf)
......@@ -18,8 +18,8 @@ IF(NOT SWIG_FOUND)
# build swig as an external project
INCLUDE(ExternalProject)
SET(SWIG_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/swig)
SET(SWIG_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/swig)
SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig)
SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig)
SET(SWIG_TARGET_VERSION "3.0.2")
SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41")
SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f")
......@@ -38,14 +38,6 @@ IF(NOT SWIG_FOUND)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# From PCRE configure
ExternalProject_Add(pcre
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/svn2github/pcre.git
PREFIX ${SWIG_SOURCES_DIR}/pcre
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SWIG_INSTALL_DIR}/pcre
)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
......@@ -54,16 +46,11 @@ IF(NOT SWIG_FOUND)
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && ./autogen.sh
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig &&
env "PCRE_LIBS=${SWIG_INSTALL_DIR}/pcre/lib/libpcre.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcrecpp.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcreposix.a"
./configure
--prefix=${SWIG_INSTALL_DIR}
--with-pcre-prefix=${SWIG_INSTALL_DIR}/pcre
BUILD_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make
INSTALL_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make install
UPDATE_COMMAND ""
DEPENDS pcre
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
......
......@@ -14,8 +14,8 @@
INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/warpctc)
SET(WARPCTC_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/warpctc)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE)
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR})
......@@ -50,9 +50,19 @@ ExternalProject_Add(
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
CMAKE_ARGS -DBUILD_SHARED=ON
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
)
LIST(APPEND external_project_dependencies warpctc)
......@@ -14,15 +14,15 @@
INCLUDE(ExternalProject)
SET(ZLIB_SOURCES_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/zlib)
SET(ZLIB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/install/zlib)
SET(ZLIB_SOURCES_DIR ${THIRD_PARTY_PATH}/zlib)
SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib)
SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE)
SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE)
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
......@@ -34,10 +34,18 @@ ExternalProject_Add(
GIT_TAG "v1.2.8"
PREFIX ${ZLIB_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR}
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies zlib)
......@@ -2,12 +2,7 @@
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
include(CheckTypeSize)
function(CheckCompilerCXX11Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
......@@ -31,7 +26,7 @@ function(CheckCompilerCXX11Flag)
endfunction()
CheckCompilerCXX11Flag()
LIST(APPEND CMAKE_CXX_FLAGS -std=c++11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# safe_set_flag
#
......@@ -89,6 +84,17 @@ if(NOT UINT64_MAX_EXISTS)
endif()
endif()
SET(CMAKE_EXTRA_INCLUDE_FILES "pthread.h")
CHECK_TYPE_SIZE(pthread_spinlock_t SPINLOCK_FOUND)
CHECK_TYPE_SIZE(pthread_barrier_t BARRIER_FOUND)
if(SPINLOCK_FOUND)
add_definitions(-DPADDLE_USE_PTHREAD_SPINLOCK)
endif(SPINLOCK_FOUND)
if(BARRIER_FOUND)
add_definitions(-DPADDLE_USE_PTHREAD_BARRIER)
endif(BARRIER_FOUND)
SET(CMAKE_EXTRA_INCLUDE_FILES "")
# Common flags. the compiler flag used for C/C++ sources whenever release or debug
# Do not care if this flag is support for gcc.
set(COMMON_FLAGS
......@@ -102,6 +108,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
......@@ -111,6 +118,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
......
set(CPACK_PACKAGE_NAME paddle)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "")
set(CPACK_PACKAGE_VERSION_MAJOR ${PADDLE_MAJOR_VERSION})
set(CPACK_PACKAGE_VERSION_MINOR ${PADDLE_MINOR_VERSION})
set(CPACK_PACKAGE_VERSION_PATCH ${PADDLE_PATCH_VERSION})
......@@ -10,8 +9,9 @@ set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE amd64)
set(CPACK_DEBIAN_PACKAGE_MAINTAINER PaddlePaddle Dev <paddle-dev@baidu.com>)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Paddle")
set(CPACK_PACKAGE_DESCRIPTION "")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libatlas3-base, libgflags2, libgoogle-glog0, libprotobuf8, libpython2.7, libstdc++6, python-numpy, python-pip, python-pip-whl, python-protobuf")
set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl")
set(CPACK_DEBIAN_PACKAGE_SECTION Devel)
set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION})
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst")
#set(CPACK_GENERATOR "DEB")
# Start cpack
......
......@@ -26,5 +26,18 @@ function(find_python_module module)
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import sys, ${module}; sys.stdout.write(${module}.__version__)"
OUTPUT_VARIABLE _${module}_version
RESULT_VARIABLE _${module}_status
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING
"Version of Python module ${module}")
endif(NOT _${module}_status)
set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE)
set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE)
endfunction(find_python_module)
......@@ -2,6 +2,7 @@
# so that PaddlePaddle can unleash the vectorization power of muticore.
INCLUDE(CheckCXXSourceRuns)
INCLUDE(CheckCXXSourceCompiles)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MMX_FLAG "-mmmx")
......@@ -17,6 +18,8 @@ ELSEIF(MSVC)
SET(AVX2_FLAG "/arch:AVX2")
ENDIF()
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
# Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS("
......@@ -73,4 +76,5 @@ int main()
return 0;
}" AVX2_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
......@@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
# but the name like centos is necessary in some scenes
# to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
IF(WIN32)
SET(HOST_SYSTEM "win32")
ELSE(WIN32)
......@@ -21,6 +29,7 @@ ELSE(WIN32)
SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx")
ELSE(APPLE)
IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
......@@ -29,8 +38,24 @@ ELSE(WIN32)
SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu")
ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
SET(HOST_SYSTEM "redhat")
ELSEIF(LINUX_ISSUE MATCHES "Fedora")
SET(HOST_SYSTEM "fedora")
ENDIF()
ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
IF(NOT HOST_SYSTEM)
SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
ENDIF()
ENDIF(APPLE)
ENDIF(WIN32)
......@@ -42,12 +67,18 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}")
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
IF(DEFINED CMAKE_SYSTEM_NAME)
IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
SET(ANDROID TRUE)
ENDIF()
ENDIF()
# external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output
LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output
LOG_INSTALL 0 # Wrap install in script to log output
)
......@@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links()
endif()
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
else()
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu)
endif()
else()
set(METRIC_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
${METRIC_LIBS}
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
......@@ -95,31 +84,16 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
${METRIC_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES} util)
endif()
if(ANDROID)
target_link_libraries(${TARGET_NAME} log)
endif(ANDROID)
if(WITH_GPU)
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME )
if(HAVE_CLOCK_GETTIME)
target_link_libraries(${TARGET_NAME} rt)
endif()
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
......@@ -126,7 +126,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
return output[output_layer].mean(0)
return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring)
......
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=300 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
......@@ -19,3 +19,4 @@ paddle train \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'
paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1
......@@ -34,5 +34,5 @@ y_predict = fc_layer(
size=1,
act=LinearActivation(),
bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
cost = mse_cost(input=y_predict, label=y)
outputs(cost)
......@@ -5,3 +5,6 @@ plot.png
train.log
*pyc
.ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
......@@ -6,33 +6,15 @@ passed to C++ side of Paddle.
The user api could be simpler and carefully designed.
"""
import py_paddle.swig_paddle as api
from py_paddle import DataProviderConverter
import paddle.trainer.PyDataProvider2 as dp
import numpy as np
import random
from mnist_util import read_from_mnist
from paddle.trainer_config_helpers import *
def optimizer_config():
settings(
learning_rate=1e-4,
learning_method=AdamOptimizer(),
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
import numpy as np
import paddle.v2 as paddle_v2
import py_paddle.swig_paddle as api
from paddle.trainer_config_helpers import *
from py_paddle import DataProviderConverter
def network_config():
imgs = data_layer(name='pixel', size=784)
hidden1 = fc_layer(input=imgs, size=200)
hidden2 = fc_layer(input=hidden1, size=200)
inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
cost = classification_cost(
input=inference, label=data_layer(
name='label', size=10))
outputs(cost)
from mnist_util import read_from_mnist
def init_parameter(network):
......@@ -75,19 +57,35 @@ def input_order_converter(generator):
def main():
api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores
# get enable_types for each optimizer.
# enable_types = [value, gradient, momentum, etc]
# For each optimizer(SGD, Adam), GradientMachine should enable different
# buffers.
opt_config_proto = parse_optimizer_config(optimizer_config)
opt_config = api.OptimizationConfig.createFromProto(opt_config_proto)
_temp_optimizer_ = api.ParameterOptimizer.create(opt_config)
enable_types = _temp_optimizer_.getParameterTypes()
optimizer = paddle_v2.optimizer.Adam(
learning_rate=1e-4,
batch_size=1000,
model_average=ModelAverage(average_window=0.5),
regularization=L2Regularization(rate=0.5))
# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = optimizer.create_local_updater()
assert isinstance(updater, api.ParameterUpdater)
# define network
images = paddle_v2.layer.data(
name='pixel', type=paddle_v2.data_type.dense_vector(784))
label = paddle_v2.layer.data(
name='label', type=paddle_v2.data_type.integer_value(10))
hidden1 = paddle_v2.layer.fc(input=images, size=200)
hidden2 = paddle_v2.layer.fc(input=hidden1, size=200)
inference = paddle_v2.layer.fc(input=hidden2,
size=10,
act=paddle_v2.activation.Softmax())
cost = paddle_v2.layer.classification_cost(input=inference, label=label)
# Create Simple Gradient Machine.
model_config = parse_network_config(network_config)
m = api.GradientMachine.createFromConfigProto(
model_config, api.CREATE_MODE_NORMAL, enable_types)
model_config = paddle_v2.layer.parse_network(cost)
m = api.GradientMachine.createFromConfigProto(model_config,
api.CREATE_MODE_NORMAL,
optimizer.enable_types())
# This type check is not useful. Only enable type hint in IDE.
# Such as PyCharm
......@@ -96,19 +94,12 @@ def main():
# Initialize Parameter by numpy.
init_parameter(network=m)
# Create Local Updater. Local means not run in cluster.
# For a cluster training, here we can change to createRemoteUpdater
# in future.
updater = api.ParameterUpdater.createLocalUpdater(opt_config)
assert isinstance(updater, api.ParameterUpdater)
# Initialize ParameterUpdater.
updater.init(m)
# DataProvider Converter is a utility convert Python Object to Paddle C++
# Input. The input format is as same as Paddle's DataProvider.
converter = DataProviderConverter(
input_types=[dp.dense_vector(784), dp.integer_value(10)])
converter = DataProviderConverter(input_types=[images.type, label.type])
train_file = './data/raw_data/train'
test_file = './data/raw_data/t10k'
......
import paddle.v2 as paddle
import gzip
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
images = paddle.layer.data(
name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
# Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
lists = []
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append((item[0], ))
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data)
print probs.shape
if __name__ == '__main__':
main()
......@@ -27,5 +27,6 @@ paddle train \
--num_passes=100 \
--save_dir=$output \
2>&1 | tee $log
paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
python -m paddle.utils.plotcurve -i $log > plot.png
......@@ -156,7 +156,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
res[name] = output[name].mean(0)
res[name] = output[name]['value'].mean(0)
return res
......
......@@ -25,6 +25,7 @@ log_file="$bin_dir/train.log"
pushd "$home_dir"
cfg=trainer_config.lr.py
paddle train \
--start_pserver=false \
--config=$cfg \
--save_dir=${model_dir} \
--trainer_count=4 \
......
......@@ -26,5 +26,7 @@ paddle train \
--init_model_path=$model \
--config_args=is_predict=1 \
--predict_output_dir=. \
2>&1 | tee 'predict.log'
paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
mv rank-00000 result.txt
......@@ -31,3 +31,4 @@ paddle train \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.mse_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
......@@ -22,3 +22,4 @@ paddle train \
--log_period=100 \
--dot_period=1 \
--num_passes=50 2>&1 | tee 'log.txt'
paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1
......@@ -86,10 +86,7 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
outputs(
regression_cost(
input=similarity, label=data_layer(
'rating', size=1)))
outputs(mse_cost(input=similarity, label=data_layer('rating', size=1)))
define_py_data_sources2(
'data/train.list',
......
import sys
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
def db_lstm():
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec])
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
......@@ -38,3 +38,4 @@ paddle train \
--config_args=is_test=1 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1
......@@ -32,4 +32,6 @@ def process(settings, file_name):
word_slot = [
settings.word_dict[w] for w in words if w in settings.word_dict
]
if not word_slot:
continue
yield word_slot, label
......@@ -138,7 +138,11 @@ def main():
batch = []
for line in sys.stdin:
batch.append([predict.get_index(line)])
words = predict.get_index(line)
if words:
batch.append([words])
else:
print('All the words in [%s] are not in the dictionary.' % line)
if len(batch) == batch_size:
predict.batch_predict(batch)
batch = []
......
......@@ -37,3 +37,4 @@ paddle train --config=$net_conf \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1
......@@ -27,3 +27,4 @@ paddle train --config=$config \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert stacked_num % 2 == 1
layer_attr = paddle.attr.Extra(drop_rate=0.5)
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(
input=inputs[0], pooling_type=paddle.pooling.Max())
lstm_last = paddle.layer.pooling(
input=inputs[1], pooling_type=paddle.pooling.Max())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False)
#data
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
feeding = {'word': 0, 'label': 1}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=2)
......@@ -29,7 +29,7 @@ settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
average_window=0.5,
model_average=ModelAverage(0.5),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25)
......
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():
paddle.init(use_gpu=False, trainer_count=1)
is_generating = False
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# train the network
if not is_generating:
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost,
event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
# start to train
trainer.train(
reader=wmt14_reader, event_handler=event_handler, num_passes=2)
# generate a english sequence to french
else:
# use the first 3 samples for generation
gen_creator = paddle.dataset.wmt14.gen(dict_size)
gen_data = []
gen_num = 3
for item in gen_creator():
gen_data.append((item[0], ))
if len(gen_data) == gen_num:
break
beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
# get the pretrained model, whose bleu = 26.92
parameters = paddle.dataset.wmt14.model()
# prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer(
output_layer=beam_gen,
parameters=parameters,
input=gen_data,
field=['prob', 'id'])
# get the dictionary
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
# the delimited element of generated sequences is -1,
# the first element of each generated sequence is the sequence length
seq_list = []
seq = []
for w in beam_result[1]:
if w != -1:
seq.append(w)
else:
seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
seq = []
prob = beam_result[0]
beam_size = 3
for i in xrange(gen_num):
print "\n*******************************************************\n"
print "src:", ' '.join(
[src_dict.get(w) for w in gen_data[i][0]]), "\n"
for j in xrange(beam_size):
print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
if __name__ == '__main__':
main()
......@@ -27,3 +27,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'paraphrase/train.log'
paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1
......@@ -69,7 +69,8 @@ def gru_encoder_decoder(data_conf,
encoder_size=512,
decoder_size=512,
beam_size=3,
max_length=250):
max_length=250,
error_clipping=50):
"""
A wrapper for an attention version of GRU Encoder-Decoder network
is_generating: whether this config is used for generating
......@@ -90,9 +91,19 @@ def gru_encoder_decoder(data_conf,
input=src_word_id,
size=word_vector_dim,
param_attr=ParamAttr(name='_source_language_embedding'))
src_forward = simple_gru(input=src_embedding, size=encoder_size)
src_forward = simple_gru(
input=src_embedding,
size=encoder_size,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
src_backward = simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
input=src_embedding,
size=encoder_size,
reverse=True,
naive=True,
gru_layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
......@@ -117,11 +128,13 @@ def gru_encoder_decoder(data_conf,
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_layer(
gru_step = gru_step_naive_layer(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
size=decoder_size,
layer_attr=ExtraLayerAttribute(
error_clipping_threshold=error_clipping))
with mixed_layer(
size=target_dict_dim, bias_attr=True,
......
......@@ -24,3 +24,4 @@ paddle train \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1
......@@ -25,3 +25,4 @@ paddle train \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1
......@@ -27,7 +27,7 @@ settings(
learning_method=MomentumOptimizer(),
batch_size=batch_size,
regularization=L2Regularization(batch_size * 1e-4),
average_window=0.5,
model_average=ModelAverage(0.5),
learning_rate=1e-1,
learning_rate_decay_a=1e-5,
learning_rate_decay_b=0.25, )
......
......@@ -27,7 +27,7 @@ settings(
learning_method=MomentumOptimizer(),
batch_size=batch_size,
regularization=L2Regularization(batch_size * 1e-5),
average_window=0.5,
model_average=ModelAverage(0.5),
learning_rate=2e-3,
learning_rate_decay_a=5e-7,
learning_rate_decay_b=0.5, )
......
......@@ -7,4 +7,6 @@ paddle train \
--dot_period=10 \
--log_period=1000 \
--test_period=0 \
--num_passes=10
--num_passes=10 \
2>&1 | tee 'train.log'
paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
......@@ -7,3 +7,5 @@ paddle train \
--log_period=10000 \
--test_period=0 \
--num_passes=10
2>&1 | tee 'train_linear.log'
paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
......@@ -25,6 +25,6 @@ paddle train \
--config_args=is_predict=1 \
--predict_output_dir=.
python gen_result.py > result.txt
python gen_result.py > result.csv
rm -rf rank-00000
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
API
===
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst>
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
v2/model_configs.rst
v2/data.rst
v2/run_logic.rst
......@@ -178,7 +178,7 @@ input_types
+++++++++++
PaddlePaddle has four data types, and three sequence types.
The four data types are:
The four data types are:
* :code:`dense_vector`: dense float vector.
* :code:`sparse_binary_vector`: sparse binary vector, most of the value is 0, and
......@@ -231,7 +231,7 @@ Its parameters lists as follows:
* :code:`is_train` is a bool parameter that indicates the DataProvider is used in
training or testing.
* :code:`file_list` is the list of all files.
* User-defined parameters args can be set in training configuration.
Note, PaddlePaddle reserves the right to add pre-defined parameter, so please
......
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
......@@ -139,24 +139,12 @@ lstmemory
:members: lstmemory
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
grumemory
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: grumemory
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
Recurrent Layer Group
=====================
......@@ -172,6 +160,18 @@ recurrent_group
:members: recurrent_group
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
beam_search
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -279,6 +279,12 @@ concat_layer
:members: concat_layer
:noindex:
seq_concat_layer
----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: seq_concat_layer
:noindex:
Reshaping Layers
================
......@@ -302,6 +308,18 @@ repeat_layer
:members: repeat_layer
:noindex:
rotate_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: rotate_layer
:noindex:
seq_reshape_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: seq_reshape_layer
:noindex:
Math Layers
===========
......@@ -382,6 +400,15 @@ sampling_id_layer
:members: sampling_id_layer
:noindex:
Slicing and Joining Layers
==========================
pad_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
:members: pad_layer
:noindex:
.. _api_trainer_config_helpers_layers_cost_layers:
Cost Layers
......@@ -405,6 +432,12 @@ multi_binary_label_cross_entropy
:members: multi_binary_label_cross_entropy
:noindex:
mse_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: mse_cost
:noindex:
huber_cost
----------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -423,6 +456,12 @@ rank_cost
:members: rank_cost
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
crf_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -441,6 +480,12 @@ ctc_layer
:members: ctc_layer
:noindex:
warp_ctc_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: warp_ctc_layer
:noindex:
nce_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -453,12 +498,6 @@ hsigmoid
:members: hsigmoid
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
Check Layer
============
......
===========
Activation
===========
Abs
===
.. automodule:: paddle.v2.activation
:members: Abs
:noindex:
Exp
===
.. automodule:: paddle.v2.activation
:members: Exp
:noindex:
Identity
========
.. automodule:: paddle.v2.activation
:members: Identity
:noindex:
Linear
======
.. automodule:: paddle.v2.activation
:members: Linear
:noindex:
Log
===
.. automodule:: paddle.v2.activation
:members: Log
:noindex:
Square
======
.. automodule:: paddle.v2.activation
:members: Square
:noindex:
Sigmoid
=======
.. automodule:: paddle.v2.activation
:members: Sigmoid
:noindex:
Softmax
=======
.. automodule:: paddle.v2.activation
:members: Softmax
:noindex:
SequenceSoftmax
===============
.. automodule:: paddle.v2.activation
:members: SequenceSoftmax
:noindex:
Relu
====
.. automodule:: paddle.v2.activation
:members: Relu
:noindex:
BRelu
=====
.. automodule:: paddle.v2.activation
:members: BRelu
:noindex:
SoftRelu
========
.. automodule:: paddle.v2.activation
:members: SoftRelu
:noindex:
Tanh
====
.. automodule:: paddle.v2.activation
:members: Tanh
:noindex:
STanh
=====
.. automodule:: paddle.v2.activation
:members: STanh
:noindex:
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册