提交 0986415c 编写于 作者: L liaogang

Fix conflict

9 合并请求!11636[IMPORTANT] MKLDNN layout: Support for sum operator,!8482Release/0.11.0,!8190Release/0.11.0,!8189Release/0.11.0,!6633给线性回归的get-started代码加上了预测的示例~~,!4615Feature/tensor array add python binding,!1862Feature/fix with doc build option,!1861Feature/fix with doc build option,!1336set macosx deployment version
.gitignore
\ No newline at end of file
*.DS_Store
build/
*.user
.vscode
.idea
.project
.cproject
.pydevproject
Makefile
.test_env/
third_party/
*~
bazel-*
!build/*.deb
......@@ -7,6 +7,7 @@ build/
.project
.cproject
.pydevproject
.settings/
Makefile
.test_env/
third_party/
......
......@@ -2,12 +2,12 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks:
- id: remove-crlf
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2
hooks:
- id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax.
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks:
......@@ -15,7 +15,7 @@
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
files: (?!.*third_party)^.*$
files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
......@@ -4,22 +4,14 @@ cache:
- $HOME/third_party
- $HOME/.ccache
- $HOME/.cache/pip
- $HOME/Library/Caches/Homebrew
sudo: required
dist: trusty
os:
- linux
- osx
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
......@@ -53,11 +45,10 @@ before_install:
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
script:
- paddle/scripts/travis/main.sh
notifications:
......
......@@ -12,17 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License
cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(system)
if(ANDROID)
cmake_minimum_required(VERSION 3.7)
else()
cmake_minimum_required(VERSION 3.0)
endif()
project(paddle CXX C)
find_package(Sphinx)
find_package(CUDA QUIET)
if(NOT CMAKE_CROSSCOMPILING)
find_package(CUDA QUIET)
endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED)
find_package(Threads REQUIRED)
......@@ -41,7 +47,7 @@ option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF)
option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF)
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(ON_COVERALLS "Compile PaddlePaddle with code coverage" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
......@@ -52,6 +58,21 @@ if(NOT CMAKE_BUILD_TYPE)
FORCE)
endif()
if(ANDROID)
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21")
endif()
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android" FORCE)
set(WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android" FORCE)
endif(ANDROID)
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
########################################################################################
......@@ -65,6 +86,7 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
......@@ -75,7 +97,6 @@ include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}")
......@@ -83,14 +104,21 @@ include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
set(EXTERNAL_LIBS
# have not include gtest here.
${GFLAGS_LIBRARIES}
${GLOG_LIBRARIES}
${CBLAS_LIBRARIES}
${PROTOBUF_LIBRARY}
${ZLIB_LIBRARIES}
${PYTHON_LIBRARIES}
)
if(WITH_GPU)
list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO)
list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(NOT WITH_DSO)
endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(python)
......
FROM ubuntu:14.04
# A image for building paddle binaries
# Use cuda devel base image for both cpu and gpu environment
FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG WITH_GPU
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV WOBOQ OFF
ENV WITH_GPU=${WITH_AVX:-OFF}
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV HOME /root
# Add bash enhancements
COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake && \
apt-get install -y automake locales clang-format-3.8 swig && \
apt-get clean -y
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# FIXME: due to temporary ipykernel dependency issue, specify ipykernel jupyter
# version util jupyter fixes this issue.
RUN pip install --upgrade pip && \
pip install -U "protobuf==3.1.0" && \
pip install -U 'protobuf==3.1.0' && \
pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter
pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install pre-commit 'requests==2.9.2' 'ipykernel==4.6.0' 'jupyter==1.0.0'
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle
COPY . /paddle/
RUN /paddle/paddle/scripts/docker/build.sh
VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]
VOLUME ["/woboq_out"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd
......@@ -48,12 +58,5 @@ RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22
# Jupyter Notebook directory.
RUN mkdir /notes/
WORKDIR "/notes"
EXPOSE 8888
RUN mkdir -p /opt/bin
COPY ./paddle/scripts/docker/entrypoint /opt/bin/
CMD ["/opt/bin/entrypoint"]
# development image default do build work
CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
......@@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
......@@ -59,36 +59,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
the capability of PaddlePaddle to make a huge impact for your product.
## Installation
Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
pre-built packages (**docker image**, **deb package**) or
directly build on **Linux** and **Mac OS X** from the source code.
It is recommended to check out the
[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
before looking into the
[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)
## Documentation
Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.
- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
You can follow the quick start tutorial to learn how use PaddlePaddle
step-by-step.
We provide [English](http://www.paddlepaddle.org/develop/doc/) and
[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
- [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
We provide five demos, including: image classification, sentiment analysis,
sequence to sequence model, recommendation, semantic role labeling.
You can also run distributed training jobs on Kubernetes clusters.
- [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
This system supports training deep learning models on multiple machines
with data parallelism.
- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html)
- [Python API](http://paddlepaddle.org/doc/ui/) <br>
PaddlePaddle supports using either Python interface or C++ to build your
system. We also use SWIG to wrap C++ source code to create a user friendly
interface for Python. You can also use SWIG to create interface for your
favorite programming language.
Our new API enables much shorter programs.
- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
We sincerely appreciate your interest and contributions. If you would like to
contribute, please read the contribution guide.
- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
- [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>
We appreciate your contributions!
## Ask Questions
......
......@@ -29,13 +29,16 @@ Luo, Tao
Lyu, Qin
Mao, Hongyue
Qian, Xiaojun
Qiao, Longfei
Qi, Jun
Qin, Duohao
Shen, Guolong
Shi, Guangchuan
Song, Xiang
Wang, Helin
Wang, Jiang
Wang, Yanfei
Wang, Yi
Wang, Yong
Weng, Renliang
Xu, Tianbing
......
......@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
COMMAND cd ${destination} && ln -sf ./index_*.html index.html
)
set_property(
......
......@@ -19,9 +19,9 @@ set(CBLAS_FOUND OFF)
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
find_path(MKL_INC_DIR mkl.h PATHS
${MKL_ROOT}/include)
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
......@@ -34,15 +34,19 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
${MKL_ROOT}/lib/intel64)
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
set(CBLAS_PROVIDER MKL)
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
set(CBLAS_INC_DIR ${MKL_INC_DIR})
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(${MKL_LAPACK_INC_DIR})
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
endif()
return() # return file.
endif()
......@@ -68,13 +72,17 @@ find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(ATLAS_CLAPACK_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
endif()
return()
endif()
......@@ -103,8 +111,12 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
set(CBLAS_FOUND ON)
if(OPENBLAS_LAPACKE_INC_DIR)
add_definitions(-DPADDLE_USE_LAPACK)
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
endif()
return()
endif()
......
# Use ccache if found ccache program
find_program(CCACHE_FOUND ccache)
find_program(CCACHE_PATH ccache)
if(CCACHE_FOUND)
if(CCACHE_PATH)
message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)
\ No newline at end of file
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)
......@@ -32,6 +32,14 @@ if(NOT WITH_PROFILER)
add_definitions(-DPADDLE_DISABLE_PROFILER)
endif(NOT WITH_PROFILER)
if(NOT CMAKE_CROSSCOMPILING)
if(WITH_AVX AND AVX_FOUND)
set(SIMD_FLAG ${AVX_FLAG})
elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG})
endif()
endif()
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
......@@ -48,21 +56,12 @@ else()
message(FATAL_ERROR "Paddle need cudnn to compile")
endif()
if(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
else(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
endif(WITH_AVX)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
# Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
if(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
else(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
endif(WITH_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
......@@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
endif()
endfunction()
if(ON_COVERALLS)
if(WITH_COVERAGE)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
......
......@@ -110,14 +110,13 @@ endmacro()
# Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:")
message("Process GCDA files:")
message("===============================")
# Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in)
# and run gcov on those.
foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH)
#
......@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov"
#
execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA}
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null
WORKING_DIRECTORY ${GCDA_DIR}
)
endforeach()
......@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach()
......
if(NOT WITH_GPU)
return()
endif()
set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT")
find_path(CUDNN_INCLUDE_DIR cudnn.h
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include
......@@ -11,6 +15,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT}
${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib
${CUDNN_ROOT}/lib/x86_64-linux-gnu
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
......
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any)
ExternalProject_Add(
linb_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/thelink2012/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
......@@ -31,9 +31,17 @@ ExternalProject_Add(
GIT_REPOSITORY "https://github.com/gflags/gflags.git"
PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies gflags)
......@@ -33,11 +33,19 @@ ExternalProject_Add(
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies glog)
......@@ -41,11 +41,19 @@ IF(WITH_TESTING)
GIT_TAG "release-1.8.0"
PREFIX ${GTEST_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DBUILD_GMOCK=ON
CMAKE_ARGS -Dgtest_disable_pthreads=ON
CMAKE_ARGS -Dgtest_force_shared_crt=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies gtest)
ENDIF(WITH_TESTING)
......@@ -29,7 +29,24 @@ IF(NOT ${CBLAS_FOUND})
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
if (NOT CMAKE_Fortran_COMPILER_VERSION)
# cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly.
execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion
OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION)
endif()
string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION})
list(GET Fortran_VERSION 0 Fortran_MAJOR)
list(GET Fortran_VERSION 1 Fortran_MINOR)
find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS
/lib
/usr/lib
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/
/usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/)
if (NOT GFORTRAN_LIBRARY)
message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas")
endif()
find_package(Threads REQUIRED)
LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
......@@ -37,6 +54,8 @@ IF(NOT ${CBLAS_FOUND})
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
......@@ -45,7 +64,7 @@ IF(NOT ${CBLAS_FOUND})
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
......@@ -14,46 +14,67 @@
INCLUDE(ExternalProject)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
set(PROTOBUF_VERSION 3.1)
FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION})
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
ENDIF(PROTOBUF_FOUND)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
)
LIST(APPEND external_project_dependencies protobuf)
ExternalProject_Add(
protobuf
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DZLIB_ROOT:STRING=${ZLIB_ROOT}
)
LIST(APPEND external_project_dependencies protobuf)
ENDIF(NOT PROTOBUF_FOUND)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
......@@ -219,5 +219,9 @@ ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
IF(WITH_PYTHON)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
ELSE()
SET(PYTHON_LIBRARIES "")
ENDIF()
......@@ -50,12 +50,19 @@ ExternalProject_Add(
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
CMAKE_ARGS -DBUILD_SHARED=ON
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
)
LIST(APPEND external_project_dependencies warpctc)
......@@ -22,7 +22,7 @@ SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include dire
IF(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
ELSE(WIN32)
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
......@@ -34,10 +34,18 @@ ExternalProject_Add(
GIT_TAG "v1.2.8"
PREFIX ${ZLIB_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR}
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON
CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release
)
LIST(APPEND external_project_dependencies zlib)
......@@ -2,6 +2,7 @@
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
include(CheckCXXSymbolExists)
include(CheckTypeSize)
function(CheckCompilerCXX11Flag)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
......@@ -25,7 +26,7 @@ function(CheckCompilerCXX11Flag)
endfunction()
CheckCompilerCXX11Flag()
LIST(APPEND CMAKE_CXX_FLAGS -std=c++11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# safe_set_flag
#
......@@ -83,6 +84,17 @@ if(NOT UINT64_MAX_EXISTS)
endif()
endif()
SET(CMAKE_EXTRA_INCLUDE_FILES "pthread.h")
CHECK_TYPE_SIZE(pthread_spinlock_t SPINLOCK_FOUND)
CHECK_TYPE_SIZE(pthread_barrier_t BARRIER_FOUND)
if(SPINLOCK_FOUND)
add_definitions(-DPADDLE_USE_PTHREAD_SPINLOCK)
endif(SPINLOCK_FOUND)
if(BARRIER_FOUND)
add_definitions(-DPADDLE_USE_PTHREAD_BARRIER)
endif(BARRIER_FOUND)
SET(CMAKE_EXTRA_INCLUDE_FILES "")
# Common flags. the compiler flag used for C/C++ sources whenever release or debug
# Do not care if this flag is support for gcc.
set(COMMON_FLAGS
......
......@@ -2,6 +2,7 @@
# so that PaddlePaddle can unleash the vectorization power of muticore.
INCLUDE(CheckCXXSourceRuns)
INCLUDE(CheckCXXSourceCompiles)
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MMX_FLAG "-mmmx")
......@@ -17,6 +18,8 @@ ELSEIF(MSVC)
SET(AVX2_FLAG "/arch:AVX2")
ENDIF()
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
# Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
CHECK_CXX_SOURCE_RUNS("
......@@ -73,4 +76,5 @@ int main()
return 0;
}" AVX2_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
......@@ -72,6 +72,12 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}")
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
IF(DEFINED CMAKE_SYSTEM_NAME)
IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
SET(ANDROID TRUE)
ENDIF()
ENDIF()
# external dependencies log output
SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
......
......@@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links()
endif()
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
else()
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu)
endif()
else()
set(METRIC_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START
paddle_gserver
paddle_function
${METRIC_LIBS}
ARCHIVE_END
paddle_pserver
paddle_trainer_lib
......@@ -95,32 +84,15 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
${METRIC_LIBS}
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES} util)
endif()
if(WITH_GPU)
target_link_libraries(${TARGET_NAME} ${CUDA_CUDART_LIBRARY})
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME )
if(HAVE_CLOCK_GETTIME)
target_link_libraries(${TARGET_NAME} rt)
endif()
endif()
if(ANDROID)
target_link_libraries(${TARGET_NAME} log)
endif(ANDROID)
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
endfunction()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
......@@ -126,7 +126,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
return output[output_layer].mean(0)
return output[output_layer]['value'].mean(0)
def predict(self, image=None, output_layer=None):
assert isinstance(image, basestring)
......
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
......@@ -34,5 +34,5 @@ y_predict = fc_layer(
size=1,
act=LinearActivation(),
bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
cost = mse_cost(input=y_predict, label=y)
outputs(cost)
......@@ -5,3 +5,6 @@ plot.png
train.log
*pyc
.ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
import numpy
import paddle.v2 as paddle
import gzip
import mnist_util
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def train_reader():
train_file = './data/raw_data/train'
generator = mnist_util.read_from_mnist(train_file)
for item in generator:
yield item
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main():
......@@ -19,42 +63,74 @@ def main():
name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10))
hidden1 = paddle.layer.fc(input=images, size=200)
hidden2 = paddle.layer.fc(input=hidden1, size=200)
inference = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
cost = paddle.layer.classification_cost(input=inference, label=label)
parameters = paddle.parameters.create(cost)
for param_name in parameters.keys():
array = parameters.get(param_name)
array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape)
parameters.set(parameter_name=param_name, value=array)
# Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
lists = []
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
para = parameters.get('___fc_2__.w0')
print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % (
event.pass_id, event.batch_id, event.cost, para.mean())
else:
pass
trainer = paddle.trainer.SGD(update_equation=adam_optimizer)
trainer.train(train_data_reader=train_reader,
topology=cost,
parameters=parameters,
event_handler=event_handler,
batch_size=32, # batch size should be refactor in Data reader
data_types={ # data_types will be removed, It should be in
# network topology
'pixel': images.type,
'label': label.type
})
if event.batch_id % 1000 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
with gzip.open('params.tar.gz', 'w') as f:
parameters.to_tar(f)
elif isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append((item[0], ))
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data)
print probs.shape
if __name__ == '__main__':
......
......@@ -156,7 +156,7 @@ class ImageClassifier():
# For oversampling, average predictions across crops.
# If not, the shape of output[name]: (1, class_number),
# the mean is also applicable.
res[name] = output[name].mean(0)
res[name] = output[name]['value'].mean(0)
return res
......
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.mse_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
......@@ -86,10 +86,7 @@ movie_feature = construct_feature("movie")
user_feature = construct_feature("user")
similarity = cos_sim(a=movie_feature, b=user_feature)
if not is_predict:
outputs(
regression_cost(
input=similarity, label=data_layer(
'rating', size=1)))
outputs(mse_cost(input=similarity, label=data_layer('rating', size=1)))
define_py_data_sources2(
'data/train.list',
......
import sys
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
def db_lstm():
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec])
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert stacked_num % 2 == 1
layer_attr = paddle.attr.Extra(drop_rate=0.5)
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(
input=inputs[0], pooling_type=paddle.pooling.Max())
lstm_last = paddle.layer.pooling(
input=inputs[1], pooling_type=paddle.pooling.Max())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False)
#data
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
feeding = {'word': 0, 'label': 1}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=2)
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():
paddle.init(use_gpu=False, trainer_count=1)
is_generating = False
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# train the network
if not is_generating:
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost,
event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
# start to train
trainer.train(
reader=wmt14_reader, event_handler=event_handler, num_passes=2)
# generate a english sequence to french
else:
# use the first 3 samples for generation
gen_creator = paddle.dataset.wmt14.gen(dict_size)
gen_data = []
gen_num = 3
for item in gen_creator():
gen_data.append((item[0], ))
if len(gen_data) == gen_num:
break
beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating)
# get the pretrained model, whose bleu = 26.92
parameters = paddle.dataset.wmt14.model()
# prob is the prediction probabilities, and id is the prediction word.
beam_result = paddle.infer(
output_layer=beam_gen,
parameters=parameters,
input=gen_data,
field=['prob', 'id'])
# get the dictionary
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
# the delimited element of generated sequences is -1,
# the first element of each generated sequence is the sequence length
seq_list = []
seq = []
for w in beam_result[1]:
if w != -1:
seq.append(w)
else:
seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]]))
seq = []
prob = beam_result[0]
beam_size = 3
for i in xrange(gen_num):
print "\n*******************************************************\n"
print "src:", ' '.join(
[src_dict.get(w) for w in gen_data[i][0]]), "\n"
for j in xrange(beam_size):
print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j]
if __name__ == '__main__':
main()
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
API
===
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst>
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
v2/model_configs.rst
v2/data.rst
v2/run_logic.rst
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
......@@ -139,24 +139,12 @@ lstmemory
:members: lstmemory
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
grumemory
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: grumemory
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
Recurrent Layer Group
=====================
......@@ -172,6 +160,18 @@ recurrent_group
:members: recurrent_group
:noindex:
lstm_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: lstm_step_layer
:noindex:
gru_step_layer
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: gru_step_layer
:noindex:
beam_search
------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -308,6 +308,12 @@ repeat_layer
:members: repeat_layer
:noindex:
rotate_layer
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: rotate_layer
:noindex:
seq_reshape_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -426,6 +432,12 @@ multi_binary_label_cross_entropy
:members: multi_binary_label_cross_entropy
:noindex:
mse_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: mse_cost
:noindex:
huber_cost
----------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -444,6 +456,12 @@ rank_cost
:members: rank_cost
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
crf_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -462,6 +480,12 @@ ctc_layer
:members: ctc_layer
:noindex:
warp_ctc_layer
--------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: warp_ctc_layer
:noindex:
nce_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
......@@ -474,12 +498,6 @@ hsigmoid
:members: hsigmoid
:noindex:
sum_cost
---------
.. automodule:: paddle.trainer_config_helpers.layers
:members: sum_cost
:noindex:
Check Layer
============
......
===========
Activation
===========
Abs
===
.. automodule:: paddle.v2.activation
:members: Abs
:noindex:
Exp
===
.. automodule:: paddle.v2.activation
:members: Exp
:noindex:
Identity
========
.. automodule:: paddle.v2.activation
:members: Identity
:noindex:
Linear
======
.. automodule:: paddle.v2.activation
:members: Linear
:noindex:
Log
===
.. automodule:: paddle.v2.activation
:members: Log
:noindex:
Square
======
.. automodule:: paddle.v2.activation
:members: Square
:noindex:
Sigmoid
=======
.. automodule:: paddle.v2.activation
:members: Sigmoid
:noindex:
Softmax
=======
.. automodule:: paddle.v2.activation
:members: Softmax
:noindex:
SequenceSoftmax
===============
.. automodule:: paddle.v2.activation
:members: SequenceSoftmax
:noindex:
Relu
====
.. automodule:: paddle.v2.activation
:members: Relu
:noindex:
BRelu
=====
.. automodule:: paddle.v2.activation
:members: BRelu
:noindex:
SoftRelu
========
.. automodule:: paddle.v2.activation
:members: SoftRelu
:noindex:
Tanh
====
.. automodule:: paddle.v2.activation
:members: Tanh
:noindex:
STanh
=====
.. automodule:: paddle.v2.activation
:members: STanh
:noindex:
Parameter Attribute
===================
.. automodule:: paddle.v2.attr
:members:
:noindex:
.. _api_v2.layer:
======
Layers
======
Data layer
===========
.. _api_v2.layer_data:
data
----
.. automodule:: paddle.v2.layer
:members: data
:noindex:
Fully Connected Layers
======================
.. _api_v2.layer_fc:
fc
--
.. automodule:: paddle.v2.layer
:members: fc
:noindex:
selective_fc
------------
.. automodule:: paddle.v2.layer
:members: selective_fc
:noindex:
Conv Layers
===========
conv_operator
-------------
.. automodule:: paddle.v2.layer
:members: conv_operator
:noindex:
conv_projection
---------------
.. automodule:: paddle.v2.layer
:members: conv_projection
:noindex:
conv_shift
----------
.. automodule:: paddle.v2.layer
:members: conv_shift
:noindex:
img_conv
--------
.. automodule:: paddle.v2.layer
:members: img_conv
:noindex:
.. _api_v2.layer_context_projection:
context_projection
------------------
.. automodule:: paddle.v2.layer
:members: context_projection
:noindex:
Image Pooling Layer
===================
img_pool
--------
.. automodule:: paddle.v2.layer
:members: img_pool
:noindex:
spp
---
.. automodule:: paddle.v2.layer
:members: spp
:noindex:
maxout
------
.. automodule:: paddle.v2.layer
:members: maxout
:noindex:
Norm Layer
==========
img_cmrnorm
-----------
.. automodule:: paddle.v2.layer
:members: img_cmrnorm
:noindex:
batch_norm
----------
.. automodule:: paddle.v2.layer
:members: batch_norm
:noindex:
sum_to_one_norm
---------------
.. automodule:: paddle.v2.layer
:members: sum_to_one_norm
:noindex:
cross_channel_norm
------------------
.. automodule:: paddle.v2.layer
:members: cross_channel_norm
:noindex:
Recurrent Layers
================
recurrent
---------
.. automodule:: paddle.v2.layer
:members: recurrent
:noindex:
lstmemory
---------
.. automodule:: paddle.v2.layer
:members: lstmemory
:noindex:
grumemory
---------
.. automodule:: paddle.v2.layer
:members: grumemory
:noindex:
Recurrent Layer Group
=====================
memory
------
.. automodule:: paddle.v2.layer
:members: memory
:noindex:
recurrent_group
---------------
.. automodule:: paddle.v2.layer
:members: recurrent_group
:noindex:
lstm_step
---------
.. automodule:: paddle.v2.layer
:members: lstm_step
:noindex:
gru_step
--------
.. automodule:: paddle.v2.layer
:members: gru_step
:noindex:
beam_search
------------
.. automodule:: paddle.v2.layer
:members: beam_search
:noindex:
get_output
----------
.. automodule:: paddle.v2.layer
:members: get_output
:noindex:
Mixed Layer
===========
.. _api_v2.layer_mixed:
mixed
-----
.. automodule:: paddle.v2.layer
:members: mixed
:noindex:
.. _api_v2.layer_embedding:
embedding
---------
.. automodule:: paddle.v2.layer
:members: embedding
:noindex:
scaling_projection
------------------
.. automodule:: paddle.v2.layer
:members: scaling_projection
:noindex:
dotmul_projection
-----------------
.. automodule:: paddle.v2.layer
:members: dotmul_projection
:noindex:
dotmul_operator
---------------
.. automodule:: paddle.v2.layer
:members: dotmul_operator
:noindex:
full_matrix_projection
----------------------
.. automodule:: paddle.v2.layer
:members: full_matrix_projection
:noindex:
identity_projection
-------------------
.. automodule:: paddle.v2.layer
:members: identity_projection
:noindex:
table_projection
----------------
.. automodule:: paddle.v2.layer
:members: table_projection
:noindex:
trans_full_matrix_projection
----------------------------
.. automodule:: paddle.v2.layer
:members: trans_full_matrix_projection
:noindex:
Aggregate Layers
================
.. _api_v2.layer_pooling:
pooling
-------
.. automodule:: paddle.v2.layer
:members: pooling
:noindex:
.. _api_v2.layer_last_seq:
last_seq
--------
.. automodule:: paddle.v2.layer
:members: last_seq
:noindex:
.. _api_v2.layer_first_seq:
first_seq
---------
.. automodule:: paddle.v2.layer
:members: first_seq
:noindex:
concat
------
.. automodule:: paddle.v2.layer
:members: concat
:noindex:
seq_concat
----------
.. automodule:: paddle.v2.layer
:members: seq_concat
:noindex:
Reshaping Layers
================
block_expand
------------
.. automodule:: paddle.v2.layer
:members: block_expand
:noindex:
.. _api_v2.layer_expand:
expand
------
.. automodule:: paddle.v2.layer
:members: expand
:noindex:
repeat
------
.. automodule:: paddle.v2.layer
:members: repeat
:noindex:
rotate
------
.. automodule:: paddle.v2.layer
:members: rotate
:noindex:
seq_reshape
-----------
.. automodule:: paddle.v2.layer
:members: seq_reshape
:noindex:
Math Layers
===========
addto
-----
.. automodule:: paddle.v2.layer
:members: addto
:noindex:
linear_comb
-----------
.. automodule:: paddle.v2.layer
:members: linear_comb
:noindex:
interpolation
-------------
.. automodule:: paddle.v2.layer
:members: interpolation
:noindex:
bilinear_interp
---------------
.. automodule:: paddle.v2.layer
:members: bilinear_interp
:noindex:
power
-----
.. automodule:: paddle.v2.layer
:members: power
:noindex:
scaling
-------
.. automodule:: paddle.v2.layer
:members: scaling
:noindex:
slope_intercept
---------------
.. automodule:: paddle.v2.layer
:members: slope_intercept
:noindex:
tensor
------
.. automodule:: paddle.v2.layer
:members: tensor
:noindex:
.. _api_v2.layer_cos_sim:
cos_sim
-------
.. automodule:: paddle.v2.layer
:members: cos_sim
:noindex:
trans
-----
.. automodule:: paddle.v2.layer
:members: trans
:noindex:
Sampling Layers
===============
maxid
-----
.. automodule:: paddle.v2.layer
:members: maxid
:noindex:
sampling_id
-----------
.. automodule:: paddle.v2.layer
:members: sampling_id
:noindex:
Slicing and Joining Layers
==========================
pad
----
.. automodule:: paddle.v2.layer
:members: pad
:noindex:
.. _api_v2.layer_costs:
Cost Layers
===========
cross_entropy_cost
------------------
.. automodule:: paddle.v2.layer
:members: cross_entropy_cost
:noindex:
cross_entropy_with_selfnorm_cost
--------------------------------
.. automodule:: paddle.v2.layer
:members: cross_entropy_with_selfnorm_cost
:noindex:
multi_binary_label_cross_entropy_cost
-------------------------------------
.. automodule:: paddle.v2.layer
:members: multi_binary_label_cross_entropy_cost
:noindex:
huber_cost
----------
.. automodule:: paddle.v2.layer
:members: huber_cost
:noindex:
lambda_cost
-----------
.. automodule:: paddle.v2.layer
:members: lambda_cost
:noindex:
rank_cost
---------
.. automodule:: paddle.v2.layer
:members: rank_cost
:noindex:
sum_cost
---------
.. automodule:: paddle.v2.layer
:members: sum_cost
:noindex:
crf
---
.. automodule:: paddle.v2.layer
:members: crf
:noindex:
crf_decoding
------------
.. automodule:: paddle.v2.layer
:members: crf_decoding
:noindex:
ctc
---
.. automodule:: paddle.v2.layer
:members: ctc
:noindex:
warp_ctc
--------
.. automodule:: paddle.v2.layer
:members: warp_ctc
:noindex:
nce
---
.. automodule:: paddle.v2.layer
:members: nce
:noindex:
hsigmoid
---------
.. automodule:: paddle.v2.layer
:members: hsigmoid
:noindex:
Check Layer
============
eos
---
.. automodule:: paddle.v2.layer
:members: eos
:noindex:
========
Networks
========
The v2.networks module contains pieces of neural network that combine multiple layers.
NLP
===
sequence_conv_pool
------------------
.. automodule:: paddle.v2.networks
:members: sequence_conv_pool
:noindex:
.. _api_trainer_config_helpers_network_text_conv_pool:
text_conv_pool
--------------
.. automodule:: paddle.v2.networks
:members: text_conv_pool
:noindex:
Images
======
img_conv_bn_pool
----------------
.. automodule:: paddle.v2.networks
:members: img_conv_bn_pool
:noindex:
img_conv_group
--------------
.. automodule:: paddle.v2.networks
:members: img_conv_group
:noindex:
.. _api_trainer_config_helpers_network_simple_img_conv_pool:
simple_img_conv_pool
--------------------
.. automodule:: paddle.v2.networks
:members: simple_img_conv_pool
:noindex:
vgg_16_network
---------------
.. automodule:: paddle.v2.networks
:members: vgg_16_network
:noindex:
Recurrent
=========
LSTM
----
lstmemory_unit
``````````````
.. automodule:: paddle.v2.networks
:members: lstmemory_unit
:noindex:
lstmemory_group
```````````````
.. automodule:: paddle.v2.networks
:members: lstmemory_group
:noindex:
simple_lstm
```````````
.. automodule:: paddle.v2.networks
:members: simple_lstm
:noindex:
bidirectional_lstm
``````````````````
.. automodule:: paddle.v2.networks
:members: bidirectional_lstm
:noindex:
GRU
---
gru_unit
````````
.. automodule:: paddle.v2.networks
:members: gru_unit
:noindex:
gru_group
`````````
.. automodule:: paddle.v2.networks
:members: gru_group
:noindex:
simple_gru
``````````
.. automodule:: paddle.v2.networks
:members: simple_gru
:noindex:
simple_attention
----------------
.. automodule:: paddle.v2.networks
:members: simple_attention
:noindex:
Miscs
=====
dropout_layer
--------------
.. automodule:: paddle.v2.networks
:members: dropout_layer
:noindex:
==========
Optimizer
==========
Momentum
========
.. automodule:: paddle.v2.optimizer
:members: Momentum
:noindex:
Adam
====
.. automodule:: paddle.v2.optimizer
:members: Adam
:noindex:
Adamax
======
.. automodule:: paddle.v2.optimizer
:members: Adamax
:noindex:
AdaGrad
=======
.. automodule:: paddle.v2.optimizer
:members: AdaGrad
:noindex:
DecayedAdaGrad
==============
.. automodule:: paddle.v2.optimizer
:members: DecayedAdaGrad
:noindex:
AdaDelta
========
.. automodule:: paddle.v2.optimizer
:members: AdaDelta
:noindex:
RMSProp
=======
.. automodule:: paddle.v2.optimizer
:members: RMSProp
:noindex:
=======
Pooling
=======
BasePool
========
.. automodule:: paddle.v2.pooling
:members: BasePool
:noindex:
Avg
===
.. automodule:: paddle.v2.pooling
:members: Avg
:noindex:
Max
===
.. automodule:: paddle.v2.pooling
:members: Max
:noindex:
Sum
===
.. automodule:: paddle.v2.pooling
:members: Sum
:noindex:
SquareRootN
===========
.. automodule:: paddle.v2.pooling
:members: SquareRootN
:noindex:
CudnnAvg
========
.. automodule:: paddle.v2.pooling
:members: CudnnAvg
:noindex:
CudnnMax
========
.. automodule:: paddle.v2.pooling
:members: CudnnMax
:noindex:
==================================
Data Reader Interface and DataSets
==================================
DataTypes
=========
.. automodule:: paddle.v2.data_type
:members:
:noindex:
DataFeeder
==========
.. automodule:: paddle.v2.data_feeder
:members:
:noindex:
Reader
======
.. automodule:: paddle.v2.reader
:members:
:noindex:
.. automodule:: paddle.v2.reader.creator
:members:
:noindex:
minibatch
=========
.. automodule:: paddle.v2.minibatch
:members:
:noindex:
Dataset
=======
.. automodule:: paddle.v2.dataset
:members:
:noindex:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
:noindex:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
:noindex:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members: get_dict,get_embedding,test
:noindex:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
:noindex:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
:noindex:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
:noindex:
.. autoclass:: paddle.v2.dataset.movielens.MovieInfo
:noindex:
.. autoclass:: paddle.v2.dataset.movielens.UserInfo
:noindex:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
:noindex:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
:noindex:
wmt14
+++++
.. automodule:: paddle.v2.dataset.wmt14
:members:
:noindex:
Model Configuration
===================
.. toctree::
:maxdepth: 1
config/activation.rst
config/layer.rst
config/optimizer.rst
config/pooling.rst
config/networks.rst
config/attr.rst
======================
Training and Inference
======================
Parameters
==========
.. automodule:: paddle.v2.parameters
:members: Parameters
:noindex:
Trainer
=======
.. automodule:: paddle.v2.trainer
:members: SGD
:noindex:
Event
=====
.. automodule:: paddle.v2.event
:members:
:noindex:
Inference
=========
.. autofunction:: paddle.v2.infer
:noindex:
\ No newline at end of file
# Design Doc: Distributed Training
## Objective
In [this slides](https://www.slideshare.net/cxwangyi/paddlepaddle-a-complete-solution-for-businesses), we explained that we'd like PaddlePaddle running on general-purpose clusters like those managed by Kubernetes, so to address demands for AI from both Internet and non-Internet industries.
This poses technical challenges to PaddlePaddle:
1. Support fault-recovery.
1. Support both offline and online training.
1. [Serverless computing](https://en.wikipedia.org/wiki/Serverless_computing) of distributed training.
## Training Job
A training job will be created once user asks Paddle cloud to train a model. The training job is made up of different processes that collaboratively consume data and produce a trained model. There are three kinds of processes:
1. the *master process*, which dispatches tasks to
1. one or more *trainer processes*, which run distributed training and synchronize gradients/models via
1. one or more *parameter server processes*, where each holds a shard of the global model.
Their relation is illustrated in the following graph:
<img src="src/paddle-model-sharding.png"/>
### Master Process
The master process will:
- Partition a dataset into [tasks](#task) and dispatch tasks to trainers.
- Keep track of training progress on the dataset with [task queue](#task-queue). A training job will iterate on the dataset for a full pass until it goes into next pass.
#### Task
A task is a data shard to be trained. The total number of tasks will be much bigger than the total number of trainers. The number of data instances inside a task will be much bigger than the mini-batch size.
#### Task Queue
The master process has three task queues to track training progress. As illustrated in the graph below, Job A and Job B both have one master process. Each master process has three task queues.
<img src="src/paddle-task-queues.png"/>
- The todo queue holds tasks to be dispatched. When a job starts, the master process fills in the todo queue with all tasks.
- The pending queue holds tasks that are currently training by trainers.
- the done queue holds tasks that are already trained.
The life cycle of a single task is illustrated below:
<img src="src/paddle-task-states.png"/>
1. When a new pass of training starts, all tasks will be placed in the todo queue.
1. The master process will dispatch few tasks to each trainer at a time, puts them in the pending queue and waits for completion.
1. The trainer will work on its tasks and tell the master process once a task is completed. The master process will dispatch a new task to that trainer.
1. If a task timeout. the master process will move it back to the todo queue. The timeout count will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, so it will be discarded.
1. The master process will move completed task to the done queue. When the todo queue is empty, the master process will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero.
### Trainer Process
The trainer process will:
- Receive tasks from the master.
- Work on the tasks: calculate and upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers.
### Parameter Server Process
Parameter server processes hold the parameters collaboratively. The parameters are partitioned on different parameter servers.
The parameter server will:
- Receive gradient from the trainers, update its parameters, and give the trainers the latest parameters.
- Periodically save its parameters to distributed file system by overriding the previous save.
### Optimization Algorithms
The communication pattern between the trainers and the parameter servers depends on the category of optimization algorithm:
- Synchronous Stochastic Gradient Descent (sync-SGD)
Parameter server will wait for all trainer finish n-th mini-batch calculation and send their gradients before broadcasting new parameters to every trainer. Every trainer will wait for the new parameters before starting n+1-th mini-batch.
- Asynchronous Stochastic Gradient Descent (async-SGD)
There will no synchronization between different trainers, and parameter server updates its parameter as soon as it receives new gradient:
- Each trainer uploads its accumulated gradient every n mini-batches.
- Every m mini-batches, the trainer downloads new parameters from parameter server.
- n and m do not have to be equal.
## Fault Tolerant
The training job will pause if the master processes is dead, or any of the parameter server process is dead. They will be started by [Kubernetes](https://kubernetes.io/) and recover in few minutes. Please refer to [fault recovery](#fault-recovery).
The training job will continue to make progress if there is at least one training process running. The strategy depends on the type of optimization algorithm:
- sync-SGD
TODO
- async-SGD
Since async-SGD does not require synchronization between mini-batches, the system will by definition make process if at least one trainer is running.
## Fault Recovery
PaddlePaddle uses [etcd](https://github.com/coreos/etcd) to keep track of the states of processes. Because etcd is a distributed reliable key-value store, the restarted process can recover its states from etcd. The model parameters are periodically saved into distributed file system, so a restarted parameter server can recover its parameters from the saved file.
Now we will introduce how each process recovers from a failure, the graph below shows how etcd is used:
<img src="src/paddle-etcd.png"/>
### Master Process
When the master is started by the Kubernetes, it executes the following steps at startup:
1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations.
1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them.
1. Watches the trainer prefix keys `/trainer/` on etcd to find the live trainers.
1. Starts dispatching the tasks to the trainers, and updates task queue using an etcd transaction to ensure lock is held during the update.
The master process will kill itself if its etcd lease expires.
When the master process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes.
### Trainer Process
When the trainer is started by the Kubernetes, it executes the following steps at startup:
1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count.
1. Generates a unique ID, and sets key `/trainer/<unique ID>` with its contact address as value. The key will be deleted when the lease expires, so the master will be aware of the trainer being online and offline.
1. Waits for tasks from the master to start training.
If trainer's etcd lease expires, it will try set key `/trainer/<unique ID>` again so that the master process can discover the trainer again.
### Parameter Server Process
When the parameter server is started by Kubernetes, it executes the following steps at startup:
1. Read desired total number of parameter servers from etcd `/ps_desired`
1. Search through etcd keys `/ps/<index>` (`/ps/0`, `/ps/1`, ...) to find the first non-existant key whose index is smaller than the total number of parameter servers. Set the key using a transaction to avoid concurrent writes. The parameter server's index is inferred from the key name.
The desired number of parameter servers is 3:
<img src="src/paddle-ps-0.png"/>
The third parameter server joined:
<img src="src/paddle-ps-1.png"/>
1. The parameter server can load parameters if there are already saved parameters in the save path (inferred from its index).
1. Now the parameter server is ready for the trainers' requests.
If the parameter server's etcd lease expires, the parameter server will kill itself.
## Dynamic Scaling
### Trainer Scaling
TODO
### Parameter Server Scaling
Not planned for v1.
## Training Dataset Format
TODO
## User Interface
TODO
文件已添加
doc/design/dist/src/paddle-etcd.png

55.0 KB

文件已添加
doc/design/dist/src/paddle-model-sharding.png

37.5 KB

doc/design/dist/src/paddle-ps-0.png

20.9 KB

doc/design/dist/src/paddle-ps-1.png

27.7 KB

文件已添加
文件已添加
doc/design/dist/src/paddle-task-queues.png

33.9 KB

文件已添加
doc/design/dist/src/paddle-task-states.png

17.8 KB

# Paddle多语言接口实现
## 背景
Paddle需要一个多语言接口,这个接口需要做到:
* 有标准的,良好的文档
* 例如Python可以使用[Sphinx](http://www.sphinx-doc.org/en/stable/)生成API文档,golang可以使用[GoDoc](https://godoc.org/golang.org/x/tools/cmd/godoc)生成文档。这都需要这个接口按照约定俗成的规则来注释完备。
* 不同语言的接口适应不同语言的特性
* 例如Java与Python的错误处理是直接扔出来Exception,而对于golang错误处理应该使用返回值。
## 基本要求
Paddle的多语言接口实现包括一下几个方面:
* 我们使用动态库来分发Paddle。在这个动态库中不嵌入任何其他语言的解释器,也不使用其他动态库。
* 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号。
* 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler)。
* 不使用SWIG这种代码生成器,而是手写多语言绑定。
## 原因
### 使用动态库来分发Paddle
* Paddle的链接方式比较复杂
* 如果用户要把Paddle的静态库(libpaddle.a)链接到自己的程序里,得使用 `--whole-archive` (for GCC) 或者 `--force_load` (for Clang) 参数,来确保把 libpaddle.a 里所有的符号都写入自己的程序的二进制文件里。这是因为 Paddle 的源码里使用了[object factory design pattern](http://stackoverflow.com/a/1310326/724872)
* 编译型语言,例如C/C++使用静态库和动态库难度差不多。但是解释性语言,例如[Python](http://stackoverflow.com/questions/19560594/how-to-import-static-library-in-python)或者[Java](http://stackoverflow.com/questions/24493337/linking-static-library-with-jni),只能调用Paddle的动态库,否则得把Paddle静态库链接到解释器里。
* 解释性语言实际运行的二进制是解释器本身,如果调用静态库只能将静态库与解释器链接。例如对于Java来说,便是将静态库加入JVM中。这对于通常的Java的开发者来说,是不常见的做法。
### 动态库中不嵌入任何其他语言的解释器
* 目前Paddle的进程模型是C++内部驱动Python解释器进行模型配置解析和数据读取
* 我们最终的动态库中不嵌入Python或者其他任何语言的解释器。模型配置解析,数据读取均交由其他语言完成
现阶段Paddle有一个问题是,Paddle内嵌的Python解释器和外部使用的Python如果版本不同,会直接报错退出。
### Paddle动态库中,不引用其他动态库
* 即这个动态库是不依赖于其他任何文件的,可以在任何机器上执行的。
### 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号
* 由于C++编译器没有[名字修饰](https://en.wikipedia.org/wiki/Name_mangling#C.2B.2B)的规范,不同版本的编译器之间,对于同一段C++代码生成的符号可能不一致。而多语言接口需要直接读取生成的二进制(动态库),需要有稳定的导出符号。
* C语言是有导出符号的标准的,并且在常见的平台上,都是ABI调用标准的。
* 大多数语言都支持使用C语言API
* 使用C99而不使用C89,是因为C99支持[Fixed-width integer types](https://en.wikipedia.org/wiki/C_data_types#Fixed-width_integer_types)[Boolean type](https://en.wikipedia.org/wiki/C_data_types#Boolean_type)
* 使用C99而不使用C11的原因是,[C11](https://en.wikipedia.org/wiki/C11_(C_standard_revision))并没有Paddle特别需要的特性,且C99相对于C11使用更加广泛。
### 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler)
* Paddle内部的类为C++书写,直接导出到C的接口比较困难。
* 在C-API中使用`void*`来表示Paddle内部类。再在每一个API中自己检查类型。
在C的头文件 `paddle_matrix.h` 中:
```C
typedef void* paddle_matrix;
typedef int paddle_error;
extern "C"
paddle_error paddle_matrix_shape(paddle_matrix matrix,
uint64_t* width,
uint64_t* height);
```
而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp`
```cpp
#include "paddle/math/matrix.hpp"
extern "C"
paddle_error paddle_matrix_shape(paddle_matrix matrix,
uint64_t *width,
uint64_t *height) {
auto m = (paddle::math::matrix*)(matrix);
*width = m->width();
*height = m->height();
}
```
其中`paddle/math/matrix.hpp`文件内容为:
```cpp
namespace paddle {
namespace math {
class Matrix {
//...
};
} // namespace math
} // namespace paddle
```
### 不使用SWIG这种代码生成器,而是手写多语言绑定
* [SWIG](http://www.swig.org/)是一个多语言接口的代码生成器。他的目标是使用C/C++写代码,SWIG直接读取C/C++的头文件,生成各种语言的绑定代码。
* 对于多语言接口,SWIG需要写一个interface文件。这个文件具有独特的语法,学习成本高。且增加一个第三方语言,就需要对这个第三方语言增加一些定义。有的时候,interface文件的写法非常[tricky](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/api/Paddle.swig#L36)。社区贡献代码学习成本高。
* SWIG暴露的接口保留了C++的接口样式,很难保证多语言代码风格的一致性。(函数命名,错误处理)
* 因为SWIG在第三方语言中暴露的函数名,类名和C++中完全一致。C++的命名风格并不能适应其他第三方语言。如果使用SWIG我们需要将在interface文件里,将大量的`SomeCppClass`重命名成`some_python_class`,或者`SomeGoTypes`
* 对于不同语言,错误处理的方式也不尽相同。例如对于Java或者Python,最常见的错误处理方式是Exception,而对于Golang,错误处理方式是返回值。而SWIG只能简单的暴露C++接口,无法做到对于各种语言错误处理方式的适配。
* 对于大多数语言,直接使用C语言的.h并不困难。例如Python的[cffi](https://cffi.readthedocs.io/en/latest/overview.html#simple-example-abi-level-in-line)或者[Cython](http://cython.org/), golang的[cgo](https://golang.org/cmd/cgo/)
* SWIG支持的语言或者解释器有局限。例如对于Python,使用SWIG只支持CPython解释器,而不支持PyPy解释器。
## 原因列表
| 结论 | 对比 | 原因 |
|---| --- | --- |
| 使用动态库 | 不使用静态库 | 解释型语言只能调用动态库,Paddle静态库链接复杂 |
| 不嵌入其他语言解释器 | 不嵌入Python解释器 | Paddle C++目前嵌入Python解释器,会导致不同版本Python在一个进程里的bug |
| 不引用其他动态库 | | Paddle一个动态库可以在任何Linux系统上运行 |
| 使用C99做接口 | 不使用C++做接口 | C有标准的ABI,C99是目前C最广泛的使用标准,且C99支持bool类型和定长整数(uint64_t等)类型 |
| 使用void*作为类句柄 | 不显示的写每个类具体包含什么| 实现简单,并且让接口脱离实现细节 |
| 手写多语言绑定 | 不使用SWIG | 使用SWIG需要多语言绑定的开发人员熟练掌握SWIG配置,社区参与困难。SWIG生成的代码不能保证多语言代码风格的一致性 |
## 简单实现
TBD
......@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t
- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
- A *reader creator* is a function that returns a reader function.
- A *reader* decorator is a function, which accepts one or more readers, and returns a reader.
- A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
and provide frequently used reader creators and reader decorators.
and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
## Data Reader Interface
......@@ -22,24 +23,69 @@ An example implementation for single item data reader creator:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
```
An example implementation for multiple item data reader creator:
```python
def reader_creator_random_imageand_label(widht, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
```
## Batch Reader Interface
*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
Here are valid outputs:
```python
# a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
[(1, 1, 1),
(2, 2, 2),
(3, 3, 3)]
# a mini batch of three data items, each data item is a list (single column).
[([1,1,1],),
([2,2,2],),
([3,3,3],),
```
Please note that each item inside the list must be a tuple, below is an invalid output:
```python
# wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
# Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
# or three column of datas, each of which is 1.
[[1,1,1],
[2,2,2],
[3,3,3]]
```
It's easy to convert from reader to batch reader:
```python
mnist_train = paddle.dataset.mnist.train()
mnist_train_batch_reader = paddle.batch(mnist_train, 128)
```
Also easy to create custom batch reader:
```python
def custom_batch_reader():
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
mnist_random_image_batch_reader = custom_batch_reader
```
## Usage
data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
```python
# two data layer is created:
......@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...)
label_layer = paddle.layer.data("label", ...)
# ...
paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...)
batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
```
## Data Reader Decorator
......@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i
Use `paddle.reader.buffered` to prefetch data:
```python
buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100)
buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
```
`buffered_reader` will try to buffer (prefetch) `100` data entries.
......@@ -77,24 +123,24 @@ We can do:
```python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
def reader_creator_bool(t):
def reader:
while True:
yield t
return reader
def reader:
while True:
yield t
return reader
true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False)
reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader)
# Skipped 1 because paddle.dataset.mnist produces two items per data entry.
reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
# And we don't care second item at this time.
paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
```
### Shuffle
......@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader
Example:
```python
reader = paddle.reader.shuffle(paddle.dataset.mnist, 512)
reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
```
## Q & A
### Why return only a single entry, but not a mini batch?
### Why reader return only a single entry, but not a mini batch?
Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
We provide function `paddle.batch` to turn (single entry) reader into batch reader.
If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`.
### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
### Why use a dictionary but not a list to provide mapping?
......@@ -122,22 +172,22 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
```python
def image_reader_creator(image_path, label_path, n):
def reader():
f = open(image_path)
l = open(label_path)
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time
f.close()
l.close()
return reader
def reader():
f = open(image_path)
l = open(label_path)
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time
f.close()
l.close()
return reader
# images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
paddle.train(reader, {"image":0, "label":1}, ...)
paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
```
### How is `paddle.train` implemented
......@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...)
An example implementation of paddle.train could be:
```python
def make_minibatch(reader, minibatch_size):
def ret():
r = reader()
buf = [r.next() for x in xrange(minibatch_size)]
while len(buf) > 0:
yield buf
buf = [r.next() for x in xrange(minibatch_size)]
return ret
def train(reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass):
for mini_batch in make_minibatch(reader): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
def train(batch_reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass):
for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
```
......@@ -286,3 +286,16 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
.. code-block:: bash
paddle train --use_gpu=true --trainer_count=2 --gpu_id=2
12. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办?
------------------------------------------------------------------------
Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。
主要原因包括两个方面:
* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。
* 模型一直不收敛,发散到了一个数值特别大的地方。
* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。
主要的解决办法是减小学习律或者对数据进行归一化处理。
......@@ -55,7 +55,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
# 线性计算网络层: ȳ = wx + b
ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
# 计算误差函数,即 ȳ 和真实 y 之间的距离
cost = regression_cost(input= ȳ, label=y)
cost = mse_cost(input= ȳ, label=y)
outputs(cost)
......@@ -69,7 +69,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
- **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x` 和 `y`。
- **全连接层**:全连接层 `fc_layer` 是基础的计算单元,这里利用它建模变量之间的线性关系。计算单元是神经网络的核心,PaddlePaddle支持大量的计算单元和任意深度的网络连接,从而可以拟合任意的函数来学习复杂的数据关系。
- **回归误差代价层**:回归误差代价层 `regression_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。
- **回归误差代价层**:回归误差代价层 `mse_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。
定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令:
......
......@@ -49,7 +49,7 @@ To recover this relationship between ``X`` and ``Y``, we use a neural network wi
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
cost = mse_cost(input=y_predict, label=y)
outputs(cost)
Some of the most fundamental usages of PaddlePaddle are demonstrated:
......
......@@ -51,7 +51,7 @@ PaddlePaddle supports some build options.
<tr><td class="left">WITH_TIMER</td><td class="left">Compile PaddlePaddle with stats timer</td></tr>
<tr><td class="left">WITH_PROFILER</td><td class="left">Compile PaddlePaddle with GPU profiler</td></tr>
<tr><td class="left">WITH_DOC</td><td class="left">Compile PaddlePaddle with documentation</td></tr>
<tr><td class="left">ON_COVERALLS</td><td class="left">Compile PaddlePaddle with code coverage</td></tr>
<tr><td class="left">WITH_COVERAGE</td><td class="left">Compile PaddlePaddle with code coverage</td></tr>
<tr><td class="left">COVERALLS_UPLOAD</td><td class="left">Package code coverage data to coveralls</td></tr>
<tr><td class="left">ON_TRAVIS</td><td class="left">Exclude special unit test on Travis CI</td></tr>
</tbody>
......
安装PaddlePaddle的Docker镜像
============================
PaddlePaddle的Docker容器使用方式
================================
PaddlePaddle项目提供官方 `Docker <https://www.docker.com/>`_ 镜像。Docker镜像是我们目前唯一官方支持的部署和运行方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源
下述内容将分为如下几个类别描述。
* PaddlePaddle提供的Docker镜像版本
* 下载和运行Docker镜像
* 注意事项
PaddlePaddle发布的docker镜像使用说明
------------------------------
PaddlePaddle提供的Docker镜像版本
--------------------------------
对于每一个PaddlePaddle版本,我们都会发布两种Docker镜像:开发镜像、运行镜像。运行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。
我们会在 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 提供最新的docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。
1. 开发镜像::code:`paddlepaddle/paddle:<version>-dev`
我们提供了12个 `Docker image <https://hub.docker.com/r/paddledev/paddle/tags/>`_ ,他们的image name都是 :code:`paddledev/paddle` ,tag分别为
这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布,
文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。
开发镜像包含了以下工具:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
很多开发者会使用远程的安装有GPU的服务器工作,用户可以使用ssh登录到这台服务器上并执行 :code:`docker exec`进入开发镜像并开始工作,
也可以在开发镜像中启动一个SSHD服务,方便开发者直接登录到镜像中进行开发:
+-----------------+------------------+------------------------+-----------------------+
| | normal | devel | demo |
+=================+==================+========================+=======================+
| CPU | cpu-latest | cpu-devel-latest | cpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU | gpu-latest | gpu-devel-latest | gpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| CPU WITHOUT AVX | cpu-noavx-latest | cpu-noavx-devel-latest | cpu-noavx-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU WITHOUT AVX | gpu-noavx-latest | gpu-noavx-devel-latest | gpu-noavx-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
以交互容器方式运行开发镜像:
其中,横向包括三个版本,normal,devel和demo。
.. code-block:: bash
* Normal: 正常的Docker image,只包括paddle的二进制
* Devel: 包括Paddle的二进制、编译环境和源代码
* Demo: 包括Paddle运行demo所需要的依赖
docker run -it --rm paddledev/paddle:<version>-dev /bin/bash
纵向包括四个版本,他们是。
或者,可以以后台进程方式运行容器:
* CPU: CPU版本。需要支持AVX指令集的CPU
* GPU: GPU版本。需要支持AVX指令集的CPU
* CPU WITHOUT AVX: CPU版本,不支持AVX指令集的CPU也可以运行
* GPU WITHOUT AVX: GPU版本,不需要AVX指令集的CPU也可以运行。
.. code-block:: bash
用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU是否支持 :code:`AVX` 指令集\:
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:<version>-dev
.. code-block:: bash
然后用密码 :code:`root` SSH进入容器:
if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi
.. code-block:: bash
如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddledev/paddle:cpu-devel-latest` 来引用这个image。
ssh -p 2202 root@localhost
PaddlePaddle提供的镜像并不包含任何命令运行,想要运行PaddlePaddle,您需要进入镜像运行PaddlePaddle
程序或者自定义一个含有启动脚本的image。具体请参考注意事项中的 :code:`使用ssh访问PaddlePaddle镜像`
SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。
下载和运行Docker镜像
--------------------
2. 运行镜像:根据CPU、GPU和非AVX区分了如下4个镜像:
- GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu`
- GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx`
- CPU/AVX::code:`paddlepaddle/paddle:<version>`
- CPU/no-AVX::code:`paddlepaddle/paddle:<version>-noavx`
为了运行PaddlePaddle的docker镜像,您需要在机器中安装好Docker。安装Docker需要您的机器
至少具有3.10以上的linux kernel。安装方法请参考
`Docker的官方文档 <https://docs.docker.com/engine/installation/>`_ 。如果您使用
mac osx或者是windows机器,请参考
`mac osx的安装文档 <https://docs.docker.com/engine/installation/mac/>`_ 和
`windows 的安装文档 <https://docs.docker.com/engine/installation/windows/>`_ 。
纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX:
您可以使用 :code:`docker pull` 命令预先下载镜像,也可以直接执行
:code:`docker run` 命令运行镜像。执行方法如下:
.. code-block:: bash
.. code-block:: bash
$ docker run -it paddledev/paddle:cpu-latest
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
即可启动和进入PaddlePaddle的container。如果运行GPU版本的PaddlePaddle,则需要先将
cuda相关的Driver和设备映射进container中,脚本类似于
如果输出是No,就需要选择使用no-AVX的镜像
.. code-block:: bash
以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。
为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。
$ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
$ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
.. code-block:: bash
进入Docker container后,运行 :code:`paddle version` 即可打印出PaddlePaddle的版本和构建
信息。安装完成的PaddlePaddle主体包括三个部分, :code:`paddle` 脚本, python的
:code:`paddle` 包和 :code:`py_paddle` 包。其中\:
nvidia-docker run -it --rm paddledev/paddle:0.10.0rc1-gpu /bin/bash
* :code:`paddle` 脚本和 :code:`paddle` 的python包是PaddlePaddle的训练主要程序。使用
:code:`paddle` 脚本可以启动PaddlePaddle的训练进程和pserver。而 :code:`paddle` 脚本
中的二进制使用了 :code:`paddle` 的python包来做配置文件解析等工作。
* python包 :code:`py_paddle` 是一个swig封装的PaddlePaddle包,用来做预测和简单的定制化
训练。
注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。:
注意事项
--------
.. code-block:: bash
性能问题
++++++++
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:<version>-gpu
由于Docker是基于容器的轻量化虚拟方案,所以在CPU的运算性能上并不会有严重的影响。
而GPU的驱动和设备全部映射到了容器内,所以GPU在运算性能上也不会有严重的影响。
3. 使用运行镜像发布你的AI程序
假设您已经完成了一个AI训练的python程序 :code:`a.py`,这个程序是您在开发机上使用开发镜像完成开发。此时您可以运行这个命令在开发机上进行测试运行:
但是如果使用了高性能的网卡,例如RDMA网卡(RoCE 40GbE 或者 IB 56GbE),或者高性能的
以太网卡 (10GbE)。推荐使用将本地网卡,即 "--net=host" 来进行训练。而不使用docker
的网桥来进行网络通信。
.. code-block:: bash
远程访问问题和二次开发
++++++++++++++++++++++
docker run -it -v $PWD:/work paddle /work/a.py
由于PaddlePaddle的Docker镜像并不包含任何预定义的运行命令。所以如果想要在后台启用ssh
远程访问,则需要进行一定的二次开发,将ssh装入系统内并开启远程访问。二次开发可以
使用Dockerfile构建一个全新的docker image。需要参考
`Dockerfile的文档 <https://docs.docker.com/engine/reference/builder/>`_ 和
`Dockerfile的最佳实践 <https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/>`_
两个文档。
这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:<version>`
创建和发布自己的AI程序镜像。
简单的含有ssh的Dockerfile如下:
运行PaddlePaddle书籍
---------------------
.. code-block:: bash
Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
FROM paddledev/paddle:cpu-latest
PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。
如果您想要更深入了解deep learning,PaddlePaddle书籍一定是您最好的选择。
MAINTAINER PaddlePaddle dev team <paddle-dev@baidu.com>
我们提供可以直接运行PaddlePaddle书籍的docker镜像,直接运行:
RUN apt-get update
RUN apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
.. code-block:: bash
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
docker run -p 8888:8888 paddlepaddle/book
EXPOSE 22
然后在浏览器中输入以下网址:
CMD ["/usr/sbin/sshd", "-D"]
.. code-block:: text
http://localhost:8888/
使用该Dockerfile构建出镜像,然后运行这个container即可。相关命令为\:
就这么简单,享受您的旅程!
.. code-block:: bash
通过Docker容器开发PaddlePaddle
------------------------------
# cd到含有Dockerfile的路径中
$ docker build . -t paddle_ssh
# 运行这个container,将宿主机的8022端口映射到container的22端口上
$ docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh
开发人员可以在Docker开发镜像中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。
执行如下命令即可以关闭这个container,并且删除container中的数据\:
1. 构建开发镜像
.. code-block:: bash
# 关闭container
$ docker stop paddle_ssh_machine
# 删除container
$ docker rm paddle_ssh_machine
.. code-block:: bash
如果想要在外部机器访问这个container,即可以使用ssh访问宿主机的8022端口。用户名为
root,密码也是root。命令为\:
git clone --recursive https://github.com/PaddlePaddle/Paddle
cd Paddle
docker build -t paddle:dev .
.. code-block:: bash
$ ssh -p 8022 root@YOUR_HOST_MACHINE
请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要构建完开发镜像,然后执行:
至此,您就可以远程的使用PaddlePaddle啦。
.. code-block:: bash
docker run -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "TEST=OFF" paddle:dev
2. 运行开发环境
当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面:
.. code-block:: bash
docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev sshd
以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。
以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了:
.. code-block:: bash
ssh root@localhost -p 2202
3. 在Docker开发环境中编译与安装PaddlPaddle代码
当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle:
.. code-block:: bash
/paddle/paddle/scripts/docker/build.sh
以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试:
.. code-block:: bash
cd /paddle/build
ctest
文档
----
Paddle的Docker开发镜像带有一个通过 `woboq code browser
<https://github.com/woboq/woboq_codebrowser>`_ 生成的HTML版本的C++源代码,便于用户浏览C++源码。
只要在Docker里启动PaddlePaddle的时候给它一个名字,就可以再运行另一个Nginx Docker镜像来服务HTML代码:
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:<version>-dev
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
......@@ -8,153 +8,255 @@ Please be aware that you will need to change `Dockers settings
<https://github.com/PaddlePaddle/Paddle/issues/627>`_ to make full use
of your hardware resource on Mac OS X and Windows.
Working With Docker
-------------------
Development Using Docker
------------------------
Docker is simple as long as we understand a few basic concepts:
Developers can work on PaddlePaddle using Docker. This allows
developers to work on different platforms -- Linux, Mac OS X, and
Windows -- in a consistent way.
- *image*: A Docker image is a pack of software. It could contain one or more programs and all their dependencies. For example, the PaddlePaddle's Docker image includes pre-built PaddlePaddle and Python and many Python packages. We can run a Docker image directly, other than installing all these software. We can type
1. Build the Development Environment as a Docker Image
.. code-block:: bash
.. code-block:: bash
docker images
git clone --recursive https://github.com/PaddlePaddle/Paddle
cd Paddle
docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile .
to list all images in the system. We can also run
.. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0rc2
Note that by default :code:`docker build` wouldn't import source
tree into the image and build it. If we want to do that, we need
to set a build arg:
to download a Docker image, paddlepaddle/paddle in this example,
from Dockerhub.com.
.. code-block:: bash
- *container*: considering a Docker image a program, a container is a
"process" that runs the image. Indeed, a container is exactly an
operating system process, but with a virtualized filesystem, network
port space, and other virtualized environment. We can type
docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON .
.. code-block:: bash
docker run paddlepaddle/paddle:0.10.0rc2
2. Run the Development Environment
to start a container to run a Docker image, paddlepaddle/paddle in this example.
Once we got the image :code:`paddle:dev`, we can use it to develop
Paddle by mounting the local source code tree into a container that
runs the image:
- By default docker container have an isolated file system namespace,
we can not see the files in the host file system. By using *volume*,
mounted files in host will be visible inside docker container.
Following command will mount current dirctory into /data inside
docker container, run docker container from debian image with
command :code:`ls /data`.
.. code-block:: bash
.. code-block:: bash
docker run --rm -v $(pwd):/data debian ls /data
Usage of CPU-only and GPU Images
----------------------------------
docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev
For each version of PaddlePaddle, we release two types of Docker images:
development image and production image. Production image includes
CPU-only version and a CUDA GPU version and their no-AVX versions. We
put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddledev/paddle/>`_. You can find the
latest versions under "tags" tab at dockerhub.com
This runs a container of the development environment Docker image
with the local source tree mounted to :code:`/paddle` of the
container.
1. Production images, this image might have multiple variants:
Note that the default entry-point of :code:`paddle:dev` is
:code:`sshd`, and above :code:`docker run` commands actually starts
an SSHD server listening on port 2202. This allows us to log into
this container with:
- GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu`
- GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx`
- CPU/AVX::code:`paddlepaddle/paddle:<version>`
- CPU/no-AVX::code:`paddlepaddle/paddle:<version>-noavx`
Please be aware that the CPU-only and the GPU images both use the
AVX instruction set, but old computers produced before 2008 do not
support AVX. The following command checks if your Linux computer
supports AVX:
.. code-block:: bash
ssh root@localhost -p 2202
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
Usually, I run above commands on my Mac. I can also run them on a
GPU server :code:`xxx.yyy.zzz.www` and ssh from my Mac to it:
To run the CPU-only image as an interactive container:
.. code-block:: bash
my-mac$ ssh root@xxx.yyy.zzz.www -p 2202
docker run -it --rm paddlepaddle/paddle:0.10.0rc2 /bin/bash
Above method work with the GPU image too -- the recommended way is
using `nvidia-docker <https://github.com/NVIDIA/nvidia-docker>`_.
3. Build and Install Using the Development Environment
Please install nvidia-docker first following this `tutorial
<https://github.com/NVIDIA/nvidia-docker#quick-start>`_.
Once I am in the container, I can use
:code:`paddle/scripts/docker/build.sh` to build, install, and test
Paddle:
Now you can run a GPU image:
.. code-block:: bash
/paddle/paddle/scripts/docker/build.sh
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
This builds everything about Paddle in :code:`/paddle/build`. And
we can run unit tests there:
2. development image :code:`paddlepaddle/paddle:<version>-dev`
.. code-block:: bash
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
cd /paddle/build
ctest
Train Model Using Python API
----------------------------
CPU-only and GPU Images
-----------------------
Our official docker image provides a runtime for PaddlePaddle
programs. The typical workflow will be as follows:
For each version of PaddlePaddle, we release 2 Docker images, a
CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically runs the following commands:
Create a directory as workspace:
.. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
mkdir ~/workspace
Edit a PaddlePaddle python program using your favourite editor
.. code-block:: bash
To run the CPU-only image as an interactive container:
emacs ~/workspace/example.py
Run the program using docker:
.. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash
docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 python /workspace/example.py
or, we can run it as a daemon container
Or if you are using GPU for training:
.. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest
nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu python /workspace/example.py
Above commands will start a docker container by running :code:`python
/workspace/example.py`. It will stop once :code:`python
/workspace/example.py` finishes.
and SSH to this container using password :code:`root`:
Another way is to tell docker to start a :code:`/bin/bash` session and
run PaddlePaddle program interactively:
.. code-block:: bash
ssh -p 2202 root@localhost
docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 /bin/bash
# now we are inside docker container
cd /workspace
python example.py
Running with GPU is identical:
.. code-block:: bash
An advantage of using SSH is that we can connect to PaddlePaddle from
more than one terminals. For example, one terminal running vi and
another one running Python interpreter. Another advantage is that we
can run the PaddlePaddle container on a remote server and SSH to it
from a laptop.
nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash
# now we are inside docker container
cd /workspace
python example.py
Above methods work with the GPU image too -- just please don't forget
to install CUDA driver and let Docker knows about it:
Develop PaddlePaddle or Train Model Using C++ API
---------------------------------------------------
We will be using PaddlePaddle development image since it contains all
compiling tools and dependencies.
Let's clone PaddlePaddle repo first:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle
Mount both workspace folder and paddle code folder into docker
container, so we can access them inside docker container. There are
two ways of using PaddlePaddle development docker image:
- run interactive bash directly
.. code-block:: bash
Non-AVX Images
--------------
# use nvidia-docker instead of docker if you need to use GPU
docker run -it -v ~/workspace:/workspace -v $(pwd):/paddle paddlepaddle/paddle:0.10.0rc2-dev /bin/bash
# now we are inside docker container
Please be aware that the CPU-only and the GPU images both use the AVX
instruction set, but old computers produced before 2008 do not support
AVX. The following command checks if your Linux computer supports
AVX:
- or, we can run it as a daemon container
.. code-block:: bash
# use nvidia-docker instead of docker if you need to use GPU
docker run -d -p 2202:22 -p 8888:8888 -v ~/workspace:/workspace -v $(pwd):/paddle paddlepaddle/paddle:0.10.0rc2-dev /usr/sbin/sshd -D
and SSH to this container using password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
An advantage is that we can run the PaddlePaddle container on a
remote server and SSH to it from a laptop.
When developing PaddlePaddle, you can edit PaddlePaddle source code
from outside of docker container using your favoriate editor. To
compile PaddlePaddle, run inside container:
.. code-block:: bash
WITH_GPU=OFF WITH_AVX=ON WITH_TEST=ON bash /paddle/paddle/scripts/docker/build.sh
This builds everything about Paddle in :code:`/paddle/build`. And we
can run unit tests there:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
cd /paddle/build
ctest
When training model using C++ API, we can edit paddle program in
~/workspace outside of docker. And build from /workspace inside of
docker.
PaddlePaddle Book
------------------
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
If it doesn't, we will need to build non-AVX images manually from
source code:
We provide a packaged book image, simply issue the command:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
docker run -p 8888:8888 paddlepaddle/book
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
Documentation
......@@ -171,7 +273,7 @@ container:
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu
docker run -d --name paddle-cpu-doc paddle:<version>
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
......
......@@ -46,7 +46,6 @@ PaddlePaddle提供了ubuntu 14.04 deb安装包。
with_double: OFF
with_python: ON
with_rdma: OFF
with_metric_learning:
with_timer: OFF
with_predict_sdk:
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
文件已移动
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部