diff --git a/.dockerignore b/.dockerignore deleted file mode 120000 index 3e4e48b0b5fe6b468434d6767749b399319f2da2..0000000000000000000000000000000000000000 --- a/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.gitignore \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..2b2e74053d33cb6d2878fd3d6da48fa344172f63 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +*.DS_Store +build/ +*.user +.vscode +.idea +.project +.cproject +.pydevproject +Makefile +.test_env/ +third_party/ +*~ +bazel-* + +!build/*.deb diff --git a/.gitignore b/.gitignore index 35bed0accdaa274f5966ca5b4b7180106325449b..9622ab78e0e0556ec2b4cc974fee93ff680d54d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.DS_Store build/ +build_doc/ *.user .vscode @@ -7,5 +8,21 @@ build/ .project .cproject .pydevproject +.settings/ Makefile .test_env/ +third_party/ + +*~ +bazel-* +third_party/ + +# clion workspace. +cmake-build-* + +# generated while compiling +python/paddle/v2/framework/core.so +CMakeFiles +cmake_install.cmake +paddle/.timestamp +python/paddlepaddle.egg-info/ diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index f635e65784af47a21df80cc92073ef14eba9a731..0000000000000000000000000000000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "warp-ctc"] - path = warp-ctc - url = https://github.com/baidu-research/warp-ctc.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 942669c41ff154c91e88c937739b0f604f21d545..bb8c88787d37faf9ce4d7d856a307c11f1085d98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,36 @@ - repo: https://github.com/Lucas-C/pre-commit-hooks.git - sha: c25201a00e6b0514370501050cf2a8538ac12270 + sha: v1.0.1 hooks: - id: remove-crlf - files: (?!.*warp-ctc)^.*$ -- repo: https://github.com/reyoung/mirrors-yapf.git - sha: v0.13.2 + files: (?!.*third_party)^.*$ | (?!.*book)^.*$ +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 hooks: - id: yapf + files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ - repo: https://github.com/pre-commit/pre-commit-hooks - sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469 + sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0 hooks: - id: check-added-large-files - id: check-merge-conflict - id: check-symlinks - id: detect-private-key - files: (?!.*warp-ctc)^.*$ + files: (?!.*third_party)^.*$ | (?!.*book)^.*$ - id: end-of-file-fixer -- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git - sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 +- repo: local hooks: - - id: clang-formater + - id: clang-format + name: clang-format + description: Format files with ClangFormat. + entry: clang-format -i + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ +- repo: https://github.com/PaddlePaddle/pre-commit-golang + sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 + hooks: + - id: go-fmt + types: + - go + - id: gometalinter + types: + - go diff --git a/.travis.yml b/.travis.yml index 6215060e336c7cff9689951c918dc7ec02b2a2fb..8c8c6699d3d9abddd65a3a224c2bceedc7d88348 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,58 +1,54 @@ language: cpp -cache: ccache +cache: + directories: + - $HOME/.ccache + - $HOME/.cache/pip + - $TRAVIS_BUILD_DIR/build/third_party + - $TRAVIS_BUILD_DIR/build_android/third_party sudo: required dist: trusty os: - linux - - osx env: - - JOB=DOCS - - JOB=BUILD_AND_TEST -matrix: - exclude: - - os: osx - env: JOB=DOCS # Only generate documentation in linux - + - JOB=build_doc + - JOB=check_style + - JOB=build_android addons: apt: packages: - gcc-4.8 - g++-4.8 - - wget - git - build-essential - - libatlas-base-dev - python - python-pip - python2.7-dev - - m4 - - libprotobuf-dev - - doxygen - - protobuf-compiler - - python-protobuf - python-numpy - python-wheel - - libgoogle-glog-dev - - libgflags-dev - - libgtest-dev + - libboost-dev - curl - - lcov - - graphviz - swig + - graphviz + - clang-format-3.8 + - automake + - libtool + - ccache before_install: + - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi + # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python + # protobuf version. + - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker + - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow + - curl https://glide.sh/get | bash + - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" + - go get -u github.com/alecthomas/gometalinter + - gometalinter --install - | - if [ ${JOB} == "BUILD_AND_TEST" ]; then - if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)' - then - echo "Only markdown docs were updated, stopping build process." - exit - fi - fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy sphinx_rtd_theme + function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - paddle/scripts/travis/main.sh + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout + RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: on_success: change diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000000000000000000000000000000000000..4db4a4a8e7441b07ce2db4adff043bb99a09014b --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,48 @@ +| Github account | name | +|---|---| +| backyes | Yan-Fei Wang | +| beckett1124 | Bin Qi | +| Canpio | Jia-Yi Feng | +| chengxiaohua1105 | Xiao-Hua Cheng | +| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | +| cxysteven | Xing-Yi Cheng | +| dzhwinter | Zhi-Hong Dong | +| emailweixu | Wei Xu | +| gangliao | Gang Liao | +| gongweibao | Wei-Bao Gong | +| Guo Sheng | Sheng Guo | +| Haichao-Zhang | Hai-Chao Zhang | +| hedaoyuan | Dao-Yuan He | +| helinwang | He-Lin Wang | +| jacquesqiao | Long-Fei Qiao | +| kuke | Yi-Bing Liu | +| lcy-seso | Ying Cao | +| lipeng-unisound | Peng Li | +| liuyuan | Yuan Liu | +| livc | Zhao Li | +| llxxxll | Yong-Feng Liu | +| luotao01 | Tao Luo | +| lzhao4ever | Liang Zhao | +| NHZlX | Zhao-Long Xing | +| pakchoi | Chuan-Jiang Song | +| pengli09 | Peng Li | +| pkuyym | Ya-Ming Yang | +| QiJune | Jun Qi | +| qingqing01 | Qing-Qing Dang | +| reyoung | Yang Yu | +| Superjom | Chun-Wei Yan | +| tianbingsz | Tian-Bing Xu | +| typhoonzero | Yi Wu | +| wanghaoshuang | Hao-Shuang Wang | +| wangyang59 | Yang Wang | +| wangzhen-nlp | Zhen Wang | +| wen-bo-yang | Wen-Bo Yang | +| wwhu | Wei-Wei Hu | +| xinghai-sun | Xing-Hai Sun | +| Xreki | Yi-Qun Liu | +| xujun05 | Jun Xu | +| xushaoyong | Shao-Yong Xu | +| Yancey1989 | Xu Yan | +| zhaopu7 | Pu Zhao | +| zhouxiao-coder | Xiao Zhou | +| Zrachel | Rui-Qing Zhang | diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a44e56719baa433a5c45df2082fa86296b3da1c..b174831109372cb014741d63032fa6a470e74042 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,173 +1,173 @@ -cmake_minimum_required(VERSION 2.8) +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +cmake_minimum_required(VERSION 3.0) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) +set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) + +include(system) + +project(paddle CXX C Go) -project(paddle CXX C) - -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_SOURCE_DIR}) -include(package) -find_package(SWIG 2.0) -find_package(CUDA QUIET) -find_package(Protobuf REQUIRED) - -# Check protobuf library version. -execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version - OUTPUT_VARIABLE PROTOBUF_VERSION) -string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION}) - -set(PROTOBUF_3 OFF) -if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0") - set(PROTOBUF_3 ON) +find_package(Sphinx) +if(NOT CMAKE_CROSSCOMPILING) + find_package(CUDA QUIET) +endif(NOT CMAKE_CROSSCOMPILING) +find_package(Git REQUIRED) +find_package(Threads REQUIRED) +if(NOT ANDROID) + find_package(Boost QUIET) endif() -find_package(PythonLibs 2.7 REQUIRED) -find_package(PythonInterp 2.7 REQUIRED) -find_package(ZLIB REQUIRED) -find_package(NumPy REQUIRED) -find_package(Threads REQUIRED) -find_package(AVX QUIET) -find_package(Glog) -find_package(Gflags QUIET) -find_package(GTest) -find_package(Sphinx) -find_package(Doxygen) -include(cblas) -find_program(M4_EXECUTABLE m4) -###################### Configurations ########################### -option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON) -option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND}) -option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF) -option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND}) -option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) -option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND}) -option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF) -option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND}) -option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND}) -option(WITH_TIMER "Compile PaddlePaddle use timer" OFF) -option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF) -option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND}) -option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) -option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND}) -option(ON_TRAVIS "Running test on travis-ci or not." OFF) -option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF) -option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON) - - -include(cpplint) -include(ccache) -if(WITH_RDMA) - include(rdma) +include(simd) + +################################ Configurations ####################################### +option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) +option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) +option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) +option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) +option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON) +option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) +option(WITH_DOUBLE "Compile PaddlePaddle with double precision" OFF) +option(WITH_RDMA "Compile PaddlePaddle with RDMA support" OFF) +option(WITH_TIMER "Compile PaddlePaddle with stats timer" OFF) +option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler" OFF) +option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) +option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) +option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) +option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) +option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) +option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) +option(GLIDE_INSTALL "Download and install go dependencies " ON) +option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) + +# CMAKE_BUILD_TYPE +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING + "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" + FORCE) endif() -include(util) -include(flags) -include(cudnn) -include(FindPythonModule) -include(check_packages) -include(swig) -include(coveralls) - -# Set PaddlePaddle version to Git tag name or Git commit ID. -find_package(Git REQUIRED) -# version.cmake will get the current PADDLE_VERSION -include(version) -add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION}) - -if(NOT WITH_GPU) - add_definitions(-DPADDLE_ONLY_CPU) - add_definitions(-DHPPL_STUB_FUNC) - - list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) -else() - if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) - message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile") - endif() - if(NOT CUDNN_FOUND) - message(FATAL_ERROR "Paddle need cudnn to compile") +if(ANDROID) + if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") + message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21") endif() - if(WITH_AVX) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}") - else(WITH_AVX) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}") - endif(WITH_AVX) - - # Include cuda and cudnn - include_directories(${CUDNN_INCLUDE_DIR}) - include_directories(${CUDA_TOOLKIT_INCLUDE}) -endif(NOT WITH_GPU) - -if(WITH_DSO) - add_definitions(-DPADDLE_USE_DSO) -endif(WITH_DSO) - -if(WITH_DOUBLE) - add_definitions(-DPADDLE_TYPE_DOUBLE) - set(ACCURACY double) -else(WITH_DOUBLE) - set(ACCURACY float) -endif(WITH_DOUBLE) - -if(NOT WITH_TIMER) - add_definitions(-DPADDLE_DISABLE_TIMER) -endif(NOT WITH_TIMER) - -if(NOT WITH_PROFILER) - add_definitions(-DPADDLE_DISABLE_PROFILER) -endif(NOT WITH_PROFILER) - -if(WITH_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}") -else(WITH_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}") -endif(WITH_AVX) - -if(WITH_PYTHON) - include_directories(${PYTHON_INCLUDE_DIR}) - include_directories(${PYTHON_NUMPY_INCLUDE_DIR}) -else(WITH_PYTHON) - add_definitions(-DPADDLE_NO_PYTHON) -endif(WITH_PYTHON) - -if(WITH_RDMA) - include_directories("${RDMA_INC_DIR}") -else(WITH_RDMA) - add_definitions(-DPADDLE_DISABLE_RDMA) -endif(WITH_RDMA) - -if(WITH_GLOG) - add_definitions(-DPADDLE_USE_GLOG) - include_directories(${LIBGLOG_INCLUDE_DIR}) + set(WITH_GPU OFF CACHE STRING + "Disable GPU when cross-compiling for Android" FORCE) + set(WITH_AVX OFF CACHE STRING + "Disable AVX when cross-compiling for Android" FORCE) + set(WITH_PYTHON OFF CACHE STRING + "Disable PYTHON when cross-compiling for Android" FORCE) + set(WITH_RDMA OFF CACHE STRING + "Disable RDMA when cross-compiling for Android" FORCE) + set(WITH_MKLDNN OFF CACHE STRING + "Disable MKLDNN when cross-compiling for Android" FORCE) + set(WITH_MKLML OFF CACHE STRING + "Disable MKLML package when cross-compiling for Android" FORCE) +endif(ANDROID) + +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING + "A path setting third party libraries download & build directories.") + +if (WITH_C_API AND WITH_PYTHON) + message(WARNING "It is suggest not embedded a python interpreter in Paddle " + "when using C-API. It will give an unpredictable behavior when using a " + "different Python interpreter from compiling.") endif() -if(WITH_GFLAGS) - add_definitions(-DPADDLE_USE_GFLAGS) - add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE}) - include_directories(${GFLAGS_INCLUDE_DIRS}) -endif() +######################################################################################## + +include(external/mklml) # download mklml package +include(external/zlib) # download, build, install zlib +include(external/gflags) # download, build, install gflags +include(external/glog) # download, build, install glog +include(external/gtest) # download, build, install gtest +include(external/protobuf) # download, build, install protobuf +include(external/python) # download, build, install python +include(external/openblas) # download, build, install openblas +include(external/mkldnn) # download, build, install mkldnn +include(external/swig) # download, build, install swig +include(external/warpctc) # download, build, install warpctc +include(external/any) # download libn::any +include(external/eigen) # download eigen3 +include(external/pybind11) # download pybind11 + +include(cudnn) # set cudnn libraries, must before configure +include(configure) # add paddle env configuration +include(generic) # simplify cmake module +include(package) # set paddle packages +include(cpplint) # set paddle c++ style +include(ccache) # set ccache for compilation +include(util) # set unittest and link libs +include(rdma) # set rdma libraries +include(flags) # set paddle compile flags +include(version) # set PADDLE_VERSION +include(coveralls) # set code coverage -if(WITH_TESTING) - enable_testing() - include_directories(${GTEST_INCLUDE_DIRS}) -endif() -include_directories("${CBLAS_INC_DIR}") include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}/paddle/cuda/include") -include_directories(${PROTOBUF_INCLUDE_DIRS}) include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") -if(EXISTS "${PROJ_ROOT}/paddle/internals/CMakeLists.txt") - set(PADDLE_WITH_INTERNAL ON) - include(paddle/internals/CMakeLists.txt) -else() - set(PADDLE_WITH_INTERNAL OFF) - set(INTERNAL_PROTO_PATH "") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") +include_directories(${Boost_INCLUDE_DIRS}) + +set(EXTERNAL_LIBS + ${GFLAGS_LIBRARIES} + ${GLOG_LIBRARIES} + ${CBLAS_LIBRARIES} + ${PROTOBUF_LIBRARY} + ${ZLIB_LIBRARIES} + ${PYTHON_LIBRARIES} +) + +if(WITH_GPU) + list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) + if(NOT WITH_DSO) + list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) + endif(NOT WITH_DSO) +endif(WITH_GPU) + +if(WITH_MKLDNN) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) endif() + +if(USE_NNPACK) + include(external/nnpack) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) +endif(USE_NNPACK) + add_subdirectory(proto) + +# "add_subdirectory(go)" should be placed after the following loine, +# because it depends on paddle/optimizer. +add_subdirectory(paddle/optimizer) + +# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be +# placed after this block, because they depends on it. +if(WITH_GOLANG) + add_subdirectory(go) +endif(WITH_GOLANG) + add_subdirectory(paddle) -add_subdirectory(python) +if(WITH_PYTHON) + add_subdirectory(python) +endif() if(WITH_DOC) add_subdirectory(doc) - add_subdirectory(doc_cn) endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..0d4bb973ae87bb45ef4386a63c26ed62602f2cee --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1 @@ +./doc/howto/dev/contribute_to_paddle_en.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8ac123bf9c0f24b47b741611f3b80213c61b82e9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,89 @@ +# A image for building paddle binaries +# Use cuda devel base image for both cpu and gpu environment +FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 +MAINTAINER PaddlePaddle Authors + +ARG UBUNTU_MIRROR +RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' + +# ENV variables +ARG WITH_GPU +ARG WITH_AVX +ARG WITH_DOC +ARG WITH_STYLE_CHECK + +ENV WOBOQ OFF +ENV WITH_GPU=${WITH_GPU:-OFF} +ENV WITH_AVX=${WITH_AVX:-ON} +ENV WITH_DOC=${WITH_DOC:-OFF} +ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} + +ENV HOME /root +# Add bash enhancements +COPY ./paddle/scripts/docker/root/ /root/ + +RUN apt-get update && \ + apt-get install -y \ + git python-pip python-dev openssh-server bison \ + wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ + curl sed grep graphviz libjpeg-dev zlib1g-dev \ + python-matplotlib gcc-4.8 g++-4.8 \ + automake locales clang-format swig doxygen cmake \ + liblapack-dev liblapacke-dev libboost-dev \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ + apt-get clean -y + +# paddle is using numpy.flip, which is introduced since 1.12.0 +RUN pip --no-cache-dir install 'numpy>=1.12.0' + +# Install Go and glide +RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -s -q https://glide.sh/get | sh + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +# FIXME: due to temporary ipykernel dependency issue, specify ipykernel jupyter +# version util jupyter fixes this issue. +RUN pip install --upgrade pip && \ + pip install -U 'protobuf==3.1.0' && \ + pip install -U wheel pillow BeautifulSoup && \ + pip install -U docopt PyYAML sphinx && \ + pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ + pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ + pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip install rarfile + +# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use +# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 +RUN apt-get install -y libssl-dev libffi-dev +RUN pip install certifi urllib3[secure] + +# Install woboq_codebrowser to /woboq +RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ + (cd /woboq \ + cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ + -DCMAKE_BUILD_TYPE=Release . \ + make) + +# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service +RUN mkdir /var/run/sshd +RUN echo 'root:root' | chpasswd +RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config +RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config +EXPOSE 22 + +# development image default do build work +CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"] diff --git a/Dockerfile.android b/Dockerfile.android new file mode 100644 index 0000000000000000000000000000000000000000..c0fa58c384f9ebcae60477ffce49ea4ffa929db9 --- /dev/null +++ b/Dockerfile.android @@ -0,0 +1,49 @@ +FROM ubuntu:16.04 +MAINTAINER PaddlePaddle Authors + +ARG UBUNTU_MIRROR +RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' + +ENV HOME=/root \ + ANDROID_NDK_HOME=/opt/android-ndk-linux \ + ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc + +RUN apt-get update && \ + apt-get install -y \ + git python-dev python-pip python-numpy \ + wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ + apt-get clean -y + +# Install Go and glide +RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ + tar -C /usr/local -xzf go.tgz && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src && \ + rm go.tgz +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip install --upgrade pip && \ + pip install -U 'protobuf==3.1.0' && \ + pip install -U wheel sphinx && \ + pip install pre-commit + +# Android NDK +RUN mkdir /opt/android-ndk-tmp && \ + cd /opt/android-ndk-tmp && \ + wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ + unzip -q android-ndk-r14b-linux-x86_64.zip && \ + mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ + ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_STANDALONE_TOOLCHAIN} && \ + rm -rf /opt/android-ndk-tmp && \ + rm -rf ${ANDROID_NDK_HOME} + +CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"] diff --git a/README.md b/README.md index 8a8e15841586ae6a01bb93e94f6074189f556f5a..b9793c3eab5d40c28f01cc67ad607b97261b3235 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) -[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/) -[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://doc.paddlepaddle.org/develop/doc/) +[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://doc.paddlepaddle.org/develop/doc_cn/) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -59,36 +59,37 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl the capability of PaddlePaddle to make a huge impact for your product. ## Installation -Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from -pre-built packages (**docker image**, **deb package**) or -directly build on **Linux** and **Mac OS X** from the source code. + +It is recommended to check out the +[Docker installation guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) +before looking into the +[build from source guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html) ## Documentation -Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers. -- [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en)
- You can follow the quick start tutorial to learn how use PaddlePaddle - step-by-step. +We provide [English](http://doc.paddlepaddle.org/develop/doc/) and +[Chinese](http://doc.paddlepaddle.org/doc_cn/) documentation. + +- [Deep Learning 101](http://book.paddlepaddle.org/index.html) + + You might want to start from this online interactive book that can run in Jupyter Notebook. + +- [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) + + You can run distributed training jobs on MPI clusters. + +- [Distributed Training on Kubernetes](http://doc.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) + + You can also run distributed training jobs on Kubernetes clusters. -- [Example and Demo](http://paddlepaddle.org/doc/demo/)
- We provide five demos, including: image classification, sentiment analysis, - sequence to sequence model, recommendation, semantic role labeling. +- [Python API](http://doc.paddlepaddle.org/develop/doc/api/index_en.html) -- [Distributed Training](http://paddlepaddle.org/doc/cluster)
- This system supports training deep learning models on multiple machines - with data parallelism. + Our new API enables much shorter programs. -- [Python API](http://paddlepaddle.org/doc/ui/)
- PaddlePaddle supports using either Python interface or C++ to build your - system. We also use SWIG to wrap C++ source code to create a user friendly - interface for Python. You can also use SWIG to create interface for your - favorite programming language. +- [How to Contribute](http://doc.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) -- [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html)
- We sincerely appreciate your interest and contributions. If you would like to - contribute, please read the contribution guide. + We appreciate your contributions! -- [Source Code Documents](http://paddlepaddle.org/doc/source/)
## Ask Questions diff --git a/RELEASE.cn.md b/RELEASE.cn.md new file mode 100644 index 0000000000000000000000000000000000000000..5deaf230a8f5dd3089993f0fc79b9460fd049750 --- /dev/null +++ b/RELEASE.cn.md @@ -0,0 +1,80 @@ +# v0.10.0版本 + +我们非常高兴发布了PaddlePaddle V0.10.0版,并开发了新的[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/)。 + +- 旧的Python API由于难以学习和使用已经过时了。使用旧版本的API至少需要两份python文件,分别是定义数据生成器和定义网络拓扑结构的文件。用户通过运行`paddle_trainer`的C++程序来启动PaddlePaddle任务,该程序调用Python解释器来运行定义网络拓扑结构的文件,然后通过迭代加载数据生成器提供的小批量数据启动训练循环。这与Python的现代编辑方式不符,比如Jupyter Notebook。 + +- 新版的API被称为 *V2 API*,允许我们在单个.py文件中,通过编辑更短的Python程序来定义网络结构和数据。此外,该Python程序也可以在Jupyter Notebook中运行,因为PaddlePaddle可以作为共享库来被Python程序加载和使用。 + +基于新的API,我们提供了一个在线的学习文档 [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) 及其[中文版本](http://book.paddlepaddle.org/)。 + +我们还致力于迭代更新新版API的在线文档,并将新版API引入分布式集群(包括MPI和Kubernetes)训练中。我们将在下一个版本中发布更多的内容。 + +## 新特点 + +* 发布新版[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/)。 +* 发布深度学习系列课程 [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) 及其[中文版本](http://book.paddlepaddle.org/)。 +* 支持矩形输入的CNN。 +* 为seqlastin和seqfirstin提供stride pooling。 +* 在`trainer_config_helpers`中暴露`seq_concat_layer/seq_reshape_layer`。 +* 添加公共数据集包:CIFAR,MNIST,IMDB,WMT14,CONLL05,movielens,imikolov。 +* 针对Single Shot Multibox Detection增加 Prior box layer。 +* 增加光滑的L1损失。 +* 在V2 API中增加 data reader 创建器和修饰器。 +* 增加cmrnorm投影的CPU实现。 + + +## 改进 + +* 提供`paddle_trainer`的Python virtualenv支持。 +* 增加代码自动格式化的pre-commit hooks。 +* 升级protobuf到3.x版本。 +* 在Python数据生成器中提供一个检测数据类型的选项。 +* 加速GPU中average层的后向反馈计算。 +* 细化文档。 +* 使用Travis-CI检查文档中的死链接。 +* 增加解释`sparse_vector`的示例。 +* 在layer_math.py中添加ReLU。 +* 简化Quick Start示例中的数据处理流程。 +* 支持CUDNN Deconv。 +* 在v2 API中增加数据feeder。 +* 在情感分析示例的演示中增加对标准输入流中样本的预测。 +* 提供图像预处理的多进程接口。 +* 增加V1 API的基准文档。 +* 在`layer_math.py`中增加ReLU。 +* 提供公共数据集的自动下载包。 +* 将`Argument::sumCost`重新命名为`Argument::sum`,并暴露给python。 +* 为矩阵相关的表达式评估增加一个新的`TensorExpression`实现。 +* 增加延迟分配来优化批处理多表达式计算。 +* 增加抽象的类函数及其实现: + * `PadFunc` 和 `PadGradFunc`。 + * `ContextProjectionForwardFunc` 和 `ContextProjectionBackwardFunc`。 + * `CosSimBackward` 和 `CosSimBackwardFunc`。 + * `CrossMapNormalFunc` 和 `CrossMapNormalGradFunc`。 + * `MulFunc`。 +* 增加`AutoCompare`和`FunctionCompare`类,使得编写比较gpu和cpu版本函数的单元测试更容易。 +* 生成`libpaddle_test_main.a`并删除测试文件内的主函数。 +* 支持PyDataProvider2中numpy的稠密向量。 +* 清理代码库,删除一些复制粘贴的代码片段: + * 增加`SparseRowMatrix`的抽样类`RowBuffer`。 + * 清理`GradientMachine`的接口。 + * 在layer中增加`override`关键字。 + * 简化`Evaluator::create`,使用`ClassRegister`来创建`Evaluator`。 +* 下载演示的数据集时检查MD5校验。 +* 添加`paddle::Error`,用于替代Paddle中的`LOG(FATAL)`。 + + +## 错误修复 + +* 检查`recurrent_group`的layer输入类型。 +* 不要用.cu源文件运行`clang-format`。 +* 修复`LogActivation`的使用错误。 +* 修复运行`test_layerHelpers`多次的错误。 +* 修复seq2seq示例超出消息大小限制的错误。 +* 修复在GPU模式下dataprovider转换的错误。 +* 修复`GatedRecurrentLayer`中的错误。 +* 修复在测试多个模型时`BatchNorm`的错误。 +* 修复paramRelu在单元测试时崩溃的错误。 +* 修复`CpuSparseMatrix`编译时相关的警告。 +* 修复`MultiGradientMachine`在`trainer_count > batch_size`时的错误。 +* 修复`PyDataProvider2`阻止异步加载数据的错误。 diff --git a/RELEASE.md b/RELEASE.md index a8a245ab442ba0fc63d1f1fda932e7590a6fe4ca..146f7afa7dfbc152500b82fde28445ae3155c16c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,104 @@ +# Release v0.10.0 + +We are glad to release version 0.10.0. In this version, we are happy to release the new +[Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/). + +- Our old Python API is kind of out of date. It's hard to learn and hard to + use. To write a PaddlePaddle program using the old API, we'd have to write + at least two Python files: one `data provider` and another one that defines + the network topology. Users start a PaddlePaddle job by running the + `paddle_trainer` C++ program, which calls Python interpreter to run the + network topology configuration script and then start the training loop, + which iteratively calls the data provider function to load minibatches. + This prevents us from writing a Python program in a modern way, e.g., in the + Jupyter Notebook. + +- The new API, which we often refer to as the *v2 API*, allows us to write + much shorter Python programs to define the network and the data in a single + .py file. Also, this program can run in Jupyter Notebook, since the entry + point is in Python program and PaddlePaddle runs as a shared library loaded + and invoked by this Python program. + +Basing on the new API, we delivered an online interative +book, [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) +and [its Chinese version](http://book.paddlepaddle.org/). + +We also worked on updating our online documentation to describe the new API. +But this is an ongoing work. We will release more documentation improvements +in the next version. + +We also worked on bring the new API to distributed model training (via MPI and +Kubernetes). This work is ongoing. We will release more about it in the next +version. + +## New Features + +* We release [new Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/). +* Deep Learning 101 book in [English](http://book.paddlepaddle.org/index.en.html) and [Chinese](http://book.paddlepaddle.org/). +* Support rectangle input for CNN. +* Support stride pooling for seqlastin and seqfirstin. +* Expose `seq_concat_layer/seq_reshape_layer` in `trainer_config_helpers`. +* Add dataset package: CIFAR, MNIST, IMDB, WMT14, CONLL05, movielens, imikolov. +* Add Priorbox layer for Single Shot Multibox Detection. +* Add smooth L1 cost. +* Add data reader creator and data reader decorator for v2 API. +* Add the CPU implementation of cmrnorm projection. + +## Improvements + +* Support Python virtualenv for `paddle_trainer`. +* Add pre-commit hooks, used for automatically format our code. +* Upgrade protobuf to version 3.x. +* Add an option to check data type in Python data provider. +* Speedup the backward of average layer on GPU. +* Documentation refinement. +* Check dead links in documents using Travis-CI. +* Add a example for explaining `sparse_vector`. +* Add ReLU in layer_math.py +* Simplify data processing flow for Quick Start. +* Support CUDNN Deconv. +* Add data feeder in v2 API. +* Support predicting the samples from sys.stdin for sentiment demo. +* Provide multi-proccess interface for image preprocessing. +* Add benchmark document for v1 API. +* Add ReLU in `layer_math.py`. +* Add packages for automatically downloading public datasets. +* Rename `Argument::sumCost` to `Argument::sum` since class `Argument` is nothing with cost. +* Expose Argument::sum to Python +* Add a new `TensorExpression` implementation for matrix-related expression evaluations. +* Add lazy assignment for optimizing the calculation of a batch of multiple expressions. +* Add abstract calss `Function` and its implementation: + * `PadFunc` and `PadGradFunc`. + * `ContextProjectionForwardFunc` and `ContextProjectionBackwardFunc`. + * `CosSimBackward` and `CosSimBackwardFunc`. + * `CrossMapNormalFunc` and `CrossMapNormalGradFunc`. + * `MulFunc`. +* Add class `AutoCompare` and `FunctionCompare`, which make it easier to write unit tests for comparing gpu and cpu version of a function. +* Generate `libpaddle_test_main.a` and remove the main function inside the test file. +* Support dense numpy vector in PyDataProvider2. +* Clean code base, remove some copy-n-pasted code snippets: + * Extract `RowBuffer` class for `SparseRowMatrix`. + * Clean the interface of `GradientMachine`. + * Use `override` keyword in layer. + * Simplify `Evaluator::create`, use `ClassRegister` to create `Evaluator`s. +* Check MD5 checksum when downloading demo's dataset. +* Add `paddle::Error` which intentially replace `LOG(FATAL)` in Paddle. + +## Bug Fixes + +* Check layer input types for `recurrent_group`. +* Don't run `clang-format` with .cu source files. +* Fix bugs with `LogActivation`. +* Fix the bug that runs `test_layerHelpers` multiple times. +* Fix the bug that the seq2seq demo exceeds protobuf message size limit. +* Fix the bug in dataprovider converter in GPU mode. +* Fix a bug in `GatedRecurrentLayer`. +* Fix bug for `BatchNorm` when testing more than one models. +* Fix broken unit test of paramRelu. +* Fix some compile-time warnings about `CpuSparseMatrix`. +* Fix `MultiGradientMachine` error when `trainer_count > batch_size`. +* Fix bugs that prevents from asynchronous data loading in `PyDataProvider2`. + # Release v0.9.0 ## New Features: diff --git a/authors b/authors deleted file mode 100644 index ab4d3118ff1f7e94677c89073c4ea05bf991165e..0000000000000000000000000000000000000000 --- a/authors +++ /dev/null @@ -1,53 +0,0 @@ -Cao, Ying -Cheng, Yujuan -Dang, Qingqing -Dong, Tengfei -Du, Dalong -Feng, Shouqiang -Gao, Haoyuan -Han, Baochang -Han, Jinchen -Hao, Nanyu -He, Daoyuan -He, Zhengyan -Hou, Jue -Huang, Chang -Huang, Zhiheng -Hu, Na -Kong, Qi -Liao, Gang -Li, Bo -Li, Jiajie -Li, Jing -Li, Lei -Li, Peng -Liu, Sheng -Liu, Yuan -Li, Yuze -Luo, Heng -Luo, Tao -Lyu, Qin -Mao, Hongyue -Qian, Xiaojun -Qi, Jun -Qin, Duohao -Shen, Guolong -Shi, Guangchuan -Song, Xiang -Wang, Jiang -Wang, Yanfei -Wang, Yong -Weng, Renliang -Xu, Tianbing -Xu, Wei -Xu, Xingyu -Yan, Chong -Yan, Chunwei -Yang, Yi -Yu, Yang -Yu, Yinan -Zhang, Jian -Zhang, Ruiqing -Zhang, Weide -Zhao, Liang -Zhou, Jie diff --git a/benchmark/tensorflow/rnn/run_multi.sh b/benchmark/tensorflow/rnn/run_multi.sh index f7f52e01e38d304bb3bf8185c53bd0da26014d3a..c2d7dd597e6da54cd5c4cda311fbbd18486b4647 100755 --- a/benchmark/tensorflow/rnn/run_multi.sh +++ b/benchmark/tensorflow/rnn/run_multi.sh @@ -25,4 +25,3 @@ test 4 2 256 512 test 4 2 512 128 test 4 2 512 256 test 4 2 512 512 - diff --git a/cmake/CMakeDetermineGoCompiler.cmake b/cmake/CMakeDetermineGoCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..abf0a00c5e99e4201dede36f13200cfc9c151ad3 --- /dev/null +++ b/cmake/CMakeDetermineGoCompiler.cmake @@ -0,0 +1,46 @@ +if(NOT CMAKE_Go_COMPILER) + if(NOT $ENV{GO_COMPILER} STREQUAL "") + get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT) + + if(CMAKE_Go_FLAGS_ENV_INIT) + set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler") + endif() + + if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT}) + message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.") + endif() + + endif() + + set(Go_BIN_PATH + $ENV{GOPATH} + $ENV{GOROOT} + $ENV{GOROOT}/bin + $ENV{GO_COMPILER} + /usr/bin + /usr/local/bin + ) + + if(CMAKE_Go_COMPILER_INIT) + set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler") + else() + find_program(CMAKE_Go_COMPILER + NAMES go + PATHS ${Go_BIN_PATH} + ) + if(CMAKE_Go_COMPILER) + EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION) + STRING(REGEX MATCH "go[0-9]+[.0-9]*[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}") + message("-- The Golang compiler identification is ${VERSION}") + message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}") + endif() + endif() + +endif() + +mark_as_advanced(CMAKE_Go_COMPILER) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CMakeGoCompiler.cmake.in + ${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY) + +set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/cmake/CMakeGoCompiler.cmake.in b/cmake/CMakeGoCompiler.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..a71f08e064656fbaad8cfa77aea6f216515712ef --- /dev/null +++ b/cmake/CMakeGoCompiler.cmake.in @@ -0,0 +1,8 @@ +set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@") +set(CMAKE_Go_COMPILER_LOADED 1) + +set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go) +set(CMAKE_Go_LINKER_PREFERENCE 40) +set(CMAKE_Go_OUTPUT_EXTENSION .o) +set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1) +set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/cmake/CMakeGoInformation.cmake b/cmake/CMakeGoInformation.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ba51ac93fcd429478f324b66bd5129d94ea2a8f4 --- /dev/null +++ b/cmake/CMakeGoInformation.cmake @@ -0,0 +1,7 @@ +if(NOT CMAKE_Go_COMPILE_OBJECT) + set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o ") +endif() + +if(NOT CMAKE_Go_LINK_EXECUTABLE) + set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o ") +endif() diff --git a/cmake/CMakeTestGoCompiler.cmake b/cmake/CMakeTestGoCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..b9891b015baced05b51e34dba562fd98a84fe14c --- /dev/null +++ b/cmake/CMakeTestGoCompiler.cmake @@ -0,0 +1 @@ +set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "") diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake deleted file mode 100644 index d380c996dfa95f0caa2b9cd9daa0ac9141e51fe0..0000000000000000000000000000000000000000 --- a/cmake/FindAVX.cmake +++ /dev/null @@ -1,76 +0,0 @@ -# This file is use to check all support level of AVX on your machine -# so that PaddlePaddle can unleash the vectorization power of muticore. - -INCLUDE(CheckCXXSourceRuns) - -IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(MMX_FLAG "-mmmx") - set(SSE2_FLAG "-msse2") - set(SSE3_FLAG "-msse3") - SET(AVX_FLAG "-mavx") - SET(AVX2_FLAG "-mavx2") -ELSEIF(MSVC) - set(MMX_FLAG "/arch:MMX") - set(SSE2_FLAG "/arch:SSE2") - set(SSE3_FLAG "/arch:SSE3") - SET(AVX_FLAG "/arch:AVX") - SET(AVX2_FLAG "/arch:AVX2") -ENDIF() - -# Check MMX -set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - _mm_setzero_si64(); - return 0; -}" MMX_FOUND) - -# Check SSE2 -set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - _mm_setzero_si128(); - return 0; -}" SSE2_FOUND) - -# Check SSE3 -set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - __m128d a = _mm_set1_pd(6.28); - __m128d b = _mm_set1_pd(3.14); - __m128d result = _mm_addsub_pd(a, b); - result = _mm_movedup_pd(result); - return 0; -}" SSE3_FOUND) - -# Check AVX -set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); - __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - __m256 result = _mm256_add_ps (a, b); - return 0; -}" AVX_FOUND) - -# Check AVX 2 -set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) -CHECK_CXX_SOURCE_RUNS(" -#include -int main() -{ - __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); - __m256i result = _mm256_abs_epi32 (a); - return 0; -}" AVX2_FOUND) - -mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND) diff --git a/cmake/FindPythonModule.cmake b/cmake/FindPythonModule.cmake deleted file mode 100644 index 2eb3441428e8290b665e092f6e4b40e146ea5a52..0000000000000000000000000000000000000000 --- a/cmake/FindPythonModule.cmake +++ /dev/null @@ -1,30 +0,0 @@ -# Find if a Python module is installed -# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html -# To use do: find_python_module(PyQt4 REQUIRED) -function(find_python_module module) - string(TOUPPER ${module} module_upper) - if(NOT PY_${module_upper}) - if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") - set(${module}_FIND_REQUIRED TRUE) - else() - set(${module}_FIND_REQUIRED FALSE) - endif() - # A module's location is usually a directory, but for binary modules - # it's a .so file. - execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" - "import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))" - RESULT_VARIABLE _${module}_status - OUTPUT_VARIABLE _${module}_location - ERROR_QUIET - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(NOT _${module}_status) - set(PY_${module_upper} ${_${module}_location} CACHE STRING - "Location of Python module ${module}") - endif(NOT _${module}_status) - endif(NOT PY_${module_upper}) - find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) - if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) - message(FATAL_ERROR "python module ${module} is not found") - endif() - set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE) -endfunction(find_python_module) diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake index 6702f45a168bf0dfc6cfca3ff8e68fbc79c92b11..f74cd4ff8c9c2c52319b18ac37264167b3718eae 100644 --- a/cmake/FindSphinx.cmake +++ b/cmake/FindSphinx.cmake @@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination ) ${source} ${destination} COMMENT "Generating sphinx documentation: ${builder}" + COMMAND cd ${destination} && ln -sf ./index_*.html index.html ) set_property( @@ -143,4 +144,4 @@ function( Sphinx_add_targets target_base_name conf source base_destination ) add_dependencies( ${target_base_name}_linkcheck ${_dependencies} ) endif() -endfunction() \ No newline at end of file +endfunction() diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 685334c6585060c0344e552c6f3fda2c7324de03..854066fd1d205c337fbdbe08997d88251095c799 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -5,7 +5,7 @@ # If any cblas implementation found, the following variable will be set. # CBLAS_PROVIDER # one of MKL, ATLAS, OPENBLAS, REFERENCE # CBLAS_INC_DIR # the include directory for cblas. -# CBLAS_LIBS # a list of libraries should be linked by paddle. +# CBLAS_LIBS # a list of libraries should be linked by paddle. # # Each library should be full path to object file. # # User should set one of MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT, REFERENCE_CBLAS_ROOT @@ -13,34 +13,59 @@ # system paths. # +set(CBLAS_FOUND OFF) -## Find MKL First. -set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") +## Find MKLML First. +if(WITH_MKLML AND MKLML_INC_DIR AND MKLML_LIB) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER MKLML) + set(CBLAS_INC_DIR ${MKLML_INC_DIR}) + set(CBLAS_LIBRARIES ${MKLML_LIB}) -find_path(MKL_INCLUDE_DIR mkl.h PATHS - ${MKL_ROOT}/include) -find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS - ${MKL_ROOT}/include) -find_library(MKL_CORE_LIB NAMES mkl_core PATHS + add_definitions(-DPADDLE_USE_MKLML) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found cblas and lapack in MKLML " + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + return() +endif() + +## Then find MKL. +set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs") +set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL") + +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_ROOT}/include + ${INTEL_MKL_ROOT}/include) +set(MKL_LIB_SEARCH_PATHS ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_ROOT}/lib/intel64 + ${INTEL_MKL_ROOT}/lib + ${INTEL_MKL_ROOT}/lib/intel64) + +find_path(MKL_INC_DIR mkl.h PATHS + ${MKL_INCLUDE_SEARCH_PATHS}) +find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS + ${MKL_INCLUDE_SEARCH_PATHS}) +find_library(MKL_CORE_LIB NAMES mkl_core PATHS + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) - + ${MKL_LIB_SEARCH_PATHS}) -if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) +if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER MKL) - set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR}) - set(CBLAS_LIBS ${MKL_INTEL_LP64} - ${MKL_SEQUENTIAL_LIB} - ${MKL_CORE_LIB}) + set(CBLAS_INC_DIR ${MKL_INC_DIR} ${MKL_LAPACK_INC_DIR}) + set(CBLAS_LIBRARIES ${MKL_INTEL_LP64} ${MKL_SEQUENTIAL_LIB} ${MKL_CORE_LIB}) + add_definitions(-DPADDLE_USE_MKL) - message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})") - return() # return file. + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found MKL (include: ${MKL_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") + return() endif() ## Then find atlas. @@ -56,21 +81,26 @@ set(ATLAS_LIB_SEARCH_PATHS /usr/lib/atlas /usr/lib/atlas-base # special for ubuntu 14.04. ) -find_path(ATLAS_INC_DIR NAMES cblas.h +find_path(ATLAS_INC_DIR NAMES cblas.h PATHS ${ATLAS_INCLUDE_SEARCH_PATHS}) find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h PATHS ${ATLAS_INCLUDE_SEARCH_PATHS}) -find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 +find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 PATHS ${ATLAS_LIB_SEARCH_PATHS}) -find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3 +find_library(ATLAS_CLAPACK_LIB NAMES lapack_atlas liblapack_atlas.so.3 PATHS ${ATLAS_LIB_SEARCH_PATHS}) -if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB) +if(ATLAS_CLAPACK_INC_DIR AND ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_CLAPACK_LIB) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER ATLAS) set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR}) - set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB}) - add_definitions(-DPADDLE_USE_ATLAS) - message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})") + set(CBLAS_LIBRARIES ${ATLAS_CLAPACK_LIB} ${ATLAS_CBLAS_LIB}) + + add_definitions(-DPADDLE_USE_ATLAS) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found ATLAS (include: ${ATLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") return() endif() @@ -95,11 +125,17 @@ find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h find_library(OPENBLAS_LIB NAMES openblas PATHS ${OPENBLAS_LIB_SEARCH_PATHS}) -if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) +if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_LIB) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER OPENBLAS) - set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR}) - set(CBLAS_LIBS ${OPENBLAS_LIB}) - message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})") + set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR}) + set(CBLAS_LIBRARIES ${OPENBLAS_LIB}) + + add_definitions(-DPADDLE_USE_OPENBLAS) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") return() endif() @@ -107,7 +143,7 @@ endif() ## Then find the reference-cblas. www.netlib.org/blas/ -set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH +set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH "Folder contains reference-cblas") set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include @@ -128,11 +164,10 @@ find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER REFERENCE) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) - set(CBLAS_LIBS ${REFERENCE_CBLAS_LIBRARY}) - return() + set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) + add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) + message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") endif() - -message(FATAL_ERROR "CBlas must be set. Paddle support MKL, ATLAS, OpenBlas, reference-cblas." - " Try set MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT or REFERENCE_CBLAS_ROOT.") diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 968d41801d73c4082d2673efe415c1cdd0305b5e..900f59d4cb83bc9ce1893b2d3bd95f5a08b164bb 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,9 +1,9 @@ # Use ccache if found ccache program -find_program(CCACHE_FOUND ccache) +find_program(CCACHE_PATH ccache) -if(CCACHE_FOUND) +if(CCACHE_PATH) message(STATUS "Ccache is founded, use ccache to speed up compile.") - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) - set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) -endif(CCACHE_FOUND) \ No newline at end of file + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) +endif(CCACHE_PATH) diff --git a/cmake/check_packages.cmake b/cmake/check_packages.cmake deleted file mode 100644 index 3bc0c1fd18448e3c2f0799295ac77d57cdc1bee7..0000000000000000000000000000000000000000 --- a/cmake/check_packages.cmake +++ /dev/null @@ -1,45 +0,0 @@ -# Check package for each cmake option - -if(WITH_GPU) - find_package(CUDA REQUIRED) # CUDA is required when use gpu -endif() - -if(WITH_PYTHON) - find_package(PythonLibs 2.6 REQUIRED) - find_package(PythonInterp REQUIRED) - find_package(NumPy REQUIRED) -endif() - -if(WITH_STYLE_CHECK) - find_package(PythonInterp REQUIRED) -endif() - -if(WITH_GLOG) - find_package(Glog REQUIRED) -endif() - -if(WITH_GFLAGS) - find_package(Gflags REQUIRED) -endif() - -if(WITH_TESTING) - find_package(GTest REQUIRED) -endif() - -if(WITH_DOC) - find_package(Sphinx REQUIRED) - find_package(Doxygen REQUIRED) - find_python_module(recommonmark REQUIRED) - find_python_module(breathe REQUIRED) -endif() - -if(WITH_SWIG_PY) - if(NOT SWIG_FOUND) - message(FATAL_ERROR "SWIG is not found. Please install swig or disable WITH_SWIG_PY") - endif() - find_python_module(wheel REQUIRED) # package wheel -endif() - -if(NOT M4_EXECUTABLE) - message(FATAL_ERROR "Paddle need m4 to generate proto file.") -endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2ac098954647d37e26ac2499e0675dae39910edc --- /dev/null +++ b/cmake/configure.cmake @@ -0,0 +1,142 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT WITH_PYTHON) + add_definitions(-DPADDLE_NO_PYTHON) +endif(NOT WITH_PYTHON) + +if(WITH_DSO) + add_definitions(-DPADDLE_USE_DSO) +endif(WITH_DSO) + +if(WITH_DOUBLE) + add_definitions(-DPADDLE_TYPE_DOUBLE) +endif(WITH_DOUBLE) + +if(NOT WITH_TIMER) + add_definitions(-DPADDLE_DISABLE_TIMER) +endif(NOT WITH_TIMER) + +if(NOT WITH_PROFILER) + add_definitions(-DPADDLE_DISABLE_PROFILER) +endif(NOT WITH_PROFILER) + +if(NOT CMAKE_CROSSCOMPILING) + if(WITH_AVX AND AVX_FOUND) + set(SIMD_FLAG ${AVX_FLAG}) + elseif(SSE3_FOUND) + set(SIMD_FLAG ${SSE3_FLAG}) + endif() +endif() + +if(NOT WITH_GOLANG) + add_definitions(-DPADDLE_WITHOUT_GOLANG) +endif(NOT WITH_GOLANG) + +if(NOT WITH_GPU) + add_definitions(-DPADDLE_ONLY_CPU) + add_definitions(-DHPPL_STUB_FUNC) + + list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) +else() + FIND_PACKAGE(CUDA REQUIRED) + + if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) + message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile") + endif() + + if(NOT CUDNN_FOUND) + message(FATAL_ERROR "Paddle need cudnn to compile") + endif() + + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}") + + # Include cuda and cudnn + include_directories(${CUDNN_INCLUDE_DIR}) + include_directories(${CUDA_TOOLKIT_INCLUDE}) +endif(NOT WITH_GPU) + +if(WITH_MKLDNN) + add_definitions(-DPADDLE_USE_MKLDNN) + if (WITH_MKLML AND MKLDNN_IOMP_DIR) + message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") + else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(WARNING "Can not find OpenMP." + "Some performance features in MKLDNN may not be available") + endif() + endif() + +endif(WITH_MKLDNN) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") + +if(WITH_GOLANG) + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. + set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") + file(MAKE_DIRECTORY ${GOPATH}) + set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") + file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}") + set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go") + + add_custom_target(go_path) + add_custom_command(TARGET go_path + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + # We can't run `go get -d ./...` for every target, because + # multiple `go get` can not run concurrently, but make need to be + # able to run with multiple jobs. + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + + if (GLIDE_INSTALL) + if(EXISTS $ENV{GOPATH}/bin/glide) + set(GLIDE "$ENV{GOPATH}/bin/glide") + else() + message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") + endif() + + # this command will only run when the file it depends is missing + # or has changed, or the output is missing. + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide + COMMAND env GOPATH=${GOPATH} ${GLIDE} install + COMMAND touch ${CMAKE_BINARY_DIR}/glide + DEPENDS ${PROJ_ROOT}/go/glide.lock + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" + ) + + # depends on the custom command which outputs + # ${CMAKE_BINARY_DIR}/glide, the custom command does not need to + # run every time this target is built. + add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path) + endif() + +endif(WITH_GOLANG) diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake index 9be7643819efdde3f42e4d39b2849ecc17e0d9fb..ca1471cabb57c0795ee193493d2e60bb5bd9e1cc 100644 --- a/cmake/coveralls.cmake +++ b/cmake/coveralls.cmake @@ -61,7 +61,7 @@ function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH) endif() endfunction() -if(ON_COVERALLS) +if(WITH_COVERAGE) set(CMAKE_BUILD_TYPE "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage") diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake index ae3530c3a0eeb79ddbcbf4f2e99be75aa7968a2f..4641184fcf5273b884524d9b9444209ffb65e000 100644 --- a/cmake/coverallsGcovJsons.cmake +++ b/cmake/coverallsGcovJsons.cmake @@ -110,14 +110,13 @@ endmacro() # Get the coverage data. file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda") -message("GCDA files:") +message("Process GCDA files:") +message("===============================") # Get a list of all the object directories needed by gcov # (The directories the .gcda files and .o files are found in) # and run gcov on those. foreach(GCDA ${GCDA_FILES}) - message("Process: ${GCDA}") - message("------------------------------------------------------------------------------") get_filename_component(GCDA_DIR ${GCDA} PATH) # @@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) # If -p is not specified then the file is named only "the_file.c.gcov" # execute_process( - COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} + COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null WORKING_DIRECTORY ${GCDA_DIR} ) endforeach() @@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING}) set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") # Generate the final JSON for this file. - message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...") string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") endforeach() diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 241af9a0835b2f100c8fb8b246426e631e42aef3..5184f0815faac005b3dff1015395235f4e19d65b 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -25,8 +25,10 @@ set(STYLE_FILTER "${STYLE_FILTER}-readability/casting") set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* + .*\\.pb\\.txt .*LtrDataProvider.* - .*MultiDataProvider.*) + .*MultiDataProvider.* + .*pb.*) # add_style_check_target # @@ -34,29 +36,27 @@ set(IGNORE_PATTERN # # first argument: target name to attach # rest arguments: source list to check code style. -# +# # NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing. macro(add_style_check_target TARGET_NAME) if(WITH_STYLE_CHECK) set(SOURCES_LIST ${ARGN}) list(REMOVE_DUPLICATES SOURCES_LIST) - list(SORT SOURCES_LIST) - foreach(filename ${SOURCES_LIST}) - set(LINT ON) foreach(pattern ${IGNORE_PATTERN}) if(filename MATCHES ${pattern}) - message(STATUS "DROP LINT ${filename}") - set(LINT OFF) - endif() + list(REMOVE_ITEM SOURCES_LIST ${filename}) + endif() endforeach() - if(LINT MATCHES ON) - add_custom_command(TARGET ${TARGET_NAME} - PRE_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" - "--filter=${STYLE_FILTER}" ${filename} - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) - endif() endforeach() + + if(SOURCES_LIST) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + "--filter=${STYLE_FILTER}" + ${SOURCES_LIST} + COMMENT "cpplint: Checking source code style" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() endif() endmacro() diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake new file mode 100644 index 0000000000000000000000000000000000000000..5e3e437a8da9624df35a5c754fe77be73f20361d --- /dev/null +++ b/cmake/cross_compiling/android.cmake @@ -0,0 +1,203 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for Android, and the +# configuration refers to the open-source resposity: +# https://github.com/taka-no-me/android-cmake +# Most of the variables are compatible with that used in +# https://developer.android.com/ndk/guides/cmake.html +# The supported variables are listed belows: +# +# ANDROID_STANDALONE_TOOLCHAIN +# ANDROID_ABI +# ANDROID_NATIVE_API_LEVEL +# ANDROID_ARM_MODE +# ANDROID_ARM_NEON +# +# For CMake >= 3.7.0, all the settings will be delivered to CMake system +# variables to let CMake do the cross-compiling configurations itself. +# More detail of cross-compiling settings +# https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html + +IF(NOT ANDROID) + return() +ENDIF() + +# check the exist of android standalone toolchain +IF(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN) + SET(ANDROID_STANDALONE_TOOLCHAIN $ENV{ANDROID_STANDALONE_TOOLCHAIN} + CACHE PATH "Folder holds the standalone toolchain of Android NDK") +ENDIF() +IF(NOT ANDROID_STANDALONE_TOOLCHAIN) + MESSAGE(WARNING "It is recommended to set ANDROID_STANDALONE_TOOLCHAIN to " + "use a standalone toolchain.\n" + "To cross-compile for Android, you need to:\n" + "1. Download an Android NDK from" + " https://developer.android.com/ndk/downloads/index.html\n" + "2. Setup a standalone toolchain" + "https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn\n") +ENDIF() + +IF(NOT DEFINED CMAKE_SYSTEM_VERSION AND ANDROID_NATIVE_API_LEVEL) + IF(ANDROID_NATIVE_API_LEVEL MATCHES "^android-[0-9]+$") + STRING(REPLACE "android-" "" CMAKE_SYSTEM_VERSION "${CMAKE_MATCH_0}") + ELSEIF(ANDROID_NATIVE_API_LEVEL MATCHES "^[0-9]+$") + SET(CMAKE_SYSTEM_VERSION ${ANDROID_NATIVE_API_LEVEL}) + ENDIF() +ENDIF() + +IF(NOT DEFINED ANDROID_ABI) + SET(ANDROID_ABI "armeabi-v7a") +ENDIF() + +IF(NOT DEFINED ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE ON) +ENDIF() +IF(ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE_NAME "arm") +ELSE(ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE_NAME "thumb") +ENDIF(ANDROID_ARM_MODE) + +IF(NOT DEFINED ANDROID_ARM_NEON) + SET(ANDROID_ARM_NEON ON) +ENDIF() + +IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") + IF("${CMAKE_VERSION}" VERSION_LESS "3.1.0") + SET(CMAKE_SYSTEM_NAME "Linux") + ENDIF() + MESSAGE(WARNING "It is recommended to use CMake >= 3.7.0 (current version: " + "${CMAKE_VERSION}), when cross-compiling for Android.") + + IF(ANDROID_STANDALONE_TOOLCHAIN) + SET(CMAKE_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot") + + IF(NOT CMAKE_SYSTEM_VERSION) + SET(ANDROID_STANDALONE_TOOLCHAIN_API "") + SET(ANDROID_API_LEVEL_H_REGEX "^[\t ]*#[\t ]*define[\t ]+__ANDROID_API__[\t ]+([0-9]+)") + FILE(STRINGS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" + ANDROID_API_LEVEL_H_CONTENT REGEX "${ANDROID_API_LEVEL_H_REGEX}") + IF(ANDROID_API_LEVEL_H_CONTENT MATCHES "${ANDROID_API_LEVEL_H_REGEX}") + SET(ANDROID_STANDALONE_TOOLCHAIN_API "${CMAKE_MATCH_1}") + ENDIF() + SET(CMAKE_SYSTEM_VERSION ${ANDROID_STANDALONE_TOOLCHAIN_API}) + ENDIF() + + # Toolchain + SET(ANDROID_TOOLCHAIN "gcc") + SET(ANDROID_TOOLCHAIN_ROOT ${ANDROID_STANDALONE_TOOLCHAIN}) + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi) + IF(ANDROID_ABI STREQUAL "armeabi") + SET(CMAKE_SYSTEM_PROCESSOR armv5te) + ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_SYSTEM_PROCESSOR armv7-a) + ENDIF() + ENDIF() + IF(ANDROID_ABI STREQUAL "arm64-v8a") + SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) + SET(CMAKE_SYSTEM_PROCESSOR aarch64) + ENDIF() + SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") + ENDIF() + + # C compiler + IF(NOT CMAKE_C_COMPILER) + SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}gcc") + ELSE() + GET_FILENAME_COMPONENT(ANDROID_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) + ENDIF() + IF(NOT EXISTS ${ANDROID_C_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find C compiler: ${ANDROID_C_COMPILER}") + ENDIF() + + # CXX compiler + IF(NOT CMAKE_CXX_COMPILER) + SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}g++") + ELSE() + GET_FILENAME_COMPONENT(ANDROID_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) + ENDIF() + IF(NOT EXISTS ${ANDROID_CXX_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${ANDROID_CXX_COMPILER}") + ENDIF() + + SET(CMAKE_C_COMPILER ${ANDROID_C_COMPILER} CACHE PATH "C compiler" FORCE) + SET(CMAKE_CXX_COMPILER ${ANDROID_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + + # Toolchain and ABI specific flags. + SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections -finline-limit=64") + SET(ANDROID_LINKER_FLAGS "-Wl,--gc-sections") + + IF(ANDROID_ABI STREQUAL "armeabi") + LIST(APPEND ANDROID_COMPILER_FLAGS + -march=armv5te + -mtune=xscale + -msoft-float) + ENDIF() + IF(ANDROID_ABI STREQUAL "armeabi-v7a") + LIST(APPEND ANDROID_COMPILER_FLAGS + -march=armv7-a + -mfloat-abi=softfp) + IF(ANDROID_ARM_NEON) + LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=neon) + ELSE() + LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=vfpv3-d16) + ENDIF() + LIST(APPEND ANDROID_LINKER_FLAGS -Wl,--fix-cortex-a8) + ENDIF() + + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + IF(ANDROID_ARM_MODE) + LIST(APPEND ANDROID_COMPILER_FLAGS -marm) + ELSE() + LIST(APPEND ANDROID_COMPILER_FLAGS -mthumb) + ENDIF() + ENDIF() + + IF(ANDROID_ABI STREQUAL "arm64-v8a") + LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) + ENDIF() + + STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") + STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}") + + SET(CMAKE_C_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" + CACHE STRING "C flags") + SET(CMAKE_CXX_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}" + CACHE STRING "CXX flags") + SET(CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" + CACHE STRING "shared linker flags") + + SET(CMAKE_POSITION_INDEPENDENT_CODE TRUE) + SET(CMAKE_EXE_LINKER_FLAGS "-pie -fPIE ${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" + CACHE STRING "executable linker flags") + + MESSAGE(STATUS "Android: Targeting API '${CMAKE_SYSTEM_VERSION}' " + "with architecture '${ANDROID_ARM_MODE_NAME}', " + "ABI '${ANDROID_ABI}', and processor '${CMAKE_SYSTEM_PROCESSOR}'") + MESSAGE(STATUS "System CMAKE_C_FLAGS: " ${CMAKE_C_FLAGS}) + MESSAGE(STATUS "System CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) +ELSE() + IF(ANDROID_STANDALONE_TOOLCHAIN) + SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) + ENDIF() + SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) + IF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + ENDIF() + ENDIF() +ENDIF() diff --git a/cmake/cross_compiling/host.cmake b/cmake/cross_compiling/host.cmake new file mode 100644 index 0000000000000000000000000000000000000000..14c35266ec60b439aaef30e5e4e0540c534160ae --- /dev/null +++ b/cmake/cross_compiling/host.cmake @@ -0,0 +1,49 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# find host C compiler +IF(HOST_C_COMPILER) + SET(HOST_C_COMPILER_NAME ${HOST_C_COMPILER}) +ELSEIF(NOT $ENV{CC} STREQUAL "") + SET(HOST_C_COMPILER_NAME $ENV{CC}) +ELSE() + SET(HOST_C_COMPILER_NAME cc) +ENDIF() + +GET_FILENAME_COMPONENT(HOST_C_COMPILER_PATH ${HOST_C_COMPILER_NAME} PROGRAM) +IF(NOT HOST_C_COMPILER_PATH OR NOT EXISTS ${HOST_C_COMPILER_PATH}) + MESSAGE(FATAL_ERROR "Cannot find host C compiler, set host C compiler:\n" + "\tcmake .. -DHOST_C_COMPILER=...") +ENDIF() + +# find host CXX compiler +IF(HOST_CXX_COMPILER) + SET(HOST_CXX_COMPILER_NAME ${HOST_CXX_COMPILER}) +ELSEIF(NOT $ENV{CXX} STREQUAL "") + SET(HOST_CXX_COMPILER_NAME $ENV{CXX}) +ELSE() + SET(HOST_CXX_COMPILER_NAME c++) +ENDIF() + +GET_FILENAME_COMPONENT(HOST_CXX_COMPILER_PATH ${HOST_CXX_COMPILER_NAME} PROGRAM) +IF(NOT HOST_CXX_COMPILER_PATH OR NOT EXISTS ${HOST_CXX_COMPILER_PATH}) + MESSAGE(FATAL_ERROR "Cannot find host CXX compiler, set host CXX compiler:\n" + "\tcmake .. -DHOST_CXX_COMPILER=...") +ENDIF() + +SET(HOST_C_COMPILER ${HOST_C_COMPILER_PATH} CACHE PATH "Host C compiler") +SET(HOST_CXX_COMPILER ${HOST_CXX_COMPILER_PATH} CACHE PATH "Host CXX compiler") + +MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER}) +MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER}) diff --git a/cmake/cross_compiling/raspberry_pi.cmake b/cmake/cross_compiling/raspberry_pi.cmake new file mode 100644 index 0000000000000000000000000000000000000000..817b39f6833e37c340d4ee465048480cfc3db151 --- /dev/null +++ b/cmake/cross_compiling/raspberry_pi.cmake @@ -0,0 +1,84 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for Raspberry Pi. +# The supported variables are listed belows: +# +# RPI_TOOLCHAIN +# RPI_ARM_NEON +# +# Also you can set CMAKE_C/CXX_COMPILER yourself, through cmake arguments. + +IF(NOT RPI) + return() +ENDIF() + +SET(CMAKE_SYSTEM_NAME Linux) +SET(CMAKE_SYSTEM_VERSION 1) +SET(CMAKE_SYSTEM_PROCESSOR arm) + +# check the exist of raspberry pi toolchain +IF(NOT DEFINED RPI_TOOLCHAIN) + SET(RPI_TOOLCHAIN $ENV{RPI_TOOLCHAIN} + CACHE PATH "Folder holds the toolchain of Raspberr Pi") +ENDIF() +IF(NOT RPI_TOOLCHAIN) + MESSAGE(WARNING "It is recommended to set RPI_TOOLCHAIN to use toolchain.\n" + "To cross-compile for Raspberry Pi, you need to download the tools using:\n" + " git clone https://github.com/raspberrypi/tools\n") +ENDIF() + +IF(NOT DEFINED RPI_ARM_NEON) + SET(RPI_ARM_NEON ON) +ENDIF() + +IF(RPI_TOOLCHAIN) + SET(RPI_TOOLCHAIN_ROOT ${RPI_TOOLCHAIN}) + IF(RPI_TOOLCHAIN_ROOT MATCHES "gcc-linaro-arm-linux-gnueabihf-raspbian(-x64)?$") + # gcc-linaro-arm-linux-gnueabihf-raspbian + # gcc-linaro-arm-linux-gnueabihf-raspbian-x64 + SET(RPI_TOOLCHAIN_NAME arm-linux-gnueabihf) + ENDIF() + SET(RPI_TOOLCHAIN_PREFIX "${RPI_TOOLCHAIN_ROOT}/bin/${RPI_TOOLCHAIN_NAME}-") +ENDIF() + +# C compiler +IF(NOT CMAKE_C_COMPILER) + SET(RPI_C_COMPILER "${RPI_TOOLCHAIN_PREFIX}gcc") +ELSE() + GET_FILENAME_COMPONENT(RPI_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) +ENDIF() +IF(NOT EXISTS ${RPI_C_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find C compiler: ${RPI_C_COMPILER}") +ENDIF() + +# CXX compiler +IF(NOT CMAKE_CXX_COMPILER) + SET(RPI_CXX_COMPILER "${RPI_TOOLCHAIN_PREFIX}g++") +ELSE() + GET_FILENAME_COMPONENT(RPI_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) +ENDIF() +IF(NOT EXISTS ${RPI_CXX_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${RPI_CXX_COMPILER}") +ENDIF() + +SET(CMAKE_C_COMPILER ${RPI_C_COMPILER} CACHE PATH "C compiler" FORCE) +SET(CMAKE_CXX_COMPILER ${RPI_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + +IF(RPI_ARM_NEON) + SET(RPI_C_FLAGS "${RPI_C_FLAGS} -mfpu=neon") +ENDIF() + +SET(CMAKE_C_FLAGS "${RPI_C_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") +SET(CMAKE_CXX_FLAGS "${RPI_C_FLAGS} ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index e5b59be19369d3ba3e852624426b95ae365e7357..69f40df51680a104c47d9335c070c570dcaff59a 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -1,3 +1,7 @@ +if(NOT WITH_GPU) + return() +endif() + set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT") find_path(CUDNN_INCLUDE_DIR cudnn.h PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include @@ -7,10 +11,16 @@ find_path(CUDNN_INCLUDE_DIR cudnn.h get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) +set(TARGET_ARCH "x86_64") +if(NOT ${CMAKE_SYSTEM_PROCESSOR}) + set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() + list(APPEND CUDNN_CHECK_LIBRARY_DIRS ${CUDNN_ROOT} ${CUDNN_ROOT}/lib64 ${CUDNN_ROOT}/lib + ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake new file mode 100644 index 0000000000000000000000000000000000000000..85cce80b70a1fcf57015ac7a264e4950616b2717 --- /dev/null +++ b/cmake/external/any.cmake @@ -0,0 +1,31 @@ +INCLUDE(ExternalProject) + +SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) + +INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any) + +ExternalProject_Add( + extern_lib_any + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" + GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d" + PREFIX ${ANY_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(lib_any STATIC ${dummyfile}) +else() + add_library(lib_any INTERFACE) +endif() + +add_dependencies(lib_any extern_lib_any) + +add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) +LIST(APPEND external_project_dependencies lib_any) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f7483f6be9169eb58f0148cd3a956a8c881e1fe3 --- /dev/null +++ b/cmake/external/eigen.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) + +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3) + +ExternalProject_Add( + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG "master" + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + add_library(eigen3 STATIC ${dummyfile}) +else() + add_library(eigen3 INTERFACE) +endif() + +add_dependencies(eigen3 extern_eigen3) + +LIST(APPEND external_project_dependencies eigen3) diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake new file mode 100644 index 0000000000000000000000000000000000000000..16e5bef4cdb8d6513de51838e3c3c8398dbad60d --- /dev/null +++ b/cmake/external/gflags.cmake @@ -0,0 +1,58 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags) +SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) +SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) +IF(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +ELSE(WIN32) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) + +ExternalProject_Add( + extern_gflags + ${EXTERNAL_PROJECT_LOG_ARGS} + # TODO(yiwang): The annoying warnings mentioned in + # https://github.com/PaddlePaddle/Paddle/issues/3277 are caused by + # gflags. I fired a PR https://github.com/gflags/gflags/pull/230 + # to fix it. Before it gets accepted by the gflags team, we use + # my personal fork, which contains above fix, temporarily. Let's + # change this back to the official Github repo once my PR is + # merged. + GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" + PREFIX ${GFLAGS_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON + CMAKE_ARGS -DBUILD_TESTING=OFF + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=Release +) + +ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) +ADD_DEPENDENCIES(gflags extern_gflags) + +LIST(APPEND external_project_dependencies gflags) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake new file mode 100644 index 0000000000000000000000000000000000000000..8a594a825abdca6a0f989b94fa42f97d6df5e10a --- /dev/null +++ b/cmake/external/glog.cmake @@ -0,0 +1,58 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(GLOG_SOURCES_DIR ${THIRD_PARTY_PATH}/glog) +SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) +SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE) + +IF(WIN32) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.lib" CACHE FILEPATH "glog library." FORCE) +ELSE(WIN32) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) + +ExternalProject_Add( + extern_glog + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS gflags + GIT_REPOSITORY "https://github.com/google/glog.git" + PREFIX ${GLOG_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON + CMAKE_ARGS -DWITH_GFLAGS=ON + CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags + CMAKE_ARGS -DBUILD_TESTING=OFF + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=Release +) + +ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) +ADD_DEPENDENCIES(glog extern_glog gflags) +LINK_LIBRARIES(glog gflags) + +LIST(APPEND external_project_dependencies glog) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e3970073a1a0b946fa1db6642799719d7a9fcf4f --- /dev/null +++ b/cmake/external/gtest.cmake @@ -0,0 +1,74 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(WITH_TESTING) + ENABLE_TESTING() + INCLUDE(ExternalProject) + + SET(GTEST_SOURCES_DIR ${THIRD_PARTY_PATH}/gtest) + SET(GTEST_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gtest) + SET(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE) + + INCLUDE_DIRECTORIES(${GTEST_INCLUDE_DIR}) + + IF(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) + ELSE(WIN32) + set(GTEST_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE) + set(GTEST_MAIN_LIBRARIES + "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) + ENDIF(WIN32) + + IF(WITH_MKLML) + # wait for mklml downloading completed + SET(GTEST_DEPENDS ${MKLML_PROJECT}) + ENDIF() + + ExternalProject_Add( + extern_gtest + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${GTEST_DEPENDS} + GIT_REPOSITORY "https://github.com/google/googletest.git" + GIT_TAG "release-1.8.0" + PREFIX ${GTEST_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON + CMAKE_ARGS -DBUILD_GMOCK=ON + CMAKE_ARGS -Dgtest_disable_pthreads=ON + CMAKE_ARGS -Dgtest_force_shared_crt=ON + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=Release + ) + + ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) + ADD_DEPENDENCIES(gtest extern_gtest) + + ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) + ADD_DEPENDENCIES(gtest_main extern_gtest) + + LIST(APPEND external_project_dependencies gtest gtest_main) +ENDIF(WITH_TESTING) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000000000000000000000000000000000..25c6b4ef52d3f8ebff1572ae8d348be7c577c08c --- /dev/null +++ b/cmake/external/mkldnn.cmake @@ -0,0 +1,67 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLDNN}) + return() +ENDIF(NOT ${WITH_MKLDNN}) + +INCLUDE(ExternalProject) + +SET(MKLDNN_PROJECT "extern_mkldnn") +SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) +SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLDNN in Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE) + return() +ENDIF() + +SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) +MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") + +INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) + +IF(${CBLAS_PROVIDER} STREQUAL "MKLML") + SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) + SET(MKLDNN_MKLROOT ${MKLML_ROOT}) + SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) + SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) + MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}") +ENDIF() + +ExternalProject_Add( + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${MKLDNN_DEPENDS} + GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" + GIT_TAG "v0.9" + PREFIX ${MKLDNN_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} + CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} + -DMKLROOT:PATH=${MKLDNN_MKLROOT} +) + +ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}") +LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e9fd3d4bedc983ae7c544cf289dc841cf22f9de4 --- /dev/null +++ b/cmake/external/mklml.cmake @@ -0,0 +1,67 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLML}) + return() +ENDIF(NOT ${WITH_MKLML}) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLML in Paddle yet." + "Force WITH_MKLML=OFF") + SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(MKLML_PROJECT "extern_mklml") +SET(MKLML_VER "mklml_lnx_2018.0.20170720") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") +SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") +SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") +SET(MKLML_DST_DIR "mklml") +SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) +SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) +SET(MKLML_INC_DIR ${MKLML_ROOT}/include) +SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib) +SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) +SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") + +INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) + +FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}\n" + " DESTINATION ${MKLML_DST_DIR})\n") + +ExternalProject_Add( + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${MKLML_SOURCE_DIR} + DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} +) + +ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) +ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +LIST(APPEND external_project_dependencies mklml) diff --git a/cmake/external/nnpack.cmake b/cmake/external/nnpack.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d42bcb0f329041462bd8b568052fbb8226d18e4e --- /dev/null +++ b/cmake/external/nnpack.cmake @@ -0,0 +1,30 @@ +# Find the NNPACK library +# NNPACK_ROOT - where to find NNPACK include and library. +# + +set(NNPACK_FOUND OFF) +set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") +find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) +find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) +find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) + +if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) + set(NNPACK_FOUND ON) + INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) + + set(NNPACK_LIBS) + list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) + if (NNPACK_UKERNELS_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) + endif() + if (NNPACK_CPUFEATURES_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) + endif() + if(NOT ANDROID) + list(APPEND NNPACK_LIBS "rt") + endif() +else() + message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") +endif() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake new file mode 100644 index 0000000000000000000000000000000000000000..db09232c0e69016bf18c1d981e4620e9e804ff7c --- /dev/null +++ b/cmake/external/openblas.cmake @@ -0,0 +1,82 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(cblas) + +IF(NOT ${CBLAS_FOUND}) + INCLUDE(ExternalProject) + + SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) + SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) + SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) + + SET(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE FILEPATH "openblas library." FORCE) + + SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) + + IF(CMAKE_CROSSCOMPILING) + IF(ANDROID) + # arm_soft_fp_abi branch of OpenBLAS to support softfp + # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(TARGET "ARMV7") + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + SET(TARGET "ARMV8") + ENDIF() + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0) + ELSEIF(RPI) + # use hardfp + SET(OPENBLAS_COMMIT "v0.2.19") + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0) + ENDIF() + ELSE() + SET(OPENBLAS_COMMIT "v0.2.19") + SET(OPTIONAL_ARGS "") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") + SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) + ENDIF() + ENDIF() + + ExternalProject_Add( + extern_openblas + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git + GIT_TAG ${OPENBLAS_COMMIT} + PREFIX ${CBLAS_SOURCES_DIR} + INSTALL_DIR ${CBLAS_INSTALL_DIR} + BUILD_IN_SOURCE 1 + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${COMMON_ARGS} ${OPTIONAL_ARGS} + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 NO_LAPACK=1 PREFIX= + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + ) +ENDIF(NOT ${CBLAS_FOUND}) + +MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") +INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) + +# FIXME(gangliao): generate cblas target to track all high performance +# linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) +SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c) +FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +ADD_LIBRARY(cblas STATIC ${dummyfile}) +TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) + +IF(NOT ${CBLAS_FOUND}) + ADD_DEPENDENCIES(cblas extern_openblas) + LIST(APPEND external_project_dependencies cblas) +ENDIF(NOT ${CBLAS_FOUND}) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e629d61585c2d2ff916187ee28d4fd089a5bd857 --- /dev/null +++ b/cmake/external/protobuf.cmake @@ -0,0 +1,233 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) +# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp +FIND_PACKAGE(Protobuf QUIET) +SET(PROTOBUF_FOUND "OFF") + +if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. + function(protobuf_generate_python SRCS) + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() + + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + endfunction() +endif() + +# Print and set the protobuf library information, +# finish this cmake process and exit from this file. +macro(PROMPT_PROTOBUF_LIB) + SET(protobuf_DEPS ${ARGN}) + + MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") + MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") + MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") + INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + + # Assuming that all the protobuf libraries are of the same type. + IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE STATIC) + ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE SHARED) + ELSE() + MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") + ENDIF() + + ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) + + ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) + + ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + + ADD_EXECUTABLE(protoc IMPORTED GLOBAL) + SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) + # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. + # make `protobuf_generate_cpp` happy. + SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + + FOREACH(dep ${protobuf_DEPS}) + ADD_DEPENDENCIES(protobuf ${dep}) + ADD_DEPENDENCIES(protobuf_lite ${dep}) + ADD_DEPENDENCIES(libprotoc ${dep}) + ADD_DEPENDENCIES(protoc ${dep}) + ENDFOREACH() + + LIST(APPEND external_project_dependencies protobuf) + RETURN() +endmacro() +macro(SET_PROTOBUF_VERSION) + EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) + STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}") +endmacro() + +set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") +if (NOT "${PROTOBUF_ROOT}" STREQUAL "") + find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include) + find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib) + find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib) + find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib) + find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin) + if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) + message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") + SET_PROTOBUF_VERSION() + PROMPT_PROTOBUF_LIB() + else() + message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}.") + endif() +endif() + +FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) + STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) + + SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) + SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) + SET(${TARGET_NAME}_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_PROTOC_EXECUTABLE + "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" + PARENT_SCOPE) + + SET(OPTIONAL_CACHE_ARGS "") + SET(OPTIONAL_ARGS "") + IF(BUILD_FOR_HOST) + SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF") + ELSE() + SET(OPTIONAL_ARGS + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + "-Dprotobuf_WITH_ZLIB=ON" + "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}") + SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}") + ENDIF() + + ExternalProject_Add( + ${TARGET_NAME} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PROTOBUF_SOURCES_DIR} + UPDATE_COMMAND "" + DEPENDS zlib + GIT_REPOSITORY "https://github.com/google/protobuf.git" + GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" + CONFIGURE_COMMAND + ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake + ${OPTIONAL_ARGS} + -Dprotobuf_BUILD_TESTS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=lib + CMAKE_CACHE_ARGS + -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + ${OPTIONAL_CACHE_ARGS} + ) +ENDFUNCTION() + +SET(PROTOBUF_VERSION 3.1) +IF(CMAKE_CROSSCOMPILING) + build_protobuf(protobuf_host TRUE) + LIST(APPEND external_project_dependencies protobuf_host) + + SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) +ENDIF() + +IF(NOT PROTOBUF_FOUND) + build_protobuf(extern_protobuf FALSE) + + SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} + CACHE PATH "protobuf include directory." FORCE) + SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY} + CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY} + CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} + CACHE FILEPATH "protoc library." FORCE) + + IF(CMAKE_CROSSCOMPILING) + PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf) + ELSE() + SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) + PROMPT_PROTOBUF_LIB(extern_protobuf) + ENDIF() +ENDIF(NOT PROTOBUF_FOUND) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake new file mode 100644 index 0000000000000000000000000000000000000000..9391c285c7544669a5b1a078b7473d7a656c1bb4 --- /dev/null +++ b/cmake/external/pybind11.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) + +INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) + +ExternalProject_Add( + extern_pybind + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/pybind/pybind11.git" + GIT_TAG "v2.1.1" + PREFIX ${PYBIND_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(pybind STATIC ${dummyfile}) +else() + add_library(pybind INTERFACE) +endif() + +add_dependencies(pybind extern_pybind) + +LIST(APPEND external_project_dependencies pybind) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake new file mode 100644 index 0000000000000000000000000000000000000000..490c87d67ed79a238dd506127cd4d9855fab6626 --- /dev/null +++ b/cmake/external/python.cmake @@ -0,0 +1,44 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) +INCLUDE(python_module) + +FIND_PACKAGE(PythonInterp 2.7) +IF(WITH_PYTHON) + FIND_PACKAGE(PythonLibs 2.7) + # Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. + ADD_LIBRARY(python SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) +ENDIF(WITH_PYTHON) + +SET(py_env "") +IF(PYTHONINTERP_FOUND) + find_python_module(pip REQUIRED) + find_python_module(numpy REQUIRED) + find_python_module(wheel REQUIRED) + find_python_module(google.protobuf REQUIRED) + FIND_PACKAGE(NumPy REQUIRED) + IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " + "please use pip to upgrade protobuf. pip install -U protobuf") + ENDIF() +ENDIF(PYTHONINTERP_FOUND) + +IF(WITH_PYTHON) + INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) + INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) +ELSE() + SET(PYTHON_LIBRARIES "") +ENDIF() diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake new file mode 100644 index 0000000000000000000000000000000000000000..744c766ee7b067058b2cb4aa7f7b761cbb9778d4 --- /dev/null +++ b/cmake/external/swig.cmake @@ -0,0 +1,61 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FIND_PACKAGE(SWIG) + +IF(NOT SWIG_FOUND) + # build swig as an external project + INCLUDE(ExternalProject) + + SET(SWIG_SOURCES_DIR ${THIRD_PARTY_PATH}/swig) + SET(SWIG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/swig) + SET(SWIG_TARGET_VERSION "3.0.2") + SET(SWIG_DOWNLOAD_SRC_MD5 "62f9b0d010cef36a13a010dc530d0d41") + SET(SWIG_DOWNLOAD_WIN_MD5 "3f18de4fc09ab9abb0d3be37c11fbc8f") + + IF(WIN32) + # swig.exe available as pre-built binary on Windows: + ExternalProject_Add(swig + URL http://prdownloads.sourceforge.net/swig/swigwin-${SWIG_TARGET_VERSION}.zip + URL_MD5 ${SWIG_DOWNLOAD_WIN_MD5} + SOURCE_DIR ${SWIG_SOURCES_DIR} + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + UPDATE_COMMAND "" + ) + SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE) + SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE) + ELSE(WIN32) + # swig uses bison find it by cmake and pass it down + FIND_PACKAGE(BISON) + + # From SWIG configure + ExternalProject_Add(swig + GIT_REPOSITORY https://github.com/swig/swig.git + GIT_TAG rel-3.0.10 + PREFIX ${SWIG_SOURCES_DIR} + CONFIGURE_COMMAND cd && ./autogen.sh && ./configure + --prefix=${SWIG_INSTALL_DIR} --without-pcre + BUILD_COMMAND cd && make + INSTALL_COMMAND cd && make install + UPDATE_COMMAND "" + ) + + SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION}) + SET(SWIG_EXECUTABLE ${SWIG_INSTALL_DIR}/bin/swig) + ENDIF(WIN32) + + LIST(APPEND external_project_dependencies swig) +ENDIF(NOT SWIG_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2d7daed9bcd5b8d854ffae6dc1ea191d154c16fe --- /dev/null +++ b/cmake/external/warpctc.cmake @@ -0,0 +1,72 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) +SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) +SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) + +INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) + +SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE) + +IF(WIN32) + SET(WARPCTC_LIBRARIES + "${WARPCTC_INSTALL_DIR}/lib/warpctc.dll" CACHE FILEPATH "Warp-ctc Library" FORCE) +ELSE(WIN32) + IF(APPLE) + SET(_warpctc_SHARED_SUFFIX dylib) + ELSE(APPLE) + SET(_warpctc_SHARED_SUFFIX so) + ENDIF(APPLE) + + SET(WARPCTC_LIBRARIES + "${WARPCTC_INSTALL_DIR}/lib/libwarpctc.${_warpctc_SHARED_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE) +ENDIF(WIN32) + +IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) + SET(USE_OMP OFF) +ELSE() + SET(USE_OMP ON) +ENDIF() + +ExternalProject_Add( + extern_warpctc + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" + PREFIX ${WARPCTC_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} + CMAKE_ARGS -DWITH_GPU=${WITH_GPU} + CMAKE_ARGS -DWITH_OMP=${USE_OMP} + CMAKE_ARGS -DWITH_TORCH=OFF + CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON + CMAKE_ARGS -DBUILD_SHARED=ON + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} +) + +ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) +ADD_DEPENDENCIES(warpctc extern_warpctc) + +LIST(APPEND external_project_dependencies warpctc) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake new file mode 100644 index 0000000000000000000000000000000000000000..45ca5542b7dc30216b45487782f849b93c5f8fca --- /dev/null +++ b/cmake/external/zlib.cmake @@ -0,0 +1,51 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +INCLUDE(ExternalProject) + +SET(ZLIB_SOURCES_DIR ${THIRD_PARTY_PATH}/zlib) +SET(ZLIB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/zlib) +SET(ZLIB_ROOT ${ZLIB_INSTALL_DIR} CACHE FILEPATH "zlib root directory." FORCE) +SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include directory." FORCE) + +IF(WIN32) + SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE) +ELSE(WIN32) + SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE) +ENDIF(WIN32) + +INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) + +ExternalProject_Add( + zlib + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/madler/zlib.git" + GIT_TAG "v1.2.8" + PREFIX ${ZLIB_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} + CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON + CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON + CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON + -DCMAKE_BUILD_TYPE:STRING=Release +) + +LIST(APPEND external_project_dependencies zlib) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 0983d83b73a32d0615170155759d45001cc6ff54..e26d8d9df386e65137aa83cc60a43bfeabf7a4a6 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -2,18 +2,18 @@ include(CheckCXXCompilerFlag) include(CheckCCompilerFlag) include(CheckCXXSymbolExists) - -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" - FORCE) -endif() +include(CheckTypeSize) function(CheckCompilerCXX11Flag) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" # Apple Clang is a different compiler than upstream Clang which havs different version numbers. @@ -31,7 +31,7 @@ function(CheckCompilerCXX11Flag) endfunction() CheckCompilerCXX11Flag() -LIST(APPEND CMAKE_CXX_FLAGS -std=c++11) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") # safe_set_flag # @@ -89,6 +89,17 @@ if(NOT UINT64_MAX_EXISTS) endif() endif() +SET(CMAKE_EXTRA_INCLUDE_FILES "pthread.h") +CHECK_TYPE_SIZE(pthread_spinlock_t SPINLOCK_FOUND) +CHECK_TYPE_SIZE(pthread_barrier_t BARRIER_FOUND) +if(SPINLOCK_FOUND) + add_definitions(-DPADDLE_USE_PTHREAD_SPINLOCK) +endif(SPINLOCK_FOUND) +if(BARRIER_FOUND) + add_definitions(-DPADDLE_USE_PTHREAD_BARRIER) +endif(BARRIER_FOUND) +SET(CMAKE_EXTRA_INCLUDE_FILES "") + # Common flags. the compiler flag used for C/C++ sources whenever release or debug # Do not care if this flag is support for gcc. set(COMMON_FLAGS @@ -102,7 +113,10 @@ set(COMMON_FLAGS -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix - -Wno-error=unused-local-typedefs) + -Wno-error=sign-compare + -Wno-error=unused-local-typedefs + -Wno-error=parentheses-equality # Warnings in pybind11 +) set(GPU_COMMON_FLAGS -fPIC @@ -111,9 +125,11 @@ set(GPU_COMMON_FLAGS -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function + -Wno-error=sign-compare -Wno-error=literal-suffix -Wno-error=unused-local-typedefs -Wno-error=unused-function # Warnings in Numpy Header. + -Wno-error=array-bounds # Warnings in Eigen::array ) if (APPLE) @@ -142,7 +158,7 @@ set(CUDA_PROPAGATE_HOST_FLAGS OFF) # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # So, don't set these flags here. -LIST(APPEND CUDA_NVCC_FLAGS -std=c++11) +LIST(APPEND CUDA_NVCC_FLAGS -std=c++11 --default-stream per-thread) LIST(APPEND CUDA_NVCC_FLAGS --use_fast_math) if(CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -179,6 +195,7 @@ endif() # Modern gpu architectures: Pascal if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0") list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") + list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr) endif() # Custom gpu architecture @@ -189,3 +206,4 @@ if(CUDA_ARCH) endif() set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS}) + diff --git a/cmake/generic.cmake b/cmake/generic.cmake new file mode 100644 index 0000000000000000000000000000000000000000..957c20bcf603f2f264b4658f63ac0eec438f12b1 --- /dev/null +++ b/cmake/generic.cmake @@ -0,0 +1,418 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# generic.cmake defines CMakes functions that look like Bazel's +# building rules (https://bazel.build/). +# +# +# ------------------------------------------- +# C++ CUDA C++ Go +# ------------------------------------------- +# cc_library nv_library go_library +# cc_binary nv_binary go_binary +# cc_test nv_test go_test +# ------------------------------------------- +# +# To build a static library example.a from example.cc using the system +# compiler (like GCC): +# +# cc_library(example SRCS example.cc) +# +# To build a static library example.a from multiple source files +# example{1,2,3}.cc: +# +# cc_library(example SRCS example1.cc example2.cc example3.cc) +# +# To build a shared library example.so from example.cc: +# +# cc_library(example SHARED SRCS example.cc) +# +# To build a library using Nvidia's NVCC from .cu file(s), use the nv_ +# prefixed version: +# +# nv_library(example SRCS example.cu) +# +# To specify that a library new_example.a depends on other libraies: +# +# cc_library(new_example SRCS new_example.cc DEPS example) +# +# Static libraries can be composed of other static libraries: +# +# cc_library(composed DEPS dependent1 dependent2 dependent3) +# +# To build an executable binary file from some source files and +# dependent libraries: +# +# cc_binary(example SRCS main.cc something.cc DEPS example1 example2) +# +# To build an executable binary file using NVCC, use the nv_ prefixed +# version: +# +# nv_binary(example SRCS main.cc something.cu DEPS example1 example2) +# +# To build a unit test binary, which is an executable binary with +# GoogleTest linked: +# +# cc_test(example_test SRCS example_test.cc DEPS example) +# +# To build a unit test binary using NVCC, use the nv_ prefixed version: +# +# nv_test(example_test SRCS example_test.cu DEPS example) +# +# It is pretty often that executable and test binaries depend on +# pre-defined external libaries like glog and gflags defined in +# /cmake/external/*.cmake: +# +# cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +# +# To build a go static library using Golang, use the go_ prefixed version: +# +# go_library(example STATIC) +# +# To build a go shared library using Golang, use the go_ prefixed version: +# +# go_library(example SHARED) +# + +# including binary directory for generated headers. +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +if(NOT APPLE AND NOT ANDROID) + find_package(Threads REQUIRED) + link_libraries(${CMAKE_THREAD_LIBS_INIT}) + set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl -lrt") +endif(NOT APPLE AND NOT ANDROID) + +function(merge_static_libs TARGET_NAME) + set(libs ${ARGN}) + list(REMOVE_DUPLICATES libs) + + # Get all propagation dependencies from the merged libraries + foreach(lib ${libs}) + list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) + endforeach() + list(REMOVE_DUPLICATES libs_deps) + + if(APPLE) # Use OSX's libtool to merge archives + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${dummyfile} + COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile} + DEPENDS ${libs}) + + # Generate dummy staic lib + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + add_library(${TARGET_NAME} STATIC ${dummyfile}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + foreach(lib ${libs}) + # Get the file names of the libraries to be merged + set(libfiles ${libfiles} $) + endforeach() + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" + COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}) + else() # general UNIX: use "ar" to extract objects and re-add to a common lib + foreach(lib ${libs}) + set(objlistfile ${lib}.objlist) # list of objects in the input library + set(objdir ${lib}.objdir) + + add_custom_command(OUTPUT ${objdir} + COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir} + DEPENDS ${lib}) + + add_custom_command(OUTPUT ${objlistfile} + COMMAND ${CMAKE_AR} -x "$" + COMMAND ${CMAKE_AR} -t "$" > ../${objlistfile} + DEPENDS ${lib} ${objdir} + WORKING_DIRECTORY ${objdir}) + + # Empty dummy source file that goes into merged library + set(mergebase ${lib}.mergebase.c) + add_custom_command(OUTPUT ${mergebase} + COMMAND ${CMAKE_COMMAND} -E touch ${mergebase} + DEPENDS ${objlistfile}) + + list(APPEND mergebases "${mergebase}") + endforeach() + + add_library(${TARGET_NAME} STATIC ${mergebases}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + # Get the file name of the generated library + set(outlibfile "$") + + foreach(lib ${libs}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_AR} cr ${outlibfile} *.o + COMMAND ${CMAKE_RANLIB} ${outlibfile} + WORKING_DIRECTORY ${lib}.objdir) + endforeach() + endif() +endfunction(merge_static_libs) + +function(cc_library TARGET_NAME) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (cc_library_SRCS) + if (cc_library_SHARED OR cc_library_shared) # build *.so + add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + else() + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) + endif() + if (cc_library_DEPS) + add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) + endif() + + # cpplint code style + foreach(source_file ${cc_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS}) + + else(cc_library_SRCS) + if (cc_library_DEPS) + merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) + else() + message(FATAL "Please specify source file or library in cc_library.") + endif() + endif(cc_library_SRCS) +endfunction(cc_library) + +function(cc_binary TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_binary_SRCS}) + if(cc_binary_DEPS) + target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) + add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) + endif() +endfunction(cc_binary) + +function(cc_test TARGET_NAME) + if(WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_test_SRCS}) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) + add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +endfunction(cc_test) + +function(nv_library TARGET_NAME) + if (WITH_GPU) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(nv_library_SRCS) + if (nv_library_SHARED OR nv_library_shared) # build *.so + cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) + else() + cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) + endif() + if (nv_library_DEPS) + add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) + endif() + # cpplint code style + foreach(source_file ${nv_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS}) + else(nv_library_SRCS) + if (nv_library_DEPS) + merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) + else() + message(FATAL "Please specify source file or library in nv_library.") + endif() + endif(nv_library_SRCS) + endif() +endfunction(nv_library) + +function(nv_binary TARGET_NAME) + if (WITH_GPU) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + if(nv_binary_DEPS) + target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) + add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) + endif() + endif() +endfunction(nv_binary) + +function(nv_test TARGET_NAME) + if (WITH_GPU AND WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main) + add_test(${TARGET_NAME} ${TARGET_NAME}) + endif() +endfunction(nv_test) + +function(go_library TARGET_NAME) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs DEPS) + cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if (go_library_SHARED OR go_library_shared) + set(BUILD_MODE "-buildmode=c-shared") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + else() + set(BUILD_MODE "-buildmode=c-archive") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + endif() + + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # This custom command will always run since it depends on a not + # existing file. + add_custom_command( + OUTPUT dummy_rebulid_${TARGET_NAME} + COMMAND cmake -E touch ${dummyfile} + ) + # Create a custom target that depends on the custom command output + # file, so the custom command can be referenced as a dependency by + # `add_dependencies`. + add_custom_target(rebuild_${TARGET_NAME} + DEPENDS dummy_rebulid_${TARGET_NAME} + ) + + # Add dummy code to support `make target_name` under Terminal Command + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + if (go_library_SHARED OR go_library_shared) + add_library(${TARGET_NAME} SHARED ${dummyfile}) + else() + add_library(${TARGET_NAME} STATIC ${dummyfile}) + endif() + if(go_library_DEPS) + add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + endif(go_library_DEPS) + + # The "source file" of the library is `${dummyfile}` which never + # change, so the target will never rebuild. Make the target depends + # on the custom command that touches the library "source file", so + # rebuild will always happen. + add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME}) + + set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") + + file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${${TARGET_NAME}_LIB_PATH}" + # Golang build source code + COMMAND GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} + -o "${${TARGET_NAME}_LIB_PATH}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" + # must run under GOPATH + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_dependencies(${TARGET_NAME} go_vendor) +endfunction(go_library) + +function(go_binary TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + + add_custom_command(OUTPUT ${TARGET_NAME}_timestamp + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build + -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) +endfunction(go_binary) + +function(go_test TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs DEPS) + cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race + -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + ".${CMAKE_CURRENT_SOURCE_REL_DIR}" + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endfunction(go_test) + +function(proto_library TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(proto_srcs) + set(proto_hdrs) + protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) +endfunction() + +function(py_proto_compile TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(py_srcs) + protobuf_generate_python(py_srcs ${py_proto_compile_SRCS}) + add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs}) +endfunction() + +function(py_test TARGET_NAME) + if(WITH_TESTING) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_test(NAME ${TARGET_NAME} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + python2 ${py_test_SRCS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +endfunction() diff --git a/cmake/make_resource.py b/cmake/make_resource.py new file mode 100644 index 0000000000000000000000000000000000000000..a9241b0e3e36c2e79c79e46b4f9114b7f6947341 --- /dev/null +++ b/cmake/make_resource.py @@ -0,0 +1,11 @@ +import os +import re +import sys + +res = sys.argv[1] +out = sys.argv[2] +var = re.sub(r'[ .-]', '_', os.path.basename(res)) + +open(out, "w").write("const unsigned char " + var + "[] = {" + ",".join([ + "0x%02x" % ord(c) for c in open(res).read() +]) + ",0};\n" + "const unsigned " + var + "_size = sizeof(" + var + ");\n") diff --git a/cmake/package.cmake b/cmake/package.cmake index 211593f358eb34cf1a5692697247511893dfeb93..ff49a2d08e8f6004320acfce266339aa301eb9c4 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -1,5 +1,4 @@ set(CPACK_PACKAGE_NAME paddle) -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "") set(CPACK_PACKAGE_VERSION_MAJOR ${PADDLE_MAJOR_VERSION}) set(CPACK_PACKAGE_VERSION_MINOR ${PADDLE_MINOR_VERSION}) set(CPACK_PACKAGE_VERSION_PATCH ${PADDLE_PATCH_VERSION}) @@ -10,8 +9,9 @@ set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE amd64) set(CPACK_DEBIAN_PACKAGE_MAINTAINER PaddlePaddle Dev ) set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Paddle") set(CPACK_PACKAGE_DESCRIPTION "") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "libatlas3-base, libgflags2, libgoogle-glog0, libprotobuf8, libpython2.7, libstdc++6, python-numpy, python-pip, python-pip-whl, python-protobuf") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) +set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack diff --git a/cmake/python_module.cmake b/cmake/python_module.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1412b7f7f20600acf95a4a899f5e6529c3b67a35 --- /dev/null +++ b/cmake/python_module.cmake @@ -0,0 +1,43 @@ +# Find if a Python module is installed +# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html +# To use do: find_python_module(PyQt4 REQUIRED) +function(find_python_module module) + string(TOUPPER ${module} module_upper) + if(NOT PY_${module_upper}) + if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED") + set(${module}_FIND_REQUIRED TRUE) + else() + set(${module}_FIND_REQUIRED FALSE) + endif() + # A module's location is usually a directory, but for binary modules + # it's a .so file. + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))" + RESULT_VARIABLE _${module}_status + OUTPUT_VARIABLE _${module}_location + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper} ${_${module}_location} CACHE STRING + "Location of Python module ${module}") + endif(NOT _${module}_status) + endif(NOT PY_${module_upper}) + find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper}) + if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) + message(FATAL_ERROR "python module ${module} is not found") + endif() + + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import sys, ${module}; sys.stdout.write(${module}.__version__)" + OUTPUT_VARIABLE _${module}_version + RESULT_VARIABLE _${module}_status + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING + "Version of Python module ${module}") + endif(NOT _${module}_status) + + set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE) + set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE) +endfunction(find_python_module) diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake index e9a4da79aa92a92aa7e5d21bb795ab9aaf60ab8b..b698f3bdc3ff586a72badee3e0109e29285b457f 100644 --- a/cmake/rdma.cmake +++ b/cmake/rdma.cmake @@ -5,72 +5,78 @@ # svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/ # we use static output in svn repositories to avoid implict bugs from not standard runtime env. -set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library") +if(WITH_RDMA) + set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library") -function(generate_rdma_links) - #redirect to current DIR to isolate the pollution from system runtime environment - #it can benifits unified control for different gcc environment. - #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version - #runtime libraries that will crash process while loading it. That redirect trick - #can fix it. - execute_process( - COMMAND mkdir -p librdma - COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1 - COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so - COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1 - COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ) -endfunction(generate_rdma_links) - - -#check and set headers -find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include) -find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio) -find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent) -find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) - -#check and set libs -find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output) -find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio) -find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent) -find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent) -find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent) -find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent) -find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) - -if( - RDMA_INC_SXISOCK AND - RDMA_INC_XIO AND - RDMA_INC_EVENT AND - RDMA_INC_NUMA AND - RDMA_LIB_SXISOCK AND - RDMA_LIB_XIO AND - RDMA_LIB_EVENT AND - RDMA_LIB_EVENT_CORE AND - RDMA_LIB_EVENT_EXTRA AND - RDMA_LIB_EVENT_PTHREADS AND - RDMA_LIB_NUMA + function(generate_rdma_links) + #redirect to current DIR to isolate the pollution from system runtime environment + #it can benifits unified control for different gcc environment. + #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version + #runtime libraries that will crash process while loading it. That redirect trick + #can fix it. + execute_process( + COMMAND mkdir -p librdma + COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1 + COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so + COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1 + COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so + COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1 + COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + endfunction(generate_rdma_links) - set(RDMA_INC_DIR - ${RDMA_INC_SXISOCK} - ${RDMA_INC_XIO} - ${RDMA_INC_EVENT} - ${RDMA_INC_NUMA}) - set(RDMA_LIBS - ${RDMA_LIB_SXISOCK} - ${RDMA_LIB_XIO} - ${RDMA_LIB_EVENT} - ${RDMA_LIB_EVENT_CORE} - ${RDMA_LIB_EVENT_EXTRA} - ${RDMA_LIB_EVENT_PTHREADS} - ${RDMA_LIB_NUMA} - ) - set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma") - return() -endif() + #check and set headers + find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include) + find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio) + find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent) + find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) + + #check and set libs + find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output) + find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio) + find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent) + find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent) + find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent) + find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent) + find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma) -#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable + if( + RDMA_INC_SXISOCK AND + RDMA_INC_XIO AND + RDMA_INC_EVENT AND + RDMA_INC_NUMA AND + RDMA_LIB_SXISOCK AND + RDMA_LIB_XIO AND + RDMA_LIB_EVENT AND + RDMA_LIB_EVENT_CORE AND + RDMA_LIB_EVENT_EXTRA AND + RDMA_LIB_EVENT_PTHREADS AND + RDMA_LIB_NUMA + ) -message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.") + set(RDMA_INC_DIR + ${RDMA_INC_SXISOCK} + ${RDMA_INC_XIO} + ${RDMA_INC_EVENT} + ${RDMA_INC_NUMA}) + set(RDMA_LIBS + ${RDMA_LIB_SXISOCK} + ${RDMA_LIB_XIO} + ${RDMA_LIB_EVENT} + ${RDMA_LIB_EVENT_CORE} + ${RDMA_LIB_EVENT_EXTRA} + ${RDMA_LIB_EVENT_PTHREADS} + ${RDMA_LIB_NUMA} + ) + set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma") + include_directories("${RDMA_INC_DIR}") + else() + #if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable + message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.") + endif() +else(WITH_RDMA) + set(RDMA_LIBS "") + set(RDMA_LD_FLAGS "") + add_definitions(-DPADDLE_DISABLE_RDMA) +endif(WITH_RDMA) diff --git a/cmake/simd.cmake b/cmake/simd.cmake new file mode 100644 index 0000000000000000000000000000000000000000..46035a908b588861607a25d3a21cf34b7b6fd4b8 --- /dev/null +++ b/cmake/simd.cmake @@ -0,0 +1,80 @@ +# This file is use to check all support level of AVX on your machine +# so that PaddlePaddle can unleash the vectorization power of muticore. + +INCLUDE(CheckCXXSourceRuns) +INCLUDE(CheckCXXSourceCompiles) + +IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(MMX_FLAG "-mmmx") + set(SSE2_FLAG "-msse2") + set(SSE3_FLAG "-msse3") + SET(AVX_FLAG "-mavx") + SET(AVX2_FLAG "-mavx2") +ELSEIF(MSVC) + set(MMX_FLAG "/arch:MMX") + set(SSE2_FLAG "/arch:SSE2") + set(SSE3_FLAG "/arch:SSE3") + SET(AVX_FLAG "/arch:AVX") + SET(AVX2_FLAG "/arch:AVX2") +ENDIF() + +set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS}) + +# Check MMX +set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + _mm_setzero_si64(); + return 0; +}" MMX_FOUND) + +# Check SSE2 +set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + _mm_setzero_si128(); + return 0; +}" SSE2_FOUND) + +# Check SSE3 +set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m128d a = _mm_set1_pd(6.28); + __m128d b = _mm_set1_pd(3.14); + __m128d result = _mm_addsub_pd(a, b); + result = _mm_movedup_pd(result); + return 0; +}" SSE3_FOUND) + +# Check AVX +set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + __m256 result = _mm256_add_ps (a, b); + return 0; +}" AVX_FOUND) + +# Check AVX 2 +set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32 (a); + return 0; +}" AVX2_FOUND) + +set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) +mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND) diff --git a/cmake/swig.cmake b/cmake/swig.cmake deleted file mode 100644 index 97e87aa947791e2c5a88e7e554dec43bcd661664..0000000000000000000000000000000000000000 --- a/cmake/swig.cmake +++ /dev/null @@ -1,15 +0,0 @@ -function(generate_python_api target_name) - add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py - ${PROJ_ROOT}/paddle/Paddle_wrap.cxx - ${PROJ_ROOT}/paddle/Paddle_wrap.h - COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig - && mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py - DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig - ${PROJ_ROOT}/paddle/api/PaddleAPI.h - WORKING_DIRECTORY ${PROJ_ROOT}/paddle - COMMENT "Generate Python API from swig") - add_custom_target(${target_name} ALL DEPENDS - ${PROJ_ROOT}/paddle/Paddle_wrap.cxx - ${PROJ_ROOT}/paddle/Paddle_wrap.h - ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py) -endfunction(generate_python_api) diff --git a/cmake/system.cmake b/cmake/system.cmake new file mode 100644 index 0000000000000000000000000000000000000000..adf5e2c539740076ad1808353522c7467d765e64 --- /dev/null +++ b/cmake/system.cmake @@ -0,0 +1,96 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Detects the OS and sets appropriate variables. +# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is +# building for, but the host processor name like centos is necessary +# in some scenes to distinguish system for customization. +# +# for instance, protobuf libs path is /lib64 +# on CentOS, but /lib on other systems. + +IF(WIN32) + SET(HOST_SYSTEM "win32") +ELSE(WIN32) + IF(APPLE) + EXEC_PROGRAM (sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION) + STRING(REGEX MATCH "[0-9]+.[0-9]+" VERSION "${MACOSX_VERSION}") + SET(MACOS_VERSION ${VERSION}) + SET(HOST_SYSTEM "macosx") + IF(NOT DEFINED ENV{MACOSX_DEPLOYMENT_TARGET}) + # Set cache variable - end user may change this during ccmake or cmake-gui configure. + SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING + "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") + ENDIF() + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") + ELSE(APPLE) + + IF(EXISTS "/etc/issue") + FILE(READ "/etc/issue" LINUX_ISSUE) + IF(LINUX_ISSUE MATCHES "CentOS") + SET(HOST_SYSTEM "centos") + ELSEIF(LINUX_ISSUE MATCHES "Debian") + SET(HOST_SYSTEM "debian") + ELSEIF(LINUX_ISSUE MATCHES "Ubuntu") + SET(HOST_SYSTEM "ubuntu") + ELSEIF(LINUX_ISSUE MATCHES "Red Hat") + SET(HOST_SYSTEM "redhat") + ELSEIF(LINUX_ISSUE MATCHES "Fedora") + SET(HOST_SYSTEM "fedora") + ENDIF() + ENDIF(EXISTS "/etc/issue") + + IF(EXISTS "/etc/redhat-release") + FILE(READ "/etc/redhat-release" LINUX_ISSUE) + IF(LINUX_ISSUE MATCHES "CentOS") + SET(HOST_SYSTEM "centos") + ENDIF() + ENDIF(EXISTS "/etc/redhat-release") + + IF(NOT HOST_SYSTEM) + SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME}) + ENDIF() + + ENDIF(APPLE) +ENDIF(WIN32) + +# query number of logical cores +CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES) + +MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) + +MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}") +MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") + +# configuration for cross-compiling +IF(DEFINED CMAKE_SYSTEM_NAME) + INCLUDE(cross_compiling/host) + IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android") + SET(ANDROID TRUE) + INCLUDE(cross_compiling/android) + ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi") + SET(RPI TRUE) + INCLUDE(cross_compiling/raspberry_pi) + ENDIF() +ENDIF() + +# external dependencies log output +SET(EXTERNAL_PROJECT_LOG_ARGS + LOG_DOWNLOAD 0 # Wrap download in script to log output + LOG_UPDATE 1 # Wrap update in script to log output + LOG_CONFIGURE 1 # Wrap configure in script to log output + LOG_BUILD 0 # Wrap build in script to log output + LOG_TEST 1 # Wrap test in script to log output + LOG_INSTALL 0 # Wrap install in script to log output +) diff --git a/cmake/util.cmake b/cmake/util.cmake index 11641f6064b9db36e14293460a1f05067e373661..4a27623b7ffc0b389680baee52db440c78442f46 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -24,7 +24,7 @@ function(target_circle_link_libraries TARGET_NAME) list(APPEND libsInArgn ${arg}) endif() endforeach() - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") list(APPEND LIBS "-undefined dynamic_lookup") endif() list(REVERSE libsInArgn) @@ -65,38 +65,16 @@ endmacro() # link_paddle_exe # add paddle library for a paddle executable, such as trainer, pserver. # -# It will handle WITH_PYTHON/WITH_GLOG etc. +# It will handle WITH_PYTHON etc. function(link_paddle_exe TARGET_NAME) if(WITH_RDMA) generate_rdma_links() endif() - if(WITH_METRIC) - if(WITH_GPU) - set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu) - else() - set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu) - endif() - else() - set(METRIC_LIBS "") - endif() - - if(PADDLE_WITH_INTERNAL) - set(INTERAL_LIBS paddle_internal_gserver paddle_internal_parameter) - target_circle_link_libraries(${TARGET_NAME} - ARCHIVE_START - paddle_internal_gserver - paddle_internal_owlqn - ARCHIVE_END - paddle_internal_parameter) - else() - set(INTERAL_LIBS "") - endif() - target_circle_link_libraries(${TARGET_NAME} ARCHIVE_START paddle_gserver - ${METRIC_LIBS} + paddle_function ARCHIVE_END paddle_pserver paddle_trainer_lib @@ -106,53 +84,18 @@ function(link_paddle_exe TARGET_NAME) paddle_parameter paddle_proto paddle_cuda - ${METRIC_LIBS} - ${PROTOBUF_LIBRARY} + paddle_optimizer + ${EXTERNAL_LIBS} ${CMAKE_THREAD_LIBS_INIT} - ${CBLAS_LIBS} - ${ZLIB_LIBRARIES} - ${INTERAL_LIBS} - ${CMAKE_DL_LIBS}) - - if(WITH_RDMA) - target_link_libraries(${TARGET_NAME} - ${RDMA_LD_FLAGS} - ${RDMA_LIBS}) - endif() - - if(WITH_PYTHON) - target_link_libraries(${TARGET_NAME} - ${PYTHON_LIBRARIES}) - endif() + ${CMAKE_DL_LIBS} + ${RDMA_LD_FLAGS} + ${RDMA_LIBS}) - if(WITH_GLOG) - target_link_libraries(${TARGET_NAME} - ${LIBGLOG_LIBRARY}) - endif() + if(ANDROID) + target_link_libraries(${TARGET_NAME} log) + endif(ANDROID) - if(WITH_GFLAGS) - target_link_libraries(${TARGET_NAME} - ${GFLAGS_LIBRARIES}) - endif() - - if(WITH_GPU) - if(NOT WITH_DSO OR WITH_METRIC) - target_link_libraries(${TARGET_NAME} - ${CUDNN_LIBRARY} - ${CUDA_curand_LIBRARY}) - CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME}) - endif() - - check_library_exists(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME ) - if(HAVE_CLOCK_GETTIME) - target_link_libraries(${TARGET_NAME} rt) - endif() - endif() - - if(NOT WITH_DSO) - target_link_libraries(${TARGET_NAME} - ${WARPCTC_LIBRARY}) - endif() + add_dependencies(${TARGET_NAME} ${external_project_dependencies}) endfunction() # link_paddle_test @@ -161,8 +104,10 @@ endfunction() # Rest Arguemnts: not used. function(link_paddle_test TARGET_NAME) link_paddle_exe(${TARGET_NAME}) - target_link_libraries(${TARGET_NAME} ${GTEST_MAIN_LIBRARIES} - ${GTEST_LIBRARIES}) + target_link_libraries(${TARGET_NAME} + paddle_test_main + paddle_test_util + ${GTEST_LIBRARIES}) endfunction() # add_unittest_without_exec @@ -173,7 +118,6 @@ endfunction() macro(add_unittest_without_exec TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) link_paddle_test(${TARGET_NAME}) - add_style_check_target(${TARGET_NAME} ${ARGN}) endmacro() # add_unittest @@ -194,17 +138,23 @@ macro(add_simple_unittest TARGET_NAME) endmacro() # Creates C resources file from files in given resource file -function(create_resources res_file output) - # Create empty output file - file(WRITE ${output} "") - # Get short filename - string(REGEX MATCH "([^/]+)$" filename ${res_file}) - # Replace filename spaces & extension separator for C compatibility - string(REGEX REPLACE "\\.| |-" "_" filename ${filename}) - # Read hex data from file - file(READ ${res_file} filedata HEX) - # Convert hex data for C compatibility - string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata}) - # Append data to output file - file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n") +function(create_resources res_file output_file) + add_custom_command( + OUTPUT ${output_file} + COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) +endfunction() + + +# Create a python unittest using run_python_tests.sh, +# which takes care of making correct running environment +function(add_python_test TEST_NAME) + foreach(arg ${ARGN}) + get_filename_component(py_fn ${arg} NAME_WE) + set(TRG_NAME ${TEST_NAME}_${py_fn}) + add_test(NAME ${TRG_NAME} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + python2 ${arg} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endforeach() endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index a0518e07e88a1ff468c301523f888c7d95e15185..ac1583a24c828629c46cb9cf4e965f8da2273732 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -21,4 +21,5 @@ while ("${PADDLE_VERSION}" STREQUAL "") endif() endwhile() +add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION}) message(STATUS "Paddle version is ${PADDLE_VERSION}") diff --git a/demo/gan/README.md b/demo/gan/README.md deleted file mode 100644 index fdc970a07b488c3a4146c9baa76a133a456fc9ab..0000000000000000000000000000000000000000 --- a/demo/gan/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Generative Adversarial Networks (GAN) - -This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434). - -The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data). - -In order to run the model, first download the corresponding data by running the shell script in ./data. -Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu). - -$python gan_trainer.py -d cifar --use_gpu 1 - -The generated images will be stored in ./cifar_samples/ -The corresponding models will be stored in ./cifar_params/ \ No newline at end of file diff --git a/demo/gan/data/download_cifar.sh b/demo/gan/data/download_cifar.sh deleted file mode 100755 index 32e73b3d8e50ec845c79e4ce93f220583f364360..0000000000000000000000000000000000000000 --- a/demo/gan/data/download_cifar.sh +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -tar zxf cifar-10-python.tar.gz -rm cifar-10-python.tar.gz - diff --git a/demo/gan/data/get_mnist_data.sh b/demo/gan/data/get_mnist_data.sh deleted file mode 100644 index d21bf7067135f1f8be486ef0f13fc3ec94ffc4ed..0000000000000000000000000000000000000000 --- a/demo/gan/data/get_mnist_data.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env sh -# This script downloads the mnist data and unzips it. -set -e -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -rm -rf "$DIR/mnist_data" -mkdir "$DIR/mnist_data" -cd "$DIR/mnist_data" - -echo "Downloading..." - -for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte -do - if [ ! -e $fname ]; then - wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz - gunzip ${fname}.gz - fi -done - - diff --git a/demo/gan/gan_conf.py b/demo/gan/gan_conf.py deleted file mode 100644 index 58ba9dde58bafb90a4bd1d76f5d8138e8948dd3a..0000000000000000000000000000000000000000 --- a/demo/gan/gan_conf.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer_config_helpers import * - -mode = get_config_arg("mode", str, "generator") -assert mode in set(["generator", - "discriminator", - "generator_training", - "discriminator_training"]) - -is_generator_training = mode == "generator_training" -is_discriminator_training = mode == "discriminator_training" -is_generator = mode == "generator" -is_discriminator = mode == "discriminator" - -# The network structure below follows the ref https://arxiv.org/abs/1406.2661 -# Here we used two hidden layers and batch_norm - -print('mode=%s' % mode) -# the dim of the noise (z) as the input of the generator network -noise_dim = 10 -# the dim of the hidden layer -hidden_dim = 10 -# the dim of the generated sample -sample_dim = 2 - -settings( - batch_size=128, - learning_rate=1e-4, - learning_method=AdamOptimizer(beta1=0.5) -) - -def discriminator(sample): - """ - discriminator ouputs the probablity of a sample is from generator - or real data. - The output has two dimenstional: dimension 0 is the probablity - of the sample is from generator and dimension 1 is the probabblity - of the sample is from real data. - """ - param_attr = ParamAttr(is_static=is_generator_training) - bias_attr = ParamAttr(is_static=is_generator_training, - initial_mean=1.0, - initial_std=0) - - hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim, - bias_attr=bias_attr, - param_attr=param_attr, - act=ReluActivation()) - - hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim, - bias_attr=bias_attr, - param_attr=param_attr, - act=LinearActivation()) - - hidden_bn = batch_norm_layer(hidden2, - act=ReluActivation(), - name="dis_hidden_bn", - bias_attr=bias_attr, - param_attr=ParamAttr(is_static=is_generator_training, - initial_mean=1.0, - initial_std=0.02), - use_global_stats=False) - - return fc_layer(input=hidden_bn, name="dis_prob", size=2, - bias_attr=bias_attr, - param_attr=param_attr, - act=SoftmaxActivation()) - -def generator(noise): - """ - generator generates a sample given noise - """ - param_attr = ParamAttr(is_static=is_discriminator_training) - bias_attr = ParamAttr(is_static=is_discriminator_training, - initial_mean=1.0, - initial_std=0) - - hidden = fc_layer(input=noise, - name="gen_layer_hidden", - size=hidden_dim, - bias_attr=bias_attr, - param_attr=param_attr, - act=ReluActivation()) - - hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim, - bias_attr=bias_attr, - param_attr=param_attr, - act=LinearActivation()) - - hidden_bn = batch_norm_layer(hidden2, - act=ReluActivation(), - name="gen_layer_hidden_bn", - bias_attr=bias_attr, - param_attr=ParamAttr(is_static=is_discriminator_training, - initial_mean=1.0, - initial_std=0.02), - use_global_stats=False) - - return fc_layer(input=hidden_bn, - name="gen_layer1", - size=sample_dim, - bias_attr=bias_attr, - param_attr=param_attr, - act=LinearActivation()) - -if is_generator_training: - noise = data_layer(name="noise", size=noise_dim) - sample = generator(noise) - -if is_discriminator_training: - sample = data_layer(name="sample", size=sample_dim) - -if is_generator_training or is_discriminator_training: - label = data_layer(name="label", size=1) - prob = discriminator(sample) - cost = cross_entropy(input=prob, label=label) - classification_error_evaluator(input=prob, label=label, name=mode+'_error') - outputs(cost) - -if is_generator: - noise = data_layer(name="noise", size=noise_dim) - outputs(generator(noise)) diff --git a/demo/gan/gan_conf_image.py b/demo/gan/gan_conf_image.py deleted file mode 100644 index 5c2b140537418d52760719c7b605e778790cb7a6..0000000000000000000000000000000000000000 --- a/demo/gan/gan_conf_image.py +++ /dev/null @@ -1,264 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer_config_helpers import * - -mode = get_config_arg("mode", str, "generator") -dataSource = get_config_arg("data", str, "mnist") -assert mode in set(["generator", - "discriminator", - "generator_training", - "discriminator_training"]) - -is_generator_training = mode == "generator_training" -is_discriminator_training = mode == "discriminator_training" -is_generator = mode == "generator" -is_discriminator = mode == "discriminator" - -# The network structure below follows the dcgan paper -# (https://arxiv.org/abs/1511.06434) - -print('mode=%s' % mode) -# the dim of the noise (z) as the input of the generator network -noise_dim = 100 -# the number of filters in the layer in generator/discriminator that is -# closet to the image -gf_dim = 64 -df_dim = 64 -if dataSource == "mnist": - sample_dim = 28 # image dim - c_dim = 1 # image color -else: - sample_dim = 32 - c_dim = 3 -s2, s4 = int(sample_dim/2), int(sample_dim/4), -s8, s16 = int(sample_dim/8), int(sample_dim/16) - -settings( - batch_size=128, - learning_rate=2e-4, - learning_method=AdamOptimizer(beta1=0.5) -) - -def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name, - param_attr, bias_attr, param_attr_bn, bn, trans=False, - act=ReluActivation()): - - """ - conv_bn is a utility function that constructs a convolution/deconv layer - with an optional batch_norm layer - - :param bn: whether to use batch_norm_layer - :type bn: bool - :param trans: whether to use conv (False) or deconv (True) - :type trans: bool - """ - - # calculate the filter_size and padding size based on the given - # imgSize and ouput size - tmp = imgSize - (output_x - 1) * stride - if tmp <= 1 or tmp > 5: - raise ValueError("conv input-output dimension does not fit") - elif tmp <= 3: - filter_size = tmp + 2 - padding = 1 - else: - filter_size = tmp - padding = 0 - - print (imgSize, output_x, stride, filter_size, padding) - - if trans: - nameApx = "_conv" - else: - nameApx = "_convt" - - if bn: - conv = img_conv_layer(input, filter_size=filter_size, - num_filters=num_filters, - name=name + nameApx, num_channels=channels, - act=LinearActivation(), groups=1, stride=stride, - padding=padding, bias_attr=bias_attr, - param_attr=param_attr, shared_biases=True, layer_attr=None, - filter_size_y=None, stride_y=None, padding_y=None, - trans=trans) - - conv_bn = batch_norm_layer(conv, - act=act, - name=name + nameApx + "_bn", - bias_attr=bias_attr, - param_attr=param_attr_bn, - use_global_stats=False) - - return conv_bn - else: - conv = img_conv_layer(input, filter_size=filter_size, - num_filters=num_filters, - name=name + nameApx, num_channels=channels, - act=act, groups=1, stride=stride, - padding=padding, bias_attr=bias_attr, - param_attr=param_attr, shared_biases=True, layer_attr=None, - filter_size_y=None, stride_y=None, padding_y=None, - trans=trans) - return conv - -def generator(noise): - """ - generator generates a sample given noise - """ - param_attr = ParamAttr(is_static=is_discriminator_training, - initial_mean=0.0, - initial_std=0.02) - bias_attr = ParamAttr(is_static=is_discriminator_training, - initial_mean=0.0, - initial_std=0.0) - - param_attr_bn=ParamAttr(is_static=is_discriminator_training, - initial_mean=1.0, - initial_std=0.02) - - h1 = fc_layer(input=noise, - name="gen_layer_h1", - size=s8 * s8 * gf_dim * 4, - bias_attr=bias_attr, - param_attr=param_attr, - act=LinearActivation()) - - h1_bn = batch_norm_layer(h1, - act=ReluActivation(), - name="gen_layer_h1_bn", - bias_attr=bias_attr, - param_attr=param_attr_bn, - use_global_stats=False) - - h2_bn = conv_bn(h1_bn, - channels=gf_dim*4, - output_x=s8, - num_filters=gf_dim*2, - imgSize=s4, - stride=2, - name="gen_layer_h2", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=True, - trans=True) - - h3_bn = conv_bn(h2_bn, - channels=gf_dim*2, - output_x=s4, - num_filters=gf_dim, - imgSize=s2, - stride=2, - name="gen_layer_h3", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=True, - trans=True) - - - return conv_bn(h3_bn, - channels=gf_dim, - output_x=s2, - num_filters=c_dim, - imgSize=sample_dim, - stride=2, - name="gen_layer_h4", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=False, - trans=True, - act=TanhActivation()) - - -def discriminator(sample): - """ - discriminator ouputs the probablity of a sample is from generator - or real data. - The output has two dimenstional: dimension 0 is the probablity - of the sample is from generator and dimension 1 is the probabblity - of the sample is from real data. - """ - param_attr = ParamAttr(is_static=is_generator_training, - initial_mean=0.0, - initial_std=0.02) - bias_attr = ParamAttr(is_static=is_generator_training, - initial_mean=0.0, - initial_std=0.0) - - param_attr_bn=ParamAttr(is_static=is_generator_training, - initial_mean=1.0, - initial_std=0.02) - - h0 = conv_bn(sample, - channels=c_dim, - imgSize=sample_dim, - num_filters=df_dim, - output_x=s2, - stride=2, - name="dis_h0", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=False) - - h1_bn = conv_bn(h0, - channels=df_dim, - imgSize=s2, - num_filters=df_dim*2, - output_x=s4, - stride=2, - name="dis_h1", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=True) - - h2_bn = conv_bn(h1_bn, - channels=df_dim*2, - imgSize=s4, - num_filters=df_dim*4, - output_x=s8, - stride=2, - name="dis_h2", - param_attr=param_attr, - bias_attr=bias_attr, - param_attr_bn=param_attr_bn, - bn=True) - - return fc_layer(input=h2_bn, name="dis_prob", size=2, - bias_attr=bias_attr, - param_attr=param_attr, - act=SoftmaxActivation()) - - - -if is_generator_training: - noise = data_layer(name="noise", size=noise_dim) - sample = generator(noise) - -if is_discriminator_training: - sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim) - -if is_generator_training or is_discriminator_training: - label = data_layer(name="label", size=1) - prob = discriminator(sample) - cost = cross_entropy(input=prob, label=label) - classification_error_evaluator(input=prob, label=label, name=mode+'_error') - outputs(cost) - -if is_generator: - noise = data_layer(name="noise", size=noise_dim) - outputs(generator(noise)) diff --git a/demo/gan/gan_trainer.py b/demo/gan/gan_trainer.py deleted file mode 100644 index a8c1bd0414529f48feb23bdb850751782de52c04..0000000000000000000000000000000000000000 --- a/demo/gan/gan_trainer.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import random -import numpy -import cPickle -import sys,os -from PIL import Image - -from paddle.trainer.config_parser import parse_config -from paddle.trainer.config_parser import logger -import py_paddle.swig_paddle as api -import matplotlib.pyplot as plt - -def plot2DScatter(data, outputfile): - ''' - Plot the data as a 2D scatter plot and save to outputfile - data needs to be two dimensinoal - ''' - x = data[:, 0] - y = data[:, 1] - logger.info("The mean vector is %s" % numpy.mean(data, 0)) - logger.info("The std vector is %s" % numpy.std(data, 0)) - - heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50) - extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]] - - plt.clf() - plt.scatter(x, y) - plt.savefig(outputfile, bbox_inches='tight') - -def CHECK_EQ(a, b): - assert a == b, "a=%s, b=%s" % (a, b) - -def copy_shared_parameters(src, dst): - ''' - copy the parameters from src to dst - :param src: the source of the parameters - :type src: GradientMachine - :param dst: the destination of the parameters - :type dst: GradientMachine - ''' - src_params = [src.getParameter(i) - for i in xrange(src.getParameterSize())] - src_params = dict([(p.getName(), p) for p in src_params]) - - - for i in xrange(dst.getParameterSize()): - dst_param = dst.getParameter(i) - src_param = src_params.get(dst_param.getName(), None) - if src_param is None: - continue - src_value = src_param.getBuf(api.PARAMETER_VALUE) - dst_value = dst_param.getBuf(api.PARAMETER_VALUE) - CHECK_EQ(len(src_value), len(dst_value)) - dst_value.copyFrom(src_value) - dst_param.setValueUpdated() - -def print_parameters(src): - src_params = [src.getParameter(i) - for i in xrange(src.getParameterSize())] - - print "***************" - for p in src_params: - print "Name is %s" % p.getName() - print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray() - -def load_mnist_data(imageFile): - f = open(imageFile, "rb") - f.read(16) - - # Define number of samples for train/test - if "train" in imageFile: - n = 60000 - else: - n = 10000 - - data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28)) - data = data / 255.0 * 2.0 - 1.0 - - f.close() - return data.astype('float32') - -def load_cifar_data(cifar_path): - batch_size = 10000 - data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32") - for i in range(1, 6): - file = cifar_path + "/data_batch_" + str(i) - fo = open(file, 'rb') - dict = cPickle.load(fo) - fo.close() - data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"] - - data = data / 255.0 * 2.0 - 1.0 - return data - -# synthesize 2-D uniform data -def load_uniform_data(): - data = numpy.random.rand(1000000, 2).astype('float32') - return data - -def merge(images, size): - if images.shape[1] == 28*28: - h, w, c = 28, 28, 1 - else: - h, w, c = 32, 32, 3 - img = numpy.zeros((h * size[0], w * size[1], c)) - for idx in xrange(size[0] * size[1]): - i = idx % size[1] - j = idx // size[1] - img[j*h:j*h+h, i*w:i*w+w, :] = \ - ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0) - return img.astype('uint8') - -def save_images(images, path): - merged_img = merge(images, [8, 8]) - if merged_img.shape[2] == 1: - im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB') - else: - im = Image.fromarray(merged_img, mode="RGB") - im.save(path) - -def get_real_samples(batch_size, data_np): - return data_np[numpy.random.choice(data_np.shape[0], batch_size, - replace=False),:] - -def get_noise(batch_size, noise_dim): - return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32') - -def get_fake_samples(generator_machine, batch_size, noise): - gen_inputs = api.Arguments.createArguments(1) - gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise)) - gen_outputs = api.Arguments.createArguments(0) - generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST) - fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat() - return fake_samples - -def get_training_loss(training_machine, inputs): - outputs = api.Arguments.createArguments(0) - training_machine.forward(inputs, outputs, api.PASS_TEST) - loss = outputs.getSlotValue(0).copyToNumpyMat() - return numpy.mean(loss) - -def prepare_discriminator_data_batch_pos(batch_size, data_np): - real_samples = get_real_samples(batch_size, data_np) - labels = numpy.ones(batch_size, dtype='int32') - inputs = api.Arguments.createArguments(2) - inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples)) - inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels)) - return inputs - -def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise): - fake_samples = get_fake_samples(generator_machine, batch_size, noise) - labels = numpy.zeros(batch_size, dtype='int32') - inputs = api.Arguments.createArguments(2) - inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples)) - inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels)) - return inputs - -def prepare_generator_data_batch(batch_size, noise): - label = numpy.ones(batch_size, dtype='int32') - inputs = api.Arguments.createArguments(2) - inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise)) - inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label)) - return inputs - - -def find(iterable, cond): - for item in iterable: - if cond(item): - return item - return None - - -def get_layer_size(model_conf, layer_name): - layer_conf = find(model_conf.layers, lambda x: x.name == layer_name) - assert layer_conf is not None, "Cannot find '%s' layer" % layer_name - return layer_conf.size - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform") - parser.add_argument("--use_gpu", default="1", - help="1 means use gpu for training") - parser.add_argument("--gpu_id", default="0", - help="the gpu_id parameter") - args = parser.parse_args() - data_source = args.data_source - use_gpu = args.use_gpu - assert data_source in ["mnist", "cifar", "uniform"] - assert use_gpu in ["0", "1"] - - if not os.path.exists("./%s_samples/" % data_source): - os.makedirs("./%s_samples/" % data_source) - - if not os.path.exists("./%s_params/" % data_source): - os.makedirs("./%s_params/" % data_source) - - api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100', - '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source) - - if data_source == "uniform": - conf = "gan_conf.py" - num_iter = 10000 - else: - conf = "gan_conf_image.py" - num_iter = 1000 - - gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source) - dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source) - generator_conf = parse_config(conf, "mode=generator,data=" + data_source) - batch_size = dis_conf.opt_config.batch_size - noise_dim = get_layer_size(gen_conf.model_config, "noise") - - if data_source == "mnist": - data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte") - elif data_source == "cifar": - data_np = load_cifar_data("./data/cifar-10-batches-py/") - else: - data_np = load_uniform_data() - - # this creates a gradient machine for discriminator - dis_training_machine = api.GradientMachine.createFromConfigProto( - dis_conf.model_config) - # this create a gradient machine for generator - gen_training_machine = api.GradientMachine.createFromConfigProto( - gen_conf.model_config) - - # generator_machine is used to generate data only, which is used for - # training discriminator - logger.info(str(generator_conf.model_config)) - generator_machine = api.GradientMachine.createFromConfigProto( - generator_conf.model_config) - - dis_trainer = api.Trainer.create( - dis_conf, dis_training_machine) - - gen_trainer = api.Trainer.create( - gen_conf, gen_training_machine) - - dis_trainer.startTrain() - gen_trainer.startTrain() - - # Sync parameters between networks (GradientMachine) at the beginning - copy_shared_parameters(gen_training_machine, dis_training_machine) - copy_shared_parameters(gen_training_machine, generator_machine) - - # constrain that either discriminator or generator can not be trained - # consecutively more than MAX_strike times - curr_train = "dis" - curr_strike = 0 - MAX_strike = 5 - - for train_pass in xrange(100): - dis_trainer.startTrainPass() - gen_trainer.startTrainPass() - for i in xrange(num_iter): - # Do forward pass in discriminator to get the dis_loss - noise = get_noise(batch_size, noise_dim) - data_batch_dis_pos = prepare_discriminator_data_batch_pos( - batch_size, data_np) - dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos) - - data_batch_dis_neg = prepare_discriminator_data_batch_neg( - generator_machine, batch_size, noise) - dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg) - - dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0 - - # Do forward pass in generator to get the gen_loss - data_batch_gen = prepare_generator_data_batch( - batch_size, noise) - gen_loss = get_training_loss(gen_training_machine, data_batch_gen) - - if i % 100 == 0: - print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg) - print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss) - - # Decide which network to train based on the training history - # And the relative size of the loss - if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \ - ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss): - if curr_train == "dis": - curr_strike += 1 - else: - curr_train = "dis" - curr_strike = 1 - dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg) - dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos) - copy_shared_parameters(dis_training_machine, gen_training_machine) - - else: - if curr_train == "gen": - curr_strike += 1 - else: - curr_train = "gen" - curr_strike = 1 - gen_trainer.trainOneDataBatch(batch_size, data_batch_gen) - # TODO: add API for paddle to allow true parameter sharing between different GradientMachines - # so that we do not need to copy shared parameters. - copy_shared_parameters(gen_training_machine, dis_training_machine) - copy_shared_parameters(gen_training_machine, generator_machine) - - dis_trainer.finishTrainPass() - gen_trainer.finishTrainPass() - # At the end of each pass, save the generated samples/images - fake_samples = get_fake_samples(generator_machine, batch_size, noise) - if data_source == "uniform": - plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) - else: - save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass)) - dis_trainer.finishTrain() - gen_trainer.finishTrain() - -if __name__ == '__main__': - main() diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore deleted file mode 100644 index 6a05b8f6632db0977fceade8b48a89b9f7f6e6cc..0000000000000000000000000000000000000000 --- a/demo/image_classification/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -data/cifar-10-batches-py -data/cifar-out -cifar_vgg_model/* -plot.png -train.log -image_provider_copy_1.py -*pyc -train.list -test.list diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh deleted file mode 100755 index 52e82d0d9812c88e5c85cffc0585e3425b862809..0000000000000000000000000000000000000000 --- a/demo/image_classification/data/download_cifar.sh +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -tar zxf cifar-10-python.tar.gz -rm cifar-10-python.tar.gz -rm -rf cifar-out/* -echo Converting CIFAR data to images..... -python process_cifar.py ./cifar-10-batches-py ./cifar-out diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py deleted file mode 100644 index db6666189e5b8008a6b66fb64afcdf98980e72bb..0000000000000000000000000000000000000000 --- a/demo/image_classification/data/process_cifar.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import sys -import os -import PIL.Image as Image -""" - Usage: python process_cifar input_dir output_dir -""" - - -def mkdir_not_exist(path): - """ - Make dir if the path does not exist. - path: the path to be created. - """ - if not os.path.exists(path): - os.mkdir(path) - - -def create_dir_structure(output_dir): - """ - Create the directory structure for the directory. - output_dir: the direcotry structure path. - """ - mkdir_not_exist(os.path.join(output_dir)) - mkdir_not_exist(os.path.join(output_dir, "train")) - mkdir_not_exist(os.path.join(output_dir, "test")) - - -def convert_batch(batch_path, label_set, label_map, output_dir, data_split): - """ - Convert CIFAR batch to the structure of Paddle format. - batch_path: the batch to be converted. - label_set: the set of labels. - output_dir: the output path. - data_split: whether it is training or testing data. - """ - data = np.load(batch_path) - for data, label, filename in zip(data['data'], data['labels'], - data['filenames']): - data = data.reshape((3, 32, 32)) - data = np.transpose(data, (1, 2, 0)) - label = label_map[label] - output_dir_this = os.path.join(output_dir, data_split, str(label)) - output_filename = os.path.join(output_dir_this, filename) - if not label in label_set: - label_set[label] = True - mkdir_not_exist(output_dir_this) - Image.fromarray(data).save(output_filename) - - -if __name__ == '__main__': - input_dir = sys.argv[1] - output_dir = sys.argv[2] - num_batch = 5 - create_dir_structure(output_dir) - label_map = { - 0: "airplane", - 1: "automobile", - 2: "bird", - 3: "cat", - 4: "deer", - 5: "dog", - 6: "frog", - 7: "horse", - 8: "ship", - 9: "truck" - } - labels = {} - for i in range(1, num_batch + 1): - convert_batch( - os.path.join(input_dir, "data_batch_%d" % i), labels, label_map, - output_dir, "train") - convert_batch( - os.path.join(input_dir, "test_batch"), {}, label_map, output_dir, - "test") diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py deleted file mode 100644 index 87eed5eebd7680e578c822083efb8b9eab16b266..0000000000000000000000000000000000000000 --- a/demo/image_classification/image_provider.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import io -import random - -import paddle.utils.image_util as image_util -from paddle.trainer.PyDataProvider2 import * - - -# -# {'img_size': 32, -# 'settings': , -# 'color': True, -# 'mean_img_size': 32, -# 'meta': './data/cifar-out/batches/batches.meta', -# 'num_classes': 10, -# 'file_list': ('./data/cifar-out/batches/train_batch_000',), -# 'use_jpeg': True} -def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg, - is_train, **kwargs): - settings.mean_img_size = mean_img_size - settings.img_size = img_size - settings.num_classes = num_classes - settings.color = color - settings.is_train = is_train - - if settings.color: - settings.img_raw_size = settings.img_size * settings.img_size * 3 - else: - settings.img_raw_size = settings.img_size * settings.img_size - - settings.meta_path = meta - settings.use_jpeg = use_jpeg - - settings.img_mean = image_util.load_meta(settings.meta_path, - settings.mean_img_size, - settings.img_size, settings.color) - - settings.logger.info('Image size: %s', settings.img_size) - settings.logger.info('Meta path: %s', settings.meta_path) - settings.input_types = [ - dense_vector(settings.img_raw_size), # image feature - integer_value(settings.num_classes) - ] # labels - - settings.logger.info('DataProvider Initialization finished') - - -@provider(init_hook=hook, min_pool_size=0) -def processData(settings, file_list): - """ - The main function for loading data. - Load the batch, iterate all the images and labels in this batch. - file_list: the batch file list. - """ - with open(file_list, 'r') as fdata: - lines = [line.strip() for line in fdata] - random.shuffle(lines) - for file_name in lines: - with io.open(file_name.strip(), 'rb') as file: - data = cPickle.load(file) - indexes = list(range(len(data['images']))) - if settings.is_train: - random.shuffle(indexes) - for i in indexes: - if settings.use_jpeg == 1: - img = image_util.decode_jpeg(data['images'][i]) - else: - img = data['images'][i] - img_feat = image_util.preprocess_img( - img, settings.img_mean, settings.img_size, - settings.is_train, settings.color) - label = data['labels'][i] - yield img_feat.astype('float32'), int(label) diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py deleted file mode 100644 index f09605394a19e09d92e555eeefb0b5646625b618..0000000000000000000000000000000000000000 --- a/demo/image_classification/image_util.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from PIL import Image -from cStringIO import StringIO - - -def resize_image(img, target_size): - """ - Resize an image so that the shorter edge has length target_size. - img: the input image to be resized. - target_size: the target resized image size. - """ - percent = (target_size / float(min(img.size[0], img.size[1]))) - resized_size = int(round(img.size[0] * percent)), int( - round(img.size[1] * percent)) - img = img.resize(resized_size, Image.ANTIALIAS) - return img - - -def flip(im): - """ - Return the flipped image. - Flip an image along the horizontal direction. - im: input image, (H x W x K) ndarrays - """ - if len(im.shape) == 3: - return im[:, :, ::-1] - else: - return im[:, ::-1] - - -def crop_img(im, inner_size, color=True, test=True): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - im: (K x H x W) ndarrays - inner_size: the cropped image size. - color: whether it is color image. - test: whether in test mode. - If False, does random cropping and flipping. - If True, crop the center of images. - """ - if color: - height, width = max(inner_size, im.shape[1]), max(inner_size, - im.shape[2]) - padded_im = np.zeros((3, height, width)) - startY = (height - im.shape[1]) / 2 - startX = (width - im.shape[2]) / 2 - endY, endX = startY + im.shape[1], startX + im.shape[2] - padded_im[:, startY:endY, startX:endX] = im - else: - im = im.astype('float32') - height, width = max(inner_size, im.shape[0]), max(inner_size, - im.shape[1]) - padded_im = np.zeros((height, width)) - startY = (height - im.shape[0]) / 2 - startX = (width - im.shape[1]) / 2 - endY, endX = startY + im.shape[0], startX + im.shape[1] - padded_im[startY:endY, startX:endX] = im - if test: - startY = (height - inner_size) / 2 - startX = (width - inner_size) / 2 - else: - startY = np.random.randint(0, height - inner_size + 1) - startX = np.random.randint(0, width - inner_size + 1) - endY, endX = startY + inner_size, startX + inner_size - if color: - pic = padded_im[:, startY:endY, startX:endX] - else: - pic = padded_im[startY:endY, startX:endX] - if (not test) and (np.random.randint(2) == 0): - pic = flip(pic) - return pic - - -def decode_jpeg(jpeg_string): - np_array = np.array(Image.open(StringIO(jpeg_string))) - if len(np_array.shape) == 3: - np_array = np.transpose(np_array, (2, 0, 1)) - return np_array - - -def preprocess_img(im, img_mean, crop_size, is_train, color=True): - """ - Does data augmentation for images. - If is_train is false, cropping the center region from the image. - If is_train is true, randomly crop a region from the image, - and randomy does flipping. - im: (K x H x W) ndarrays - """ - im = im.astype('float32') - test = not is_train - pic = crop_img(im, crop_size, color, test) - pic -= img_mean - return pic.flatten() - - -def load_meta(meta_path, mean_img_size, crop_size, color=True): - """ - Return the loaded meta file. - Load the meta image, which is the mean of the images in the dataset. - The mean image is subtracted from every input image so that the expected mean - of each input image is zero. - """ - mean = np.load(meta_path)['data_mean'] - border = (mean_img_size - crop_size) / 2 - if color: - assert (mean_img_size * mean_img_size * 3 == mean.shape[0]) - mean = mean.reshape(3, mean_img_size, mean_img_size) - mean = mean[:, border:border + crop_size, border:border + - crop_size].astype('float32') - else: - assert (mean_img_size * mean_img_size == mean.shape[0]) - mean = mean.reshape(mean_img_size, mean_img_size) - mean = mean[border:border + crop_size, border:border + - crop_size].astype('float32') - return mean - - -def load_image(img_path, is_color=True): - """ - Load image and return. - img_path: image path. - is_color: is color image or not. - """ - img = Image.open(img_path) - img.load() - return img - - -def oversample(img, crop_dims): - """ - image : iterable of (H x W x K) ndarrays - crop_dims: (height, width) tuple for the crops. - Returned data contains ten crops of input image, namely, - four corner patches and the center patch as well as their - horizontal reflections. - """ - # Dimensions and center. - im_shape = np.array(img[0].shape) - crop_dims = np.array(crop_dims) - im_center = im_shape[:2] / 2.0 - - # Make crop coordinates - h_indices = (0, im_shape[0] - crop_dims[0]) - w_indices = (0, im_shape[1] - crop_dims[1]) - crops_ix = np.empty((5, 4), dtype=int) - curr = 0 - for i in h_indices: - for j in w_indices: - crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) - curr += 1 - crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate( - [-crop_dims / 2.0, crop_dims / 2.0]) - crops_ix = np.tile(crops_ix, (2, 1)) - - # Extract crops - crops = np.empty( - (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]), - dtype=np.float32) - ix = 0 - for im in img: - for crop in crops_ix: - crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] - ix += 1 - crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors - return crops - - -class ImageTransformer: - def __init__(self, - transpose=None, - channel_swap=None, - mean=None, - is_color=True): - self.transpose = transpose - self.channel_swap = None - self.mean = None - self.is_color = is_color - - def set_transpose(self, order): - if self.is_color: - assert 3 == len(order) - self.transpose = order - - def set_channel_swap(self, order): - if self.is_color: - assert 3 == len(order) - self.channel_swap = order - - def set_mean(self, mean): - # mean value, may be one value per channel - if mean.ndim == 1: - mean = mean[:, np.newaxis, np.newaxis] - else: - # elementwise mean - if self.is_color: - assert len(mean.shape) == 3 - self.mean = mean - - def transformer(self, data): - if self.transpose is not None: - data = data.transpose(self.transpose) - if self.channel_swap is not None: - data = data[self.channel_swap, :, :] - if self.mean is not None: - data -= self.mean - return data diff --git a/demo/image_classification/predict.sh b/demo/image_classification/predict.sh deleted file mode 100755 index 9d5785c9a1a4dac12f7940fa708b1a79c6ec8a93..0000000000000000000000000000000000000000 --- a/demo/image_classification/predict.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -model=cifar_vgg_model/pass-00299/ -image=data/cifar-out/test/airplane/seaplane_s_000978.png -use_gpu=1 -python prediction.py $model $image $use_gpu diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py deleted file mode 100755 index 9a86aafcb2fa4d4354d1dd9443c1b73ddcda980b..0000000000000000000000000000000000000000 --- a/demo/image_classification/prediction.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -import logging -from PIL import Image -from optparse import OptionParser - -import paddle.utils.image_util as image_util - -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import dense_vector -from paddle.trainer.config_parser import parse_config - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') -logging.getLogger().setLevel(logging.INFO) - - -class ImageClassifier(): - def __init__(self, - train_conf, - use_gpu=True, - model_dir=None, - resize_dim=None, - crop_dim=None, - mean_file=None, - oversample=False, - is_color=True): - """ - train_conf: network configure. - model_dir: string, directory of model. - resize_dim: int, resized image size. - crop_dim: int, crop size. - mean_file: string, image mean file. - oversample: bool, oversample means multiple crops, namely five - patches (the four corner patches and the center - patch) as well as their horizontal reflections, - ten crops in all. - """ - self.train_conf = train_conf - self.model_dir = model_dir - if model_dir is None: - self.model_dir = os.path.dirname(train_conf) - - self.resize_dim = resize_dim - self.crop_dims = [crop_dim, crop_dim] - self.oversample = oversample - self.is_color = is_color - - self.transformer = image_util.ImageTransformer(is_color=is_color) - self.transformer.set_transpose((2, 0, 1)) - - self.mean_file = mean_file - mean = np.load(self.mean_file)['data_mean'] - mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) - self.transformer.set_mean(mean) # mean pixel - gpu = 1 if use_gpu else 0 - conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) - conf = parse_config(train_conf, conf_args) - swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - assert isinstance(self.network, swig_paddle.GradientMachine) - self.network.loadParameters(self.model_dir) - - data_size = 3 * self.crop_dims[0] * self.crop_dims[1] - slots = [dense_vector(data_size)] - self.converter = DataProviderConverter(slots) - - def get_data(self, img_path): - """ - 1. load image from img_path. - 2. resize or oversampling. - 3. transformer data: transpose, sub mean. - return K x H x W ndarray. - img_path: image path. - """ - image = image_util.load_image(img_path, self.is_color) - if self.oversample: - # image_util.resize_image: short side is self.resize_dim - image = image_util.resize_image(image, self.resize_dim) - image = np.array(image) - input = np.zeros( - (1, image.shape[0], image.shape[1], 3), dtype=np.float32) - input[0] = image.astype(np.float32) - input = image_util.oversample(input, self.crop_dims) - else: - image = image.resize(self.crop_dims, Image.ANTIALIAS) - input = np.zeros( - (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32) - input[0] = np.array(image).astype(np.float32) - - data_in = [] - for img in input: - img = self.transformer.transformer(img).flatten() - data_in.append([img.tolist()]) - return data_in - - def forward(self, input_data): - in_arg = self.converter(input_data) - return self.network.forwardTest(in_arg) - - def forward(self, data, output_layer): - """ - input_data: py_paddle input data. - output_layer: specify the name of probability, namely the layer with - softmax activation. - return: the predicting probability of each label. - """ - input = self.converter(data) - self.network.forwardTest(input) - output = self.network.getLayerOutputs(output_layer) - # For oversampling, average predictions across crops. - # If not, the shape of output[name]: (1, class_number), - # the mean is also applicable. - return output[output_layer].mean(0) - - def predict(self, image=None, output_layer=None): - assert isinstance(image, basestring) - assert isinstance(output_layer, basestring) - data = self.get_data(image) - prob = self.forward(data, output_layer) - lab = np.argsort(-prob) - logging.info("Label of %s is: %d", image, lab[0]) - - -if __name__ == '__main__': - image_size = 32 - crop_size = 32 - multi_crop = True - config = "vgg_16_cifar.py" - output_layer = "__fc_layer_1__" - mean_path = "data/cifar-out/batches/batches.meta" - model_path = sys.argv[1] - image = sys.argv[2] - use_gpu = bool(int(sys.argv[3])) - - obj = ImageClassifier( - train_conf=config, - model_dir=model_path, - resize_dim=image_size, - crop_dim=crop_size, - mean_file=mean_path, - use_gpu=use_gpu, - oversample=multi_crop) - obj.predict(image, output_layer) diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py deleted file mode 100755 index 2947ad239c36f9a02ed67ccf5906380cb70e37ce..0000000000000000000000000000000000000000 --- a/demo/image_classification/preprocess.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.utils.preprocess_img import ImageClassificationDatasetCreater -from optparse import OptionParser - - -def option_parser(): - parser = OptionParser(usage="usage: python preprcoess.py "\ - "-i data_dir [options]") - parser.add_option( - "-i", - "--input", - action="store", - dest="input", - help="Input data directory.") - parser.add_option( - "-s", - "--size", - action="store", - dest="size", - help="Processed image size.") - parser.add_option( - "-c", - "--color", - action="store", - dest="color", - help="whether to use color images.") - return parser.parse_args() - - -if __name__ == '__main__': - options, args = option_parser() - data_dir = options.input - processed_image_size = int(options.size) - color = options.color == "1" - data_creator = ImageClassificationDatasetCreater( - data_dir, processed_image_size, color) - data_creator.train_list_name = "train.txt" - data_creator.test_list_name = "test.txt" - data_creator.num_per_batch = 1000 - data_creator.overwrite = True - data_creator.create_batches() diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh deleted file mode 100755 index c7396c6393599ef3f2c55089eb05f2435b2b4b82..0000000000000000000000000000000000000000 --- a/demo/image_classification/preprocess.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -data_dir=./data/cifar-out - -python preprocess.py -i $data_dir -s 32 -c 1 - -echo "data/cifar-out/batches/train.txt" > train.list -echo "data/cifar-out/batches/test.txt" > test.list diff --git a/demo/image_classification/train.sh b/demo/image_classification/train.sh deleted file mode 100755 index 6fc11caf1c75192242482c2e85f8167eb9fba4ec..0000000000000000000000000000000000000000 --- a/demo/image_classification/train.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -config=vgg_16_cifar.py -output=./cifar_vgg_model -log=train.log - -paddle train \ ---config=$config \ ---dot_period=10 \ ---log_period=100 \ ---test_all_data_in_one_period=1 \ ---use_gpu=1 \ ---trainer_count=1 \ ---num_passes=300 \ ---save_dir=$output \ -2>&1 | tee $log - -python -m paddle.utils.plotcurve -i $log > plot.png diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py deleted file mode 100755 index 8ee4a64c15f885023a6e19812885b4f76bb12af9..0000000000000000000000000000000000000000 --- a/demo/image_classification/vgg_16_cifar.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -is_predict = get_config_arg("is_predict", bool, False) - -####################Data Configuration ################## -if not is_predict: - data_dir = 'data/cifar-out/batches/' - meta_path = data_dir + 'batches.meta' - - args = { - 'meta': meta_path, - 'mean_img_size': 32, - 'img_size': 32, - 'num_classes': 10, - 'use_jpeg': 1, - 'color': "color" - } - - define_py_data_sources2( - train_list="train.list", - test_list="train.list", - module='image_provider', - obj='processData', - args=args) - -######################Algorithm Configuration ############# -settings( - batch_size=128, - learning_rate=0.1 / 128.0, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * 128)) - -#######################Network Configuration ############# -data_size = 3 * 32 * 32 -label_size = 10 -img = data_layer(name='image', size=data_size) -# small_vgg is predefined in trainer_config_helpers.networks -predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size) - -if not is_predict: - lbl = data_layer(name="label", size=label_size) - outputs(classification_cost(input=predict, label=lbl)) -else: - outputs(predict) diff --git a/demo/introduction/README.md b/demo/introduction/README.md deleted file mode 100644 index 0614a7afe645677ef0b65a17ea05f1dcfa45214f..0000000000000000000000000000000000000000 --- a/demo/introduction/README.md +++ /dev/null @@ -1,3 +0,0 @@ -This folder contains scripts used in PaddlePaddle introduction. -- use `bash train.sh` to train a simple linear regression model -- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3]. diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py deleted file mode 100644 index 03c920cc34b397643e97ad41cf06458245c7ca7b..0000000000000000000000000000000000000000 --- a/demo/introduction/dataprovider.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * -import random - - -# define data types of input: 2 real numbers -@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False) -def process(settings, input_file): - for i in xrange(2000): - x = random.random() - yield [x], [2 * x + 0.3] diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py deleted file mode 100755 index eeda43c5c86f3e49f758bf55b16a68387e64238c..0000000000000000000000000000000000000000 --- a/demo/introduction/evaluate_model.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Print model parameters in last model - -Usage: - python evaluate_model.py -""" -import numpy as np -import os - - -def load(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. - return np.fromfile(f, dtype=np.float32) - - -def main(): - print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'), - load('output/pass-00029/b')) - - -if __name__ == '__main__': - main() diff --git a/demo/introduction/train.sh b/demo/introduction/train.sh deleted file mode 100755 index b7bbb90ddd287e3e312a490b53924ae76fb20d2c..0000000000000000000000000000000000000000 --- a/demo/introduction/train.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -paddle train \ - --config=trainer_config.py \ - --save_dir=./output \ - --num_passes=30 \ - 2>&1 |tee 'train.log' diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py deleted file mode 100644 index 41cebcf6e146e55efb89c2ceea429fa003ff206e..0000000000000000000000000000000000000000 --- a/demo/introduction/trainer_config.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -# 1. read data. Suppose you saved above python code as dataprovider.py -data_file = 'empty.list' -with open(data_file, 'w') as f: - f.writelines(' ') -define_py_data_sources2( - train_list=data_file, - test_list=None, - module='dataprovider', - obj='process', - args={}) - -# 2. learning algorithm -settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) - -# 3. Network configuration -x = data_layer(name='x', size=1) -y = data_layer(name='y', size=1) -y_predict = fc_layer( - input=x, - param_attr=ParamAttr(name='w'), - size=1, - act=LinearActivation(), - bias_attr=ParamAttr(name='b')) -cost = regression_cost(input=y_predict, label=y) -outputs(cost) diff --git a/demo/mnist/.gitignore b/demo/mnist/.gitignore deleted file mode 100644 index 810910fd5ca56f0cfd7051f3392a9f7ea010d7f0..0000000000000000000000000000000000000000 --- a/demo/mnist/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -data/raw_data -data/*.list -mnist_vgg_model -plot.png -train.log -*pyc diff --git a/demo/mnist/mnist_provider.py b/demo/mnist/mnist_provider.py deleted file mode 100644 index 6df4676da3bdc2e6949cc911fa3720cb51ddc568..0000000000000000000000000000000000000000 --- a/demo/mnist/mnist_provider.py +++ /dev/null @@ -1,31 +0,0 @@ -from paddle.trainer.PyDataProvider2 import * - - -# Define a py data provider -@provider( - input_types={'pixel': dense_vector(28 * 28), - 'label': integer_value(10)}) -def process(settings, filename): # settings is not used currently. - imgf = filename + "-images-idx3-ubyte" - labelf = filename + "-labels-idx1-ubyte" - f = open(imgf, "rb") - l = open(labelf, "rb") - - f.read(16) - l.read(8) - - # Define number of samples for train/test - if "train" in filename: - n = 60000 - else: - n = 10000 - - for i in range(n): - label = ord(l.read(1)) - pixels = [] - for j in range(28 * 28): - pixels.append(float(ord(f.read(1))) / 255.0) - yield {"pixel": pixels, 'label': label} - - f.close() - l.close() diff --git a/demo/mnist/train.sh b/demo/mnist/train.sh deleted file mode 100755 index da90cd749a02976633d0f0d6e4352d8a85c7cdef..0000000000000000000000000000000000000000 --- a/demo/mnist/train.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -config=vgg_16_mnist.py -output=./mnist_vgg_model -log=train.log - -paddle train \ ---config=$config \ ---dot_period=10 \ ---log_period=100 \ ---test_all_data_in_one_period=1 \ ---use_gpu=0 \ ---trainer_count=1 \ ---num_passes=100 \ ---save_dir=$output \ -2>&1 | tee $log - -python -m paddle.utils.plotcurve -i $log > plot.png diff --git a/demo/model_zoo/embedding/pre_DictAndModel.sh b/demo/model_zoo/embedding/pre_DictAndModel.sh deleted file mode 100755 index f97ef2610734449c88fdfca6216b1cab57472b84..0000000000000000000000000000000000000000 --- a/demo/model_zoo/embedding/pre_DictAndModel.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -# download the dictionary and pretrained model -for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb -do - wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file -done diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py deleted file mode 100755 index 4631816c43ef48839df1863a0a86c3ab00924d3f..0000000000000000000000000000000000000000 --- a/demo/model_zoo/resnet/classify.py +++ /dev/null @@ -1,312 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import cPickle -import logging -from PIL import Image -import numpy as np -from optparse import OptionParser - -import paddle.utils.image_util as image_util - -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import dense_vector -from paddle.trainer.config_parser import parse_config - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') -logging.getLogger().setLevel(logging.INFO) - - -class ImageClassifier(): - def __init__(self, - train_conf, - model_dir=None, - resize_dim=256, - crop_dim=224, - use_gpu=True, - mean_file=None, - output_layer=None, - oversample=False, - is_color=True): - """ - train_conf: network configure. - model_dir: string, directory of model. - resize_dim: int, resized image size. - crop_dim: int, crop size. - mean_file: string, image mean file. - oversample: bool, oversample means multiple crops, namely five - patches (the four corner patches and the center - patch) as well as their horizontal reflections, - ten crops in all. - """ - self.train_conf = train_conf - self.model_dir = model_dir - if model_dir is None: - self.model_dir = os.path.dirname(train_conf) - - self.resize_dim = resize_dim - self.crop_dims = [crop_dim, crop_dim] - self.oversample = oversample - self.is_color = is_color - - self.output_layer = output_layer - if self.output_layer: - assert isinstance(self.output_layer, basestring) - self.output_layer = self.output_layer.split(",") - - self.transformer = image_util.ImageTransformer(is_color=is_color) - self.transformer.set_transpose((2, 0, 1)) - self.transformer.set_channel_swap((2, 1, 0)) - - self.mean_file = mean_file - if self.mean_file is not None: - mean = np.load(self.mean_file)['data_mean'] - mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) - self.transformer.set_mean(mean) # mean pixel - else: - # if you use three mean value, set like: - # this three mean value is calculated from ImageNet. - self.transformer.set_mean(np.array([103.939, 116.779, 123.68])) - - conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu)) - conf = parse_config(train_conf, conf_args) - swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu))) - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - assert isinstance(self.network, swig_paddle.GradientMachine) - self.network.loadParameters(self.model_dir) - - data_size = 3 * self.crop_dims[0] * self.crop_dims[1] - slots = [dense_vector(data_size)] - self.converter = DataProviderConverter(slots) - - def get_data(self, img_path): - """ - 1. load image from img_path. - 2. resize or oversampling. - 3. transformer data: transpose, channel swap, sub mean. - return K x H x W ndarray. - - img_path: image path. - """ - image = image_util.load_image(img_path, self.is_color) - # Another way to extract oversampled features is that - # cropping and averaging from large feature map which is - # calculated by large size of image. - # This way reduces the computation. - if self.oversample: - # image_util.resize_image: short side is self.resize_dim - image = image_util.resize_image(image, self.resize_dim) - image = np.array(image) - input = np.zeros( - (1, image.shape[0], image.shape[1], 3), dtype=np.float32) - input[0] = image.astype(np.float32) - input = image_util.oversample(input, self.crop_dims) - else: - image = image.resize(self.crop_dims, Image.ANTIALIAS) - input = np.zeros( - (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32) - input[0] = np.array(image).astype(np.float32) - - data_in = [] - for img in input: - img = self.transformer.transformer(img).flatten() - data_in.append([img.tolist()]) - # paddle input: [[[]],[[]],...], [[]] is one sample. - return data_in - - def forward(self, input_data): - """ - return output arguments which are the Outputs() in network configure. - - input_data: py_paddle input data. - call forward. - """ - in_arg = self.converter(input_data) - return self.network.forwardTest(in_arg) - - def forward(self, data, output_layer): - """ - return output arguments which are the Outputs() in network configure. - - input_data: py_paddle input data. - call forward. - """ - input = self.converter(data) - self.network.forwardTest(input) - output = self.network.getLayerOutputs(output_layer) - res = {} - if isinstance(output_layer, basestring): - output_layer = [output_layer] - for name in output_layer: - # For oversampling, average predictions across crops. - # If not, the shape of output[name]: (1, class_number), - # the mean is also applicable. - res[name] = output[name].mean(0) - - return res - - def predict(self, data_file): - """ - call forward and predicting. - - data_file: input image list. - """ - image_files = open(data_file, 'rb').readlines() - results = {} - if self.output_layer is None: - self.output_layer = ["output"] - for line in image_files: - image = line.split()[0] - data = self.get_data(image) - prob = self.forward(data, self.output_layer) - lab = np.argsort(-prob[self.output_layer[0]]) - results[image] = lab[0] - logging.info("Label of %s is: %d", image, lab[0]) - return results - - def extract(self, data_file, output_dir, batch_size=10000): - """ - extract and save features of output layers, which are - specify in Outputs() in network configure. - - data_file: file name of input data. - output_dir: saved directory of extracted features. - batch_size: sample number of one batch file. - """ - if not os.path.exists(output_dir): - os.mkdir(output_dir) - - sample_num = 0 - batch_num = 0 - image_feature = {} - image_files = open(data_file, 'rb').readlines() - for idx, line in enumerate(image_files): - image = line.split()[0] - data = self.get_data(image) - feature = self.forward(data, self.output_layer) - # save extracted features - file_name = image.split("/")[-1] - image_feature[file_name] = feature - sample_num += 1 - if sample_num == batch_size: - batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num)) - self.save_file(image_feature, batch_name) - logging.info('Finish batch %d', batch_num) - batch_num += 1 - sample_num = 0 - image_feature = {} - if idx % 1000 == 0: - logging.info('%d/%d, %s', idx, len(image_files), file_name) - if sample_num > 0: - batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num)) - self.save_file(image_feature, batch_name) - logging.info('Finish batch %d', batch_num) - logging.info('Done: make image feature batch') - - def save_file(self, data, file): - of = open(file, 'wb') - cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL) - - -def option_parser(): - """ - Main entry for predciting - """ - usage = "%prog -c config -i data_list -w model_dir [options]" - parser = OptionParser(usage="usage: %s" % usage) - parser.add_option( - "-j", - "--job", - action="store", - dest="job_type", - help="job type: predict, extract\ - predict: predicting,\ - extract: extract features") - parser.add_option( - "-c", - "--conf", - action="store", - dest="train_conf", - help="network config") - parser.add_option( - "-i", "--data", action="store", dest="data_file", help="image list") - parser.add_option( - "-w", - "--model", - action="store", - dest="model_path", - default=None, - help="model path") - parser.add_option( - "-g", - "--use_gpu", - action="store", - dest="use_gpu", - default=True, - help="Whether to use gpu mode.") - parser.add_option( - "-o", - "--output_dir", - action="store", - dest="output_dir", - default="output", - help="output path") - parser.add_option( - "-m", - "--mean", - action="store", - dest="mean", - default=None, - help="mean file.") - parser.add_option( - "-p", - "--multi_crop", - action="store_true", - dest="multi_crop", - default=False, - help="Wether to use multiple crops on image.") - parser.add_option("-l", "--output_layer", action="store", - dest="output_layer", default=None, - help="--job=extract, specify layers to extract "\ - "features, --job=predict, specify layer of " - "classification probability, output in resnet.py.") - return parser.parse_args() - - -def main(): - """ - 1. parse input arguments. - 2. predicting or extract features according job type. - """ - options, args = option_parser() - obj = ImageClassifier( - options.train_conf, - options.model_path, - use_gpu=options.use_gpu, - mean_file=options.mean, - output_layer=options.output_layer, - oversample=options.multi_crop) - if options.job_type == "predict": - obj.predict(options.data_file) - - elif options.job_type == "extract": - obj.extract(options.data_file, options.output_dir) - - -if __name__ == '__main__': - main() diff --git a/demo/quick_start/.gitignore b/demo/quick_start/.gitignore deleted file mode 100644 index d6bc73105b1abfdae3067b7fecd656079a56b57c..0000000000000000000000000000000000000000 --- a/demo/quick_start/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -*.pyc -data/dict.txt -data/dict_all.txt -data/labels.list -data/mosesdecoder-master/ -data/reviews_Electronics_5.json.gz -data/test.list -data/test.txt -data/train.list -data/train.txt -dataprovider_copy_1.py -train.log -output diff --git a/demo/quick_start/dataprovider_bow.py b/demo/quick_start/dataprovider_bow.py deleted file mode 100644 index 8e651d77bf3fd3bbd990ef314456ec14bd77cfeb..0000000000000000000000000000000000000000 --- a/demo/quick_start/dataprovider_bow.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -# id of the word not in dictionary -UNK_IDX = 0 - - -# initializer is called by the framework during initialization. -# It allows the user to describe the data types and setup the -# necessary data structure for later use. -# `settings` is an object. initializer need to properly fill settings.input_types. -# initializer can also store other data structures needed to be used at process(). -# In this example, dictionary is stored in settings. -# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py -def initializer(settings, dictionary, **kwargs): - # Put the word dictionary into settings - settings.word_dict = dictionary - - # setting.input_types specifies what the data types the data provider - # generates. - settings.input_types = [ - # The first input is a sparse_binary_vector, - # which means each dimension of the vector is either 0 or 1. It is the - # bag-of-words (BOW) representation of the texts. - sparse_binary_vector(len(dictionary)), - # The second input is an integer. It represents the category id of the - # sample. 2 means there are two labels in the dataset. - # (1 for positive and 0 for negative) - integer_value(2) - ] - - -# Delaring a data provider. It has an initializer 'data_initialzer'. -# It will cache the generated data of the first pass in memory, so that -# during later pass, no on-the-fly data generation will be needed. -# `setting` is the same object used by initializer() -# `file_name` is the name of a file listed train_list or test_list file given -# to define_py_data_sources2(). See trainer_config.lr.py. -@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - # Open the input data file. - with open(file_name, 'r') as f: - # Read each line. - for line in f: - # Each line contains the label and text of the comment, separated by \t. - label, comment = line.strip().split('\t') - - # Split the words into a list. - words = comment.split() - - # convert the words into a list of ids by looking them up in word_dict. - word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words] - - # Return the features for the current comment. The first is a list - # of ids representing a 0-1 binary sparse vector of the text, - # the second is the integer id of the label. - yield word_vector, int(label) - - -def predict_initializer(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [sparse_binary_vector(len(dictionary))] - - -# Declaring a data provider for prediction. The difference with process -# is that label is not generated. -@provider(init_hook=predict_initializer, should_shuffle=False) -def process_predict(settings, file_name): - with open(file_name, 'r') as f: - for line in f: - comment = line.strip().split() - word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment] - yield word_vector diff --git a/demo/quick_start/dataprovider_emb.py b/demo/quick_start/dataprovider_emb.py deleted file mode 100755 index b010253a8a764ede4ff0416231ac6aa2fd8f94e3..0000000000000000000000000000000000000000 --- a/demo/quick_start/dataprovider_emb.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -UNK_IDX = 0 - - -def initializer(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - # Define the type of the first input as sequence of integer. - # The value of the integers range from 0 to len(dictrionary)-1 - integer_value_sequence(len(dictionary)), - # Define the second input for label id - integer_value(2) - ] - - -@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - with open(file_name, 'r') as f: - for line in f: - label, comment = line.strip().split('\t') - words = comment.split() - word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words] - yield word_slot, int(label) - - -def predict_initializer(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - integer_value( - len(dictionary), seq_type=SequenceType.SEQUENCE) - ] - - -@provider(init_hook=predict_initializer, should_shuffle=False) -def process_predict(settings, file_name): - with open(file_name, 'r') as f: - for line in f: - comment = line.strip().split() - word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment] - yield word_slot diff --git a/demo/quick_start/predict.sh b/demo/quick_start/predict.sh deleted file mode 100755 index f02e5038e92790c7f1ddcd84a09c6d9a02f84ac4..0000000000000000000000000000000000000000 --- a/demo/quick_start/predict.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -cfg=trainer_config.lr.py -#cfg=trainer_config.emb.py -#cfg=trainer_config.cnn.py -#cfg=trainer_config.lstm.py -model="output/pass-00003" -paddle train \ - --config=$cfg \ - --use_gpu=false \ - --job=test \ - --init_model_path=$model \ - --config_args=is_predict=1 \ - --predict_output_dir=. \ - -mv rank-00000 result.txt diff --git a/demo/quick_start/train.sh b/demo/quick_start/train.sh deleted file mode 100755 index e3595fce7519297058e1eeb66487692267ddcfcc..0000000000000000000000000000000000000000 --- a/demo/quick_start/train.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -cfg=trainer_config.lr.py -#cfg=trainer_config.emb.py -#cfg=trainer_config.cnn.py -#cfg=trainer_config.lstm.py -#cfg=trainer_config.bidi-lstm.py -#cfg=trainer_config.db-lstm.py -#cfg=trainer_config.resnet-lstm.py -paddle train \ - --config=$cfg \ - --save_dir=./output \ - --trainer_count=4 \ - --log_period=100 \ - --num_passes=15 \ - --use_gpu=false \ - --show_parameter_stats_period=100 \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' diff --git a/demo/quick_start/trainer_config.resnet-lstm.py b/demo/quick_start/trainer_config.resnet-lstm.py deleted file mode 100644 index 5bed925d84a0a6d94da446e1a8c64061ad54ae55..0000000000000000000000000000000000000000 --- a/demo/quick_start/trainer_config.resnet-lstm.py +++ /dev/null @@ -1,94 +0,0 @@ -# edit-mode: -*- python -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This configuration is a demonstration of how to implement the stacked LSTM -with residual connections, i.e. an LSTM layer takes the sum of the hidden states -and inputs of the previous LSTM layer instead of only the hidden states. -This architecture is from: -Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi, -Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey, -Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser, -Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens, -George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa, -Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016. -Google's Neural Machine Translation System: Bridging the Gap between Human and -Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf -Different from the architecture described in the paper, we use a stack single -direction LSTM layers as the first layer instead of bi-directional LSTM. Also, -since this is a demo code, to reduce computation time, we stacked 4 layers -instead of 8 layers. -""" - -from paddle.trainer_config_helpers import * - -dict_file = "./data/dict.txt" -word_dict = dict() -with open(dict_file, 'r') as f: - for i, line in enumerate(f): - w = line.strip().split()[0] - word_dict[w] = i - -is_predict = get_config_arg('is_predict', bool, False) -trn = 'data/train.list' if not is_predict else None -tst = 'data/test.list' if not is_predict else 'data/pred.list' -process = 'process' if not is_predict else 'process_predict' -define_py_data_sources2(train_list=trn, - test_list=tst, - module="dataprovider_emb", - obj=process, - args={"dictionary": word_dict}) - -batch_size = 128 if not is_predict else 1 -settings( - batch_size=batch_size, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25 -) - -bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) - -data = data_layer(name="word", size=len(word_dict)) -emb = embedding_layer(input=data, size=128) -lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1)) - -previous_input, previous_hidden_state = emb, lstm - -for i in range(3): - # The input to the current layer is the sum of the hidden state - # and input of the previous layer. - current_input = addto_layer(input=[previous_input, previous_hidden_state]) - hidden_state = simple_lstm(input=current_input, size=128, - lstm_cell_attr=ExtraAttr(drop_rate=0.1)) - previous_input, previous_hidden_state = current_input, hidden_state - -lstm = previous_hidden_state - -lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) -output = fc_layer(input=lstm_last, size=2, - bias_attr=bias_attr, - act=SoftmaxActivation()) - - -if is_predict: - maxid = maxid_layer(output) - outputs([maxid, output]) -else: - label = data_layer(name="label", size=2) - cls = classification_cost(input=output, label=label) - outputs(cls) diff --git a/demo/recommendation/.gitignore b/demo/recommendation/.gitignore deleted file mode 100644 index fd27ef62a87cae51f2392c0eba50a44490d029af..0000000000000000000000000000000000000000 --- a/demo/recommendation/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -log.txt -data/meta.bin -data/ml-1m -data/ratings.dat.train -data/ratings.dat.test -data/train.list -data/test.list -dataprovider_copy_1.py -*.pyc -output diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py deleted file mode 100755 index d4fbdad1d7ac53b35d9478c65ab61c2d28845261..0000000000000000000000000000000000000000 --- a/demo/recommendation/common_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer.PyDataProvider2 import * - - -def meta_to_header(meta, name): - metas = meta[name]['__meta__']['raw_meta'] - for each_meta in metas: - if each_meta['type'] == 'id': - yield integer_value(each_meta['max']) - elif each_meta['type'] == 'embedding': - is_seq = each_meta['seq'] == 'sequence' - yield integer_value( - len(each_meta['dict']), - seq_type=SequenceType.SEQUENCE - if is_seq else SequenceType.NO_SEQUENCE) - elif each_meta['type'] == 'one_hot_dense': - yield dense_vector(len(each_meta['dict'])) diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json deleted file mode 100644 index f26e74ce47bb7843a571e6033f051c046b31f054..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/config.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "user": { - "file": { - "name": "users.dat", - "delimiter": "::" - }, - "fields": ["id", "gender", "age", "occupation"] - }, - "movie": { - "file": { - "name": "movies.dat", - "delimiter": "::" - }, - "fields": ["id", "title", "genres"] - } -} diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py deleted file mode 100644 index 4ca496a252dffc62ed62bb8f2a5ee1661a940580..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/config_generator.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -config_generator.py - -Usage: - ./config_generator.py [--output_format=] - ./config_generator.py -h | --help - -Options: - -h --help Show this screen. - --output_format= Output Config format(json or yaml) [default: json]. -""" - -import json -import docopt -import copy - -DEFAULT_FILE = {"type": "split", "delimiter": ","} - -DEFAULT_FIELD = { - "id": { - "type": "id" - }, - "gender": { - "name": "gender", - "type": "embedding", - "dict": { - "type": "char_based" - } - }, - "age": { - "name": "age", - "type": "embedding", - "dict": { - "type": "whole_content", - "sort": True - } - }, - "occupation": { - "name": "occupation", - "type": "embedding", - "dict": { - "type": "whole_content", - "sort": "true" - } - }, - "title": { - "regex": { - "pattern": r"^(.*)\((\d+)\)$", - "group_id": 1, - "strip": True - }, - "name": "title", - "type": { - "name": "embedding", - "seq_type": "sequence", - }, - "dict": { - "type": "char_based" - } - }, - "genres": { - "type": "one_hot_dense", - "dict": { - "type": "split", - "delimiter": "|" - }, - "name": "genres" - } -} - - -def merge_dict(master_dict, slave_dict): - return dict(((k, master_dict.get(k) or slave_dict.get(k)) - for k in set(slave_dict) | set(master_dict))) - - -def main(filename, fmt): - with open(filename, 'r') as f: - conf = json.load(f) - obj = dict() - for k in conf: - val = conf[k] - file_dict = val['file'] - file_dict = merge_dict(file_dict, DEFAULT_FILE) - - fields = [] - for pos, field_key in enumerate(val['fields']): - assert isinstance(field_key, basestring) - field = copy.deepcopy(DEFAULT_FIELD[field_key]) - field['pos'] = pos - fields.append(field) - obj[k] = {"file": file_dict, "fields": fields} - meta = {"meta": obj} - # print meta - if fmt == 'json': - - def formatter(x): - import json - return json.dumps(x, indent=2) - elif fmt == 'yaml': - - def formatter(x): - import yaml - return yaml.safe_dump(x, default_flow_style=False) - else: - raise NotImplementedError("Dump format %s is not implemented" % fmt) - - print formatter(meta) - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version="0.1.0") - main(args[""], args["--output_format"]) diff --git a/demo/recommendation/data/meta_config.json b/demo/recommendation/data/meta_config.json deleted file mode 100644 index cc6a046e271dd0faaa47eeb5a5bef6d3604113fe..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/meta_config.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "meta": { - "movie": { - "fields": [ - { - "type": "id", - "pos": 0 - }, - { - "regex": { - "pattern": "^(.*)\\((\\d+)\\)$", - "group_id": 1, - "strip": true - }, - "type": { - "seq_type": "sequence", - "name": "embedding" - }, - "dict": { - "type": "char_based" - }, - "name": "title", - "pos": 1 - }, - { - "type": "one_hot_dense", - "dict": { - "delimiter": "|", - "type": "split" - }, - "name": "genres", - "pos": 2 - } - ], - "file": { - "delimiter": "::", - "type": "split", - "name": "movies.dat" - } - }, - "user": { - "fields": [ - { - "type": "id", - "pos": 0 - }, - { - "type": "embedding", - "dict": { - "type": "char_based" - }, - "name": "gender", - "pos": 1 - }, - { - "type": "embedding", - "dict": { - "sort": true, - "type": "whole_content" - }, - "name": "age", - "pos": 2 - }, - { - "type": "embedding", - "dict": { - "sort": "true", - "type": "whole_content" - }, - "name": "occupation", - "pos": 3 - } - ], - "file": { - "delimiter": "::", - "type": "split", - "name": "users.dat" - } - } - } -} diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py deleted file mode 100644 index 38e4679d266c331a751114cd13f0e3453016cf26..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/meta_generator.py +++ /dev/null @@ -1,430 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Preprocess Movielens dataset, to get movie/user object. - -Usage: - ./preprocess.py [--config=] - ./preprocess.py -h | --help - -Options: - -h --help Show this screen. - --version Show version. - --config= Get MetaData config file [default: config.json]. -""" -import docopt -import os -import sys -import re -import collections - -try: - import cPickle as pickle -except ImportError: - import pickle - - -class UniqueIDGenerator(object): - def __init__(self): - self.pool = collections.defaultdict(self.__next_id__) - self.next_id = 0 - - def __next_id__(self): - tmp = self.next_id - self.next_id += 1 - return tmp - - def __call__(self, k): - return self.pool[k] - - def to_list(self): - ret_val = [None] * len(self.pool) - for k in self.pool.keys(): - ret_val[self.pool[k]] = k - return ret_val - - -class SortedIDGenerator(object): - def __init__(self): - self.__key_set__ = set() - self.dict = None - - def scan(self, key): - self.__key_set__.add(key) - - def finish_scan(self, compare=None, key=None, reverse=False): - self.__key_set__ = sorted( - list(self.__key_set__), cmp=compare, key=key, reverse=reverse) - self.dict = dict() - for idx, each_key in enumerate(self.__key_set__): - self.dict[each_key] = idx - - def __call__(self, key): - return self.dict[key] - - def to_list(self): - return self.__key_set__ - - -class SplitFileReader(object): - def __init__(self, work_dir, config): - assert isinstance(config, dict) - self.filename = config['name'] - self.delimiter = config.get('delimiter', ',') - self.work_dir = work_dir - - def read(self): - with open(os.path.join(self.work_dir, self.filename), 'r') as f: - for line in f: - line = line.strip() - if isinstance(self.delimiter, unicode): - self.delimiter = str(self.delimiter) - yield line.split(self.delimiter) - - @staticmethod - def create(work_dir, config): - assert isinstance(config, dict) - if config['type'] == 'split': - return SplitFileReader(work_dir, config) - - -class IFileReader(object): - READERS = [SplitFileReader] - - def read(self): - raise NotImplementedError() - - @staticmethod - def create(work_dir, config): - for reader_cls in IFileReader.READERS: - val = reader_cls.create(work_dir, config) - if val is not None: - return val - - -class IDFieldParser(object): - TYPE = 'id' - - def __init__(self, config): - self.__max_id__ = -sys.maxint - 1 - self.__min_id__ = sys.maxint - self.__id_count__ = 0 - - def scan(self, line): - idx = int(line) - self.__max_id__ = max(self.__max_id__, idx) - self.__min_id__ = min(self.__min_id__, idx) - self.__id_count__ += 1 - - def parse(self, line): - return int(line) - - def meta_field(self): - return { - "is_key": True, - 'max': self.__max_id__, - 'min': self.__min_id__, - 'count': self.__id_count__, - 'type': 'id' - } - - -class SplitEmbeddingDict(object): - def __init__(self, delimiter): - self.__id__ = UniqueIDGenerator() - self.delimiter = delimiter - - def scan(self, multi): - for val in multi.split(self.delimiter): - self.__id__(val) - - def parse(self, multi): - return map(self.__id__, multi.split(self.delimiter)) - - def meta_field(self): - return self.__id__.to_list() - - -class EmbeddingFieldParser(object): - TYPE = 'embedding' - - NO_SEQUENCE = "no_sequence" - SEQUENCE = "sequence" - - class CharBasedEmbeddingDict(object): - def __init__(self, is_seq=True): - self.__id__ = UniqueIDGenerator() - self.is_seq = is_seq - - def scan(self, s): - for ch in s: - self.__id__(ch) - - def parse(self, s): - return map(self.__id__, s) if self.is_seq else self.__id__(s[0]) - - def meta_field(self): - return self.__id__.to_list() - - class WholeContentDict(object): - def __init__(self, need_sort=True): - assert need_sort - self.__id__ = SortedIDGenerator() - self.__has_finished__ = False - - def scan(self, txt): - self.__id__.scan(txt) - - def meta_field(self): - if not self.__has_finished__: - self.__id__.finish_scan() - self.__has_finished__ = True - return self.__id__.to_list() - - def parse(self, txt): - return self.__id__(txt) - - def __init__(self, config): - try: - self.seq_type = config['type']['seq_type'] - except TypeError: - self.seq_type = EmbeddingFieldParser.NO_SEQUENCE - - if config['dict']['type'] == 'char_based': - self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict( - self.seq_type == EmbeddingFieldParser.SEQUENCE) - elif config['dict']['type'] == 'split': - self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ',')) - elif config['dict']['type'] == 'whole_content': - self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][ - 'sort']) - else: - print config - assert False - - self.name = config['name'] - - def scan(self, s): - self.dict.scan(s) - - def meta_field(self): - return { - 'name': self.name, - 'dict': self.dict.meta_field(), - 'type': 'embedding', - 'seq': self.seq_type - } - - def parse(self, s): - return self.dict.parse(s) - - -class OneHotDenseFieldParser(object): - TYPE = 'one_hot_dense' - - def __init__(self, config): - if config['dict']['type'] == 'split': - self.dict = SplitEmbeddingDict(config['dict']['delimiter']) - self.name = config['name'] - - def scan(self, s): - self.dict.scan(s) - - def meta_field(self): - # print self.dict.meta_field() - return { - 'dict': self.dict.meta_field(), - 'name': self.name, - 'type': 'one_hot_dense' - } - - def parse(self, s): - ids = self.dict.parse(s) - retv = [0.0] * len(self.dict.meta_field()) - for idx in ids: - retv[idx] = 1.0 - # print retv - return retv - - -class FieldParserFactory(object): - PARSERS = [IDFieldParser, EmbeddingFieldParser, OneHotDenseFieldParser] - - @staticmethod - def create(config): - if isinstance(config['type'], basestring): - config_type = config['type'] - elif isinstance(config['type'], dict): - config_type = config['type']['name'] - - assert config_type is not None - - for each_parser_cls in FieldParserFactory.PARSERS: - if config_type == each_parser_cls.TYPE: - return each_parser_cls(config) - print config - - -class CompositeFieldParser(object): - def __init__(self, parser, extractor): - self.extractor = extractor - self.parser = parser - - def scan(self, *args, **kwargs): - self.parser.scan(self.extractor.extract(*args, **kwargs)) - - def parse(self, *args, **kwargs): - return self.parser.parse(self.extractor.extract(*args, **kwargs)) - - def meta_field(self): - return self.parser.meta_field() - - -class PositionContentExtractor(object): - def __init__(self, pos): - self.pos = pos - - def extract(self, line): - assert isinstance(line, list) - return line[self.pos] - - -class RegexPositionContentExtractor(PositionContentExtractor): - def __init__(self, pos, pattern, group_id, strip=True): - PositionContentExtractor.__init__(self, pos) - pattern = pattern.strip() - self.pattern = re.compile(pattern) - self.group_id = group_id - self.strip = strip - - def extract(self, line): - line = PositionContentExtractor.extract(self, line) - match = self.pattern.match(line) - # print line, self.pattern.pattern, match - assert match is not None - txt = match.group(self.group_id) - if self.strip: - txt.strip() - return txt - - -class ContentExtractorFactory(object): - def extract(self, line): - pass - - @staticmethod - def create(config): - if 'pos' in config: - if 'regex' not in config: - return PositionContentExtractor(config['pos']) - else: - extra_args = config['regex'] - return RegexPositionContentExtractor( - pos=config['pos'], **extra_args) - - -class MetaFile(object): - def __init__(self, work_dir): - self.work_dir = work_dir - self.obj = dict() - - def parse(self, config): - config = config['meta'] - - ret_obj = dict() - for key in config.keys(): - val = config[key] - assert 'file' in val - reader = IFileReader.create(self.work_dir, val['file']) - assert reader is not None - assert 'fields' in val and isinstance(val['fields'], list) - fields_config = val['fields'] - field_parsers = map(MetaFile.__field_config_mapper__, fields_config) - - for each_parser in field_parsers: - assert each_parser is not None - - for each_block in reader.read(): - for each_parser in field_parsers: - each_parser.scan(each_block) - - metas = map(lambda x: x.meta_field(), field_parsers) - # print metas - key_index = filter( - lambda x: x is not None, - map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None, - enumerate(metas)))[0] - - key_map = [] - for i in range(min(key_index, len(metas))): - key_map.append(i) - for i in range(key_index + 1, len(metas)): - key_map.append(i) - - obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}} - - for each_block in reader.read(): - idx = field_parsers[key_index].parse(each_block) - val = [] - for i, each_parser in enumerate(field_parsers): - if i != key_index: - val.append(each_parser.parse(each_block)) - obj[idx] = val - ret_obj[key] = obj - self.obj = ret_obj - return ret_obj - - @staticmethod - def __field_config_mapper__(conf): - assert isinstance(conf, dict) - extrator = ContentExtractorFactory.create(conf) - field_parser = FieldParserFactory.create(conf) - assert extrator is not None - assert field_parser is not None - return CompositeFieldParser(field_parser, extrator) - - def dump(self, fp): - pickle.dump(self.obj, fp, pickle.HIGHEST_PROTOCOL) - - -def preprocess(binary_filename, dataset_dir, config, **kwargs): - assert isinstance(config, str) - with open(config, 'r') as config_file: - file_loader = None - if config.lower().endswith('.yaml'): - import yaml - file_loader = yaml - elif config.lower().endswith('.json'): - import json - file_loader = json - config = file_loader.load(config_file) - meta = MetaFile(dataset_dir) - meta.parse(config) - with open(binary_filename, 'wb') as outf: - meta.dump(outf) - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version='0.1.0') - kwargs = dict() - for key in args.keys(): - if key != '--help': - param_name = key - assert isinstance(param_name, str) - param_name = param_name.replace('<', '') - param_name = param_name.replace('>', '') - param_name = param_name.replace('--', '') - kwargs[param_name] = args[key] - preprocess(**kwargs) diff --git a/demo/recommendation/data/ml_data.sh b/demo/recommendation/data/ml_data.sh deleted file mode 100755 index 2268d876389e0bdf5ead405e74d278d276626f82..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/ml_data.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -ex -cd "$(dirname "$0")" -# download the dataset -wget http://files.grouplens.org/datasets/movielens/ml-1m.zip -# unzip the dataset -unzip ml-1m.zip -# remove the unused zip file -rm ml-1m.zip diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py deleted file mode 100644 index be6869c22f04be1db0f8e9c35c73c851e4c490b0..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/split.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Separate movielens 1m dataset to train/test file. - -Usage: - ./separate.py [--test_ratio=] [--delimiter=] - ./separate.py -h | --help - -Options: - -h --help Show this screen. - --version Show version. - --test_ratio= Test ratio for separate [default: 0.1]. - --delimiter= File delimiter [default: ,]. -""" -import docopt -import collections -import random - - -def process(test_ratio, input_file, delimiter, **kwargs): - test_ratio = float(test_ratio) - rating_dict = collections.defaultdict(list) - with open(input_file, 'r') as f: - for line in f: - user_id = int(line.split(delimiter)[0]) - rating_dict[user_id].append(line.strip()) - - with open(input_file + ".train", 'w') as train_file: - with open(input_file + ".test", 'w') as test_file: - for k in rating_dict.keys(): - lines = rating_dict[k] - assert isinstance(lines, list) - random.shuffle(lines) - test_len = int(len(lines) * test_ratio) - for line in lines[:test_len]: - print >> test_file, line - - for line in lines[test_len:]: - print >> train_file, line - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version='0.1.0') - kwargs = dict() - for key in args.keys(): - if key != '--help': - param_name = key - assert isinstance(param_name, str) - param_name = param_name.replace('<', '') - param_name = param_name.replace('>', '') - param_name = param_name.replace('--', '') - kwargs[param_name] = args[key] - process(**kwargs) diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py deleted file mode 100755 index 80c62d75612e544c5197f878a83284f8e08d1a99..0000000000000000000000000000000000000000 --- a/demo/recommendation/dataprovider.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * -import common_utils # parse - - -def hook(settings, meta, **kwargs): - """ - Init hook is invoked before process data. It will set obj.slots and store - data meta. - - :param obj: global object. It will passed to process routine. - :type obj: object - :param meta: the meta file object, which passed from trainer_config. Meta - file record movie/user features. - :param kwargs: unused other arguments. - """ - del kwargs # unused kwargs - - # Header define slots that used for paddle. - # first part is movie features. - # second part is user features. - # final part is rating score. - # header is a list of [USE_SEQ_OR_NOT?, SlotType] - headers = list(common_utils.meta_to_header(meta, 'movie')) - headers.extend(list(common_utils.meta_to_header(meta, 'user'))) - headers.append(dense_vector(1)) # Score - - # slot types. - settings.input_types = headers - settings.meta = meta - - -@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, filename): - with open(filename, 'r') as f: - for line in f: - # Get a rating from file. - user_id, movie_id, score = map(int, line.split('::')[:-1]) - - # Scale score to [-5, +5] - score = float(score) * 2 - 5.0 - - # Get movie/user features by movie_id, user_id - movie_meta = settings.meta['movie'][movie_id] - user_meta = settings.meta['user'][user_id] - - outputs = [movie_id - 1] - - # Then add movie features - for each_meta in movie_meta: - outputs.append(each_meta) - - # Then add user id. - outputs.append(user_id - 1) - - # Then add user features. - for each_meta in user_meta: - outputs.append(each_meta) - - # Finally, add score - outputs.append([score]) - # Return data to paddle - yield outputs diff --git a/demo/recommendation/evaluate.sh b/demo/recommendation/evaluate.sh deleted file mode 100755 index 02b2857de028bc9c05d7ddd67012043b671b2764..0000000000000000000000000000000000000000 --- a/demo/recommendation/evaluate.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1 -} - -LOG=`get_best_pass log.txt` -LOG=(${LOG}) -echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'` - -evaluate_pass="output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py deleted file mode 100755 index 191120188ef5dbddf4c42a1356a9fa46e16c5ca1..0000000000000000000000000000000000000000 --- a/demo/recommendation/prediction.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle, DataProviderConverter - -from common_utils import * -from paddle.trainer.config_parser import parse_config - -try: - import cPickle as pickle -except ImportError: - import pickle -import sys - -if __name__ == '__main__': - model_path = sys.argv[1] - swig_paddle.initPaddle('--use_gpu=0') - conf = parse_config("trainer_config.py", "is_predict=1") - network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - assert isinstance(network, swig_paddle.GradientMachine) - network.loadParameters(model_path) - with open('./data/meta.bin', 'rb') as f: - meta = pickle.load(f) - headers = list(meta_to_header(meta, 'movie')) - headers.extend(list(meta_to_header(meta, 'user'))) - cvt = DataProviderConverter(headers) - while True: - movie_id = int(raw_input("Input movie_id: ")) - user_id = int(raw_input("Input user_id: ")) - movie_meta = meta['movie'][movie_id] # Query Data From Meta. - user_meta = meta['user'][user_id] - data = [movie_id - 1] - data.extend(movie_meta) - data.append(user_id - 1) - data.extend(user_meta) - print "Prediction Score is %.2f" % ( - (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5) - / 2) diff --git a/demo/recommendation/preprocess.sh b/demo/recommendation/preprocess.sh deleted file mode 100755 index e121e470193fa1e73c000fe612d6858e28f9261f..0000000000000000000000000000000000000000 --- a/demo/recommendation/preprocess.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -cd "$(dirname "$0")" -delimiter='::' -dir=ml-1m -cd data -echo 'generate meta config file' -python config_generator.py config.json > meta_config.json -echo 'generate meta file' -python meta_generator.py $dir meta.bin --config=meta_config.json -echo 'split train/test file' -python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1 -echo 'shuffle train file' -shuf $dir/ratings.dat.train > ratings.dat.train -cp $dir/ratings.dat.test . -echo "./data/ratings.dat.train" > train.list -echo "./data/ratings.dat.test" > test.list diff --git a/demo/recommendation/requirements.txt b/demo/recommendation/requirements.txt deleted file mode 100644 index 1ea154584a428b6a389309f1f8def502e0aadfce..0000000000000000000000000000000000000000 --- a/demo/recommendation/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -PyYAML -docopt diff --git a/demo/recommendation/run.sh b/demo/recommendation/run.sh deleted file mode 100755 index e341d1cc7a3267bef9db916719b2e4b1981e31bc..0000000000000000000000000000000000000000 --- a/demo/recommendation/run.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -paddle train \ - --config=trainer_config.py \ - --save_dir=./output \ - --use_gpu=false \ - --trainer_count=4\ - --test_all_data_in_one_period=true \ - --log_period=100 \ - --dot_period=1 \ - --num_passes=50 2>&1 | tee 'log.txt' diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py deleted file mode 100755 index aabcd335253faf69c940024ac8098a54da030463..0000000000000000000000000000000000000000 --- a/demo/recommendation/trainer_config.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -try: - import cPickle as pickle -except ImportError: - import pickle - -is_predict = get_config_arg('is_predict', bool, False) - -META_FILE = 'data/meta.bin' - -with open(META_FILE, 'rb') as f: - # load meta file - meta = pickle.load(f) - -settings( - batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer()) - - -def construct_feature(name): - """ - Construct movie/user features. - - This method read from meta data. Then convert feature to neural network due - to feature type. The map relation as follow. - - * id: embedding => fc - * embedding: - is_sequence: embedding => context_projection => fc => pool - not sequence: embedding => fc - * one_hot_dense: fc => fc - - Then gather all features vector, and use a fc layer to combined them as - return. - - :param name: 'movie' or 'user' - :type name: basestring - :return: combined feature output - :rtype: LayerOutput - """ - __meta__ = meta[name]['__meta__']['raw_meta'] - fusion = [] - for each_meta in __meta__: - type_name = each_meta['type'] - slot_name = each_meta.get('name', '%s_id' % name) - if type_name == 'id': - slot_dim = each_meta['max'] - embedding = embedding_layer( - input=data_layer( - slot_name, size=slot_dim), size=256) - fusion.append(fc_layer(input=embedding, size=256)) - elif type_name == 'embedding': - is_seq = each_meta['seq'] == 'sequence' - slot_dim = len(each_meta['dict']) - din = data_layer(slot_name, slot_dim) - embedding = embedding_layer(input=din, size=256) - if is_seq: - fusion.append( - text_conv_pool( - input=embedding, context_len=5, hidden_size=256)) - else: - fusion.append(fc_layer(input=embedding, size=256)) - elif type_name == 'one_hot_dense': - slot_dim = len(each_meta['dict']) - hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256) - fusion.append(fc_layer(input=hidden, size=256)) - - return fc_layer(name="%s_fusion" % name, input=fusion, size=256) - - -movie_feature = construct_feature("movie") -user_feature = construct_feature("user") -similarity = cos_sim(a=movie_feature, b=user_feature) -if not is_predict: - outputs( - regression_cost( - input=similarity, label=data_layer( - 'rating', size=1))) - - define_py_data_sources2( - 'data/train.list', - 'data/test.list', - module='dataprovider', - obj='process', - args={'meta': meta}) -else: - outputs(similarity) diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore deleted file mode 100644 index cd90ca7bbe9be46f54cb656a8067c794a55d8cfc..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -*.pyc -train.log -data/feature -data/conll05st-release/ -data/src.dict -data/test.wsj.props -data/test.wsj.seq_pair -data/test.wsj.words -data/tgt.dict -output diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py deleted file mode 100644 index 123df022f508cad1d4557b845619dd18761f357e..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/extract_dict_feature.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from optparse import OptionParser - - -def extract_dict_features(pair_file, feature_file): - - with open(pair_file) as fin, open(feature_file, 'w') as feature_out: - for line in fin: - sentence, predicate, labels = line.strip().split('\t') - sentence_list = sentence.split() - labels_list = labels.split() - - verb_index = labels_list.index('B-V') - - mark = [0] * len(labels_list) - if verb_index > 0: - mark[verb_index - 1] = 1 - ctx_n1 = sentence_list[verb_index - 1] - else: - ctx_n1 = 'bos' - - if verb_index > 1: - mark[verb_index - 2] = 1 - ctx_n2 = sentence_list[verb_index - 2] - else: - ctx_n2 = 'bos' - - mark[verb_index] = 1 - ctx_0 = sentence_list[verb_index] - - if verb_index < len(labels_list) - 2: - mark[verb_index + 1] = 1 - ctx_p1 = sentence_list[verb_index + 1] - else: - ctx_p1 = 'eos' - - if verb_index < len(labels_list) - 3: - mark[verb_index + 2] = 1 - ctx_p2 = sentence_list[verb_index + 2] - else: - ctx_p2 = 'eos' - - - feature_str = sentence + '\t' \ - + predicate + '\t' \ - + ctx_n2 + '\t' \ - + ctx_n1 + '\t' \ - + ctx_0 + '\t' \ - + ctx_p1 + '\t' \ - + ctx_p2 + '\t' \ - + ' '.join([str(i) for i in mark]) + '\t' \ - + labels - - feature_out.write(feature_str + '\n') - - - -if __name__ == '__main__': - - usage = '-p pair_file -f feature_file' - parser = OptionParser(usage) - parser.add_option('-p', dest='pair_file', help='the pair file') - parser.add_option('-f', dest='feature_file', help='the feature file') - - (options, args) = parser.parse_args() - - extract_dict_features(options.pair_file, options.feature_file) diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py deleted file mode 100644 index 2d0d535c53a74a9fbf9ea2521930333b7f89581b..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/extract_pairs.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from optparse import OptionParser - - -def read_labels(props_file): - ''' - a sentence maybe has more than one verb, each verb has its label sequence - label[], is a 3-dimension list. - the first dim is to store all sentence's label seqs, len is the sentence number - the second dim is to store all label sequences for one sentences - the third dim is to store each label for one word - ''' - labels = [] - with open(props_file) as fin: - label_seqs_for_one_sentences = [] - one_seg_in_file = [] - for line in fin: - line = line.strip() - if line == '': - for i in xrange(len(one_seg_in_file[0])): - a_kind_lable = [x[i] for x in one_seg_in_file] - label_seqs_for_one_sentences.append(a_kind_lable) - labels.append(label_seqs_for_one_sentences) - one_seg_in_file = [] - label_seqs_for_one_sentences = [] - else: - part = line.split() - one_seg_in_file.append(part) - return labels - - -def read_sentences(words_file): - sentences = [] - with open(words_file) as fin: - s = '' - for line in fin: - line = line.strip() - if line == '': - sentences.append(s) - s = '' - else: - s += line + ' ' - return sentences - - -def transform_labels(sentences, labels): - sen_lab_pair = [] - for i in xrange(len(sentences)): - if len(labels[i]) == 1: - continue - else: - verb_list = [] - for x in labels[i][0]: - if x !='-': - verb_list.append(x) - - for j in xrange(1, len(labels[i])): - label_list = labels[i][j] - current_tag = 'O' - is_in_bracket = False - label_seq = [] - verb_word = '' - for ll in label_list: - if ll == '*' and is_in_bracket == False: - label_seq.append('O') - elif ll == '*' and is_in_bracket == True: - label_seq.append('I-' + current_tag) - elif ll == '*)': - label_seq.append('I-' + current_tag) - is_in_bracket = False - elif ll.find('(') != -1 and ll.find(')') != -1: - current_tag = ll[1:ll.find('*')] - label_seq.append('B-' + current_tag) - is_in_bracket = False - elif ll.find('(') != -1 and ll.find(')') == -1: - current_tag = ll[1:ll.find('*')] - label_seq.append('B-' + current_tag) - is_in_bracket = True - else: - print 'error:', ll - sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq)) - return sen_lab_pair - - -def write_file(sen_lab_pair, output_file): - with open(output_file, 'w') as fout: - for x in sen_lab_pair: - sentence = x[0] - label_seq = ' '.join(x[2]) - assert len(sentence.split()) == len(x[2]) - fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n') - - -if __name__ == '__main__': - - usage = '-w words_file -p props_file -o output_file' - parser = OptionParser(usage) - parser.add_option('-w', dest='words_file', help='the words file') - parser.add_option('-p', dest='props_file', help='the props file') - parser.add_option('-o', dest='output_file', help='the output_file') - (options, args) = parser.parse_args() - - sentences = read_sentences(options.words_file) - labels = read_labels(options.props_file) - sen_lab_pair = transform_labels(sentences, labels) - - write_file(sen_lab_pair, options.output_file) diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh deleted file mode 100644 index a0ef26a13b9a03392cb8b6207d6d21b7761e38e8..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/get_data.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb -tar -xzvf conll05st-tests.tar.gz -rm conll05st-tests.tar.gz -cp ./conll05st-release/test.wsj/words/test.wsj.words.gz . -cp ./conll05st-release/test.wsj/props/test.wsj.props.gz . -gunzip test.wsj.words.gz -gunzip test.wsj.props.gz - -python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair -python extract_dict_feature.py -p test.wsj.seq_pair -f feature diff --git a/demo/semantic_role_labeling/data/test.list b/demo/semantic_role_labeling/data/test.list deleted file mode 100644 index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/test.list +++ /dev/null @@ -1 +0,0 @@ -./data/feature diff --git a/demo/semantic_role_labeling/data/train.list b/demo/semantic_role_labeling/data/train.list deleted file mode 100644 index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/train.list +++ /dev/null @@ -1 +0,0 @@ -./data/feature diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py deleted file mode 100644 index d12f10bfcb65e25972035d863997bb9d26ba86eb..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/dataprovider.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -UNK_IDX = 0 - - -def hook(settings, word_dict, label_dict, predicate_dict, **kwargs): - settings.word_dict = word_dict - settings.label_dict = label_dict - settings.predicate_dict = predicate_dict - - #all inputs are integral and sequential type - settings.slots = [ - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(predicate_dict)), - integer_value_sequence(2), - integer_value_sequence(len(label_dict)) - ] - - -def get_batch_size(yeild_data): - return len(yeild_data[0]) - - -@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size, - can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ - line.strip().split('\t') - - words = sentence.split() - sen_len = len(words) - word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words] - - predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len - ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - label_list = label.split() - label_slot = [settings.label_dict.get(w) for w in label_list] - yield word_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py deleted file mode 100644 index 75946bd72e04341c189f6e88fdde98e03f4a8bfb..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/db_lstm.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os -import sys -from paddle.trainer_config_helpers import * - -#file paths -word_dict_file = './data/wordDict.txt' -label_dict_file = './data/targetDict.txt' -predicate_file= './data/verbDict.txt' -train_list_file = './data/train.list' -test_list_file = './data/test.list' - -is_test = get_config_arg('is_test', bool, False) -is_predict = get_config_arg('is_predict', bool, False) - -if not is_predict: - #load dictionaries - word_dict = dict() - label_dict = dict() - predicate_dict = dict() - with open(word_dict_file, 'r') as f_word, \ - open(label_dict_file, 'r') as f_label, \ - open(predicate_file, 'r') as f_pre: - for i, line in enumerate(f_word): - w = line.strip() - word_dict[w] = i - - for i, line in enumerate(f_label): - w = line.strip() - label_dict[w] = i - - for i, line in enumerate(f_pre): - w = line.strip() - predicate_dict[w] = i - - - if is_test: - train_list_file = None - - #define data provider - define_py_data_sources2( - train_list=train_list_file, - test_list=test_list_file, - module='dataprovider', - obj='process', - args={'word_dict': word_dict, - 'label_dict': label_dict, - 'predicate_dict': predicate_dict }) - - word_dict_len = len(word_dict) - label_dict_len = len(label_dict) - pred_len = len(predicate_dict) - -else: - word_dict_len = get_config_arg('dict_len', int) - label_dict_len = get_config_arg('label_len', int) - pred_len = get_config_arg('pred_len', int) - -############################## Hyper-parameters ################################## -mark_dict_len = 2 -word_dim = 32 -mark_dim = 5 -hidden_dim = 512 -depth = 8 - - - -########################### Optimizer ####################################### - - -settings( - batch_size=150, - learning_method=MomentumOptimizer(momentum=0), - learning_rate=2e-2, - regularization=L2Regularization(8e-4), - is_async=False, - model_average=ModelAverage(average_window=0.5, - max_average_window=10000), - -) - - - - -####################################### network ############################## -#8 features and 1 target -word = data_layer(name='word_data', size=word_dict_len) -predicate = data_layer(name='verb_data', size=pred_len) - -ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len) -ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len) -ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len) -ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len) -ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len) -mark = data_layer(name='mark_data', size=mark_dict_len) - - -if not is_predict: - target = data_layer(name='target', size=label_dict_len) - - -default_std=1/math.sqrt(hidden_dim)/3.0 - -emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.) -std_0 = ParameterAttribute(initial_std=0.) -std_default = ParameterAttribute(initial_std=default_std) - -predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std)) -mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0) - -word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] -emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input] -emb_layers.append(predicate_embedding) -emb_layers.append(mark_embedding) - -hidden_0 = mixed_layer( - name='hidden0', - size=hidden_dim, - bias_attr=std_default, - input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ]) - - -mix_hidden_lr = 1e-3 -lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0) -hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr) - -lstm_0 = lstmemory(name='lstm0', - input=hidden_0, - act=ReluActivation(), - gate_act=SigmoidActivation(), - state_act=SigmoidActivation(), - bias_attr=std_0, - param_attr=lstm_para_attr) - -#stack L-LSTM and R-LSTM with direct edges -input_tmp = [hidden_0, lstm_0] - - -for i in range(1, depth): - - mix_hidden = mixed_layer(name='hidden'+str(i), - size=hidden_dim, - bias_attr=std_default, - input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr), - full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr) - ] - ) - - lstm = lstmemory(name='lstm'+str(i), - input=mix_hidden, - act=ReluActivation(), - gate_act=SigmoidActivation(), - state_act=SigmoidActivation(), - reverse=((i % 2)==1), - bias_attr=std_0, - param_attr=lstm_para_attr) - - input_tmp = [mix_hidden, lstm] - -feature_out = mixed_layer(name='output', - size=label_dict_len, - bias_attr=std_default, - input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr), - full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr) - ], - ) - - - -if not is_predict: - crf_l = crf_layer( name = 'crf', - size = label_dict_len, - input = feature_out, - label = target, - param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr) - - ) - - - crf_dec_l = crf_decoding_layer(name = 'crf_dec_l', - size = label_dict_len, - input = feature_out, - label = target, - param_attr=ParameterAttribute(name='crfw') - ) - - - eval = sum_evaluator(input=crf_dec_l) - - outputs(crf_l) - -else: - crf_dec_l = crf_decoding_layer(name = 'crf_dec_l', - size = label_dict_len, - input = feature_out, - param_attr=ParameterAttribute(name='crfw') - ) - - outputs(crf_dec_l) - diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py deleted file mode 100644 index 15145fafceb2422ee201684e85ef5d1043a7bf7d..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/predict.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from optparse import OptionParser -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import integer_value_sequence -from paddle.trainer.config_parser import parse_config -""" -Usage: run following command to show help message. - python predict.py -h -""" -UNK_IDX = 0 - - -class Prediction(): - def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file): - """ - train_conf: trainer configure. - dict_file: word dictionary file name. - model_dir: directory of model. - """ - - self.dict = {} - self.labels = {} - self.predicate_dict={} - self.labels_reverse = {} - self.load_dict_label(dict_file, label_file, predicate_dict_file) - - len_dict = len(self.dict) - len_label = len(self.labels) - len_pred = len(self.predicate_dict) - - conf = parse_config( - train_conf, - 'dict_len=' + str(len_dict) + - ',label_len=' + str(len_label) + - ',pred_len=' + str(len_pred) + - ',is_predict=True') - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - self.network.loadParameters(model_dir) - - slots = [ - integer_value_sequence(len_dict), - integer_value_sequence(len_dict), - integer_value_sequence(len_dict), - integer_value_sequence(len_dict), - integer_value_sequence(len_dict), - integer_value_sequence(len_dict), - integer_value_sequence(len_pred), - integer_value_sequence(2) - ] - self.converter = DataProviderConverter(slots) - - def load_dict_label(self, dict_file, label_file, predicate_dict_file): - """ - Load dictionary from self.dict_file. - """ - for line_count, line in enumerate(open(dict_file, 'r')): - self.dict[line.strip()] = line_count - - for line_count, line in enumerate(open(label_file, 'r')): - self.labels[line.strip()] = line_count - self.labels_reverse[line_count] = line.strip() - - for line_count, line in enumerate(open(predicate_dict_file, 'r')): - self.predicate_dict[line.strip()] = line_count - def get_data(self, data_file): - """ - Get input data of paddle format. - """ - with open(data_file, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip( - ).split('\t') - words = sentence.split() - sen_len = len(words) - - word_slot = [self.dict.get(w, UNK_IDX) for w in words] - predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len - ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - yield word_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot - - def predict(self, data_file, output_file): - """ - data_file: file name of input data. - """ - input = self.converter(self.get_data(data_file)) - output = self.network.forwardTest(input) - lab = output[0]["id"].tolist() - - with open(data_file, 'r') as fin, open(output_file, 'w') as fout: - index = 0 - for line in fin: - sen = line.split('\t')[0] - len_sen = len(sen.split()) - line_labels = lab[index:index + len_sen] - index += len_sen - fout.write(sen + '\t' + ' '.join( - [self.labels_reverse[i] for i in line_labels]) + '\n') - - -def option_parser(): - usage = ("python predict.py -c config -w model_dir " - "-d word dictionary -l label_file -i input_file -p pred_dict_file") - parser = OptionParser(usage="usage: %s [options]" % usage) - parser.add_option( - "-c", - "--tconf", - action="store", - dest="train_conf", - help="network config") - parser.add_option( - "-d", - "--dict", - action="store", - dest="dict_file", - help="dictionary file") - parser.add_option( - "-l", - "--label", - action="store", - dest="label_file", - default=None, - help="label file") - parser.add_option( - "-p", - "--predict_dict_file", - action="store", - dest="predict_dict_file", - default=None, - help="predict_dict_file") - parser.add_option( - "-i", - "--data", - action="store", - dest="data_file", - help="data file to predict") - parser.add_option( - "-w", - "--model", - action="store", - dest="model_path", - default=None, - help="model path") - - parser.add_option( - "-o", - "--output_file", - action="store", - dest="output_file", - default=None, - help="output file") - return parser.parse_args() - - -def main(): - options, args = option_parser() - train_conf = options.train_conf - data_file = options.data_file - dict_file = options.dict_file - model_path = options.model_path - label_file = options.label_file - predict_dict_file = options.predict_dict_file - output_file = options.output_file - - swig_paddle.initPaddle("--use_gpu=0") - predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file) - predict.predict(data_file,output_file) - - -if __name__ == '__main__': - main() diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh deleted file mode 100755 index 873aad670d16803ce321ab60baabe9fe29ea64bf..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/predict.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -best_model_path="output/pass-${LOG[1]}" - -config_file=db_lstm.py -dict_file=./data/wordDict.txt -label_file=./data/targetDict.txt -predicate_dict_file=./data/verbDict.txt -input_file=./data/feature -output_file=predict.res - -python predict.py \ - -c $config_file \ - -w $best_model_path \ - -l $label_file \ - -p $predicate_dict_file \ - -d $dict_file \ - -i $input_file \ - -o $output_file diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh deleted file mode 100755 index 11d9d6a19c1b17ad1b7540ee7a03017f85dd821e..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/test.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -evaluate_pass="output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass -model_list=./model.list -touch $model_list | echo $evaluate_pass > $model_list - -paddle train \ - --config=./db_lstm.py \ - --model_list=$model_list \ - --job=test \ - --use_gpu=false \ - --config_args=is_test=1 \ - --test_all_data_in_one_period=1 \ -2>&1 | tee 'test.log' diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh deleted file mode 100755 index 9354e72f46dc4dfc46138a04c330933d404c6cb8..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/train.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -paddle train \ - --config=./db_lstm.py \ - --use_gpu=0 \ - --log_period=5000 \ - --trainer_count=1 \ - --show_parameter_stats_period=5000 \ - --save_dir=./output \ - --num_passes=10000 \ - --average_test_period=10000000 \ - --init_model_path=./data \ - --load_missing_parameter_strategy=rand \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' diff --git a/demo/sentiment/.gitignore b/demo/sentiment/.gitignore deleted file mode 100644 index bf2a9ab1ce3c937bf06179074cd952dc53591dfd..0000000000000000000000000000000000000000 --- a/demo/sentiment/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -data/aclImdb -data/imdb -data/pre-imdb -data/mosesdecoder-master -logs/ -model_output -dataprovider_copy_1.py -model.list -test.log -train.log -*.pyc diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh deleted file mode 100755 index 7600af6fbb900ee845702f1297779c1f0ed9bf84..0000000000000000000000000000000000000000 --- a/demo/sentiment/data/get_imdb.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -set -x - -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd $DIR - -#download the dataset -echo "Downloading aclImdb..." -#http://ai.stanford.edu/%7Eamaas/data/sentiment/ -wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz - -echo "Downloading mosesdecoder..." -#https://github.com/moses-smt/mosesdecoder -wget https://github.com/moses-smt/mosesdecoder/archive/master.zip - -#extract package -echo "Unzipping..." -tar -zxvf aclImdb_v1.tar.gz -unzip master.zip - -#move train and test set to imdb_data directory -#in order to process when traing -mkdir -p imdb/train -mkdir -p imdb/test - -cp -r aclImdb/train/pos/ imdb/train/pos -cp -r aclImdb/train/neg/ imdb/train/neg - -cp -r aclImdb/test/pos/ imdb/test/pos -cp -r aclImdb/test/neg/ imdb/test/neg - -#remove compressed package -rm aclImdb_v1.tar.gz -rm master.zip - -echo "Done." diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py deleted file mode 100755 index 00f72cecacb454a0dd1184fa2098be4543007de7..0000000000000000000000000000000000000000 --- a/demo/sentiment/dataprovider.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer.PyDataProvider2 import * - - -def hook(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - integer_value_sequence(len(settings.word_dict)), integer_value(2) - ] - settings.logger.info('dict len : %d' % (len(settings.word_dict))) - - -@provider(init_hook=hook) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line_count, line in enumerate(fdata): - label, comment = line.strip().split('\t\t') - label = int(label) - words = comment.split() - word_slot = [ - settings.word_dict[w] for w in words if w in settings.word_dict - ] - yield word_slot, label diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py deleted file mode 100755 index 0095c6f7272a2191ea39e042a836f7d6038032aa..0000000000000000000000000000000000000000 --- a/demo/sentiment/predict.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -from optparse import OptionParser -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import integer_value_sequence -from paddle.trainer.config_parser import parse_config -""" -Usage: run following command to show help message. - python predict.py -h -""" - - -class SentimentPrediction(): - def __init__(self, train_conf, dict_file, model_dir=None, label_file=None): - """ - train_conf: trainer configure. - dict_file: word dictionary file name. - model_dir: directory of model. - """ - self.train_conf = train_conf - self.dict_file = dict_file - self.word_dict = {} - self.dict_dim = self.load_dict() - self.model_dir = model_dir - if model_dir is None: - self.model_dir = os.path.dirname(train_conf) - - self.label = None - if label_file is not None: - self.load_label(label_file) - - conf = parse_config(train_conf, "is_predict=1") - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - self.network.loadParameters(self.model_dir) - input_types = [integer_value_sequence(self.dict_dim)] - self.converter = DataProviderConverter(input_types) - - def load_dict(self): - """ - Load dictionary from self.dict_file. - """ - for line_count, line in enumerate(open(self.dict_file, 'r')): - self.word_dict[line.strip().split('\t')[0]] = line_count - return len(self.word_dict) - - def load_label(self, label_file): - """ - Load label. - """ - self.label = {} - for v in open(label_file, 'r'): - self.label[int(v.split('\t')[1])] = v.split('\t')[0] - - def get_index(self, data): - """ - transform word into integer index according to the dictionary. - """ - words = data.strip().split() - word_slot = [ - self.word_dict[w] for w in words if w in self.word_dict - ] - return word_slot - - def batch_predict(self, data_batch): - input = self.converter(data_batch) - output = self.network.forwardTest(input) - prob = output[0]["value"] - labs = np.argsort(-prob) - for idx, lab in enumerate(labs): - if self.label is None: - print("predicting label is %d" % (lab[0])) - else: - print("predicting label is %s" % - (self.label[lab[0]])) - -def option_parser(): - usage = "python predict.py -n config -w model_dir -d dictionary -i input_file " - parser = OptionParser(usage="usage: %s [options]" % usage) - parser.add_option( - "-n", - "--tconf", - action="store", - dest="train_conf", - help="network config") - parser.add_option( - "-d", - "--dict", - action="store", - dest="dict_file", - help="dictionary file") - parser.add_option( - "-b", - "--label", - action="store", - dest="label", - default=None, - help="dictionary file") - parser.add_option( - "-c", - "--batch_size", - type="int", - action="store", - dest="batch_size", - default=1, - help="the batch size for prediction") - parser.add_option( - "-w", - "--model", - action="store", - dest="model_path", - default=None, - help="model path") - return parser.parse_args() - - -def main(): - options, args = option_parser() - train_conf = options.train_conf - batch_size = options.batch_size - dict_file = options.dict_file - model_path = options.model_path - label = options.label - swig_paddle.initPaddle("--use_gpu=0") - predict = SentimentPrediction(train_conf, dict_file, model_path, label) - - batch = [] - for line in sys.stdin: - batch.append([predict.get_index(line)]) - if len(batch) == batch_size: - predict.batch_predict(batch) - batch=[] - if len(batch) > 0: - predict.batch_predict(batch) - -if __name__ == '__main__': - main() diff --git a/demo/sentiment/predict.sh b/demo/sentiment/predict.sh deleted file mode 100755 index c72a8e8641516543ef267fcb4b448630246d1e8d..0000000000000000000000000000000000000000 --- a/demo/sentiment/predict.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -#Note the default model is pass-00002, you shold make sure the model path -#exists or change the mode path. -model=model_output/pass-00002/ -config=trainer_config.py -label=data/pre-imdb/labels.list -cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \ - --tconf=$config\ - --model=$model \ - --label=$label \ - --dict=./data/pre-imdb/dict.txt \ - --batch_size=1 diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py deleted file mode 100755 index 29b3682b747c66574590de5ea70574981cc536bb..0000000000000000000000000000000000000000 --- a/demo/sentiment/preprocess.py +++ /dev/null @@ -1,359 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import random -import operator -import numpy as np -from subprocess import Popen, PIPE -from os.path import join as join_path -from optparse import OptionParser - -from paddle.utils.preprocess_util import * -""" -Usage: run following command to show help message. - python preprocess.py -h -""" - - -def save_dict(dict, filename, is_reverse=True): - """ - Save dictionary into file. - dict: input dictionary. - filename: output file name, string. - is_reverse: True, descending order by value. - False, ascending order by value. - """ - f = open(filename, 'w') - for k, v in sorted(dict.items(), key=operator.itemgetter(1),\ - reverse=is_reverse): - f.write('%s\t%s\n' % (k, v)) - f.close() - - -def tokenize(sentences): - """ - Use tokenizer.perl to tokenize input sentences. - tokenizer.perl is tool of Moses. - sentences : a list of input sentences. - return: a list of processed text. - """ - dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl' - tokenizer_cmd = [dir, '-l', 'en', '-q', '-'] - assert isinstance(sentences, list) - text = "\n".join(sentences) - tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE) - tok_text, _ = tokenizer.communicate(text) - toks = tok_text.split('\n')[:-1] - return toks - - -def read_lines(path): - """ - path: String, file path. - return a list of sequence. - """ - seqs = [] - with open(path, 'r') as f: - for line in f.readlines(): - line = line.strip() - if len(line): - seqs.append(line) - return seqs - - -class SentimentDataSetCreate(): - """ - A class to process data for sentiment analysis task. - """ - - def __init__(self, - data_path, - output_path, - use_okenizer=True, - multi_lines=False): - """ - data_path: string, traing and testing dataset path - output_path: string, output path, store processed dataset - multi_lines: whether a file has multi lines. - In order to shuffle fully, it needs to read all files into - memory, then shuffle them if one file has multi lines. - """ - self.output_path = output_path - self.data_path = data_path - - self.train_dir = 'train' - self.test_dir = 'test' - - self.train_list = "train.list" - self.test_list = "test.list" - - self.label_list = "labels.list" - self.classes_num = 0 - - self.batch_size = 50000 - self.batch_dir = 'batches' - - self.dict_file = "dict.txt" - self.dict_with_test = False - self.dict_size = 0 - self.word_count = {} - - self.tokenizer = use_okenizer - self.overwrite = False - - self.multi_lines = multi_lines - - self.train_dir = join_path(data_path, self.train_dir) - self.test_dir = join_path(data_path, self.test_dir) - self.train_list = join_path(output_path, self.train_list) - self.test_list = join_path(output_path, self.test_list) - self.label_list = join_path(output_path, self.label_list) - self.dict_file = join_path(output_path, self.dict_file) - - def data_list(self, path): - """ - create dataset from path - path: data path - return: data list - """ - label_set = get_label_set_from_dir(path) - data = [] - for lab_name in label_set.keys(): - file_paths = list_files(join_path(path, lab_name)) - for p in file_paths: - data.append({"label" : label_set[lab_name],\ - "seq_path": p}) - return data, label_set - - def create_dict(self, data): - """ - create dict for input data. - data: list, [sequence, sequnce, ...] - """ - for seq in data: - for w in seq.strip().lower().split(): - if w not in self.word_count: - self.word_count[w] = 1 - else: - self.word_count[w] += 1 - - def create_dataset(self): - """ - create file batches and dictionary of train data set. - If the self.overwrite is false and train.list already exists in - self.output_path, this function will not create and save file - batches from the data set path. - return: dictionary size, class number. - """ - out_path = self.output_path - if out_path and not os.path.exists(out_path): - os.makedirs(out_path) - - # If self.overwrite is false or self.train_list has existed, - # it will not process dataset. - if not (self.overwrite or not os.path.exists(self.train_list)): - print "%s already exists." % self.train_list - return - - # Preprocess train data. - train_data, train_lab_set = self.data_list(self.train_dir) - print "processing train set..." - file_lists = self.save_data(train_data, "train", self.batch_size, True, - True) - save_list(file_lists, self.train_list) - - # If have test data path, preprocess test data. - if os.path.exists(self.test_dir): - test_data, test_lab_set = self.data_list(self.test_dir) - assert (train_lab_set == test_lab_set) - print "processing test set..." - file_lists = self.save_data(test_data, "test", self.batch_size, - False, self.dict_with_test) - save_list(file_lists, self.test_list) - - # save labels set. - save_dict(train_lab_set, self.label_list, False) - self.classes_num = len(train_lab_set.keys()) - - # save dictionary. - save_dict(self.word_count, self.dict_file, True) - self.dict_size = len(self.word_count) - - def save_data(self, - data, - prefix="", - batch_size=50000, - is_shuffle=False, - build_dict=False): - """ - Create batches for a Dataset object. - data: the Dataset object to process. - prefix: the prefix of each batch. - batch_size: number of data in each batch. - build_dict: whether to build dictionary for data - - return: list of batch names - """ - if is_shuffle and self.multi_lines: - return self.save_data_multi_lines(data, prefix, batch_size, - build_dict) - - if is_shuffle: - random.shuffle(data) - num_batches = int(math.ceil(len(data) / float(batch_size))) - batch_names = [] - for i in range(num_batches): - batch_name = join_path(self.output_path, - "%s_part_%03d" % (prefix, i)) - begin = i * batch_size - end = min((i + 1) * batch_size, len(data)) - # read a batch of data - label_list, data_list = self.get_data_list(begin, end, data) - if build_dict: - self.create_dict(data_list) - self.save_file(label_list, data_list, batch_name) - batch_names.append(batch_name) - - return batch_names - - def get_data_list(self, begin, end, data): - """ - begin: int, begining index of data. - end: int, ending index of data. - data: a list of {"seq_path": seqquence path, "label": label index} - - return a list of label and a list of sequence. - """ - label_list = [] - data_list = [] - for j in range(begin, end): - seqs = read_lines(data[j]["seq_path"]) - lab = int(data[j]["label"]) - #File may have multiple lines. - for seq in seqs: - data_list.append(seq) - label_list.append(lab) - if self.tokenizer: - data_list = tokenize(data_list) - return label_list, data_list - - def save_data_multi_lines(self, - data, - prefix="", - batch_size=50000, - build_dict=False): - """ - In order to shuffle fully, there is no need to load all data if - each file only contains one sample, it only needs to shuffle list - of file name. But one file contains multi lines, each line is one - sample. It needs to read all data into memory to shuffle fully. - This interface is mainly for data containning multi lines in each - file, which consumes more memory if there is a great mount of data. - - data: the Dataset object to process. - prefix: the prefix of each batch. - batch_size: number of data in each batch. - build_dict: whether to build dictionary for data - - return: list of batch names - """ - assert self.multi_lines - label_list = [] - data_list = [] - - # read all data - label_list, data_list = self.get_data_list(0, len(data), data) - if build_dict: - self.create_dict(data_list) - - length = len(label_list) - perm_list = np.array([i for i in xrange(length)]) - random.shuffle(perm_list) - - num_batches = int(math.ceil(length / float(batch_size))) - batch_names = [] - for i in range(num_batches): - batch_name = join_path(self.output_path, - "%s_part_%03d" % (prefix, i)) - begin = i * batch_size - end = min((i + 1) * batch_size, length) - sub_label = [label_list[perm_list[i]] for i in range(begin, end)] - sub_data = [data_list[perm_list[i]] for i in range(begin, end)] - self.save_file(sub_label, sub_data, batch_name) - batch_names.append(batch_name) - - return batch_names - - def save_file(self, label_list, data_list, filename): - """ - Save data into file. - label_list: a list of int value. - data_list: a list of sequnece. - filename: output file name. - """ - f = open(filename, 'w') - print "saving file: %s" % filename - for lab, seq in zip(label_list, data_list): - f.write('%s\t\t%s\n' % (lab, seq)) - f.close() - - -def option_parser(): - parser = OptionParser(usage="usage: python preprcoess.py "\ - "-i data_dir [options]") - parser.add_option( - "-i", - "--data", - action="store", - dest="input", - help="Input data directory.") - parser.add_option( - "-o", - "--output", - action="store", - dest="output", - default=None, - help="Output directory.") - parser.add_option( - "-t", - "--tokenizer", - action="store", - dest="use_tokenizer", - default=True, - help="Whether to use tokenizer.") - parser.add_option("-m", "--multi_lines", action="store", - dest="multi_lines", default=False, - help="If input text files have multi lines and they "\ - "need to be shuffled, you should set -m True,") - return parser.parse_args() - - -def main(): - options, args = option_parser() - data_dir = options.input - output_dir = options.output - use_tokenizer = options.use_tokenizer - multi_lines = options.multi_lines - if output_dir is None: - outname = os.path.basename(options.input) - output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname) - data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer, - multi_lines) - data_creator.create_dataset() - - -if __name__ == '__main__': - main() diff --git a/demo/sentiment/preprocess.sh b/demo/sentiment/preprocess.sh deleted file mode 100755 index 19ec34d4f016365d18db01ddec559d26202b19c6..0000000000000000000000000000000000000000 --- a/demo/sentiment/preprocess.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -echo "Start to preprcess..." - -data_dir="./data/imdb" -python preprocess.py -i $data_dir - -echo "Done." diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py deleted file mode 100644 index a01577ca5ae025b7bec67c6d54c7dbd931dbee74..0000000000000000000000000000000000000000 --- a/demo/sentiment/sentiment_net.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from os.path import join as join_path - -from paddle.trainer_config_helpers import * - - -def sentiment_data(data_dir=None, - is_test=False, - is_predict=False, - train_list="train.list", - test_list="test.list", - dict_file="dict.txt"): - """ - Predefined data provider for sentiment analysis. - is_test: whether this config is used for test. - is_predict: whether this config is used for prediction. - train_list: text file name, containing a list of training set. - test_list: text file name, containing a list of testing set. - dict_file: text file name, containing dictionary. - """ - dict_dim = len(open(join_path(data_dir, "dict.txt")).readlines()) - class_dim = len(open(join_path(data_dir, 'labels.list')).readlines()) - if is_predict: - return dict_dim, class_dim - - if data_dir is not None: - train_list = join_path(data_dir, train_list) - test_list = join_path(data_dir, test_list) - dict_file = join_path(data_dir, dict_file) - - train_list = train_list if not is_test else None - word_dict = dict() - with open(dict_file, 'r') as f: - for i, line in enumerate(open(dict_file, 'r')): - word_dict[line.split('\t')[0]] = i - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={'dictionary': word_dict}) - - return dict_dim, class_dim - - -def bidirectional_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - lstm_dim=128, - is_predict=False): - data = data_layer("word", input_dim) - emb = embedding_layer(input=data, size=emb_dim) - bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim) - dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) - output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation()) - - if not is_predict: - lbl = data_layer("label", 1) - outputs(classification_cost(input=output, label=lbl)) - else: - outputs(output) - - -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3, - is_predict=False): - """ - A Wrapper for sentiment classification task. - This network uses bi-directional recurrent network, - consisting three LSTM layers. This configure is referred to - the paper as following url, but use fewer layrs. - http://www.aclweb.org/anthology/P15-1109 - - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - is_predict: is predicting or not. - Some layers is not needed in network when predicting. - """ - hid_lr = 1e-3 - assert stacked_num % 2 == 1 - - layer_attr = ExtraLayerAttribute(drop_rate=0.5) - fc_para_attr = ParameterAttribute(learning_rate=hid_lr) - lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = ParameterAttribute(initial_std=0., l2_rate=0.) - relu = ReluActivation() - linear = LinearActivation() - - data = data_layer("word", input_dim) - emb = embedding_layer(input=data, size=emb_dim) - - fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr) - lstm1 = lstmemory( - input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) - - inputs = [fc1, lstm1] - for i in range(2, stacked_num + 1): - fc = fc_layer( - input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = lstmemory( - input=fc, - reverse=(i % 2) == 0, - act=relu, - bias_attr=bias_attr, - layer_attr=layer_attr) - inputs = [fc, lstm] - - fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling()) - lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling()) - output = fc_layer( - input=[fc_last, lstm_last], - size=class_dim, - act=SoftmaxActivation(), - bias_attr=bias_attr, - param_attr=para_attr) - - if is_predict: - outputs(output) - else: - outputs(classification_cost(input=output, label=data_layer('label', 1))) diff --git a/demo/sentiment/test.sh b/demo/sentiment/test.sh deleted file mode 100755 index 8af827c3388c8df88a872bd87d121a4f9631c3ff..0000000000000000000000000000000000000000 --- a/demo/sentiment/test.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -evaluate_pass="model_output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass - -model_list=./model.list -touch $model_list | echo $evaluate_pass > $model_list -net_conf=trainer_config.py -paddle train --config=$net_conf \ - --model_list=$model_list \ - --job=test \ - --use_gpu=false \ - --trainer_count=4 \ - --config_args=is_test=1 \ - 2>&1 | tee 'test.log' diff --git a/demo/sentiment/train.sh b/demo/sentiment/train.sh deleted file mode 100755 index 5ce8bf4b997d962b9b61593cec0954d76c4874bc..0000000000000000000000000000000000000000 --- a/demo/sentiment/train.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -config=trainer_config.py -output=./model_output -paddle train --config=$config \ - --save_dir=$output \ - --job=train \ - --use_gpu=false \ - --trainer_count=4 \ - --num_passes=10 \ - --log_period=10 \ - --dot_period=20 \ - --show_parameter_stats_period=100 \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py deleted file mode 100644 index 2defecd178262900c03c1eda60b351dc44629d1f..0000000000000000000000000000000000000000 --- a/demo/sentiment/trainer_config.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sentiment_net import * -from paddle.trainer_config_helpers import * - -# whether this config is used for test -is_test = get_config_arg('is_test', bool, False) -# whether this config is used for prediction -is_predict = get_config_arg('is_predict', bool, False) - -data_dir = "./data/pre-imdb" -dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict) - -################## Algorithm Config ##################### - -settings( - batch_size=128, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - average_window=0.5, - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) - -#################### Network Config ###################### -stacked_lstm_net( - dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict) -# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict) diff --git a/demo/seqToseq/.gitignore b/demo/seqToseq/.gitignore deleted file mode 100644 index 21cec2c2c1f3422cbb0ad133281dc1ecdd076a96..0000000000000000000000000000000000000000 --- a/demo/seqToseq/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -data/wmt14 -data/pre-wmt14 -data/wmt14_model -data/paraphrase -data/pre-paraphrase -data/paraphrase_model -translation/gen.log -translation/gen_result -translation/train.log -paraphrase/train.log -dataprovider_copy_1.py -translation/thirdparty.tgz -translation/thirdparty/train.conf -translation/thirdparty/dataprovider.py -translation/thirdparty/seqToseq_net.py -translation/thirdparty/*.dict -*.pyc diff --git a/demo/seqToseq/data/paraphrase_data.sh b/demo/seqToseq/data/paraphrase_data.sh deleted file mode 100755 index e6497c91286d44b5ef3b66c5f824e36a09728720..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/paraphrase_data.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -# download the in-house paraphrase dataset -wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz - -# untar the dataset -tar -zxvf paraphrase.tar.gz -rm paraphrase.tar.gz diff --git a/demo/seqToseq/data/paraphrase_model.sh b/demo/seqToseq/data/paraphrase_model.sh deleted file mode 100755 index d0e7f214a38c4dad0fdf7c10ba3b76eb0ab40f06..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/paraphrase_model.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -dim=32 -pretrained_dir='../../model_zoo/embedding/' -preModel=$pretrained_dir'model_'$dim'.emb' -preDict=$pretrained_dir'baidu.dict' - -usrDict_dir='pre-paraphrase/' -srcDict=$usrDict_dir'src.dict' -trgDict=$usrDict_dir'trg.dict' - -usrModel_dir='paraphrase_model/' -mkdir $usrModel_dir -srcModel=$usrModel_dir'_source_language_embedding' -trgModel=$usrModel_dir'_target_language_embedding' - -echo 'extract desired parameters based on user dictionary' -script=$pretrained_dir'extract_para.py' -python $script --preModel $preModel --preDict $preDict \ - --usrModel $srcModel --usrDict $srcDict -d $dim -python $script --preModel $preModel --preDict $preDict \ - --usrModel $trgModel --usrDict $trgDict -d $dim diff --git a/demo/seqToseq/data/wmt14_data.sh b/demo/seqToseq/data/wmt14_data.sh deleted file mode 100755 index 43f67168d2a876ba5401e0f8490a88adac9c5551..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/wmt14_data.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x -mkdir wmt14 -cd wmt14 - -# download the dataset -wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz -wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz - -# untar the dataset -tar -zxvf bitexts.tgz -tar -zxvf dev+test.tgz -gunzip bitexts.selected/* -mv bitexts.selected train -rm bitexts.tgz -rm dev+test.tgz - -# separate the dev and test dataset -mkdir test gen -mv dev/ntst1213.* test -mv dev/ntst14.* gen -rm -rf dev - -set +x -# rename the suffix, .fr->.src, .en->.trg -for dir in train test gen -do - filelist=`ls $dir` - cd $dir - for file in $filelist - do - if [ ${file##*.} = "fr" ]; then - mv $file ${file/%fr/src} - elif [ ${file##*.} = 'en' ]; then - mv $file ${file/%en/trg} - fi - done - cd .. -done diff --git a/demo/seqToseq/data/wmt14_model.sh b/demo/seqToseq/data/wmt14_model.sh deleted file mode 100755 index c4b55b90a3eb98f94e0eb3be028c6de1ef57326b..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/wmt14_model.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -# download the pretrained model -wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz - -# untar the model -tar -zxvf wmt14_model.tar.gz -rm wmt14_model.tar.gz diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py deleted file mode 100755 index c2b49804be582d7d0bc3ef6332741be03936eb24..0000000000000000000000000000000000000000 --- a/demo/seqToseq/dataprovider.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -UNK_IDX = 2 -START = "" -END = "" - - -def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list, - **kwargs): - # job_mode = 1: training mode - # job_mode = 0: generating mode - settings.job_mode = not is_generating - - def fun(dict_path): - out_dict = dict() - with open(dict_path, "r") as fin: - out_dict = { - line.strip(): line_count - for line_count, line in enumerate(fin) - } - return out_dict - - settings.src_dict = fun(src_dict_path) - settings.trg_dict = fun(trg_dict_path) - - settings.logger.info("src dict len : %d" % (len(settings.src_dict))) - - if settings.job_mode: - settings.slots = { - 'source_language_word': - integer_value_sequence(len(settings.src_dict)), - 'target_language_word': - integer_value_sequence(len(settings.trg_dict)), - 'target_language_next_word': - integer_value_sequence(len(settings.trg_dict)) - } - settings.logger.info("trg dict len : %d" % (len(settings.trg_dict))) - else: - settings.slots = { - 'source_language_word': - integer_value_sequence(len(settings.src_dict)), - 'sent_id': - integer_value_sequence(len(open(file_list[0], "r").readlines())) - } - - -def _get_ids(s, dictionary): - words = s.strip().split() - return [dictionary[START]] + \ - [dictionary.get(w, UNK_IDX) for w in words] + \ - [dictionary[END]] - - -@provider(init_hook=hook, pool_size=50000) -def process(settings, file_name): - with open(file_name, 'r') as f: - for line_count, line in enumerate(f): - line_split = line.strip().split('\t') - if settings.job_mode and len(line_split) != 2: - continue - src_seq = line_split[0] # one source sequence - src_ids = _get_ids(src_seq, settings.src_dict) - - if settings.job_mode: - trg_seq = line_split[1] # one target sequence - trg_words = trg_seq.split() - trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words] - - # remove sequence whose length > 80 in training mode - if len(src_ids) > 80 or len(trg_ids) > 80: - continue - trg_ids_next = trg_ids + [settings.trg_dict[END]] - trg_ids = [settings.trg_dict[START]] + trg_ids - yield { - 'source_language_word': src_ids, - 'target_language_word': trg_ids, - 'target_language_next_word': trg_ids_next - } - else: - yield {'source_language_word': src_ids, 'sent_id': [line_count]} diff --git a/demo/seqToseq/paraphrase/train.conf b/demo/seqToseq/paraphrase/train.conf deleted file mode 100644 index be79c5e771c0e864fd1776cedb3ef37c997b6df6..0000000000000000000000000000000000000000 --- a/demo/seqToseq/paraphrase/train.conf +++ /dev/null @@ -1,33 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -is_generating = False -### Data Definiation -train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase", - is_generating = is_generating) - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 50, - learning_rate = 5e-4) - -### Network Architecture -gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32) diff --git a/demo/seqToseq/paraphrase/train.sh b/demo/seqToseq/paraphrase/train.sh deleted file mode 100755 index 33a42f6eff2b0414c466d5f78c89989a6a517eb9..0000000000000000000000000000000000000000 --- a/demo/seqToseq/paraphrase/train.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ - --config='paraphrase/train.conf' \ - --save_dir='paraphrase/model' \ - --init_model_path='data/paraphrase_model' \ - --load_missing_parameter_strategy=rand \ - --use_gpu=false \ - --num_passes=16 \ - --show_parameter_stats_period=100 \ - --trainer_count=4 \ - --log_period=10 \ - --dot_period=5 \ - 2>&1 | tee 'paraphrase/train.log' diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py deleted file mode 100755 index 03f371331a0755e5939e457f4bdfb1770b8dad88..0000000000000000000000000000000000000000 --- a/demo/seqToseq/preprocess.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Example: - python preprocess.py -i INPUT [-d DICTSIZE] [-m] - -Options: - -h, --help show this help message and exit - -i INPUT input original dataset path - -d DICTSIZE specified word count of dictionary - -m --mergeDict merge source and target dictionary -""" -import os -import sys - -import string -from optparse import OptionParser -from paddle.utils.preprocess_util import save_list, DatasetCreater - - -class SeqToSeqDatasetCreater(DatasetCreater): - """ - A class to process data for sequence to sequence application. - """ - - def __init__(self, data_path, output_path): - """ - data_path: the path to store the train data, test data and gen data - output_path: the path to store the processed dataset - """ - DatasetCreater.__init__(self, data_path) - self.gen_dir_name = 'gen' - self.gen_list_name = 'gen.list' - self.output_path = output_path - - def concat_file(self, file_path, file1, file2, output_path, output): - """ - Concat file1 and file2 to be one output file - The i-th line of output = i-th line of file1 + '\t' + i-th line of file2 - file_path: the path to store file1 and file2 - output_path: the path to store output file - """ - file1 = os.path.join(file_path, file1) - file2 = os.path.join(file_path, file2) - output = os.path.join(output_path, output) - if not os.path.exists(output): - os.system('paste ' + file1 + ' ' + file2 + ' > ' + output) - - def cat_file(self, dir_path, suffix, output_path, output): - """ - Cat all the files in dir_path with suffix to be one output file - dir_path: the base directory to store input file - suffix: suffix of file name - output_path: the path to store output file - """ - cmd = 'cat ' - file_list = os.listdir(dir_path) - file_list.sort() - for file in file_list: - if file.endswith(suffix): - cmd += os.path.join(dir_path, file) + ' ' - output = os.path.join(output_path, output) - if not os.path.exists(output): - os.system(cmd + '> ' + output) - - def build_dict(self, file_path, dict_path, dict_size=-1): - """ - Create the dictionary for the file, Note that - 1. Valid characters include all printable characters - 2. There is distinction between uppercase and lowercase letters - 3. There is 3 special token: - : the start of a sequence - : the end of a sequence - : a word not included in dictionary - file_path: the path to store file - dict_path: the path to store dictionary - dict_size: word count of dictionary - if is -1, dictionary will contains all the words in file - """ - if not os.path.exists(dict_path): - dictory = dict() - with open(file_path, "r") as fdata: - for line in fdata: - line = line.split('\t') - for line_split in line: - words = line_split.strip().split() - for word in words: - if word not in dictory: - dictory[word] = 1 - else: - dictory[word] += 1 - output = open(dict_path, "w+") - output.write('\n\n\n') - count = 3 - for key, value in sorted( - dictory.items(), key=lambda d: d[1], reverse=True): - output.write(key + "\n") - count += 1 - if count == dict_size: - break - self.dict_size = count - - def create_dataset(self, - dict_size=-1, - mergeDict=False, - suffixes=['.src', '.trg']): - """ - Create seqToseq dataset - """ - # dataset_list and dir_list has one-to-one relationship - train_dataset = os.path.join(self.data_path, self.train_dir_name) - test_dataset = os.path.join(self.data_path, self.test_dir_name) - gen_dataset = os.path.join(self.data_path, self.gen_dir_name) - dataset_list = [train_dataset, test_dataset, gen_dataset] - - train_dir = os.path.join(self.output_path, self.train_dir_name) - test_dir = os.path.join(self.output_path, self.test_dir_name) - gen_dir = os.path.join(self.output_path, self.gen_dir_name) - dir_list = [train_dir, test_dir, gen_dir] - - # create directory - for dir in dir_list: - if not os.path.exists(dir): - os.mkdir(dir) - - # checkout dataset should be parallel corpora - suffix_len = len(suffixes[0]) - for dataset in dataset_list: - file_list = os.listdir(dataset) - if len(file_list) % 2 == 1: - raise RuntimeError("dataset should be parallel corpora") - file_list.sort() - for i in range(0, len(file_list), 2): - if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]: - raise RuntimeError( - "source and target file name should be equal") - - # cat all the files with the same suffix in dataset - for suffix in suffixes: - for dataset in dataset_list: - outname = os.path.basename(dataset) + suffix - self.cat_file(dataset, suffix, dataset, outname) - - # concat parallel corpora and create file.list - print 'concat parallel corpora for dataset' - id = 0 - list = ['train.list', 'test.list', 'gen.list'] - for dataset in dataset_list: - outname = os.path.basename(dataset) - self.concat_file(dataset, outname + suffixes[0], - outname + suffixes[1], dir_list[id], outname) - save_list([os.path.join(dir_list[id], outname)], - os.path.join(self.output_path, list[id])) - id += 1 - - # build dictionary for train data - dict = ['src.dict', 'trg.dict'] - dict_path = [ - os.path.join(self.output_path, dict[0]), - os.path.join(self.output_path, dict[1]) - ] - if mergeDict: - outname = os.path.join(train_dir, train_dataset.split('/')[-1]) - print 'build src dictionary for train data' - self.build_dict(outname, dict_path[0], dict_size) - print 'build trg dictionary for train data' - os.system('cp ' + dict_path[0] + ' ' + dict_path[1]) - else: - outname = os.path.join(train_dataset, self.train_dir_name) - for id in range(0, 2): - suffix = suffixes[id] - print 'build ' + suffix[1:] + ' dictionary for train data' - self.build_dict(outname + suffix, dict_path[id], dict_size) - print 'dictionary size is', self.dict_size - - -def main(): - usage = "usage: \n" \ - "python %prog -i INPUT [-d DICTSIZE] [-m]" - parser = OptionParser(usage) - parser.add_option( - "-i", action="store", dest="input", help="input original dataset path") - parser.add_option( - "-d", - action="store", - dest="dictsize", - help="specified word count of dictionary") - parser.add_option( - "-m", - "--mergeDict", - action="store_true", - dest="mergeDict", - help="merge source and target dictionary") - (options, args) = parser.parse_args() - if options.input[-1] == os.path.sep: - options.input = options.input[:-1] - outname = os.path.basename(options.input) - output_path = os.path.join(os.path.dirname(options.input), 'pre-' + outname) - dictsize = int(options.dictsize) if options.dictsize else -1 - if not os.path.exists(output_path): - os.mkdir(output_path) - data_creator = SeqToSeqDatasetCreater(options.input, output_path) - data_creator.create_dataset(dictsize, options.mergeDict) - - -if __name__ == "__main__": - main() diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py deleted file mode 100644 index e523a34d5a95120d1f0a583be8bbdbff5678d1ab..0000000000000000000000000000000000000000 --- a/demo/seqToseq/seqToseq_net.py +++ /dev/null @@ -1,191 +0,0 @@ -# edit-mode: -*- python -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from paddle.trainer_config_helpers import * - - -def seq_to_seq_data(data_dir, - is_generating, - dict_size=30000, - train_list='train.list', - test_list='test.list', - gen_list='gen.list', - gen_result='gen_result'): - """ - Predefined seqToseq train data provider for application - is_generating: whether this config is used for generating - dict_size: word count of dictionary - train_list: a text file containing a list of training data - test_list: a text file containing a list of testing data - gen_list: a text file containing a list of generating data - gen_result: a text file containing generating result - """ - src_lang_dict = os.path.join(data_dir, 'src.dict') - trg_lang_dict = os.path.join(data_dir, 'trg.dict') - - if is_generating: - train_list = None - test_list = os.path.join(data_dir, gen_list) - else: - train_list = os.path.join(data_dir, train_list) - test_list = os.path.join(data_dir, test_list) - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={ - "src_dict_path": src_lang_dict, - "trg_dict_path": trg_lang_dict, - "is_generating": is_generating - }) - - return { - "src_dict_path": src_lang_dict, - "trg_dict_path": trg_lang_dict, - "gen_result": gen_result - } - - -def gru_encoder_decoder(data_conf, - is_generating, - word_vector_dim=512, - encoder_size=512, - decoder_size=512, - beam_size=3, - max_length=250): - """ - A wrapper for an attention version of GRU Encoder-Decoder network - is_generating: whether this config is used for generating - encoder_size: dimension of hidden unit in GRU Encoder network - decoder_size: dimension of hidden unit in GRU Decoder network - word_vector_dim: dimension of word vector - beam_size: expand width in beam search - max_length: a stop condition of sequence generation - """ - for k, v in data_conf.iteritems(): - globals()[k] = v - source_dict_dim = len(open(src_dict_path, "r").readlines()) - target_dict_dim = len(open(trg_dict_path, "r").readlines()) - gen_trans_file = gen_result - - src_word_id = data_layer(name='source_language_word', size=source_dict_dim) - src_embedding = embedding_layer( - input=src_word_id, - size=word_vector_dim, - param_attr=ParamAttr(name='_source_language_embedding')) - src_forward = simple_gru(input=src_embedding, size=encoder_size) - src_backward = simple_gru( - input=src_embedding, size=encoder_size, reverse=True) - encoded_vector = concat_layer(input=[src_forward, src_backward]) - - with mixed_layer(size=decoder_size) as encoded_proj: - encoded_proj += full_matrix_projection(input=encoded_vector) - - backward_first = first_seq(input=src_backward) - with mixed_layer( - size=decoder_size, - act=TanhActivation(), ) as decoder_boot: - decoder_boot += full_matrix_projection(input=backward_first) - - def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - decoder_mem = memory( - name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) - - context = simple_attention( - encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem, ) - - with mixed_layer(size=decoder_size * 3) as decoder_inputs: - decoder_inputs += full_matrix_projection(input=context) - decoder_inputs += full_matrix_projection(input=current_word) - - gru_step = gru_step_layer( - name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) - - with mixed_layer( - size=target_dict_dim, bias_attr=True, - act=SoftmaxActivation()) as out: - out += full_matrix_projection(input=gru_step) - return out - - decoder_group_name = "decoder_group" - group_inputs = [ - StaticInput( - input=encoded_vector, is_seq=True), StaticInput( - input=encoded_proj, is_seq=True) - ] - - if not is_generating: - trg_embedding = embedding_layer( - input=data_layer( - name='target_language_word', size=target_dict_dim), - size=word_vector_dim, - param_attr=ParamAttr(name='_target_language_embedding')) - group_inputs.append(trg_embedding) - - # For decoder equipped with attention mechanism, in training, - # target embeding (the groudtruth) is the data input, - # while encoded source sequence is accessed to as an unbounded memory. - # Here, the StaticInput defines a read-only memory - # for the recurrent_group. - decoder = recurrent_group( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) - - lbl = data_layer(name='target_language_next_word', size=target_dict_dim) - cost = classification_cost(input=decoder, label=lbl) - outputs(cost) - else: - # In generation, the decoder predicts a next target word based on - # the encoded source sequence and the last generated target word. - - # The encoded source sequence (encoder's output) must be specified by - # StaticInput, which is a read-only memory. - # Embedding of the last generated word is automatically gotten by - # GeneratedInputs, which is initialized by a start mark, such as , - # and must be included in generation. - - trg_embedding = GeneratedInput( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) - group_inputs.append(trg_embedding) - - beam_gen = beam_search( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, - eos_id=1, - beam_size=beam_size, - max_length=max_length) - - seqtext_printer_evaluator( - input=beam_gen, - id_input=data_layer( - name="sent_id", size=1), - dict_file=trg_dict_path, - result_file=gen_trans_file) - outputs(beam_gen) diff --git a/demo/seqToseq/translation/eval_bleu.sh b/demo/seqToseq/translation/eval_bleu.sh deleted file mode 100755 index 54c2ed237e93adb3456dbe62f75626d36c2d90bc..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/eval_bleu.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -gen_file=$1 -beam_size=$2 - -# find top1 generating result -top1=$(printf '%s_top1.txt' `basename $gen_file .txt`) -if [ $beam_size -eq 1 ]; then - awk -F "\t" '{sub(" ","",$2);sub(" ","",$2);print $2}' $gen_file >$top1 -else - awk 'BEGIN{ - FS="\t"; - OFS="\t"; - read_pos = 2} { - if (NR == read_pos){ - sub(" ","",$3); - sub(" ","",$3); - print $3; - read_pos += (2 + res_num); - }}' res_num=$beam_size $gen_file >$top1 -fi - -# evalute bleu value -bleu_script=multi-bleu.perl -standard_res=../data/wmt14/gen/ntst14.trg -bleu_res=`perl $bleu_script $standard_res <$top1` - -echo $bleu_res -rm $top1 diff --git a/demo/seqToseq/translation/gen.conf b/demo/seqToseq/translation/gen.conf deleted file mode 100644 index e9bea4e4559ff31ad83c4474e91de7e7acc77e9f..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/gen.conf +++ /dev/null @@ -1,36 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -# whether this config is used for generating -is_generating = True - -### Data Definiation -gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14", - is_generating = is_generating, - gen_result = "./translation/gen_result") - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 1, - learning_rate = 0) - -### Network Architecture -gru_encoder_decoder(gen_conf, is_generating) diff --git a/demo/seqToseq/translation/gen.sh b/demo/seqToseq/translation/gen.sh deleted file mode 100755 index a700ae213473dfe7c5b77156de15775b8fe9a9f0..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/gen.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ - --job=test \ - --config='translation/gen.conf' \ - --save_dir='data/wmt14_model' \ - --use_gpu=false \ - --num_passes=13 \ - --test_pass=12 \ - --trainer_count=1 \ - 2>&1 | tee 'translation/gen.log' diff --git a/demo/seqToseq/translation/moses_bleu.sh b/demo/seqToseq/translation/moses_bleu.sh deleted file mode 100755 index 2f230d7f4c736da003966fbdb277f6b8b1ec952c..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/moses_bleu.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x -echo "Downloading multi-bleu.perl" -wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl --no-check-certificate diff --git a/demo/seqToseq/translation/train.conf b/demo/seqToseq/translation/train.conf deleted file mode 100644 index 72b7ccdbb95dbda8f06674079db9a3257bb31622..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/train.conf +++ /dev/null @@ -1,36 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -# whether this config is used for generating -is_generating = False - -### Data Definiation -data_dir = "./data/pre-wmt14" -train_conf = seq_to_seq_data(data_dir = data_dir, - is_generating = is_generating) - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 50, - learning_rate = 5e-4) - -### Network Architecture -gru_encoder_decoder(train_conf, is_generating) diff --git a/demo/seqToseq/translation/train.sh b/demo/seqToseq/translation/train.sh deleted file mode 100755 index bdece693e5c407c89bc172c461bac7f9b20560d3..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/train.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ ---config='translation/train.conf' \ ---save_dir='translation/model' \ ---use_gpu=false \ ---num_passes=16 \ ---show_parameter_stats_period=100 \ ---trainer_count=4 \ ---log_period=10 \ ---dot_period=5 \ -2>&1 | tee 'translation/train.log' diff --git a/demo/sequence_tagging/linear_crf.py b/demo/sequence_tagging/linear_crf.py deleted file mode 100644 index 736b580bb87a3f2c12b369e231a10893fa95ce08..0000000000000000000000000000000000000000 --- a/demo/sequence_tagging/linear_crf.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -import math - -define_py_data_sources2( - train_list="data/train.list", - test_list="data/test.list", - module="dataprovider", - obj="process") - -batch_size = 1 -settings( - learning_method=MomentumOptimizer(), - batch_size=batch_size, - regularization=L2Regularization(batch_size * 1e-4), - average_window=0.5, - learning_rate=1e-1, - learning_rate_decay_a=1e-5, - learning_rate_decay_b=0.25, ) - -num_label_types = 23 - - -def get_simd_size(size): - return int(math.ceil(float(size) / 8)) * 8 - - -# Currently, in order to use sparse_update=True, -# the size has to be aligned. -num_label_types = get_simd_size(num_label_types) - -features = data_layer(name="features", size=76328) -word = data_layer(name="word", size=6778) -pos = data_layer(name="pos", size=44) -chunk = data_layer(name="chunk", size=num_label_types) - -crf_input = fc_layer( - input=features, - size=num_label_types, - act=LinearActivation(), - bias_attr=False, - param_attr=ParamAttr( - initial_std=0, sparse_update=True)) - -crf = crf_layer( - input=crf_input, - label=chunk, - param_attr=ParamAttr( - name="crfw", initial_std=0), ) - -crf_decoding = crf_decoding_layer( - size=num_label_types, - input=crf_input, - label=chunk, - param_attr=ParamAttr(name="crfw"), ) - -sum_evaluator( - name="error", - input=crf_decoding, ) - -chunk_evaluator( - name="chunk_f1", - input=[crf_decoding, chunk], - chunk_scheme="IOB", - num_chunk_types=11, ) - -inputs(word, pos, chunk, features) -outputs(crf) diff --git a/demo/sequence_tagging/rnn_crf.py b/demo/sequence_tagging/rnn_crf.py deleted file mode 100644 index ad1e7b68e78ae202575623e139ad3727b0b9d30c..0000000000000000000000000000000000000000 --- a/demo/sequence_tagging/rnn_crf.py +++ /dev/null @@ -1,120 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -import math - -define_py_data_sources2( - train_list="data/train.list", - test_list="data/test.list", - module="dataprovider", - obj="process") - -batch_size = 16 -settings( - learning_method=MomentumOptimizer(), - batch_size=batch_size, - regularization=L2Regularization(batch_size * 1e-5), - average_window=0.5, - learning_rate=2e-3, - learning_rate_decay_a=5e-7, - learning_rate_decay_b=0.5, ) - -word_dim = 128 -hidden_dim = 128 -with_rnn = True - -initial_std = 1 / math.sqrt(hidden_dim) -param_attr = ParamAttr(initial_std=initial_std) -cpu_layer_attr = ExtraLayerAttribute(device=-1) - -default_device(0) - -num_label_types = 23 - -features = data_layer(name="features", size=76328) -word = data_layer(name="word", size=6778) -pos = data_layer(name="pos", size=44) -chunk = data_layer( - name="chunk", size=num_label_types, layer_attr=cpu_layer_attr) - -emb = embedding_layer( - input=word, size=word_dim, param_attr=ParamAttr(initial_std=0)) - -hidden1 = mixed_layer( - size=hidden_dim, - act=STanhActivation(), - bias_attr=True, - input=[ - full_matrix_projection(emb), table_projection( - pos, param_attr=param_attr) - ]) - -if with_rnn: - rnn1 = recurrent_layer( - act=ReluActivation(), - bias_attr=True, - input=hidden1, - param_attr=ParamAttr(initial_std=0), ) - -hidden2 = mixed_layer( - size=hidden_dim, - act=STanhActivation(), - bias_attr=True, - input=[full_matrix_projection(hidden1)] + - ([full_matrix_projection( - rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), ) - -if with_rnn: - rnn2 = recurrent_layer( - reverse=True, - act=ReluActivation(), - bias_attr=True, - input=hidden2, - param_attr=ParamAttr(initial_std=0), ) - -crf_input = mixed_layer( - size=num_label_types, - bias_attr=False, - input=[full_matrix_projection(hidden2), ] + - ([full_matrix_projection( - rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), ) - -crf = crf_layer( - input=crf_input, - label=chunk, - param_attr=ParamAttr( - name="crfw", initial_std=0), - layer_attr=cpu_layer_attr, ) - -crf_decoding = crf_decoding_layer( - size=num_label_types, - input=crf_input, - label=chunk, - param_attr=ParamAttr(name="crfw"), - layer_attr=cpu_layer_attr, ) - -sum_evaluator( - name="error", - input=crf_decoding, ) - -chunk_evaluator( - name="chunk_f1", - input=[crf_decoding, chunk], - chunk_scheme="IOB", - num_chunk_types=11, ) - -inputs(word, pos, chunk, features) -outputs(crf) diff --git a/demo/sequence_tagging/train.sh b/demo/sequence_tagging/train.sh deleted file mode 100755 index 9a706b98d8686101ba21b513644bdd791062ec26..0000000000000000000000000000000000000000 --- a/demo/sequence_tagging/train.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -paddle train \ - --config rnn_crf.py \ - --parallel_nn=1 \ - --use_gpu=1 \ - --dot_period=10 \ - --log_period=1000 \ - --test_period=0 \ - --num_passes=10 diff --git a/demo/sequence_tagging/train_linear.sh b/demo/sequence_tagging/train_linear.sh deleted file mode 100755 index 597b5afea9c63a8e209b69b6a40e74556e27ac31..0000000000000000000000000000000000000000 --- a/demo/sequence_tagging/train_linear.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -paddle train \ - --config linear_crf.py \ - --use_gpu=0 \ - --dot_period=100 \ - --log_period=10000 \ - --test_period=0 \ - --num_passes=10 diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index efcf8b0ad3d6f2f831fe71f3c09163015cc1ac96..94dd3457fb5b513441c4c8e339e1862de9092517 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -7,25 +7,43 @@ if(NOT DEFINED SPHINX_THEME_DIR) endif() # configured documentation tools and intermediate build results -set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build") +set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build") # Sphinx cache with pickled ReST documents -set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees") +set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees") -# HTML output directory -set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html") +# HTML output director +set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html") configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in" - "${BINARY_BUILD_DIR}/conf.py" + "${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.en.in" + "${BINARY_BUILD_DIR_EN}/conf.py" @ONLY) sphinx_add_target(paddle_docs html - ${BINARY_BUILD_DIR} - ${SPHINX_CACHE_DIR} + ${BINARY_BUILD_DIR_EN} + ${SPHINX_CACHE_DIR_EN} ${CMAKE_CURRENT_SOURCE_DIR} - ${SPHINX_HTML_DIR}) + ${SPHINX_HTML_DIR_EN}) + +# configured documentation tools and intermediate build results +set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") + +# Sphinx cache with pickled ReST documents +set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees") + +# HTML output directory +set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html") + +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.cn.in" + "${BINARY_BUILD_DIR_CN}/conf.py" + @ONLY) -add_dependencies(paddle_docs - gen_proto_py) +sphinx_add_target(paddle_docs_cn + html + ${BINARY_BUILD_DIR_CN} + ${SPHINX_CACHE_DIR_CN} + ${CMAKE_CURRENT_SOURCE_DIR} + ${SPHINX_HTML_DIR_CN}) diff --git a/doc/about/index_cn.md b/doc/about/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..3bf030004d4de8c6f3cb773c6e78c09f40878c5f --- /dev/null +++ b/doc/about/index_cn.md @@ -0,0 +1,11 @@ +关于PaddlePaddle +================ + +PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台,兼备易用性、高效性、灵活性和可扩展性,目前已被百度内部多个产品线广泛使用。 +PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。 +同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。 + +致谢 +-------- + +在此,特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。 diff --git a/doc/about/index_en.rst b/doc/about/index_en.rst index 8a372d2bc2b2c54b021ed63941482cbad8d8f719..065c430cdea802ed3c9f487cd00255b85a5598a5 100644 --- a/doc/about/index_en.rst +++ b/doc/about/index_en.rst @@ -11,4 +11,4 @@ We hope to build an active open source community both by providing feedback and Credits -------- -We owe many thanks to `all contributors and developers `_ of PaddlePaddle! +We owe many thanks to `all contributors and developers `_ of PaddlePaddle! diff --git a/doc/api/data_provider/pydataprovider2_en.rst b/doc/api/data_provider/pydataprovider2_en.rst deleted file mode 100644 index 083436e2710b4582e11741aaeaf5932d59869473..0000000000000000000000000000000000000000 --- a/doc/api/data_provider/pydataprovider2_en.rst +++ /dev/null @@ -1,247 +0,0 @@ -.. _api_pydataprovider: - -PyDataProvider2 -=============== - -We highly recommand users to use PyDataProvider2 to provide training or testing -data to PaddlePaddle. The user only needs to focus on how to read a single -sample from the original data file by using PyDataProvider2, leaving all of the -trivial work, including, transfering data into cpu/gpu memory, shuffle, binary -serialization to PyDataProvider2. PyDataProvider2 uses multithreading and a -fanscinating but simple cache strategy to optimize the efficiency of the data -providing process. - -DataProvider for the non-sequential model ------------------------------------------ - -Here we use the MNIST handwriting recognition data as an example to illustrate -how to write a simple PyDataProvider. - -MNIST is a handwriting classification data set. It contains 70,000 digital -grayscale images. Labels of the training sample range from 0 to 9. All the -images have been size-normalized and centered into images with the same size -of 28 x 28 pixels. - -A small part of the original data as an example is shown as below: - -.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt - -Each line of the data contains two parts, separated by :code:`;`. The first part is -label of an image. The second part contains 28x28 pixel float values. - -Just write path of the above data into train.list. It looks like this: - -.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list - -The corresponding dataprovider is shown as below: - -.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py - -The first line imports PyDataProvider2 package. -The main function is the process function, that has two parameters. -The first parameter is the settings, which is not used in this example. -The second parameter is the filename, that is exactly each line of train.list. -This parameter is passed to the process function by PaddlePaddle. - -:code:`@provider` is a Python -`Decorator `_ . -It sets some properties to DataProvider, and constructs a real PaddlePaddle -DataProvider from a very simple user implemented python function. It does not -matter if you are not familiar with `Decorator`_. You can keep it simple by -just taking :code:`@provider` as a fixed mark above the provider function you -implemented. - -`input_types`_ defines the data format that a DataProvider returns. -In this example, it is set to a 28x28-dimensional dense vector and an integer -scalar, whose value ranges from 0 to 9. -`input_types`_ can be set to several kinds of input formats, please refer to the -document of `input_types`_ for more details. - - -The process method is the core part to construct a real DataProvider in -PaddlePaddle. It implements how to open the text file, how to read one sample -from the original text file, convert them into `input_types`_, and give them -back to PaddlePaddle process at line 23. -Note that data yielded by the process function must follow the same order that -`input_types`_ are defined. - - -With the help of PyDataProvider2, user can focus on how to generate ONE traning -sample by using keywords :code:`yield`. -:code:`yield` is a python keyword, and a concept related to it includes -:code:`generator`. - -Only a few lines of codes need to be added into the training configuration file, -you can take this as an example. - -.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py - -Here we specify training data by :code:`train.list`, and no testing data is specified. -The method which actually provide data is :code:`process`. - -User also can use another style to provide data, which defines the -:code:`data_layer`'s name explicitly when `yield`. For example, -the :code:`dataprovider` is shown as below. - -.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py - :linenos: - -If user did't give the :code:`data_layer`'s name, PaddlePaddle will use -the order of :code:`data_layer` definition roughly to determine which feature to -which :code:`data_layer`. This order may be not correct, so TO DEFINE THE -:code:`data_layer`'s NAMES EXPLICITLY IS THE RECOMMANDED WAY TO PROVIDER DATA. - -Now, this simple example of using PyDataProvider is finished. -The only thing that the user should know is how to generte **one sample** from -**one data file**. -And PaddlePadle will do all of the rest things\: - -* Form a training batch -* Shuffle the training data -* Read data with multithreading -* Cache the training data (Optional) -* CPU-> GPU double buffering. - -Is this cool? - -DataProvider for the sequential model -------------------------------------- -A sequence model takes sequences as its input. A sequence is made up of several -timesteps. The so-called timestep, is not necessary to have something to do -with time. It can also be explained to that the order of data are taken into -consideration into model design and training. -For example, the sentence can be interpreted as a kind of sequence data in NLP -tasks. - -Here is an example on data proivider for English sentiment classification data. -The original input data are simple English text, labeled into positive or -negative sentiment (marked by 0 and 1 respectively). - -A small part of the original data as an example can be found in the path below: - -.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_train.txt - -The corresponding data provider can be found in the path below: - -.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py - -This data provider for sequential model is a little more complex than that -for MINST dataset. -A new initialization method is introduced here. -The method :code:`on_init` is configured to DataProvider by :code:`@provider`'s -:code:`init_hook` parameter, and it will be invoked once DataProvider is -initialized. The :code:`on_init` function has the following parameters: - -* The first parameter is the settings object. -* The rest parameters are passed by key word arguments. Some of them are passed - by PaddlePaddle, see reference for `init_hook`_. - The :code:`dictionary` object is a python dict object passed from the trainer - configuration file, and it maps word string to word id. - -To pass these parameters into DataProvider, the following lines should be added -into trainer configuration file. - -.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_config.py - -The definition is basically same as MNIST example, except: -* Load dictionary in this configuration -* Pass it as a parameter to the DataProvider - -The `input_types` is configured in method :code:`on_init`. It has the same -effect to configure them by :code:`@provider`'s :code:`input_types` parameter. -However, the :code:`input_types` is set at runtime, so we can set it to -different types according to the input data. Input of the neural network is a -sequence of word id, so set :code:`seq_type` to :code:`integer_value_sequence`. - -Durning :code:`on_init`, we save :code:`dictionary` variable to -:code:`settings`, and it will be used in :code:`process`. Note the settings -parameter for the process function and for the on_init's function are a same -object. - -The basic processing logic is the same as MNIST's :code:`process` method. Each -sample in the data file is given back to PaddlePaddle process. - -Thus, the basic usage of PyDataProvider is here. -Please refer to the following section reference for details. - -Reference ---------- - -@provider -+++++++++ - -.. autofunction:: paddle.trainer.PyDataProvider2.provider - -input_types -+++++++++++ - -PaddlePaddle has four data types, and three sequence types. -The four data types are: - -* :code:`dense_vector`: dense float vector. -* :code:`sparse_binary_vector`: sparse binary vector, most of the value is 0, and - the non zero elements are fixed to 1. -* :code:`sparse_float_vector`: sparse float vector, most of the value is 0, and some - non zero elements can be any float value. They are given by the user. -* :code:`integer`: an integer scalar, that is especially used for label or word index. - -The three sequence types are: - -* :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence. -* :code:`SequenceType.SEQUENCE` means the sample is a sequence. -* :code:`SequenceType.SUB_SEQUENCE` means it is a nested sequence, that each timestep of - the input sequence is also a sequence. - -Different input type has a defferenct input format. Their formats are shown -in the above table. - -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| | NO_SEQUENCE | SEQUENCE | SUB_SEQUENCE | -+======================+=====================+===================================+================================================+ -| dense_vector | [f, f, ...] | [[f, ...], [f, ...], ...] | [[[f, ...], ...], [[f, ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| sparse_binary_vector | [i, i, ...] | [[i, ...], [i, ...], ...] | [[[i, ...], ...], [[i, ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| sparse_float_vector | [(i,f), (i,f), ...] | [[(i,f), ...], [(i,f), ...], ...] | [[[(i,f), ...], ...], [[(i,f), ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| integer_value | i | [i, i, ...] | [[i, ...], [i, ...], ...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ - -where f represents a float value, i represents an integer value. - -init_hook -+++++++++ - -init_hook is a function that is invoked once the data provoder is initialized. -Its parameters lists as follows: - -* The first parameter is a settings object, which is the same to :code:`settings` - in :code:`process` method. The object contains several attributes, including: - - * :code:`settings.input_types`: the input types. Reference `input_types`_. - * :code:`settings.logger`: a logging object. - -* The rest parameters are the key word arguments. It is made up of PaddpePaddle - pre-defined parameters and user defined parameters. - - * PaddlePaddle-defined parameters including: - - * :code:`is_train` is a bool parameter that indicates the DataProvider is used in - training or testing. - * :code:`file_list` is the list of all files. - - * User-defined parameters args can be set in training configuration. - -Note, PaddlePaddle reserves the right to add pre-defined parameter, so please -use :code:`**kwargs` in init_hook to ensure compatibility by accepting the -parameters which your init_hook does not use. - -cache -+++++ -DataProvider provides two simple cache strategy. They are: - -* :code:`CacheType.NO_CACHE` means do not cache any data, then data is read at runtime by - the user implemented python module every pass. -* :code:`CacheType.CACHE_PASS_IN_MEM` means the first pass reads data by the user - implemented python module, and the rest passes will directly read data from - memory. diff --git a/doc/api/index_cn.rst b/doc/api/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..9be0b370ee5e301aee4a6e31b1cfa905754968e8 --- /dev/null +++ b/doc/api/index_cn.rst @@ -0,0 +1,9 @@ +API +=== + +.. toctree:: + :maxdepth: 1 + + 模型配置 + 数据访问 + 训练与应用 diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst index 6fdee9f928dd7057cec58f740bf7520af54a24fb..25c1dd00b9cbb3ab647e04cdc2b4c27c552a2332 100644 --- a/doc/api/index_en.rst +++ b/doc/api/index_en.rst @@ -1,37 +1,9 @@ API === -DataProvider API ----------------- - -.. toctree:: - :maxdepth: 1 - - data_provider/index_en.rst - data_provider/pydataprovider2_en.rst - -.. _api_trainer_config: - -Model Config API ----------------- - -.. toctree:: - :maxdepth: 1 - - trainer_config_helpers/optimizers.rst - trainer_config_helpers/data_sources.rst - trainer_config_helpers/layers.rst - trainer_config_helpers/activations.rst - trainer_config_helpers/poolings.rst - trainer_config_helpers/networks.rst - trainer_config_helpers/evaluators.rst - trainer_config_helpers/attrs.rst - - -Applications API ----------------- - .. toctree:: :maxdepth: 1 - predict/swig_py_paddle_en.rst + v2/model_configs.rst + v2/data.rst + v2/run_logic.rst diff --git a/doc/api/predict/swig_py_paddle_en.rst b/doc/api/predict/swig_py_paddle_en.rst deleted file mode 100644 index 9845cd1607b425dc0a4ddc665aab40d96fa2fbe4..0000000000000000000000000000000000000000 --- a/doc/api/predict/swig_py_paddle_en.rst +++ /dev/null @@ -1,59 +0,0 @@ -Python Prediction -================== - -PaddlePaddle offers a set of clean prediction interfaces for python with the help of -SWIG. The main steps of predict values in python are: - -* Parse training configurations -* Construct GradientMachine -* Prepare data -* Predict - -Here is a sample python script that shows the typical prediction process for the -MNIST classification problem. A complete sample code could be found at -:code:`src_root/doc/ui/predict/predict_sample.py`. - -.. literalinclude:: ./predict_sample.py - :language: python - :lines: 15-18,90-100,101-104 - -The module that does the most of the job is py_paddle.swig_paddle, it's -generated by SWIG and has complete documents, for more details you can use -python's :code:`help()` function. Let's walk through the above python script: - -* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize - PaddlePaddle with command line arguments, for more about command line arguments - see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_. -* Parse the configuration file that is used in training with :code:`parse_config()`. - Because data to predict with always have no label, and output of prediction work - normally is the output layer rather than the cost layer, so you should modify - the configuration file accordingly before using it in the prediction work. -* Create a neural network with - :code:`swig_paddle.GradientMachine.createFromConfigproto()`, which takes the - parsed configuration :code:`conf.model_config` as argument. Then load the - trained parameters from the model with :code:`network.loadParameters()`. -* Create a data converter object of utility class :code:`DataProviderConverter`. - - Note: As swig_paddle can only accept C++ matrices, we offer a utility - class DataProviderConverter that can accept the same input data with - PyDataProvider2, for more information please refer to document - of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_. -* Do the prediction with :code:`forwardTest()`, which takes the converted - input data and outputs the activations of the output layer. - -Here is a typical output: - -.. code-block:: text - - [{'id': None, 'value': array([[ 5.53018653e-09, 1.12194102e-05, 1.96644767e-09, - 1.43630644e-02, 1.51111044e-13, 9.85625684e-01, - 2.08823112e-10, 2.32777140e-08, 2.00186201e-09, - 1.15501715e-08], - [ 9.99982715e-01, 1.27787406e-10, 1.72296313e-05, - 1.49316648e-09, 1.36540484e-11, 6.93137714e-10, - 2.70634608e-08, 3.48565123e-08, 5.25639710e-09, - 4.48684503e-08]], dtype=float32)}] - -:code:`value` is the output of the output layer, each row represents result of -the corresponding row in the input data, each element represents activation of -the corresponding neuron in the output layer. - diff --git a/doc/api/trainer_config_helpers/activations.rst b/doc/api/trainer_config_helpers/activations.rst deleted file mode 100644 index 269e6491e7ebe3899c3fb24fca756a393043473b..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/activations.rst +++ /dev/null @@ -1,108 +0,0 @@ -=========== -Activations -=========== - -BaseActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: BaseActivation - :noindex: - -AbsActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: AbsActivation - :noindex: - -ExpActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: ExpActivation - :noindex: - -IdentityActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: IdentityActivation - :noindex: - -LinearActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: LinearActivation - :noindex: - -LogActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: LogActivation - :noindex: - -SquareActivation -================ - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SquareActivation - :noindex: - -SigmoidActivation -================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SigmoidActivation - :noindex: - -SoftmaxActivation -================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SoftmaxActivation - :noindex: - -SequenceSoftmaxActivation -========================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SequenceSoftmaxActivation - :noindex: - -ReluActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: ReluActivation - :noindex: - -BReluActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: BReluActivation - :noindex: - -SoftReluActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SoftReluActivation - :noindex: - -TanhActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: TanhActivation - :noindex: - -STanhActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: STanhActivation - :noindex: diff --git a/doc/api/trainer_config_helpers/attrs.rst b/doc/api/trainer_config_helpers/attrs.rst deleted file mode 100644 index ac63127bf7d9db6351365ab7b58f43db12347a8e..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/attrs.rst +++ /dev/null @@ -1,5 +0,0 @@ -Parameter Attributes -======================= - -.. automodule:: paddle.trainer_config_helpers.attrs - :members: diff --git a/doc/api/trainer_config_helpers/data_sources.rst b/doc/api/trainer_config_helpers/data_sources.rst deleted file mode 100644 index b9dd4dda01ae59d1260356aff50ddf298d02c87f..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/data_sources.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _api_trainer_config_helpers_data_sources: - -DataSources -=========== - -.. automodule:: paddle.trainer_config_helpers.data_sources - :members: diff --git a/doc/api/trainer_config_helpers/evaluators.rst b/doc/api/trainer_config_helpers/evaluators.rst deleted file mode 100644 index d6a79c13e2316b0fd3d53eb47960a767bcf8abdb..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/evaluators.rst +++ /dev/null @@ -1,106 +0,0 @@ -========== -Evaluators -========== - -Base -==== -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: evaluator_base - :noindex: - -Classification -============== - -classification_error_evaluator ------------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: classification_error_evaluator - :noindex: - -auc_evaluator -------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: auc_evaluator - :noindex: - -ctc_error_evaluator -------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: ctc_error_evaluator - :noindex: - -chunk_evaluator ---------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: chunk_evaluator - :noindex: - -precision_recall_evaluator --------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: precision_recall_evaluator - :noindex: - -Rank -==== - -pnpair_evaluator ----------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: pnpair_evaluator - :noindex: - -Utils -===== - -sum_evaluator -------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: sum_evaluator - :noindex: - -column_sum_evaluator --------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: column_sum_evaluator - :noindex: - -Print -===== - -classification_error_printer_evaluator --------------------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: classification_error_printer_evaluator - :noindex: - -gradient_printer_evaluator --------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: gradient_printer_evaluator - :noindex: - -maxid_printer_evaluator ------------------------ -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: maxid_printer_evaluator - :noindex: - -maxframe_printer_evaluator ---------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: maxframe_printer_evaluator - :noindex: - -seqtext_printer_evaluator -------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: seqtext_printer_evaluator - :noindex: - -value_printer_evaluator ------------------------ -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: value_printer_evaluator - :noindex: - diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst deleted file mode 100644 index 12a75080d0deab1ecce6b2579b059ba56abf6711..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/layers.rst +++ /dev/null @@ -1,457 +0,0 @@ -====== -Layers -====== - -Base -====== - -LayerType ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: LayerType - :noindex: - -LayerOutput ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: LayerOutput - :noindex: - -Data layer -=========== - -.. _api_trainer_config_helpers_layers_data_layer: - -data_layer ----------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: data_layer - :noindex: - -Fully Connected Layers -====================== - -.. _api_trainer_config_helpers_layers_fc_layer: - -fc_layer --------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: fc_layer - :noindex: - -selective_fc_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: selective_fc_layer - :noindex: - -Conv Layers -=========== - -conv_operator -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_operator - :noindex: - -conv_projection ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_projection - :noindex: - -conv_shift_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_shift_layer - :noindex: - -img_conv_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_conv_layer - :noindex: - -.. _api_trainer_config_helpers_layers_context_projection: - -context_projection ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: context_projection - :noindex: - -Image Pooling Layer -=================== - -img_pool_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_pool_layer - :noindex: - -spp_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: spp_layer - :noindex: - -maxout_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: maxout_layer - :noindex: - -Norm Layer -========== - -img_cmrnorm_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_cmrnorm_layer - :noindex: - -batch_norm_layer ---------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: batch_norm_layer - :noindex: - -sum_to_one_norm_layer ---------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: sum_to_one_norm_layer - :noindex: - -Recurrent Layers -================ - -recurrent_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: recurrent_layer - :noindex: - -lstmemory ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstmemory - :noindex: - -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - -grumemory ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: grumemory - :noindex: - -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - -Recurrent Layer Group -===================== - -memory ------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: memory - :noindex: - -recurrent_group ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: recurrent_group - :noindex: - -beam_search ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: beam_search - :noindex: - -get_output_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: get_output_layer - :noindex: - -Mixed Layer -=========== - -mixed_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: mixed_layer - :noindex: - -.. _api_trainer_config_helpers_layers_embedding_layer: - -embedding_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: embedding_layer - :noindex: - -scaling_projection ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: scaling_projection - :noindex: - -dotmul_projection ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: dotmul_projection - :noindex: - -dotmul_operator ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: dotmul_operator - :noindex: - -full_matrix_projection ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: full_matrix_projection - :noindex: - -identity_projection -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: identity_projection - :noindex: - - -table_projection ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: table_projection - :noindex: - -trans_full_matrix_projection ----------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: trans_full_matrix_projection - :noindex: - -Aggregate Layers -================ - -.. _api_trainer_config_helpers_layers_pooling_layer: - -pooling_layer -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: pooling_layer - :noindex: - -last_seq --------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: last_seq - :noindex: - -first_seq ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: first_seq - :noindex: - -concat_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: concat_layer - :noindex: - -Reshaping Layers -================ - -block_expand_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: block_expand_layer - :noindex: - -expand_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: expand_layer - :noindex: - -repeat_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: repeat_layer - :noindex: - -Math Layers -=========== - -addto_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: addto_layer - :noindex: - -linear_comb_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: linear_comb_layer - :noindex: - -interpolation_layer -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: interpolation_layer - :noindex: - -bilinear_interp_layer ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: bilinear_interp_layer - :noindex: - -power_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: power_layer - :noindex: - -scaling_layer -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: scaling_layer - :noindex: - -slope_intercept_layer ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: slope_intercept_layer - :noindex: - -tensor_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: tensor_layer - :noindex: - -.. _api_trainer_config_helpers_layers_cos_sim: - -cos_sim -------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cos_sim - :noindex: - -trans_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: trans_layer - :noindex: - -Sampling Layers -=============== - -maxid_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: maxid_layer - :noindex: - -sampling_id_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: sampling_id_layer - :noindex: - -Cost Layers -=========== - -cross_entropy -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cross_entropy - :noindex: - -cross_entropy_with_selfnorm ---------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cross_entropy_with_selfnorm - :noindex: - -multi_binary_label_cross_entropy --------------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: multi_binary_label_cross_entropy - :noindex: - -huber_cost ----------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: huber_cost - :noindex: - -lambda_cost ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: lambda_cost - :noindex: - -rank_cost ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: rank_cost - :noindex: - -crf_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: crf_layer - :noindex: - -crf_decoding_layer -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: crf_decoding_layer - :noindex: - -ctc_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: ctc_layer - :noindex: - -nce_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: nce_layer - :noindex: - -hsigmoid ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: hsigmoid - :noindex: - -sum_cost ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: sum_cost - :noindex: - -Check Layer -============ - -eos_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: eos_layer - :noindex: diff --git a/doc/api/trainer_config_helpers/networks.rst b/doc/api/trainer_config_helpers/networks.rst deleted file mode 100644 index e13c368051abe3c50036c3baab988f170df4c641..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/networks.rst +++ /dev/null @@ -1,121 +0,0 @@ -======== -Networks -======== - -The networks module contains pieces of neural network that combine multiple layers. - -NLP -=== - -sequence_conv_pool ------------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: sequence_conv_pool - :noindex: - -.. _api_trainer_config_helpers_network_text_conv_pool: - -text_conv_pool --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: text_conv_pool - :noindex: - -Images -====== - -img_conv_bn_pool ----------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: img_conv_bn_pool - :noindex: - -img_conv_group --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: img_conv_group - :noindex: - -simple_img_conv_pool --------------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_img_conv_pool - :noindex: - -vgg_16_network ---------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: vgg_16_network - :noindex: - -Recurrent -========= - -LSTM ----- - -lstmemory_unit -`````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: lstmemory_unit - :noindex: - -lstmemory_group -``````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: lstmemory_group - :noindex: - -simple_lstm -``````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_lstm - :noindex: - -bidirectional_lstm -`````````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: bidirectional_lstm - :noindex: - -GRU ---- - -gru_unit -```````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: gru_unit - :noindex: - -gru_group -````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: gru_group - :noindex: - -simple_gru -`````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_gru - :noindex: - -simple_attention ----------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_attention - :noindex: - -Miscs -===== - -dropout_layer --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: dropout_layer - :noindex: - -outputs -------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: outputs - :noindex: diff --git a/doc/api/trainer_config_helpers/optimizers.rst b/doc/api/trainer_config_helpers/optimizers.rst deleted file mode 100644 index 7ca4e34156e273caf66cc71e6927bfb23bb5235e..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/optimizers.rst +++ /dev/null @@ -1,57 +0,0 @@ -========== -Optimizers -========== - -BaseSGDOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: BaseSGDOptimizer - :noindex: - -MomentumOptimizer -================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: MomentumOptimizer - :noindex: - -AdamOptimizer -============= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdamOptimizer - :noindex: - -AdamaxOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdamaxOptimizer - :noindex: - -AdaGradOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdaGradOptimizer - :noindex: - -DecayedAdaGradOptimizer -======================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: DecayedAdaGradOptimizer - :noindex: - -AdaDeltaOptimizer -================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdaDeltaOptimizer - :noindex: - -RMSPropOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: RMSPropOptimizer - :noindex: - -settings -======== -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: settings - :noindex: diff --git a/doc/api/trainer_config_helpers/poolings.rst b/doc/api/trainer_config_helpers/poolings.rst deleted file mode 100644 index 66566809d26f59263597b5286c5b27e0bbc9415a..0000000000000000000000000000000000000000 --- a/doc/api/trainer_config_helpers/poolings.rst +++ /dev/null @@ -1,33 +0,0 @@ -======== -Poolings -======== - -BasePoolingType -=============== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: BasePoolingType - :noindex: - -AvgPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: AvgPooling - :noindex: - -MaxPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: MaxPooling - :noindex: - -SumPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: SumPooling - :noindex: - -SquareRootNPooling -================== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: SquareRootNPooling - :noindex: diff --git a/doc/api/v1/data_provider/dataprovider_cn.rst b/doc/api/v1/data_provider/dataprovider_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..d08c6b3efacbc35ae274d5b207fe91e747124e79 --- /dev/null +++ b/doc/api/v1/data_provider/dataprovider_cn.rst @@ -0,0 +1,15 @@ +.. _api_dataprovider: + +DataProvider的介绍 +================== + +DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 :ref:`api_pydataprovider2` ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。 + +PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用哪种DataProvider,并且在DataProvider中实现如何访问训练文件列表(train.list)或测试文件列表(test.list)。 + +- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址: + + - 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。 + - 地址也可以为hdfs文件路径,或者数据库连接路径等。 + - 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。 +- 如果没有设置test.list,或设置为None,那么在训练过程中不会执行测试操作;否则,会根据命令行参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。 diff --git a/doc/api/data_provider/index_en.rst b/doc/api/v1/data_provider/dataprovider_en.rst similarity index 100% rename from doc/api/data_provider/index_en.rst rename to doc/api/v1/data_provider/dataprovider_en.rst diff --git a/doc/api/v1/data_provider/pydataprovider2_cn.rst b/doc/api/v1/data_provider/pydataprovider2_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f9db31cfb9946e1d2db3872718bd92787d861f0 --- /dev/null +++ b/doc/api/v1/data_provider/pydataprovider2_cn.rst @@ -0,0 +1,229 @@ +.. _api_pydataprovider2: + +PyDataProvider2的使用 +===================== + +PyDataProvider2是PaddlePaddle使用Python提供数据的推荐接口。该接口使用多线程读取数据,并提供了简单的Cache功能;同时可以使用户只关注如何从文件中读取每一条数据,而不用关心数据如何传输,如何存储等等。 + +.. contents:: + +MNIST的使用场景 +--------------- + +我们以MNIST手写识别为例,来说明PyDataProvider2的简单使用场景。 + +样例数据 +++++++++ + +MNIST是一个包含有70,000张灰度图片的数字分类数据集。样例数据 ``mnist_train.txt`` 如下: + +.. literalinclude:: src/mnist_train.txt + +其中每行数据代表一张图片,行内使用 ``;`` 分成两部分。第一部分是图片的标签,为0-9中的一个数字;第二部分是28*28的图片像素灰度值。 对应的 ``train.list`` 即为这个数据文件的名字: + +.. literalinclude:: src/train.list + +dataprovider的使用 +++++++++++++++++++ + +.. literalinclude:: src/mnist_provider.dict.py + +- 首先,引入PaddlePaddle的PyDataProvider2包。 +- 其次,定义一个Python的 `Decorator `_ `@provider`_ 。用于将下一行的数据输入函数标记成一个PyDataProvider2,同时设置它的input_types属性。 + + - `input_types`_:设置这个PyDataProvider2返回什么样的数据。本例根据网络配置中 ``data_layer`` 的名字,显式指定返回的是一个28*28维的稠密浮点数向量和一个[0-9]的10维整数标签。 + + .. literalinclude:: src/mnist_config.py + :lines: 9-10 + + - 注意:如果用户不显示指定返回数据的对应关系,那么PaddlePaddle会根据layer的声明顺序,来确定对应关系。但这个关系可能不正确,所以推荐使用显式指定的方式来设置input_types。 +- 最后,实现数据输入函数(如本例的 ``process`` 函数)。 + + - 该函数的功能是:打开文本文件,读取每一行,将行中的数据转换成与input_types一致的格式,然后返回给PaddlePaddle进程。注意, + + - 返回的顺序需要和input_types中定义的顺序一致。 + - 返回时,必须使用Python关键词 ``yield`` ,相关概念是 ``generator`` 。 + - 一次yield调用,返回一条完整的样本。如果想为一个数据文件返回多条样本,只需要在函数中调用多次yield即可(本例中使用for循环进行多次调用)。 + + - 该函数具有两个参数: + + - settings:在本例中没有使用,具体可以参考 `init_hook`_ 中的说明。 + - filename:为 ``train.list`` 或 ``test.list`` 中的一行,即若干数据文件路径的某一个。 + +网络配置中的调用 +++++++++++++++++ + +在网络配置里,只需要一行代码就可以调用这个PyDataProvider2,如, + +.. literalinclude:: src/mnist_config.py + :lines: 1-7 + +训练数据是 ``train.list`` ,没有测试数据,调用的PyDataProvider2是 ``mnist_provider`` 模块中的 ``process`` 函数。 + +小结 ++++++ + +至此,简单的PyDataProvider2样例就说明完毕了。对用户来说,仅需要知道如何从 **一个文件** 中读取 **一条样本** ,就可以将数据传送给PaddlePaddle了。而PaddlePaddle则会帮用户做以下工作: + +* 将数据组合成Batch进行训练 +* 对训练数据进行Shuffle +* 多线程的数据读取 +* 缓存训练数据到内存(可选) +* CPU->GPU双缓存 + +是不是很简单呢? + +时序模型的使用场景 +------------------ +样例数据 +++++++++ + +时序模型是指数据的某一维度是一个序列形式,即包含时间步信息。所谓时间步信息,不一定和时间有关系,只是说明数据的顺序是重要的。例如,文本信息就是一个序列数据。 + +本例采用英文情感分类的数据,即将一段英文文本数据,分类成正面情绪和负面情绪两类(用0和1表示)。样例数据 ``sentimental_train.txt`` 如下: + +.. literalinclude:: src/sentimental_train.txt + +dataprovider的使用 +++++++++++++++++++ + +相对MNIST而言,这个dataprovider较复杂,主要原因是增加了初始化机制 `init_hook`_。本例的 ``on_init`` 函数就是根据该机制配置的,它会在dataprovider创建的时候执行。 + +- 其中 ``input_types`` 和在 `@provider`_ 中配置的效果一致。本例中的输入特征是词ID的序列,因此使用 ``integer_value_sequence`` 类型来设置。 +- 将 ``dictionary`` 存入settings对象,在 ``process`` 函数中使用。 dictionary是从网络配置中传入的dict对象,即一个将单词字符串映射到单词ID的字典。 + +.. literalinclude:: src/sentimental_provider.py + +网络配置中的调用 +++++++++++++++++ + +调用这个PyDataProvider2的方法,基本上和MNIST样例一致,除了 + +* 在配置中需要读取外部字典。 +* 在声明DataProvider的时候传入dictionary作为参数。 + +.. literalinclude:: src/sentimental_config.py + :emphasize-lines: 12-14 + +参考(Reference) +--------------- + +@provider ++++++++++ + +``@provider`` 是一个Python的 `Decorator`_ ,可以将某一个函数标记成一个PyDataProvider2。如果不了解 `Decorator`_ 是什么也没关系,只需知道这是一个标记属性的方法就可以了。它包含的属性参数如下: + +* input_types:数据输入格式。具体的格式说明,请参考 `input_types`_ 。 +* should_shuffle:是不是要对数据做Shuffle。训练时默认shuffle,测试时默认不shuffle。 +* min_pool_size:设置内存中最小暂存的数据条数,也是PaddlePaddle所能够保证的shuffle粒度。如果为-1,则会预先读取全部数据到内存中。 +* pool_size: 设置内存中暂存的数据条数。如果为-1(默认),则不在乎内存暂存多少条数据。如果设置,则推荐大于训练时batch size的值,并且在内存足够的情况下越大越好。 +* can_over_batch_size:是否允许暂存略微多余pool_size的数据。由于这样做可以避免很多死锁问题,一般推荐设置成True。 +* calc_batch_size:可以传入一个函数,用于自定义每条数据的batch size(默认为1)。 +* cache: 数据缓存的策略,具体请参考 `cache`_ 。 +* init_hook:初始化时调用的函数,具体请参考 `init_hook`_ 。 +* check:如果为true,会根据input_types检查数据的合法性。 +* check_fail_continue:如果为true,那么当check出数据不合法时,会扔到这条数据,继续训练或预测。(对check=false的情况,没有作用) + +input_types ++++++++++++ + +PaddlePaddle的数据包括四种主要类型,和三种序列模式。 + +四种数据类型: + +* dense_vector:稠密的浮点数向量。 +* sparse_binary_vector:稀疏的01向量,即大部分值为0,但有值的地方必须为1。 +* sparse_float_vector:稀疏的向量,即大部分值为0,但有值的部分可以是任何浮点数。 +* integer:整数标签。 + +三种序列模式: + +* SequenceType.NO_SEQUENCE:不是一条序列 +* SequenceType.SEQUENCE:是一条时间序列 +* SequenceType.SUB_SEQUENCE: 是一条时间序列,且序列的每一个元素还是一个时间序列。 + +不同的数据类型和序列模式返回的格式不同,列表如下: + ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| | NO_SEQUENCE | SEQUENCE | SUB_SEQUENCE | ++======================+=====================+===================================+================================================+ +| dense_vector | [f, f, ...] | [[f, ...], [f, ...], ...] | [[[f, ...], ...], [[f, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_binary_vector | [i, i, ...] | [[i, ...], [i, ...], ...] | [[[i, ...], ...], [[i, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_float_vector | [(i,f), (i,f), ...] | [[(i,f), ...], [(i,f), ...], ...] | [[[(i,f), ...], ...], [[(i,f), ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| integer_value | i | [i, i, ...] | [[i, ...], [i, ...], ...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ + +其中,f代表一个浮点数,i代表一个整数。 + +注意:对sparse_binary_vector和sparse_float_vector,PaddlePaddle存的是有值位置的索引。例如, + +- 对一个5维非序列的稀疏01向量 ``[0, 1, 1, 0, 0]`` ,类型是sparse_binary_vector,返回的是 ``[1, 2]`` 。 +- 对一个5维非序列的稀疏浮点向量 ``[0, 0.5, 0.7, 0, 0]`` ,类型是sparse_float_vector,返回的是 ``[(1, 0.5), (2, 0.7)]`` 。 + +init_hook ++++++++++ + +init_hook可以传入一个函数。该函数在初始化的时候会被调用,其参数如下: + +* 第一个参数是settings对象,它和数据传入函数的第一个参数(如本例中 ``process`` 函数的 ``settings`` 参数)必须一致。该对象具有以下两个属性: + * settings.input_types:数据输入格式,具体请参考 `input_types`_ 。 + * settings.logger:一个logging对象。 +* 其他参数使用 ``kwargs`` (key word arguments)传入,包括以下两种: + * PaddlePaddle定义的参数: 1)is_train:bool型参数,表示用于训练或预测;2)file_list:所有文件列表。 + * 用户定义的参数:使用args在网络配置中设置。 + +注意:PaddlePaddle保留添加参数的权力,因此init_hook尽量使用 ``**kwargs`` 来接受不使用的函数以保证兼容性。 + +cache ++++++ + +PyDataProvider2提供了两种简单的Cache策略: + +* CacheType.NO_CACHE:不缓存任何数据,每次都会从python端读取数据 +* CacheType.CACHE_PASS_IN_MEM:第一个pass会从python端读取数据,剩下的pass会直接从内存里 + 读取数据。 + + +注意事项 +-------- + +可能的内存泄露问题 +++++++++++++++++++ + +PaddlePaddle将train.list中的每一行都传递给process函数,从而生成多个generator。当训练数据非常多时,就会生成非常多的generator。 + +虽然每个generator在没有调用的时候,是几乎不占内存的;但当调用过一次后,generator便会存下当前的上下文(Context),而这个Context可能会非常大。并且,generator至少需要调用两次才会知道是否停止。所以,即使process函数里面只有一个yield,也需要两次随机选择到相同generator的时候,才会释放该段内存。 + +.. code-block:: python + + def func(): + yield 0 + + f = func() # 创建generator + tmp = next(f) # 调用一次,返回0 + tmp = next(f) # 调用第二次的时候,才会Stop Iteration + +由于顺序调用这些generator不会出现上述问题,因此有两种解决方案: + +1. **最佳推荐**:将样本的地址放入另一个文本文件,train.list写入那个文本文件的地址。即不要将每一个样本都放入train.list。 +2. 在generator的上下文中尽量留下非常少的变量引用,例如 + +.. code-block:: python + + def real_process(fn): + # ... read from fn + return result # 当函数返回的时候,python可以解除掉内部变量的引用。 + + def process(fn): + yield real_process(fn) + +注意:这个问题是PyDataProvider读数据时候的逻辑问题,很难整体修正。 + +内存不够用的情况 +++++++++++++++++ + +PyDataProvider2会尽可能多的使用内存。因此,对于内存较小的机器,推荐使用 ``pool_size`` 变量来设置内存中暂存的数据条。具体请参考 `@provider`_ 中的说明。 + diff --git a/doc/api/v1/data_provider/pydataprovider2_en.rst b/doc/api/v1/data_provider/pydataprovider2_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..e8fb6292779790765154502bff319ea10ab1e70b --- /dev/null +++ b/doc/api/v1/data_provider/pydataprovider2_en.rst @@ -0,0 +1,249 @@ +.. _api_pydataprovider2: + +PyDataProvider2 +=============== + +We highly recommand users to use PyDataProvider2 to provide training or testing +data to PaddlePaddle. The user only needs to focus on how to read a single +sample from the original data file by using PyDataProvider2, leaving all of the +trivial work, including, transfering data into cpu/gpu memory, shuffle, binary +serialization to PyDataProvider2. PyDataProvider2 uses multithreading and a +fanscinating but simple cache strategy to optimize the efficiency of the data +providing process. + +DataProvider for the non-sequential model +----------------------------------------- + +Here we use the MNIST handwriting recognition data as an example to illustrate +how to write a simple PyDataProvider. + +MNIST is a handwriting classification data set. It contains 70,000 digital +grayscale images. Labels of the training sample range from 0 to 9. All the +images have been size-normalized and centered into images with the same size +of 28 x 28 pixels. + +A small part of the original data as an example is shown as below: + +.. literalinclude:: src/mnist_train.txt + +Each line of the data contains two parts, separated by :code:`;`. The first part is +label of an image. The second part contains 28x28 pixel float values. + +Just write path of the above data into train.list. It looks like this: + +.. literalinclude:: src/train.list + +The corresponding dataprovider is shown as below: + +.. literalinclude:: src/mnist_provider.dict.py + +The first line imports PyDataProvider2 package. +The main function is the process function, that has two parameters. +The first parameter is the settings, which is not used in this example. +The second parameter is the filename, that is exactly each line of train.list. +This parameter is passed to the process function by PaddlePaddle. + +:code:`@provider` is a Python +`Decorator `_ . +It sets some properties to DataProvider, and constructs a real PaddlePaddle +DataProvider from a very simple user implemented python function. It does not +matter if you are not familiar with `Decorator`_. You can keep it simple by +just taking :code:`@provider` as a fixed mark above the provider function you +implemented. + +`input_types`_ defines the data format that a DataProvider returns. +In this example, it is set to a 28x28-dimensional dense vector and an integer +scalar, whose value ranges from 0 to 9. +`input_types`_ can be set to several kinds of input formats, please refer to the +document of `input_types`_ for more details. + + +The process method is the core part to construct a real DataProvider in +PaddlePaddle. It implements how to open the text file, how to read one sample +from the original text file, convert them into `input_types`_, and give them +back to PaddlePaddle process at line 23. +Note that data yielded by the process function must follow the same order that +`input_types`_ are defined. + + +With the help of PyDataProvider2, user can focus on how to generate ONE traning +sample by using keywords :code:`yield`. +:code:`yield` is a python keyword, and a concept related to it includes +:code:`generator`. + +Only a few lines of codes need to be added into the training configuration file, +you can take this as an example. + +.. literalinclude:: src/mnist_config.py + +Here we specify training data by :code:`train.list`, and no testing data is specified. +The method which actually provide data is :code:`process`. + +User also can use another style to provide data, which defines the +:code:`data_layer`'s name explicitly when `yield`. For example, +the :code:`dataprovider` is shown as below. + +.. literalinclude:: src/mnist_provider.dict.py + :linenos: + +If user did't give the :code:`data_layer`'s name, PaddlePaddle will use +the order of :code:`data_layer` definition roughly to determine which feature to +which :code:`data_layer`. This order may be not correct, so TO DEFINE THE +:code:`data_layer`'s NAMES EXPLICITLY IS THE RECOMMANDED WAY TO PROVIDER DATA. + +Now, this simple example of using PyDataProvider is finished. +The only thing that the user should know is how to generte **one sample** from +**one data file**. +And PaddlePadle will do all of the rest things\: + +* Form a training batch +* Shuffle the training data +* Read data with multithreading +* Cache the training data (Optional) +* CPU-> GPU double buffering. + +Is this cool? + +.. _api_pydataprovider2_sequential_model: + +DataProvider for the sequential model +------------------------------------- +A sequence model takes sequences as its input. A sequence is made up of several +timesteps. The so-called timestep, is not necessary to have something to do +with time. It can also be explained to that the order of data are taken into +consideration into model design and training. +For example, the sentence can be interpreted as a kind of sequence data in NLP +tasks. + +Here is an example on data proivider for English sentiment classification data. +The original input data are simple English text, labeled into positive or +negative sentiment (marked by 0 and 1 respectively). + +A small part of the original data as an example can be found in the path below: + +.. literalinclude:: src/sentimental_train.txt + +The corresponding data provider can be found in the path below: + +.. literalinclude:: src/sentimental_provider.py + +This data provider for sequential model is a little more complex than that +for MINST dataset. +A new initialization method is introduced here. +The method :code:`on_init` is configured to DataProvider by :code:`@provider`'s +:code:`init_hook` parameter, and it will be invoked once DataProvider is +initialized. The :code:`on_init` function has the following parameters: + +* The first parameter is the settings object. +* The rest parameters are passed by key word arguments. Some of them are passed + by PaddlePaddle, see reference for `init_hook`_. + The :code:`dictionary` object is a python dict object passed from the trainer + configuration file, and it maps word string to word id. + +To pass these parameters into DataProvider, the following lines should be added +into trainer configuration file. + +.. literalinclude:: src/sentimental_config.py + +The definition is basically same as MNIST example, except: +* Load dictionary in this configuration +* Pass it as a parameter to the DataProvider + +The `input_types` is configured in method :code:`on_init`. It has the same +effect to configure them by :code:`@provider`'s :code:`input_types` parameter. +However, the :code:`input_types` is set at runtime, so we can set it to +different types according to the input data. Input of the neural network is a +sequence of word id, so set :code:`seq_type` to :code:`integer_value_sequence`. + +Durning :code:`on_init`, we save :code:`dictionary` variable to +:code:`settings`, and it will be used in :code:`process`. Note the settings +parameter for the process function and for the on_init's function are a same +object. + +The basic processing logic is the same as MNIST's :code:`process` method. Each +sample in the data file is given back to PaddlePaddle process. + +Thus, the basic usage of PyDataProvider is here. +Please refer to the following section reference for details. + +Reference +--------- + +@provider ++++++++++ + +.. autofunction:: paddle.trainer.PyDataProvider2.provider + +input_types ++++++++++++ + +PaddlePaddle has four data types, and three sequence types. +The four data types are: + +* :code:`dense_vector`: dense float vector. +* :code:`sparse_binary_vector`: sparse binary vector, most of the value is 0, and + the non zero elements are fixed to 1. +* :code:`sparse_float_vector`: sparse float vector, most of the value is 0, and some + non zero elements can be any float value. They are given by the user. +* :code:`integer`: an integer scalar, that is especially used for label or word index. + +The three sequence types are: + +* :code:`SequenceType.NO_SEQUENCE` means the sample is not a sequence. +* :code:`SequenceType.SEQUENCE` means the sample is a sequence. +* :code:`SequenceType.SUB_SEQUENCE` means it is a nested sequence, that each timestep of + the input sequence is also a sequence. + +Different input type has a defferenct input format. Their formats are shown +in the above table. + ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| | NO_SEQUENCE | SEQUENCE | SUB_SEQUENCE | ++======================+=====================+===================================+================================================+ +| dense_vector | [f, f, ...] | [[f, ...], [f, ...], ...] | [[[f, ...], ...], [[f, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_binary_vector | [i, i, ...] | [[i, ...], [i, ...], ...] | [[[i, ...], ...], [[i, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_float_vector | [(i,f), (i,f), ...] | [[(i,f), ...], [(i,f), ...], ...] | [[[(i,f), ...], ...], [[(i,f), ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| integer_value | i | [i, i, ...] | [[i, ...], [i, ...], ...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ + +where f represents a float value, i represents an integer value. + +init_hook ++++++++++ + +init_hook is a function that is invoked once the data provoder is initialized. +Its parameters lists as follows: + +* The first parameter is a settings object, which is the same to :code:`settings` + in :code:`process` method. The object contains several attributes, including: + + * :code:`settings.input_types`: the input types. Reference `input_types`_. + * :code:`settings.logger`: a logging object. + +* The rest parameters are the key word arguments. It is made up of PaddpePaddle + pre-defined parameters and user defined parameters. + + * PaddlePaddle-defined parameters including: + + * :code:`is_train` is a bool parameter that indicates the DataProvider is used in + training or testing. + * :code:`file_list` is the list of all files. + + * User-defined parameters args can be set in training configuration. + +Note, PaddlePaddle reserves the right to add pre-defined parameter, so please +use :code:`**kwargs` in init_hook to ensure compatibility by accepting the +parameters which your init_hook does not use. + +cache ++++++ +DataProvider provides two simple cache strategy. They are: + +* :code:`CacheType.NO_CACHE` means do not cache any data, then data is read at runtime by + the user implemented python module every pass. +* :code:`CacheType.CACHE_PASS_IN_MEM` means the first pass reads data by the user + implemented python module, and the rest passes will directly read data from + memory. diff --git a/doc_cn/ui/data_provider/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py similarity index 100% rename from doc_cn/ui/data_provider/mnist_config.py rename to doc/api/v1/data_provider/src/mnist_config.py diff --git a/doc_cn/ui/data_provider/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py similarity index 100% rename from doc_cn/ui/data_provider/mnist_provider.dict.py rename to doc/api/v1/data_provider/src/mnist_provider.dict.py diff --git a/doc_cn/ui/data_provider/mnist_train.txt b/doc/api/v1/data_provider/src/mnist_train.txt similarity index 100% rename from doc_cn/ui/data_provider/mnist_train.txt rename to doc/api/v1/data_provider/src/mnist_train.txt diff --git a/doc_cn/ui/data_provider/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py similarity index 100% rename from doc_cn/ui/data_provider/sentimental_config.py rename to doc/api/v1/data_provider/src/sentimental_config.py diff --git a/doc_cn/ui/data_provider/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py similarity index 100% rename from doc_cn/ui/data_provider/sentimental_provider.py rename to doc/api/v1/data_provider/src/sentimental_provider.py diff --git a/doc_cn/ui/data_provider/sentimental_train.txt b/doc/api/v1/data_provider/src/sentimental_train.txt similarity index 100% rename from doc_cn/ui/data_provider/sentimental_train.txt rename to doc/api/v1/data_provider/src/sentimental_train.txt diff --git a/doc_cn/ui/data_provider/train.list b/doc/api/v1/data_provider/src/train.list similarity index 100% rename from doc_cn/ui/data_provider/train.list rename to doc/api/v1/data_provider/src/train.list diff --git a/doc/api/v1/index_cn.rst b/doc/api/v1/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..3718cd73a2003b8ef6c406a9bd51dc68e76402dc --- /dev/null +++ b/doc/api/v1/index_cn.rst @@ -0,0 +1,37 @@ +API中文手册 +============ + +DataProvider API +---------------- + +.. toctree:: + :maxdepth: 1 + + data_provider/dataprovider_cn.rst + data_provider/pydataprovider2_cn.rst + +.. _api_trainer_config: + +Model Config API +---------------- + +.. toctree:: + :maxdepth: 1 + + trainer_config_helpers/optimizers.rst + trainer_config_helpers/data_sources.rst + trainer_config_helpers/layers.rst + trainer_config_helpers/activations.rst + trainer_config_helpers/poolings.rst + trainer_config_helpers/networks.rst + trainer_config_helpers/evaluators.rst + trainer_config_helpers/attrs.rst + + +Applications API +---------------- + +.. toctree:: + :maxdepth: 1 + + predict/swig_py_paddle_cn.rst diff --git a/doc/api/v1/index_en.rst b/doc/api/v1/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..10c297a71d6988c002de868e804ed9ee2345fbd7 --- /dev/null +++ b/doc/api/v1/index_en.rst @@ -0,0 +1,37 @@ +API +=== + +DataProvider API +---------------- + +.. toctree:: + :maxdepth: 1 + + data_provider/dataprovider_en.rst + data_provider/pydataprovider2_en.rst + +.. _api_trainer_config: + +Model Config API +---------------- + +.. toctree:: + :maxdepth: 1 + + trainer_config_helpers/optimizers.rst + trainer_config_helpers/data_sources.rst + trainer_config_helpers/layers.rst + trainer_config_helpers/activations.rst + trainer_config_helpers/poolings.rst + trainer_config_helpers/networks.rst + trainer_config_helpers/evaluators.rst + trainer_config_helpers/attrs.rst + + +Applications API +---------------- + +.. toctree:: + :maxdepth: 1 + + predict/swig_py_paddle_en.rst diff --git a/doc/api/predict/predict_sample.py b/doc/api/v1/predict/src/predict_sample.py similarity index 100% rename from doc/api/predict/predict_sample.py rename to doc/api/v1/predict/src/predict_sample.py diff --git a/doc/api/v1/predict/swig_py_paddle_cn.rst b/doc/api/v1/predict/swig_py_paddle_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..42f333dba2e996e70572b3cda085b83e402ede8e --- /dev/null +++ b/doc/api/v1/predict/swig_py_paddle_cn.rst @@ -0,0 +1,58 @@ +.. _api_swig_py_paddle: + +基于Python的预测 +================ + +预测流程 +-------- + +PaddlePaddle使用swig对常用的预测接口进行了封装,通过编译会生成py_paddle软件包,安装该软件包就可以在python环境下实现模型预测。可以使用python的 ``help()`` 函数查询软件包相关API说明。 + +基于Python的模型预测,主要包括以下五个步骤。 + +1. 初始化PaddlePaddle环境 + + 在程序开始阶段,通过调用 ``swig_paddle.initPaddle()`` 并传入相应的命令行参数初始化PaddlePaddle。 + +2. 解析模型配置文件 + + 初始化之后,可以通过调用 ``parse_config()`` 解析训练模型时用的配置文件。注意预测数据通常不包含label, 同时预测网络通常直接输出最后一层的结果而不是像训练网络一样再接一层cost layer,所以一般需要对训练用的模型配置文件稍作相应修改才能在预测时使用。 + +3. 构造paddle.GradientMachine + + 通过调用 ``swig_paddle.GradientMachine.createFromConfigproto()`` 传入上一步解析出来的模型配置就可以创建一个 ``GradientMachine``。 + +4. 准备预测数据 + + swig_paddle中的预测接口的参数是自定义的C++数据类型,py_paddle里面提供了一个工具类 ``DataProviderConverter`` 可以用于接收和PyDataProvider2一样的输入数据并转换成预测接口所需的数据类型。 + +5. 模型预测 + + 通过调用 ``forwardTest()`` 传入预测数据,直接返回计算结果。 + + +预测Demo +-------- + +如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。 + +.. literalinclude:: src/predict_sample.py + :language: python + :lines: 15-18,121-136 + + +Demo预测输出如下,其中value即为softmax层的输出。由于TEST_DATA包含两条预测数据,所以输出的value包含两个向量 。 + +.. code-block:: text + + [{'id': None, 'value': array( + [[ 5.53018653e-09, 1.12194102e-05, 1.96644767e-09, + 1.43630644e-02, 1.51111044e-13, 9.85625684e-01, + 2.08823112e-10, 2.32777140e-08, 2.00186201e-09, + 1.15501715e-08], + [ 9.99982715e-01, 1.27787406e-10, 1.72296313e-05, + 1.49316648e-09, 1.36540484e-11, 6.93137714e-10, + 2.70634608e-08, 3.48565123e-08, 5.25639710e-09, + 4.48684503e-08]], dtype=float32)}] + + diff --git a/doc/api/v1/predict/swig_py_paddle_en.rst b/doc/api/v1/predict/swig_py_paddle_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..1c628e6971fa5643e6a9ca629488049957686193 --- /dev/null +++ b/doc/api/v1/predict/swig_py_paddle_en.rst @@ -0,0 +1,59 @@ +Python Prediction +================== + +PaddlePaddle offers a set of clean prediction interfaces for python with the help of +SWIG. The main steps of predict values in python are: + +* Parse training configurations +* Construct GradientMachine +* Prepare data +* Predict + +Here is a sample python script that shows the typical prediction process for the +MNIST classification problem. A complete sample code could be found at +:code:`src_root/doc/ui/predict/predict_sample.py`. + +.. literalinclude:: src/predict_sample.py + :language: python + :lines: 15-18,90-100,101-104 + +The module that does the most of the job is py_paddle.swig_paddle, it's +generated by SWIG and has complete documents, for more details you can use +python's :code:`help()` function. Let's walk through the above python script: + +* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize + PaddlePaddle with command line arguments, for more about command line arguments + see :ref:`cmd_detail_introduction` . +* Parse the configuration file that is used in training with :code:`parse_config()`. + Because data to predict with always have no label, and output of prediction work + normally is the output layer rather than the cost layer, so you should modify + the configuration file accordingly before using it in the prediction work. +* Create a neural network with + :code:`swig_paddle.GradientMachine.createFromConfigproto()`, which takes the + parsed configuration :code:`conf.model_config` as argument. Then load the + trained parameters from the model with :code:`network.loadParameters()`. +* Create a data converter object of utility class :code:`DataProviderConverter`. + - Note: As swig_paddle can only accept C++ matrices, we offer a utility + class DataProviderConverter that can accept the same input data with + PyDataProvider2, for more information please refer to document + of :ref:`api_pydataprovider2` . +* Do the prediction with :code:`forwardTest()`, which takes the converted + input data and outputs the activations of the output layer. + +Here is a typical output: + +.. code-block:: text + + [{'id': None, 'value': array([[ 5.53018653e-09, 1.12194102e-05, 1.96644767e-09, + 1.43630644e-02, 1.51111044e-13, 9.85625684e-01, + 2.08823112e-10, 2.32777140e-08, 2.00186201e-09, + 1.15501715e-08], + [ 9.99982715e-01, 1.27787406e-10, 1.72296313e-05, + 1.49316648e-09, 1.36540484e-11, 6.93137714e-10, + 2.70634608e-08, 3.48565123e-08, 5.25639710e-09, + 4.48684503e-08]], dtype=float32)}] + +:code:`value` is the output of the output layer, each row represents result of +the corresponding row in the input data, each element represents activation of +the corresponding neuron in the output layer. + diff --git a/doc/api/v2/config/activation.rst b/doc/api/v2/config/activation.rst new file mode 100644 index 0000000000000000000000000000000000000000..eca3ce03bcdc599edca802d8dfca48d4f28275a2 --- /dev/null +++ b/doc/api/v2/config/activation.rst @@ -0,0 +1,101 @@ +=========== +Activation +=========== + +Abs +=== + +.. automodule:: paddle.v2.activation + :members: Abs + :noindex: + +Exp +=== + +.. automodule:: paddle.v2.activation + :members: Exp + :noindex: + +Identity +======== + +.. automodule:: paddle.v2.activation + :members: Identity + :noindex: + +Linear +====== + +.. automodule:: paddle.v2.activation + :members: Linear + :noindex: + +Log +=== + +.. automodule:: paddle.v2.activation + :members: Log + :noindex: + +Square +====== + +.. automodule:: paddle.v2.activation + :members: Square + :noindex: + +Sigmoid +======= + +.. automodule:: paddle.v2.activation + :members: Sigmoid + :noindex: + +Softmax +======= + +.. automodule:: paddle.v2.activation + :members: Softmax + :noindex: + +SequenceSoftmax +=============== + +.. automodule:: paddle.v2.activation + :members: SequenceSoftmax + :noindex: + +Relu +==== + +.. automodule:: paddle.v2.activation + :members: Relu + :noindex: + +BRelu +===== + +.. automodule:: paddle.v2.activation + :members: BRelu + :noindex: + +SoftRelu +======== + +.. automodule:: paddle.v2.activation + :members: SoftRelu + :noindex: + +Tanh +==== + +.. automodule:: paddle.v2.activation + :members: Tanh + :noindex: + +STanh +===== + +.. automodule:: paddle.v2.activation + :members: STanh + :noindex: diff --git a/doc/api/v2/config/attr.rst b/doc/api/v2/config/attr.rst new file mode 100644 index 0000000000000000000000000000000000000000..a93f41b86779200d8bac651614f4d61f4895875f --- /dev/null +++ b/doc/api/v2/config/attr.rst @@ -0,0 +1,6 @@ +Parameter Attribute +=================== + +.. automodule:: paddle.v2.attr + :members: + :noindex: diff --git a/doc/api/v2/config/evaluators.rst b/doc/api/v2/config/evaluators.rst new file mode 100644 index 0000000000000000000000000000000000000000..9ac972fb193a2fb525edc507f7ba1303d2c8eabe --- /dev/null +++ b/doc/api/v2/config/evaluators.rst @@ -0,0 +1,110 @@ +.. _api_v2: + +========== +Evaluators +========== + +Classification +============== + +classification_error +-------------------- +.. automodule:: paddle.v2.evaluator + :members: classification_error + :noindex: + +auc +--- +.. automodule:: paddle.v2.evaluator + :members: auc + :noindex: + +ctc_error +--------- +.. automodule:: paddle.v2.evaluator + :members: ctc_error + :noindex: + +chunk +----- +.. automodule:: paddle.v2.evaluator + :members: chunk + :noindex: + +precision_recall +---------------- +.. automodule:: paddle.v2.evaluator + :members: precision_recall + :noindex: + +Rank +==== + +pnpair +------ +.. automodule:: paddle.v2.evaluator + :members: pnpair + :noindex: + +Utils +===== + +sum +--- +.. automodule:: paddle.v2.evaluator + :members: sum + :noindex: + +column_sum +---------- +.. automodule:: paddle.v2.evaluator + :members: column_sum + :noindex: + +Print +===== + +classification_error_printer +---------------------------- +.. automodule:: paddle.v2.evaluator + :members: classification_error_printer + :noindex: + +gradient_printer +---------------- +.. automodule:: paddle.v2.evaluator + :members: gradient_printer + :noindex: + +maxid_printer +------------- +.. automodule:: paddle.v2.evaluator + :members: maxid_printer + :noindex: + +maxframe_printer +---------------- +.. automodule:: paddle.v2.evaluator + :members: maxframe_printer + :noindex: + +seqtext_printer +--------------- +.. automodule:: paddle.v2.evaluator + :members: seqtext_printer + :noindex: + +value_printer +------------- +.. automodule:: paddle.v2.evaluator + :members: value_printer + :noindex: + +Detection +===== + +detection_map +------------- +.. automodule:: paddle.v2.evaluator + :members: detection_map + :noindex: diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst new file mode 100644 index 0000000000000000000000000000000000000000..cb330ea5e1b914587a725c9b90a33053f3fbbc3d --- /dev/null +++ b/doc/api/v2/config/layer.rst @@ -0,0 +1,512 @@ +.. _api_v2.layer: + +====== +Layers +====== + +Data layer +=========== + +.. _api_v2.layer_data: + +data +---- +.. autoclass:: paddle.v2.layer.data + :noindex: + +Fully Connected Layers +====================== + +.. _api_v2.layer_fc: + +fc +-- +.. autoclass:: paddle.v2.layer.fc + :noindex: + +selective_fc +------------ +.. autoclass:: paddle.v2.layer.selective_fc + :noindex: + +Conv Layers +=========== + +conv_operator +------------- +.. autoclass:: paddle.v2.layer.conv_operator + :noindex: + +conv_projection +--------------- +.. autoclass:: paddle.v2.layer.conv_projection + :noindex: + +conv_shift +---------- +.. autoclass:: paddle.v2.layer.conv_shift + :noindex: + +img_conv +-------- +.. autoclass:: paddle.v2.layer.img_conv + :noindex: + +.. _api_v2.layer_context_projection: + +context_projection +------------------ +.. autoclass:: paddle.v2.layer.context_projection + :noindex: + +row_conv +-------- +.. autoclass:: paddle.v2.layer.row_conv + :noindex: + +Image Pooling Layer +=================== + +img_pool +-------- +.. autoclass:: paddle.v2.layer.img_pool + :noindex: + +spp +--- +.. autoclass:: paddle.v2.layer.spp + :noindex: + +maxout +------ +.. autoclass:: paddle.v2.layer.maxout + :noindex: + +Norm Layer +========== + +img_cmrnorm +----------- +.. autoclass:: paddle.v2.layer.img_cmrnorm + :noindex: + +batch_norm +---------- +.. autoclass:: paddle.v2.layer.batch_norm + :noindex: + +sum_to_one_norm +--------------- +.. autoclass:: paddle.v2.layer.sum_to_one_norm + :noindex: + +cross_channel_norm +------------------ +.. autoclass:: paddle.v2.layer.cross_channel_norm + :noindex: + +row_l2_norm +----------- +.. autoclass:: paddle.v2.layer.row_l2_norm + :noindex: + +Recurrent Layers +================ + +recurrent +--------- +.. autoclass:: paddle.v2.layer.recurrent + :noindex: + +lstmemory +--------- +.. autoclass:: paddle.v2.layer.lstmemory + :noindex: + +grumemory +--------- +.. autoclass:: paddle.v2.layer.grumemory + :noindex: + +Recurrent Layer Group +===================== + +memory +------ +.. autoclass:: paddle.v2.layer.memory + :noindex: + +recurrent_group +--------------- +.. autoclass:: paddle.v2.layer.recurrent_group + :noindex: + +lstm_step +--------- +.. autoclass:: paddle.v2.layer.lstm_step + :noindex: + +gru_step +-------- +.. autoclass:: paddle.v2.layer.gru_step + :noindex: + +beam_search +------------ +.. autoclass:: paddle.v2.layer.beam_search + :noindex: + +get_output +---------- +.. autoclass:: paddle.v2.layer.get_output + :noindex: + +Mixed Layer +=========== + +.. _api_v2.layer_mixed: + +mixed +----- +.. autoclass:: paddle.v2.layer.mixed + :noindex: + +.. _api_v2.layer_embedding: + +embedding +--------- +.. autoclass:: paddle.v2.layer.embedding + :noindex: + +scaling_projection +------------------ +.. autoclass:: paddle.v2.layer.scaling_projection + :noindex: + +dotmul_projection +----------------- +.. autoclass:: paddle.v2.layer.dotmul_projection + :noindex: + +dotmul_operator +--------------- +.. autoclass:: paddle.v2.layer.dotmul_operator + :noindex: + +full_matrix_projection +---------------------- +.. autoclass:: paddle.v2.layer.full_matrix_projection + :noindex: + +identity_projection +------------------- +.. autoclass:: paddle.v2.layer.identity_projection + :noindex: + +slice_projection +------------------- +.. autoclass:: paddle.v2.layer.slice_projection + :noindex: + +table_projection +---------------- +.. autoclass:: paddle.v2.layer.table_projection + :noindex: + +trans_full_matrix_projection +---------------------------- +.. autoclass:: paddle.v2.layer.trans_full_matrix_projection + :noindex: + +Aggregate Layers +================ + +AggregateLevel +-------------- +.. autoclass:: paddle.v2.layer.AggregateLevel + :noindex: + +.. _api_v2.layer_pooling: + +pooling +------- +.. autoclass:: paddle.v2.layer.pooling + :noindex: + +.. _api_v2.layer_last_seq: + +last_seq +-------- +.. autoclass:: paddle.v2.layer.last_seq + :noindex: + +.. _api_v2.layer_first_seq: + +first_seq +--------- +.. autoclass:: paddle.v2.layer.first_seq + :noindex: + +concat +------ +.. autoclass:: paddle.v2.layer.concat + :noindex: + +seq_concat +---------- +.. autoclass:: paddle.v2.layer.seq_concat + :noindex: + +kmax_sequence_score +------------------- +.. autoclass:: paddle.v2.layer.kmax_sequence_score + :noindex: + +sub_nested_seq +-------------- +.. autoclass:: paddle.v2.layer.sub_nested_seq + :noindex: + +Reshaping Layers +================ + +block_expand +------------ +.. autoclass:: paddle.v2.layer.block_expand + :noindex: + +.. _api_v2.layer_expand: + +ExpandLevel +----------- +.. autoclass:: paddle.v2.layer.ExpandLevel + :noindex: + +expand +------ +.. autoclass:: paddle.v2.layer.expand + :noindex: + +repeat +------ +.. autoclass:: paddle.v2.layer.repeat + :noindex: + +rotate +------ +.. autoclass:: paddle.v2.layer.rotate + :noindex: + +seq_reshape +----------- +.. autoclass:: paddle.v2.layer.seq_reshape + :noindex: + +Math Layers +=========== + +addto +----- +.. autoclass:: paddle.v2.layer.addto + :noindex: + +linear_comb +----------- +.. autoclass:: paddle.v2.layer.linear_comb + :noindex: + +interpolation +------------- +.. autoclass:: paddle.v2.layer.interpolation + :noindex: + +bilinear_interp +--------------- +.. autoclass:: paddle.v2.layer.bilinear_interp + :noindex: + +power +----- +.. autoclass:: paddle.v2.layer.power + :noindex: + +scaling +------- +.. autoclass:: paddle.v2.layer.scaling + :noindex: + +clip +---- +.. autoclass:: paddle.v2.layer.clip + :noindex: + +slope_intercept +--------------- +.. autoclass:: paddle.v2.layer.slope_intercept + :noindex: + +tensor +------ +.. autoclass:: paddle.v2.layer.tensor + :noindex: + +.. _api_v2.layer_cos_sim: + +cos_sim +------- +.. autoclass:: paddle.v2.layer.cos_sim + :noindex: + +trans +----- +.. autoclass:: paddle.v2.layer.trans + :noindex: + +Sampling Layers +=============== + +maxid +----- +.. autoclass:: paddle.v2.layer.max_id + :noindex: + +sampling_id +----------- +.. autoclass:: paddle.v2.layer.sampling_id + :noindex: + +multiplex +--------- +.. autoclass:: paddle.v2.layer.multiplex + :noindex: + + +Slicing and Joining Layers +========================== + +pad +---- +.. autoclass:: paddle.v2.layer.pad + :noindex: + +.. _api_v2.layer_costs: + +Cost Layers +=========== + +cross_entropy_cost +------------------ +.. autoclass:: paddle.v2.layer.cross_entropy_cost + :noindex: + +cross_entropy_with_selfnorm_cost +-------------------------------- +.. autoclass:: paddle.v2.layer.cross_entropy_with_selfnorm_cost + :noindex: + +multi_binary_label_cross_entropy_cost +------------------------------------- +.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost + :noindex: + +huber_cost +---------- +.. autoclass:: paddle.v2.layer.huber_cost + :noindex: + +lambda_cost +----------- +.. autoclass:: paddle.v2.layer.lambda_cost + :noindex: + +mse_cost +-------- +.. autoclass:: paddle.v2.layer.mse_cost + :noindex: + +rank_cost +--------- +.. autoclass:: paddle.v2.layer.rank_cost + :noindex: + +sum_cost +--------- +.. autoclass:: paddle.v2.layer.sum_cost + :noindex: + +crf +--- +.. autoclass:: paddle.v2.layer.crf + :noindex: + +crf_decoding +------------ +.. autoclass:: paddle.v2.layer.crf_decoding + :noindex: + +ctc +--- +.. autoclass:: paddle.v2.layer.ctc + :noindex: + +warp_ctc +-------- +.. autoclass:: paddle.v2.layer.warp_ctc + :noindex: + +nce +--- +.. autoclass:: paddle.v2.layer.nce + :noindex: + +hsigmoid +--------- +.. autoclass:: paddle.v2.layer.hsigmoid + :noindex: + +smooth_l1_cost +-------------- +.. autoclass:: paddle.v2.layer.smooth_l1_cost + :noindex: + +multibox_loss +-------------- +.. autoclass:: paddle.v2.layer.multibox_loss + :noindex: + +Check Layer +============ + +eos +--- +.. autoclass:: paddle.v2.layer.eos + :noindex: + +Miscs +===== + +dropout +-------------- +.. autoclass:: paddle.v2.layer.dropout + :noindex: + +Activation with learnable parameter +=================================== + +prelu +-------- +.. autoclass:: paddle.v2.layer.prelu + :noindex: + +gated_unit +----------- +.. autoclass:: paddle.v2.layer.gated_unit + :noindex: + +Detection output Layer +====================== + +detection_output +---------------- +.. autoclass:: paddle.v2.layer.detection_output + :noindex: diff --git a/doc/api/v2/config/networks.rst b/doc/api/v2/config/networks.rst new file mode 100644 index 0000000000000000000000000000000000000000..6e813ab1a820d068ea3e54cad6178f1cf928eadc --- /dev/null +++ b/doc/api/v2/config/networks.rst @@ -0,0 +1,127 @@ +======== +Networks +======== + +The v2.networks module contains pieces of neural network that combine multiple layers. + +NLP +=== + +sequence_conv_pool +------------------ +.. automodule:: paddle.v2.networks + :members: sequence_conv_pool + :noindex: + +.. _api_trainer_config_helpers_network_text_conv_pool: + +text_conv_pool +-------------- +.. automodule:: paddle.v2.networks + :members: text_conv_pool + :noindex: + +Images +====== + +img_conv_bn_pool +---------------- +.. automodule:: paddle.v2.networks + :members: img_conv_bn_pool + :noindex: + +img_conv_group +-------------- +.. automodule:: paddle.v2.networks + :members: img_conv_group + :noindex: + +.. _api_trainer_config_helpers_network_simple_img_conv_pool: + +simple_img_conv_pool +-------------------- +.. automodule:: paddle.v2.networks + :members: simple_img_conv_pool + :noindex: + +small_vgg +--------- +.. automodule:: paddle.v2.networks + :members: small_vgg + :noindex: + +vgg_16_network +--------------- +.. automodule:: paddle.v2.networks + :members: vgg_16_network + :noindex: + +Recurrent +========= + +LSTM +---- + +lstmemory_unit +`````````````` +.. automodule:: paddle.v2.networks + :members: lstmemory_unit + :noindex: + +lstmemory_group +``````````````` +.. automodule:: paddle.v2.networks + :members: lstmemory_group + :noindex: + +simple_lstm +``````````` +.. automodule:: paddle.v2.networks + :members: simple_lstm + :noindex: + +bidirectional_lstm +`````````````````` +.. automodule:: paddle.v2.networks + :members: bidirectional_lstm + :noindex: + +GRU +--- + +gru_unit +```````` +.. automodule:: paddle.v2.networks + :members: gru_unit + :noindex: + +gru_group +````````` +.. automodule:: paddle.v2.networks + :members: gru_group + :noindex: + +simple_gru +`````````` +.. automodule:: paddle.v2.networks + :members: simple_gru + :noindex: + +simple_gru2 +``````````` +.. automodule:: paddle.v2.networks + :members: simple_gru2 + :noindex: + +bidirectional_gru +`````````````````` +.. automodule:: paddle.v2.networks + :members: bidirectional_gru + :noindex: + +simple_attention +---------------- +.. automodule:: paddle.v2.networks + :members: simple_attention + :noindex: + diff --git a/doc/api/v2/config/optimizer.rst b/doc/api/v2/config/optimizer.rst new file mode 100644 index 0000000000000000000000000000000000000000..b32373fdef52a7aa9d64b12cda3f76cb2abf351b --- /dev/null +++ b/doc/api/v2/config/optimizer.rst @@ -0,0 +1,45 @@ +========== +Optimizer +========== + +Momentum +======== +.. automodule:: paddle.v2.optimizer + :members: Momentum + :noindex: + +Adam +==== +.. automodule:: paddle.v2.optimizer + :members: Adam + :noindex: + +Adamax +====== +.. automodule:: paddle.v2.optimizer + :members: Adamax + :noindex: + +AdaGrad +======= +.. automodule:: paddle.v2.optimizer + :members: AdaGrad + :noindex: + +DecayedAdaGrad +============== +.. automodule:: paddle.v2.optimizer + :members: DecayedAdaGrad + :noindex: + +AdaDelta +======== +.. automodule:: paddle.v2.optimizer + :members: AdaDelta + :noindex: + +RMSProp +======= +.. automodule:: paddle.v2.optimizer + :members: RMSProp + :noindex: diff --git a/doc/api/v2/config/pooling.rst b/doc/api/v2/config/pooling.rst new file mode 100644 index 0000000000000000000000000000000000000000..d26b365c9284632210a1532853e39feedc70758b --- /dev/null +++ b/doc/api/v2/config/pooling.rst @@ -0,0 +1,46 @@ +======= +Pooling +======= + +BasePool +======== +.. automodule:: paddle.v2.pooling + :members: BasePool + :noindex: + +Avg +=== +.. automodule:: paddle.v2.pooling + :members: Avg + :noindex: + +Max +=== +.. automodule:: paddle.v2.pooling + :members: Max + :noindex: + +Sum +=== +.. automodule:: paddle.v2.pooling + :members: Sum + :noindex: + +SquareRootN +=========== +.. automodule:: paddle.v2.pooling + :members: SquareRootN + :noindex: + +CudnnAvg +======== +.. automodule:: paddle.v2.pooling + :members: CudnnAvg + :noindex: + +CudnnMax +======== +.. automodule:: paddle.v2.pooling + :members: CudnnMax + :noindex: + diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst new file mode 100644 index 0000000000000000000000000000000000000000..fef87c4fbdb452771ecdb361c6eeae5b32bcee14 --- /dev/null +++ b/doc/api/v2/data.rst @@ -0,0 +1,113 @@ +================================== +Data Reader Interface and DataSets +================================== + + +DataTypes +========= + +.. automodule:: paddle.v2.data_type + :members: + :noindex: + +DataFeeder +========== + +.. automodule:: paddle.v2.data_feeder + :members: + :noindex: + +Reader +====== + +.. automodule:: paddle.v2.reader + :members: + :noindex: + +.. automodule:: paddle.v2.reader.creator + :members: + :noindex: + +minibatch +========= + +.. automodule:: paddle.v2.minibatch + :members: + :noindex: + +Dataset +======= + +.. automodule:: paddle.v2.dataset + :members: + :noindex: + +mnist ++++++ + +.. automodule:: paddle.v2.dataset.mnist + :members: + :noindex: + +cifar ++++++ + +.. automodule:: paddle.v2.dataset.cifar + :members: + :noindex: + +conll05 ++++++++ + +.. automodule:: paddle.v2.dataset.conll05 + :members: get_dict,get_embedding,test + :noindex: + +imdb +++++ + +.. automodule:: paddle.v2.dataset.imdb + :members: + :noindex: + +imikolov +++++++++ + +.. automodule:: paddle.v2.dataset.imikolov + :members: + :noindex: + +movielens ++++++++++ + +.. automodule:: paddle.v2.dataset.movielens + :members: + :noindex: + +.. autoclass:: paddle.v2.dataset.movielens.MovieInfo + :noindex: + +.. autoclass:: paddle.v2.dataset.movielens.UserInfo + :noindex: + +sentiment ++++++++++ + +.. automodule:: paddle.v2.dataset.sentiment + :members: + :noindex: + +uci_housing ++++++++++++ + +.. automodule:: paddle.v2.dataset.uci_housing + :members: + :noindex: + +wmt14 ++++++ + +.. automodule:: paddle.v2.dataset.wmt14 + :members: + :noindex: + diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst new file mode 100644 index 0000000000000000000000000000000000000000..992b559cbd87244612521d4c96f84f997d6c4196 --- /dev/null +++ b/doc/api/v2/model_configs.rst @@ -0,0 +1,13 @@ +Model Configuration +=================== + +.. toctree:: + :maxdepth: 1 + + config/activation.rst + config/layer.rst + config/evaluators.rst + config/optimizer.rst + config/pooling.rst + config/networks.rst + config/attr.rst diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst new file mode 100644 index 0000000000000000000000000000000000000000..5c97651f6536d89d2b5926d4b2907a547aa86b55 --- /dev/null +++ b/doc/api/v2/run_logic.rst @@ -0,0 +1,31 @@ +====================== +Training and Inference +====================== + +Parameters +========== + +.. automodule:: paddle.v2.parameters + :members: Parameters + :noindex: + +Trainer +======= + +.. automodule:: paddle.v2.trainer + :members: SGD + :noindex: + +Event +===== + +.. automodule:: paddle.v2.event + :members: + :noindex: + +Inference +========= + +.. autofunction:: paddle.v2.infer + :noindex: + \ No newline at end of file diff --git a/doc/conf.py.in b/doc/conf.py.in deleted file mode 100644 index 01d156e887b623898df09044a800fd067ee116db..0000000000000000000000000000000000000000 --- a/doc/conf.py.in +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding: utf-8 -*- -# -# documentation build configuration file, created by -# sphinx-quickstart on Thu Jul 23 19:40:08 2015. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. -import sys -import os, subprocess -import shlex -from recommonmark import parser, transform - -MarkdownParser = parser.CommonMarkParser -AutoStructify = transform.AutoStructify -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, '@PROJ_ROOT@/python') - -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] - -# -- General configuration ------------------------------------------------ - -# General information about the project. -project = u'PaddlePaddle' -author = u'%s developers' % project -copyright = u'2016, %s' % author -github_doc_root = '' - -# add markdown parser -MarkdownParser.github_doc_root = github_doc_root -source_parsers = { - '.md': MarkdownParser, - '.Rmd': MarkdownParser, -} -os.environ['PADDLE_BUILD_DOC'] = '1' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.mathjax', - 'sphinx.ext.napoleon', -] - - -autodoc_member_order = 'bysource' - - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = ['.rst', '.md', '.Rmd'] - -# The encoding of source files. -source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] - -# Output file base name for HTML help builder. -htmlhelp_basename = project + 'doc' - -# -- Options for LaTeX output --------------------------------------------- -latex_elements = { -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, '%s.tex' % project, project, - author, 'manual'), -] - -# Use the .. admonition:: directive for Notes sections. -# False to use the .. rubric:: directive instead. -napoleon_use_admonition_for_notes = True - -def setup(app): - # Add hook for building doxygen xml when needed - # no c++ API for now - app.add_config_value('recommonmark_config', { - 'url_resolver': lambda url: github_doc_root + url, - 'enable_eval_rst': True, - }, True) - app.add_transform(AutoStructify) diff --git a/doc/design/api.md b/doc/design/api.md new file mode 100644 index 0000000000000000000000000000000000000000..8185d2af0ea264a2e7b4e28b9ed05279e4a22014 --- /dev/null +++ b/doc/design/api.md @@ -0,0 +1,262 @@ +# PaddlePaddle Design Doc + +## Ingredients + +As our design principle is starting from the essence: how could we +allow users to express and solve their problems at neural networks. +Some essential concepts that our API have to provide include: + +1. A *topology* is an expression of *layers*. + +1. A layer could be any kind of computation, including *cost*. + +1. Some layers have parameters, some don't. Most costs don't have + parameters. + +1. In some topologies, layers share parameters. For + example, + [the network for training a ranking model](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850). + +1. At programming time, users specify topologies and possible sharing + of parameters. PaddlePaddle can figure out and create parameters + required (and possibly shared) by one or more topologies. + + +## Starting from Examples + +As a summarization +of +[our disucssion](https://github.com/PaddlePaddle/Paddle/issues/1315), +let us present two examples here: + + +### Example 1. Sharing Parameters between Layers + +We use +the +[3-branch ranking](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850) model +in this example. For your convenience, I copy-a-paste the model's +topology as follows: + +``` +A -> f -\ +Q -> f --> cost +B -> f -/ +``` + +The following program trains the topology including the cost, and then +use the sub-network in the trained topology in inference: + +```python +def f(in): + e = paddle.layer.embedding(in, parameter_name="embedding") + o = paddle.layer.softmax(e, parameter_name="semantic") + return o + +# Create 3 topologies (subnets), they share parameters because all +# correspoinding layers have the same parameter names. +fA = f(paddle.layer.data(input_name="A")) +fB = f(paddle.layer.data(input_name="B")) +fQ = f(paddle.layer.data(input_name="Q")) + +topology = paddle.layer.less_than( + paddle.layer.cross_entropy(fA, fQ), + paddle.layer.corss_entropy(fB, fQ)) + +# Derive parameters required in topology and create them in model. +parameters = paddle.parameters.create(topology) + +# Estimate parameters used in topology from data. +paddle.train(topology, parameters, reader=read_ranking_model_data) + +# Inference using fA (or fB or fC, as they share their parameters). +[testA, testB, testQ] = read_ranking_model_data() +print "The sematic-vector of testA: ", paddle.infer(fA, parameters, testA) +``` + + +### Example 2. Sharing Parameters between "Models" + +We use [GAN](https://github.com/PaddlePaddle/book/tree/develop/gan) in +this example. In the following example program, `d0` and `d1` +correspond to the two networks in the following figure: + + + +```python +def G(in): + # over-simplified example as G has only one layers: + return paddle.layer.fc(in, parameter_name="G") + +def D(in); + # again, over-simplified: + return paddle.layer.fc(in, parameter_name="D") + +# Construct the first topology, which contains both D and G. +# By learning this topology, we update parameters of G. +d0 = paddle.layer.should_be_false(D(G(paddle.layer.data()))) + +# Construct a second topology d1, which contains only D. By +# training this topology, we update parameters of D. Note +# that d1 share parameters with d0. +d1 = paddle.layer.should_be_true(D(paddle.layer.data())) + +# Create parameters from a list of multiple topologies (models) for +# the chance to share parameters between these topologies. +parameters = paddle.parameters.create([d0, d1]) + +# Iterative training of GAN. +for ...: + train(d0, parameters, reader=read_from_rng, immutable_parameters={"D"}) + train(d1, parameters, reader=read_from_realistic_images) + +# Use d1 for inference: +print "D thinks a batch of images are realistic ", infer(d1, parameters, read_mnist_images) +``` + + +### Summarization + + +Above two programs reveal some important design concerns: + +1. Users describe a topology as an expression of layers. Every layer + has a *parameter name*. If the users don't specify it explicitly, it's automatically generated as a unique name. By + specifying the parameter name, users can specify the sharing of + parameters between layers and even between topologies. + +1. `paddle.parameters.create` figures out parameters required by one + or more topologies from parameter names of layers. It creates these + parameters and returns a `ParameterSet` object, which is in essence + a map from *parameter names* to *parameters*. + +1. At training and inference time, `paddle.train` and `paddle.infer` + requires both a topology and the parameter set that holds the parameters of that topology. There are some reasons: + + 1. This prevents users from forgetting to call + `paddle.parameters.create`. + 1. `paddle.train` needs to know which parameter set to update. + 1. Users could load another (pre-trained) parameter set and use it + with a topology in `train.infer`. + +1. By specifying the `immutable_parameters` parameter of + `paddle.train`, we can forbid the update of these parameters. + + +## Reader + +Not all programming frameworks allow users to define I/O functions. +An example is Google MapReduce, which can only read from text, +SSTable, and RecordIO files. Hadoop MapReduce allows users to define +readers and writers by deriving from base classes `Reader` and +`Writer`. The former is less flexible but also less error-prone. We +decide to provide the flexibility to users to define their readers. + + +There are some open questions here: + +1. **Should a reader return a Python dictionary?** + +1. **How to map multiple outputs from a reader to multiple data layers?** + +1. **How to easily compose some existing readers to read more data and + feed a topology with more data layers?** + + +## Training + +The recommended way to training a model is to call `paddle.train`, +which simply calls `paddle.trainer.Default`, a global variable of +type `paddle.trainer.SGD`. Equivalently, we can do + +```python +opt = paddle.trainer.SGD(..., paddle.updater.Adam(...)) +opt.train(topology, parameters, reader=read, ...) +``` + +### Updater + +Please be aware that a trainer can accept an updater as its data +member, where an updater is a class derived from +`paddle.trainer.Updater`. This is to make it easier to customize +trainers, as discussed +[here](https://github.com/PaddlePaddle/Paddle/issues/1319). + +### Event Handler + +`paddle.train` and `paddle.trainer.XXX.train` take an optional +parameter `event_handler`, which should be either `None` or a function +that handle some events: + +1. BeginTraining +1. EndTraining +1. BeginIteration +1. EndIteration +1. BeginPass +1. EndPass + +where EndPass is sent if and only if the reader yields +`end_pass=True`. + +An example as follows: + +```python +def event_handler(event): + if ininstance(event, paddle.event.EndIteration): + print paddle.test(...) + +paddle.train(topology, parameters, reader, event_handler) +``` + +If we are writing a PaddlePaddle program in and for iPython/Jypyter, +we can use metaplotlib in the event handler to plot a curve of +cost/error versus iterations, as shown +[here](https://blog.dominodatalab.com/interactive-dashboards-in-jupyter/). + +### Distributed Training + +If users want to do distributed training on a cluster, s/he should +call `paddle.dist_train` and provides access tokens to the cluster as +a parameter. + +For example, if the user has a TLS certificate that allows him to +access a Kubernetes cluster, s/he should be able to call + +```python +paddle.dist_train(model, + trainer=paddle.trainer.SGD(..., + paddle.updater.Adam(...)), + reader=read, + k8s_user="yi", + k8s_token="kube_cluster_tls.pem", + k8s_job="hello", + num_parameter_servers=15) +``` + +The pseudo code if `paddle.dist_train` is as follows: + +```python +def dist_train(topology, parameters, trainer, reader, ...): + if os.getenv("KUBERNETES_SERVICE_HOST") == None: + image_name = k8s_user + '/' + k8s_job + docker_build(image_name) + docker_push() + kube_ctrl_start_job(image_name, k8s_user, k8s_token) + else: + rank = kube_list_containers_in_job_and_return_current_containers_rank() + if rank == 0: + master() + elif rank < 15: + parameter_server() + else: + trainer.train(model, reader=read) +``` + +Please be aware that if a process is running on the Kubernetes +cluster, it will have some environment variables pre-defined. + +If `dist_train` doesn't see these environment variables, it knows +that it's running on users' personal computer, and it should work as a +*launcher*. Otherwise, it knows that it's running on the cluster and +need to figure out its role as either the master, or a trainer, or a +parameter server. diff --git a/doc/design/build_system/README.md b/doc/design/build_system/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bf0e4dddc1b640ecbce489f65820aaf8a4b3b1e7 --- /dev/null +++ b/doc/design/build_system/README.md @@ -0,0 +1,152 @@ +A few months ago when we were trying to replace CMake with Bazel, @emailweixu suggested that we rewrite those handy Bazel functions using CMake. Now it seems that it's the right time to get this done, as we are facing problems from the porting of Majel and the development of new the parameter server using Go and C++. + +Here are some initial thoughts. Your comments are welcome! + +### Required CMake Function + +I think we need only the following few CMake functions to make a project description mean and clean: + +| C++ | CUDA C++ | Go | +|---|---|---| +| cc_library | nv_library | go_library | +| cc_binary | nv_binary | go_binary | +| cc_test | nv_test | go_test | + +- The `_library` functions generate .a files from source code. +- The `_binary` functions generate executable binary files. +- The `_test` functions generate executable unit test files. They work like `_binary` but links `-lgtest` and `-lgtest_main`. + +The difference between `nv_` functions and `cc_` functions is that the former use `nvcc` instead of the system-default C++ compiler. + +Both `nv_` and `cc_` functions enables C++11 (-std=c++11). + +Also, + +- to describe external dependencies, we need `external_library`. +- to build shared libraries, we need `shared_library`. + +### An Example Project + +Suppose that we have aforementioned functions defined in our `/cmake` directory. The following example `CMakeLists.txt` describes a project including the following source files: + +- tensor.h +- tensor.cc +- tensor_test.cc +- ops.h +- ops.cu +- ops_test.cu +- api.go +- api_test.go + +Suppose that ops.cu depends on CUDNN. + +```cmake +# cc_binary parses tensor.cc and figures out that target also depend +# on tensor.h. +cc_binary(tensor + SRCS + tensor.cc) + +# The dependency to target tensor implies that if any of +# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. +cc_test(tensor_test + SRCS + tensor_test.cc + DEPS + tensor) + +# I don't have a clear idea what parameters external_library need to +# have. @gangliao as a CMake expert would have better ideas. +external_library(cudnn + ....) + +# Suppose that ops.cu depends on external target CUDNN. Also, ops.cu +# include global functions that take Tensor as their parameters, so +# ops depend on tensor. This implies that if any of tensor.{h.cc}, +# ops.{h,cu} is changed, ops need to be re-built. +nv_library(ops + SRCS + ops.cu + DEPS + tensor + cudnn) # cudnn is defined later. + +nv_test(ops_test + SRCS + ops_test.cu + DEPS + ops) + +# Because api.go defines a GO wrapper to ops and tensor, it depends on +# both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or +# api.go is changed, api need to be re-built. +go_library(api + SRCS + api.go + DEPS + tensor # Because ops depend on tensor, this line is optional. + ops) + +go_test(api_test + SRCS + api_test.go + DEPS + api) + + +# This builds libapi.so. shared_library might use CMake target +# api_shared so to distinguish it from above target api. +shared_library(api + DEPS + api) + +``` + +### Implementation + +As above example CMakeLists.txt executes, each function invocation adds "nodes" to a dependency graph. It also use this graph to generate CMake commands including `add_executable`, `add_dependencies`, `target_link_libraries`, and `add_test`. + +### Using Package Manager For Go + +Building Go binaries and libraries need to satisfy their dependencies, generally +we can do `go get ./...` to download and compile all external dependencies. The +problems are: + +1. `go get` will always get the latest code from the default branch of the + remote repo, so changes of dependents might break the build. This is very + different with what we already have in `cmake/external` which download a + specific version or commit id of the dependency. +1. Some locations can not access external dependencies through the internet, as mentioned + in https://github.com/PaddlePaddle/Paddle/issues/2605. Using package management + tools can package the dependencies as a "vendor" package, which can be mirrored + at many cloud file hosting, so users what to compile paddle by themselves can + download this "vendor" package from a mirror site. + +#### Choose A Suitable Tool + +As mentioned by @wangkuiyi, [Here](https://github.com/golang/go/wiki/PackageManagementTools) +list dozens of Go package managers. We choose the tool using following principles: + +- Most "active" projects with more stars, more pull requests or commits +- Widely used project + +After comparing all these projects, we shall choose between the most popular +tools: Godep and Glide. + +Here's a brief comparison between Godep and Glide +: https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are +also many complaints about using `Godep`. There's also a new "official" pakcage +management tool has been started at: https://github.com/golang/dep to resolve +such problems, but it's currently at Alpha stage. So the best choice now is +glide obviously. + +#### Manage Go Packages + +- Dependencies: `go/glide.yaml` will store the dependencies and their versions which + is directly imported by paddle. `go/glide.lock` will store all dependencies recursively + with their commit id. Builds will "lock" to these packages if we don't `glide up` + them +- Vendor package: `go/vendor` directory will generated when running `cmake` command. `cmake` + will download the code corresponding to `go/glide.lock`. If we put a vendor folder + under `go/`, cmake will just check the commit id to the packages under the folder, + if commit id matches, there will be no download at all. diff --git a/doc/design/cluster_train/README.md b/doc/design/cluster_train/README.md new file mode 100644 index 0000000000000000000000000000000000000000..74961f80050c6b2723889b51416a2e8048174b00 --- /dev/null +++ b/doc/design/cluster_train/README.md @@ -0,0 +1,183 @@ +# Design Doc: Distributed Training + +## Objective + +In [this slides](https://www.slideshare.net/cxwangyi/paddlepaddle-a-complete-solution-for-businesses), we explained that we'd like PaddlePaddle running on general-purpose clusters like those managed by Kubernetes, so to address demands for AI from both Internet and non-Internet industries. + +This poses technical challenges to PaddlePaddle: + +1. Support fault-recovery. +1. Support both offline and online training. +1. [Serverless computing](https://en.wikipedia.org/wiki/Serverless_computing) of distributed training. + + +## Training Job + +A training job will be created once user asks Paddle cloud to train a model. The training job is made up of different processes that collaboratively consume data and produce a trained model. There are three kinds of processes: + +1. the *master server process*, which dispatches tasks to +1. one or more *trainer processes*, which run distributed training and synchronize gradients/models via +1. one or more *parameter server processes*, where each holds a shard of the global model, and receive the uploaded gradients from every *trainer process*, so they can run the optimize functions to update their parameters. + +Their relation is illustrated in the following graph: + + + +By coordinating these processes, PaddlePaddle supports use both Synchronize Stochastic Gradient Descent (sync SGD) and Asynchronous Stochastic Gradient Descent (async SGD) to train user-defined neural network topologies. + +When training with sync SGD, parameter servers wait for all trainers to finish gradients update and then send the updated parameters to trainers, training can not proceed until the trainer received the updated parameters. This creates a synchronization point between trainers. When training with async SGD, each trainer upload gradient and download new parameters individually, without the synchronization with other trainers. Using asyc SGD will be faster in terms of time per pass, but have more noise in gradient since trainers are likely to have a stale model. + +### Master Server Process + +The master server process will: + +- Partition a dataset into [tasks](#task) and dispatch tasks to trainers. +- Keep track of training progress on the dataset with [task queue](#task-queue). A training job will iterate on the dataset for a full pass until it goes into next pass. + + +#### Task + +A task is a data shard to be trained. The total number of tasks will be much bigger than the total number of trainers. The number of data instances inside a task will be much bigger than the mini-batch size. + +#### Task Queue + +The master server has three task queues to track training progress. As illustrated in the graph below, Job A and Job B both have one master server. Each master server process has three task queues. + + + +- The todo queue holds tasks to be dispatched. When a job starts, the master server fills in the todo queue with all tasks. +- The pending queue holds tasks that are currently training by trainers. +- the done queue holds tasks that are already trained. + +The life cycle of a single task is illustrated below: + + + +1. When a new pass of training starts, all tasks will be placed in the todo queue. +1. The master server will dispatch few tasks to each trainer at a time, puts them in the pending queue and waits for completion. +1. The trainer will work on its tasks and tell the master server once a task is completed. The master server will dispatch a new task to that trainer. +1. If a task timeout. the master server will move it back to the todo queue. The timeout count will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, so it will be discarded. +1. The master server will move completed task to the done queue. When the todo queue is empty, the master server will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero. + +### Trainer Process + +The trainer process will: + +- Receive tasks from the master. +- Work on the tasks: calculate and upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers. + +### Parameter Server Process + +Parameter server processes hold the parameters collaboratively. The parameters are partitioned on different parameter servers. + +The parameter server will: + +- Receive gradient from the trainers, update its parameters, and give the trainers the latest parameters. +- Periodically save its parameters to distributed file system by overriding the previous save. + +### Optimization Algorithms + +The communication pattern between the trainers and the parameter servers depends on the category of optimization algorithm: + +- Synchronous Stochastic Gradient Descent (sync-SGD) + + Parameter server will wait for all trainer finish n-th mini-batch calculation and send their gradients before broadcasting new parameters to every trainer. Every trainer will wait for the new parameters before starting n+1-th mini-batch. + +- Asynchronous Stochastic Gradient Descent (async-SGD) + + There will no synchronization between different trainers, and parameter server updates its parameter as soon as it receives new gradient: + + - Each trainer uploads its accumulated gradient every n mini-batches. + - Every m mini-batches, the trainer downloads new parameters from parameter server. + - n and m do not have to be equal. + +## Fault Tolerant + +The training job will pause if the master server processes is dead, or any of the parameter server process is dead. They will be started by [Kubernetes](https://kubernetes.io/) and recover in few minutes. Please refer to [fault recovery](#fault-recovery). + +The training job will continue to make progress if there is at least one training process running. The strategy depends on the type of optimization algorithm: + +- sync-SGD + + TODO + +- async-SGD + + Since async-SGD does not require synchronization between mini-batches, the system will by definition make process if at least one trainer is running. + +## Fault Recovery + +PaddlePaddle uses [etcd](https://github.com/coreos/etcd) to keep track of the states of processes. Because etcd is a distributed reliable key-value store, the restarted process can recover its states from etcd. The model parameters are periodically saved into distributed file system, so a restarted parameter server can recover its parameters from the saved file. + +Now we will introduce how each process recovers from a failure, the graph below shows how etcd is used: + + + +### Master Server Process + +When the master is started by the Kubernetes, it executes the following steps at startup: + +1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations. +1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them. +1. Watches the trainer prefix keys `/trainer/` on etcd to find the live trainers. +1. Starts dispatching the tasks to the trainers, and updates task queue using an etcd transaction to ensure lock is held during the update. + +When the master server process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes. + +### Trainer Process + +When the trainer is started by the Kubernetes, it executes the following steps at startup: + +1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count. +1. Generates a unique ID, and sets key `/trainer/` with its contact address as value. The key will be deleted when the lease expires, so the master will be aware of the trainer being online and offline. +1. Waits for tasks from the master to start training. + +If trainer's etcd lease expires, it will try set key `/trainer/` again so that the master server can discover the trainer again. + +When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from the TODO queue and go on training. + +### Parameter Server Process + +When the parameter server is started by Kubernetes, it executes the following steps at startup: + +1. Read desired total number of parameter servers from etcd `/ps_desired` +1. Search through etcd keys `/ps/` (`/ps/0`, `/ps/1`, ...) to find the first non-existant key whose index is smaller than the total number of parameter servers. Set the key using a transaction to avoid concurrent writes. The parameter server's index is inferred from the key name. + + The desired number of parameter servers is 3: + + + + The third parameter server joined: + + + +1. The parameter server can load parameters if there are already saved parameters in the save path (inferred from its index). +1. Now the parameter server is ready for the trainers' requests. + +If the parameter server's etcd lease expires, the parameter server will kill itself. + + +## Parameter Server Checkpointing +See [here](./checkpointing.md) + +## Store and dispatching trainning data +See [here](./data_dispatch.md) + + +## Dynamic Scaling + +### Trainer Scaling + +TODO + +### Parameter Server Scaling + +Not planned for v1. + +## Training Dataset Format + +TODO + +## User Interface + +TODO diff --git a/doc/design/cluster_train/checkpointing.md b/doc/design/cluster_train/checkpointing.md new file mode 100644 index 0000000000000000000000000000000000000000..c87ef2c7d2636208866d05456d5d44316d0bb200 --- /dev/null +++ b/doc/design/cluster_train/checkpointing.md @@ -0,0 +1,44 @@ +## 模型参数检查点(Checkpointing) +模型数据检查点的实现,可以有效的避免parameter server的单点或多点同时故障。模型参数检查点通过定期向磁盘上保存一份存储在parameter server内存中的模型数据的完整镜像,来保证训练过程可以从中间状态重新启动。在一个不可中断并缺少备份的训练任务中,可以通过阶段性的保存每个parameter server的数据快照(snapshot)到 ***分布式存储服务*** 达到容灾的目的,比如每隔10分钟最新的快照,并删除更早的快照。在出现单点故障时,只需要恢复这台节点,或者将这台节点迁移到另一个节点并启动即可恢复训练任务。 + + + +### 快照保存的设计如下: + +说明: + +* parameter server在集群中启动后,自动挂载分布式存储目录,并把快照保存到这个目录下。 +* ***注:每个parameter server的检查点各自独立保存,暂时不考虑多个parameter server同步的保存一个特定时间点的全局检查点,因为这样做也没法保证消除随机性。*** + +检查点保存程序流程: + +1. 如果满足条件"每隔10分钟"时,parameter server会获取parameters内存的`read_lock`,启动一个新的线程开始保存检查点。如果已经正在执行保存检查点的线程,则忽略。由于对parameters的更新需要获取parameters内存的`write_lock`,所以在写入快照的过程中,parameter server会暂停参数更新并等待。 +2. parameter server生成一个UUID,向指定的目录中一个新的文件(文件名为此UUID)写入快照数据。在快照写入完成后,计算这个文件的MD5 sum。然后在etcd的`/checkpoints/[pserver_id]`中写入json内容:`{"uuid": [UUID], "md5", "MD5 sum", "timestamp": xxxx}`。 +3. 删除磁盘目录中不是当前uuid的快照文件。 +4. 释放对paramters内存的锁定,停止保存检查点的线程。 + +这里需要用户额外注意,在您的实际环境中,训练任务的运行可能会占满trainer和parameter server之间的网络带宽,如果parameter server此时还需要通过网络访问分布式存储以保存快照,可能会造成网络拥塞,而出现阶段性的运行停滞。 + +### 从快照恢复 + +在parameter server第一次启动或任意时间parameter server故障后被Kubernetes重新启动,则需要回滚到上一个检查点: + + 1. 从etcd中读取节点:`/checkpoints/[pserver_id]`获取最新的检查点的文件uuid + 1. 从磁盘文件中加载uuid文件名的检查点快照文件,并加载其中的参数 + 1. 如果上面两步出现错误,则使用启动参数定义的初始化方法初始化参数 + 1. 开始提供服务 + +## TODO List +### 推测执行/加速执行(TODO) +在异构集群中,如果存在某些trainer执行速度过慢会影响整体集群的速度(如图中Trainer 1),此时master将负责启动一个新的Trainer(Accelerate Trainer 2),使用同样的训练数据block。哪个trainer先完成block的训练,则把另一个慢速的kill掉。 + +### 动态扩容/缩容 +目前只考虑动态扩容trainer数量,可以减小系统复杂性。 + +## 术语 +* model: 指深度学习训练之后得到的所有参数,使用这个神经网络可以完成对新数据的预测 +* parameters: 神经网络中的参数,包括权重w和偏置b。一个神经网络的模型由大量的参数组成 +* shard: 分片,通常指将一个整体拆分成多份的其中的一份。 +* model shard: 将一个神经网络参数拆分成多份,每个shard分别存储在其中一台parameter server之上 +* parameter block: 多个parameter block构成一个model shard +* 单点故障: 任意时刻只可能同时有一台服务器故障。由于集群中同时存在两台机器故障的概率极低((平均故障率*平均故障修复时间)^2)只对特殊在线系统考虑两台以上同时故障的容灾。 diff --git a/doc/design/cluster_train/data_dispatch.md b/doc/design/cluster_train/data_dispatch.md new file mode 100644 index 0000000000000000000000000000000000000000..1f5d22ff5e6abcb576d16cbe7391da1967a1ab8e --- /dev/null +++ b/doc/design/cluster_train/data_dispatch.md @@ -0,0 +1,160 @@ +## 训练数据的存储和分发 + +### 概念解释 + +### 流程介绍 +生产环境中的训练数据集通常体积很大,并被存储在诸如Hadoop HDFS,Ceph,AWS S3之类的分布式存储之上。这些分布式存储服务通常会把数据切割成多个分片分布式的存储在多个节点之上。这样就可以在云端执行多种数据类计算任务,包括: + +* 数据预处理任务 +* Paddle训练任务 +* 在线模型预测服务 +
+ +
+ +在上图中显示了在一个实际生产环境中的应用(人脸识别)的数据流图。生产环境的日志数据会通过实时流的方式(Kafka)和离线数据的方式(HDFS)存储,并在集群中运行多个分布式数据处理任务,比如流式数据处理(online data process),离线批处理(offline data process)完成数据的预处理,提供给paddle作为训练数据。用户也可以上传labeled data到分布式存储补充训练数据。在paddle之上运行的深度学习训练输出的模型会提供给在线人脸识别的应用使用。 + +### 训练数据存储 +我们选择[CephFS](http://docs.ceph.com/docs/master/cephfs/)作为存储系统。 + +- 无论是从[PFSClient](../file_manager/README.md)的角度,还是从[Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod/)中运行任务的角度,统一用`/pfs/$DATACENTER/home/$USER`来访问用户自己的数据。 +- `/pfs/$DATACENTER/common`下存放公共数据集合 + - 做只读挂载 + +
+ +
+ +### 文件预处理 + + +在开始训练之前, 数据集需要预先被转换成PaddlePaddle分布式训练使用的存储格[RecordIO](https://github.com/PaddlePaddle/Paddle/issues/1947)。我们提供两个转换方式: + +1. 用户在本地转换好再上传 +1. 用户上传数据后,在机群上运行转换程序 + +转换生成的文件名会是以下格式: + +```text +name_prefix-aaaaa-of-bbbbb +``` + +"aaaaa"和"bbbbb"都是五位的数字,每一个文件是数据集的一个shard,"aaaaa"代表shard的index,"bbbbb"代表这个shard的最大index。 + +比如ImageNet这个数据集可能被分成1000个shard,它们的文件名是: +```text +imagenet-00000-of-00999 +imagenet-00001-of-00999 +... +imagenet-00999-of-00999 +``` + +#### 转换库 + +无论是在本地或是云端转换,我们都提供Python的转换库,接口是: +```python +def convert(output_path, reader, num_shards, name_prefix) +``` + +- `output_path`: directory in which output files will be saved. +- `reader`: a [data reader](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#data-reader-interface), from which the convert program will read data instances. +- `num_shards`: the number of shards that the dataset will be partitioned into. +- `name_prefix`: the name prefix of generated files. + +`reader`每次输出一个data instance,这个instance可以是单个值,或者用tuple表示的多个值: + +```python +yield 1 # 单个值 +yield numpy.random.uniform(-1, 1, size=28*28) # 单个值 +yield numpy.random.uniform(-1, 1, size=28*28), 0 # 多个值 +``` + +每个值的类型可以是整形、浮点型数据、字符串,或者由它们组成的list,以及numpy.ndarray。如果是其它类型,会被Pickle序列化成字符串。 + +### 示例程序 + +#### 使用转换库 + +以下`reader_creator`生成的`reader`每次输出一个data instance,每个data instance包涵两个值:numpy.ndarray类型的值和整型的值: +```python +def reader_creator(): + def reader(): + for i in range(1000): + yield numpy.random.uniform(-1, 1, size=28*28), 0 # 多个值 + return reader +``` + +把`reader_creator`生成的`reader`传入`convert`函数即可完成转换: +```python +convert("./", reader_creator(), 100, random_images) +``` + +以上命令会在当前目录下生成100个文件: +```text +random_images-00000-of-00099 +random_images-00001-of-00099 +... +random_images-00099-of-00099 +``` + +#### 进行训练 + + +PaddlePaddle提供专用的[data reader creator](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#python-data-reader-design-doc),生成给定`RecordIO`文件对应的data reader。**无论在本地还是在云端,reader的使用方式都是一致的**: + +```python +# ... +reader = paddle.reader.creator.RecordIO("/pfs/datacenter_name/home/user_name/random_images-*-of-*") +batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128) +trainer.train(batch_reader, ...) +``` + +以上代码的reader输出的data instance与生成数据集时,reader输出的data instance是一模一样的。 + +### 上传训练文件 + +使用下面命令,可以把本地的数据上传到存储集群中。 + +```bash +paddle pfs cp filename /pfs/$DATACENTER/home/$USER/folder/ +``` + +比如,把之前示例中转换完毕的random_images数据集上传到云端的`/home/`可以用以下指令: + +```bash +paddle pfs cp random_images-*-of-* /pfs/$DATACENTER/home/$USER/folder/ +``` + +需要`$DATACENTER`的配置写到配置文件中,例如 + +``` +# config file +[datacenter_1] +username=user +usercert=user.pem +userkey=user-key.pem +endpoint=datacenter1.paddlepaddle.org + +[datacenter_2] +username=user +usercert=user.pem +userkey=user-key.pem +endpoint=datacenter2.paddlepaddle.org +``` +## TODO +### 文件访问的权限 +控制用户权限 + +- 用户可以把自己的数据分享给别人 + +### 文件访问方式 +不用mount的方式来访问数据,而是直接用API的接口远程访问 + +例如: + +``` +f = open('/pfs/datacenter_name/home/user_name/test1.dat') +``` + + +### 支持用户自定义的数据预处理job diff --git a/doc/design/cluster_train/master_server.md b/doc/design/cluster_train/master_server.md new file mode 100644 index 0000000000000000000000000000000000000000..4bf3c506f101361875043f8bfd97972b8c981a22 --- /dev/null +++ b/doc/design/cluster_train/master_server.md @@ -0,0 +1,91 @@ +# Design Doc: Master Server + +For an overview of master server's role, please refer to [distributed training design doc](./README.md). In this design doc we will discuss the master server in more details. The master will be implemented in [Go](https://golang.org/). + +## Dataset + + + +A dataset is a list of files in *RecordIO* format. A RecordIO file consists of chunks, whereas each chunk consists some records. + +## Task Queue + +As mentioned in [distributed training design doc](./README.md), a *task* is a data shard that the master server assigns to the trainer process to train on. A task consists of one or multiple *chunks* from one or multiple files. The master server maintains *task queues* to track the training progress. + +### Task Queue Creation + +1. Each trainer will make an RPC call (using Go's [rpc](https://golang.org/pkg/net/rpc/) package) to the master server, telling it the RecordIO files representing the dataset specified by the user. Since every trainer will tell the master server the same dataset, only the first RPC call will be honored. + + The RPC interface is: + ```go + func (m *RPCServer) ReportDataset(Paths []string, dummy *int) error { + } + ``` +1. The master server will scan through each RecordIO file to generate the *chunk index* and know how many chunks does each file have. A chunk can be referenced by the file path and the index of the chunk within the file. The chunk index is in memory data structure that enables fast access to each chunk, and the index of the chunk with the file is an integer start from 0, representing the n-th chunk within the file. + + The definition of the chunk is: + ```go + type Chunk struct { + Idx int // index of the chunk within the file + Path string + Index recordio.Index // chunk index + } + ``` +1. Chunks are grouped into tasks, and tasks are filled into the todo queue. The pending queue and the done queue are initialized with no element. + + The definition of the task is: + ```go + type Task struct { + Index int + Chunks []Chunk + } + ``` + + The elements in the tasks queues is of type `TaskEntry`, containing a timeout counter (described in [task retry logic](#task-retry-logic)), and a task: + ```go + type TaskEntry struct { + NumTimeout int + Task Task + } + ``` + + The definition of task queues is: + ```go + type TaskQueues struct { + Todo []TaskEntry + Pending map[int]TaskEntry // map from task index to task entry + Done []TaskEntry + } + ``` + +### Task Queue Persistence + +The task queues need to be persisted on [etcd](https://github.com/coreos/etcd) for fault recovery. Since the task queues only change once a task is completed or timed out, which is not very frequent, we can afford to synchronize with etcd every time the task queues change. + +We will serialize the task queues data structure with [gob encoding](https://golang.org/pkg/encoding/gob/), compress with gzip, and save into etcd synchronously under key `/task_queues`. + +### Task Dispatch + +The trainer will make an RPC call to master to get a new task when: + +- the trainer first started, or +- the trainer finishes a task. + +The RPC interface is: +```go +func (m *RPCServer) GetTask(finished *Task, result *Task) error { +} +``` +Argument `finished` will be `nil` when the trainer is just started. + +During the RPC call the master will do the following: + +- Make a copy of the task queues, and update the copy reflecting the finished tasks and the new pending tasks. +- Synchronize the copy of task queues with etcd using a transaction conditioned on holding the master lock. +- Replace the task queues with the copy and report to the trainer with the new tasks if succeeded, or discard the copy and report the error to the trainer if failed. + +### Task Retry Logic + +When a task is dispatched to the trainer, the master will schedule a function for execution after the timeout duration (based on the moving average of task completion time). If the task entry in still in the pending queue, its timeout counter will increase by one, and the task will be moved to todo queue. If the timeout counter is above the threshold, the master will log the error and discard the task. + +Please note that since a timed out task could be completed after it has been dispatched for retry, so it is possible for a task to be processed multiple times. We do not try to prevent it from happening since it's fine to train on the same task multiple times due to the stochastic nature of the stochastic gradient decent algorithm. diff --git a/doc/design/cluster_train/pserver_client.md b/doc/design/cluster_train/pserver_client.md new file mode 100644 index 0000000000000000000000000000000000000000..474b8c572cd92fc87e9f7f3f2b19d12cccd158de --- /dev/null +++ b/doc/design/cluster_train/pserver_client.md @@ -0,0 +1,171 @@ +# Design Doc: The Client Library of Parameter Server + +For an overview of trainer's role, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter server's client library, which will manage communication with parameter servers. The library will be implemented in [Go](https://golang.org/) and made available as a static or dynamic library with a C header file. + +## Parameter Partition + +Each parameter will be partitioned into parameter blocks to make the parameters evenly distributed on parameter servers. The partition is done automatically by the client library. The *sparse parameter* require a little different treatment: + +### Sparse Parameter + +The sparse parameter is a parameter that is updated sparsely. The name is somewhat misleading, it does not have a sparse representation, it has the same representation as a dense vector. + +Because a sparse parameter is updated sparsely, the trainer will have to partition the sparse parameter. Because the parameter server will merge all sparse parameter shard into the same file when saving the parameter. It needs special naming convention: + +If a sparse parameter is partitioned into n shards, they should be named as: + +```text +name:sparse-0 +name:sparse-1 +... +name:sparse-n-1 +``` + +The library is unaware of the partition, and treat each parameter independently. Only when saving parameters, the parameter servers will merge the sparse parameters according to the naming convention. + +## Model Optimization Using Gradients + +There are two ways to perform model optimization using gradients: + +- On Client + + The client does multiple steps of forward and backward update. In each step, the gradients are calculated and a new model is generated. After some steps, the client will calculate the difference between the newest model and the old model at step 0. The difference will be updated to parameter servers. Parameter servers will just update parameters using the difference without any optimization using gradients (such as Adam and L1 regularization). + +- On Parameter Server + + The client will send accumulated gradients to parameter servers, the parameter server will do the optimization using gradients. + +## L1 and L2 Regularization + +PaddlePaddle allows L1 or L2 regularizations to be specified per parameter, so when the trainer initializes the parameter it needs include a parameter configuration when L1 or L2 regularization is necessary. + +## Parameter Initialization + +The parameters on parameter servers need to be initialized. To provide maximum flexibility, the trainer will initialize the parameters. Only one trainer will do the initialization, the other trainers will wait for the completion of initialization and get the parameters from the parameter servers. + +### Trainer Selection + +To select the trainer for initialization, every trainer will try to get a distributed lock, whoever owns the lock will do the initialization. As illustrated below: + + + +### Trainer Selection Process + +The trainer select process is encapsulated in the C API function: +```c +int paddle_begin_init_params(paddle_pserver_client* client, const char* config_proto); +``` +The selected trainer's call to `paddle_begin_init_params` will return with 1, and the other trainers' call to `paddle_begin_init_params` will return 0. `paddle_get_params` will be blocked until initialization is completed. As illustrated below: + + + +## C Interface + +```c +typedef enum { + PADDLE_ELEMENT_TYPE_INT32 = 0, + PADDLE_ELEMENT_TYPE_UINT32 = 1, + PADDLE_ELEMENT_TYPE_INT64 = 2, + PADDLE_ELEMENT_TYPE_UINT64 = 3, + PADDLE_ELEMENT_TYPE_FLOAT32 = 4, + PADDLE_ELEMENT_TYPE_FLOAT64 = 5, +} paddle_element_type; + +typedef struct { + char* name; + paddle_element_type element_type; + unsigned char* content; + int content_len; +} paddle_parameter, paddle_gradient; + +typedef int paddle_pserver_client; + +/** + * @brief creates a pserver client that talks to etcd for coordination. + */ +paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr); + +/** + * @brief creates a pserver client given pserver addresses. + * + * @param pserver_addrs comma-separated pserver addresses. + * @param selected if current pserver client is selected to initialize all parameter servers. + */ +paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected); +void paddle_pserver_client_release(paddle_pserver_client c); + +/** + * @brief paddle_begin_init_params begins to initialize parameters on + * parameter servers. + * + * paddle_begin_init_params will be called from multiple trainers, + * only one trainer will be selected to initialize the parameters on + * parameter servers. Other trainers need to get the initialized + * parameters from parameter servers using @paddle_get_params. + * + * @return 1 if the trainer is selected to initialize parameter + * servers, otherwise 0. + */ +int paddle_begin_init_params(paddle_pserver_client client); + +/** + * @brief paddle_init_param initializes the parameter on parameter + * servers. + * + * @param param the parameter to initialize. + * @param param_config_proto the configuration for the parameter. + * @param config_len the length of param_config_proto + * @return 0 if successful, otherwise -1. On failure, the trainer + * needs to restart the entire initialization process (starting from + * @paddle_begin_init_param). Or simply exit the program and wait for + * the cluster management system to restart the trainer. + */ +int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); + +/** + * @brief paddle_finish_init_params tells parameter servers client has + * sent all parameters to parameter servers as initialization. + * + * @return 0 if successful, otherwise -1. On failure, the trainer + * needs to restart the entire initialization process (starting from + * @paddle_begin_init_param). Or simply exit the program and wait for + * the cluster management system to restart the trainer. + */ +int paddle_finish_init_params(paddle_pserver_client client); + +/** + * @brief paddle_send_grads sends gradients to parameter servers for + * updating parameters. + * + * @param grads the array of gradients to send. + * @param len the length of the gradient array. + * @param learning_rate the learning rate for the gradients. + * @return 0 if successful, otherwise -1. + */ +int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len); + +/** + * @brief paddle_get_params gets parameters from parameter servers. + * + * paddle_get_params will block until parameters are initialized on + * the parameter servers. + * + * @param dst the destination array of parameter pointers to save to. + * The parameter pointer must be pre-popullated with required parameter name, + * and the content of parameter must be pre-allocated of the size of required + * parameter on pserver. + * @param len the length of the names array and the paddle_parameter + * array. + * @return 0 if successful, otherwise -1. + */ +int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len); + +/** + * @brief paddle_save_model indicates parameters to save the parameter + * to the given path + * + * @param path the path to save parameters. + * @return 0 if successful, otherwise -1. + */ +int paddle_save_model(paddle_pserver_client client, const char* path); +``` diff --git a/doc/design/cluster_train/remote_parameter_updater.md b/doc/design/cluster_train/remote_parameter_updater.md new file mode 100644 index 0000000000000000000000000000000000000000..6e8e5938455b869e0f3367794c41250340b37f77 --- /dev/null +++ b/doc/design/cluster_train/remote_parameter_updater.md @@ -0,0 +1,21 @@ +# Design Doc: Remote Parameter Updater for Cluster Train + +For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. + +## Parameter Updater + +Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. + +### Remote Parameter Updater + +Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) + +In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. + +#### Sparse Remote Parameter Updater + +Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. + +### Interface Design + +TBD diff --git a/doc/design/cluster_train/save_model.md b/doc/design/cluster_train/save_model.md new file mode 100644 index 0000000000000000000000000000000000000000..b755185c81ad617b9c85c47de0f5f65d2201c658 --- /dev/null +++ b/doc/design/cluster_train/save_model.md @@ -0,0 +1,111 @@ +# Design Doc: Save Model + +## Overview + +The model is the output of the training process. There are two +ways from which user can obtain a model: + +- Save model triggered by user code: user code asks PaddlePaddle to + save a model. +- Convert model from the checkpoint: model being converted from + pservers' periodic checkpoint. In this way, the user can cancel a + job at any time, and still have a relatively fresh model (we + checkpoint around every 5 minutes). + +### Trainer Saving Model vs. Pservers Saving Model + +Both trainers and pservers have access to the model. So the model can +be saved from a trainer or pservers. We need to decide where the model +is saved from. + +#### Dense Update vs. Sparse Update + +There are two types of model update methods: dense update and sparse +update (when the model parameter is configured to be sparse). + +- Dense update + + Every trainer has it's own full copy of the model. Every model + update will update the entire model. + +- Sparse update + + The training input is sparse, and the trainer does not have the + entire model. It will only download the sub-model necessary related + to the input. When updating the model, only the sub-model related to + the training input is updated. + + +#### Pservers Saving Model + +The benefit of letting pservers save model is they have the entire +model all the time. However, since pservers are on different nodes, it +requires a merging process to merge model shards into the same +model. Thus requires the pservers to write models to a distributed +filesystem, making the checkpoint shards visible to the merge program. + +#### Trainer Saving Model + +The benefit of letting one trainer to save the model is it does not +require a distributed filesystem. And it's reusing the same save model +logic when training locally - except when doing sparse update, the +trainer needs to download the entire model during the saving process. + +#### Conclusion + +Given trainer saving model does not require a distributed filesystem, +and is an intuitive extension to trainer saving model when training +locally, we decide to let the trainer save the model when doing +distributed training. + + +### Convert Model from Checkpoint + +TODO + + +## Timeline + +We first implement trainer save the model. Converting the latest +snapshot to a model will be a TODO for future. + + +## Trainer Save Model + +### Trainer Election + +One trainer will be elected as the one to save the model. When using +etcd, trainer ID is a randomly generated UUID, the trainer will +contact the master server requesting to save the model, and find out +if itself is elected. When the master server is not used, unique +trainer IDs will be given by the administrator, the trainer whose ID +is "0" is elected to save the model. + +### Model Save Path + +Each trainer will be given the directory to save the model. The +elected trainer will save the model to +`given-directory/trainerID`. Since the trainer ID is unique, this +would prevent concurrent save to the same file when multiple trainers +are elected to save the model when split-brain problem happens. + +### What Happens When Model Is Saving + +It takes some time to save model, we need to define what will happen +when save model is taking place. + +When doing dense update, the trainer uses the local model. Pservers +does not need to pause model update. + +When doing sparse update. The trainer needs to download the entire +model while saving. To get the most accurate model, the model update +needs to be paused before the download starts and resumed after the +download finishes. Otherwise, the trainer gets a model that is +"polluted": some part of the model is old, some part of the model is +new. + +It's unclear that the "polluted" model will be inferior due to the +stochastic nature of deep learning, and pausing the model update will +add more complexity to the system. Since supporting sparse update is a +TODO item. We defer the evaluation of pause the model update or not +during saving model to the future. diff --git a/doc/design/cluster_train/src/checkpointing.png b/doc/design/cluster_train/src/checkpointing.png new file mode 100644 index 0000000000000000000000000000000000000000..c221e8474f90f37e31416cbb19c9452207a0d14c Binary files /dev/null and b/doc/design/cluster_train/src/checkpointing.png differ diff --git a/doc/design/cluster_train/src/data_dispatch.png b/doc/design/cluster_train/src/data_dispatch.png new file mode 100644 index 0000000000000000000000000000000000000000..5bdcc24d6a6d193cb014f8c38b362451fded5e54 Binary files /dev/null and b/doc/design/cluster_train/src/data_dispatch.png differ diff --git a/doc/design/cluster_train/src/dataset.graffle b/doc/design/cluster_train/src/dataset.graffle new file mode 100644 index 0000000000000000000000000000000000000000..c10a423ed16a23229a9ee33d11bfc82bb59646c8 Binary files /dev/null and b/doc/design/cluster_train/src/dataset.graffle differ diff --git a/doc/design/cluster_train/src/dataset.png b/doc/design/cluster_train/src/dataset.png new file mode 100644 index 0000000000000000000000000000000000000000..2fb7f1cce3b6dd21489392557826e95a9f207c34 Binary files /dev/null and b/doc/design/cluster_train/src/dataset.png differ diff --git a/doc/design/cluster_train/src/file_storage.graffle b/doc/design/cluster_train/src/file_storage.graffle new file mode 100644 index 0000000000000000000000000000000000000000..50a17e70fa255495337c529a3bf12a5c0024a5be Binary files /dev/null and b/doc/design/cluster_train/src/file_storage.graffle differ diff --git a/doc/design/cluster_train/src/file_storage.png b/doc/design/cluster_train/src/file_storage.png new file mode 100644 index 0000000000000000000000000000000000000000..fccb4e3e7e738224c7f1584326bd5f351ce799aa Binary files /dev/null and b/doc/design/cluster_train/src/file_storage.png differ diff --git a/doc/design/cluster_train/src/init_lock.graffle b/doc/design/cluster_train/src/init_lock.graffle new file mode 100644 index 0000000000000000000000000000000000000000..fa9149f21b1311eed48ef72ec55e556559d0fc94 Binary files /dev/null and b/doc/design/cluster_train/src/init_lock.graffle differ diff --git a/doc/design/cluster_train/src/init_lock.png b/doc/design/cluster_train/src/init_lock.png new file mode 100644 index 0000000000000000000000000000000000000000..92404ee6d6c0f9a7727952bae3c869ba338ecd7f Binary files /dev/null and b/doc/design/cluster_train/src/init_lock.png differ diff --git a/doc/design/cluster_train/src/paddle-cloud-in-data-center.png b/doc/design/cluster_train/src/paddle-cloud-in-data-center.png new file mode 100644 index 0000000000000000000000000000000000000000..da5d1a77562480ad1d886f5f21dbd84001d3d508 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-cloud-in-data-center.png differ diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle new file mode 100644 index 0000000000000000000000000000000000000000..56681ae5bbe11849116d621b066a6317e003e4ca Binary files /dev/null and b/doc/design/cluster_train/src/paddle-etcd.graffle differ diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png new file mode 100644 index 0000000000000000000000000000000000000000..4f9c9762b3a8c089dd5e9b2c07cb9dfc78296a21 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-etcd.png differ diff --git a/doc/design/cluster_train/src/paddle-model-sharding.graffle b/doc/design/cluster_train/src/paddle-model-sharding.graffle new file mode 100644 index 0000000000000000000000000000000000000000..fba30f0ca2b47f0d202a432821d95e55aac37ec8 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-model-sharding.graffle differ diff --git a/doc/design/cluster_train/src/paddle-model-sharding.png b/doc/design/cluster_train/src/paddle-model-sharding.png new file mode 100644 index 0000000000000000000000000000000000000000..8c3f6724ef46c6527e63a4cd8cb0b50fe0167124 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-model-sharding.png differ diff --git a/doc/design/cluster_train/src/paddle-ps-0.png b/doc/design/cluster_train/src/paddle-ps-0.png new file mode 100644 index 0000000000000000000000000000000000000000..47ef32806f182cab003da77f1556823b3f6d1721 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-ps-0.png differ diff --git a/doc/design/cluster_train/src/paddle-ps-1.png b/doc/design/cluster_train/src/paddle-ps-1.png new file mode 100644 index 0000000000000000000000000000000000000000..f3125db73096c52bac6e7c60e1675552857c0774 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-ps-1.png differ diff --git a/doc/design/cluster_train/src/paddle-ps.graffle b/doc/design/cluster_train/src/paddle-ps.graffle new file mode 100644 index 0000000000000000000000000000000000000000..0e536ffdd91cd696008b4c01bad3cb53edebdc16 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-ps.graffle differ diff --git a/doc/design/cluster_train/src/paddle-task-queues.graffle b/doc/design/cluster_train/src/paddle-task-queues.graffle new file mode 100644 index 0000000000000000000000000000000000000000..4263ed8bfd2ef0e55058828bf23f2fac3595e5fd Binary files /dev/null and b/doc/design/cluster_train/src/paddle-task-queues.graffle differ diff --git a/doc/design/cluster_train/src/paddle-task-queues.png b/doc/design/cluster_train/src/paddle-task-queues.png new file mode 100644 index 0000000000000000000000000000000000000000..5f980266795776752cebd0c346b85c4a75a47780 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-task-queues.png differ diff --git a/doc/design/cluster_train/src/paddle-task-states.graffle b/doc/design/cluster_train/src/paddle-task-states.graffle new file mode 100644 index 0000000000000000000000000000000000000000..cf1a0b9246d9386a949d2dbb8c32fe84f72eea83 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-task-states.graffle differ diff --git a/doc/design/cluster_train/src/paddle-task-states.png b/doc/design/cluster_train/src/paddle-task-states.png new file mode 100644 index 0000000000000000000000000000000000000000..4ae43cb66c071aee9eb90d875e2373b29af9c3e0 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-task-states.png differ diff --git a/doc/design/cluster_train/src/pserver_init.graffle b/doc/design/cluster_train/src/pserver_init.graffle new file mode 100644 index 0000000000000000000000000000000000000000..5f3f1f52be8aa7f9049a8fcd6b7c93c8560c1676 Binary files /dev/null and b/doc/design/cluster_train/src/pserver_init.graffle differ diff --git a/doc/design/cluster_train/src/pserver_init.png b/doc/design/cluster_train/src/pserver_init.png new file mode 100644 index 0000000000000000000000000000000000000000..dfe491ff98dd7db1c336093c80964a260df2cd90 Binary files /dev/null and b/doc/design/cluster_train/src/pserver_init.png differ diff --git a/doc/design/cluster_train/src/submit-job.graffle b/doc/design/cluster_train/src/submit-job.graffle new file mode 100644 index 0000000000000000000000000000000000000000..677cdfb6d9a32168bf71729eb841fa1ca0dd31d6 Binary files /dev/null and b/doc/design/cluster_train/src/submit-job.graffle differ diff --git a/doc/design/cluster_train/src/submit-job.png b/doc/design/cluster_train/src/submit-job.png new file mode 100644 index 0000000000000000000000000000000000000000..3046a460a7ba708079e88a560debaa215a694680 Binary files /dev/null and b/doc/design/cluster_train/src/submit-job.png differ diff --git a/doc/design/cluster_train/src/trainer.graffle b/doc/design/cluster_train/src/trainer.graffle new file mode 100644 index 0000000000000000000000000000000000000000..42384a3f059966e22e22f5fa4295cc9ead5cef83 Binary files /dev/null and b/doc/design/cluster_train/src/trainer.graffle differ diff --git a/doc/design/cluster_train/src/trainer.png b/doc/design/cluster_train/src/trainer.png new file mode 100644 index 0000000000000000000000000000000000000000..6537d3d56589ca9f19a77a50a970e4b5275e6ce0 Binary files /dev/null and b/doc/design/cluster_train/src/trainer.png differ diff --git a/doc/design/cluster_train/submit-job.md b/doc/design/cluster_train/submit-job.md new file mode 100644 index 0000000000000000000000000000000000000000..8377d5489dc64bd2fdc5bb4f7bc737e7b489000d --- /dev/null +++ b/doc/design/cluster_train/submit-job.md @@ -0,0 +1,127 @@ +# Submit a Distributed Training Job + +The user can submit a distributed training job with Python code, rather than with a command-line interface. + +## Runtime Environment On Kubernetes + +For a distributed training job, there is two Docker image called *runtime Docker image* and *base Docker image*. The runtime Docker image is the Docker image that gets scheduled by Kubernetes to run during training. The base Docker image is for building the runtime Docker image. + +### Base Docker Image + +Usually, the base Docker image is PaddlePaddle product Docker image including paddle binary files and python package. And of course, users can specify any image name hosted on any docker registry which users have the access right. + +### Runtime Docker Image + +The trainer package which user upload and some Python dependencies are packaged into a runtime Docker image based on base Docker image. + +- Handle Python Dependencies + + You need to provide requirements.txt file in your `trainer-package` folder. Example: + + ```txt + pillow + protobuf==3.1.0 + ``` + More [details](https://pip.readthedocs.io/en/1.1/requirements.html) about requirements, an example project looks like: + ```bash + paddle_example + |-quick_start + |-trainer.py + |-dataset.py + |-requirements.txt + ``` + +## Submit Distributed Training Job With Python Code + + +- `paddle.job.dist_train()` will call the Job Server API `/v1/packages` to upload the trainer package and save them on CephFS, and then call `/v1/trainer/job` to submit the PaddlePaddle distributed job. +- `/v1/trainer/job` will start a building job for preparing the runtime Docker image. When the building job is finished, Job Server will submit the PaddlePaddle distributed job to Kubernetes. +- *NOTE*: For the first version, we will not prepare the runtime Docker image, instead, the package is uploaded to Paddle Cloud, and Paddle Cloud will mount the package in a temporary folder into the base Docker image. We will not support custom Python dependencies in the first version as well. + +You can call `paddle.job.dist_train` and provide distributed training configuration as the parameters: +```python +paddle.job.dist_train( + trainer=dist_trainer(), + paddle_job=PaddleJob( + job_name = "paddle-cloud", + entry_point = "python %s"%__file__, + trainer_package = "/example/word2vec", + image = "yancey1989/paddle-job", + trainers = 10, + pservers = 3, + trainer_cpu = 1, + trainer_gpu = 1, + trainer_mem = "10G", + pserver_cpu = 1, + pserver_mem = "2G" + )) +``` + +The parameter `trainer` of `paddle.job.dist_train` is a function and you can implement it as follows: +```python +def dist_trainer(): + def trainer_creator(): + trainer = paddle.v2.trainer.SGD(...) + trainer.train(...) + return trainer_creator +``` + +The pseudo code of `paddle.job.dist_train` is as follows: +```python +def dist_train(trainer, paddle_job): + # if the code is running on cloud, set PADDLE_ON_CLOUD=YES + if os.getenv("RUNNING_ON_CLOUD", "NO") == "NO": + #submit the paddle job + paddle_job.submit() + else: + #start the training + trainer() +``` +### PaddleJob Parameters +parameter | type | explanation + --- | --- | --- +job_name | str | the unique name for the training job +entry_point | str | entry point for startup trainer process +trainer_package | str | trainer package file path which user have the access right +image|str|the [base image](#base-docker-image) for building the [runtime image](#runtime-docker-image) +pservers|int| Parameter Server process count +trainers|int| Trainer process count +pserver_cpu|int| CPU count for each Parameter Server process +pserver_mem|str| memory allocated for each Parameter Server process, a plain integer using one of these suffixes: E, P, T, G, M, K +trainer_cpu|int| CPU count for each Trainer process +trainer_mem|str| memory allocated for each Trainer process, a plain integer using one of these suffixes: E, P, T, G, M, K +trainer_gpu|int| GPU count for each Trainer process, if you only want CPU, do not set this parameter + +### Deploy Parameter Server, Trainer and Master Process + - Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet. + - Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job. + - Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet. + +## Job Server + +- RESTful API + + Job server provides RESTful HTTP API for receiving the trainer package and displaying + PaddlePaddle job related informations. + - `POST /v1/package` receive the trainer package and save them on CephFS + - `POST /v1/trainer/job` submit a trainer job + - `GET /v1/jobs/` list all jobs + - `GET /v1/jobs/` the status of a job + - `DELETE /v1/jobs/` delete a job + - `GET /v1/version` job server version + +- Build Runtime Docker Image on Kubernetes + + `paddle.job.dist_train` will upload the trainer package to Job Server, save them on the distributed filesystem, and then start up a job for building the runtime Docker image that gets scheduled by Kubernetes to run during training. + + There are some benefits for building runtime Docker image on JobServer: + - On Paddle Cloud, users will run the trainer code in a Jupyter Notebook which is a Kubernetes Pod, if we want to execute `docker build` in the Pod, we should mount the host's `docker.sock` to the Pod, user's code will connect the host's Docker Engine directly, it's not safe. + - Users only need to upload the training package files, does not need to install docker engine, docker registry as dependencies. + - If we want to change another image type, such as RKT, users do not need to care about it. + +- Deploy Parameter Server, Trainer and Master Processes + + `POST /v1/trainer/job` receives the distributed training parameters, and deploy the job as follows: + - Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet. + - Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job. + - Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet. diff --git a/doc/design/file_manager/README.md b/doc/design/file_manager/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3df10d801e568834729f902aace483d033340e2d --- /dev/null +++ b/doc/design/file_manager/README.md @@ -0,0 +1,87 @@ +# FileManager设计文档 +## 目标 +在本文档中,我们设计说明了名为FileManager系统,方便用户上传自己的训练数据以进行分布式训练 + +主要功能包括: + +- 提供常用的命令行管理命令管理文件和目录 +- 支持大文件的断点上传、下载 + +## 名词解释 +- PFS:是`Paddlepaddle cloud File System`的缩写,是对用户文件存储空间的抽象,与之相对的是local filesystem。目前我们用CephFS来搭建。 +- [CephFS](http://docs.ceph.com/docs/master/cephfs/):一个POSIX兼容的文件系统。 +- Chunk:逻辑划上文件分块的单位。 + +## 模块 +### 架构图 + + +### PFSClient +- 功能: 详细设计[link](./pfs/pfsclient.md) + - 提供用户管理文件的命令 + - 需要可以跨平台执行 + +- 双向验证 + PFSClient需要和Ingress之间做双向验证[tls](#tls),所以用户需要首先在`cloud.paddlepaddle.org`上注册一下,申请用户空间,并且把系统生成的CA(certificate authority)、Key、CRT(CA signed certificate)下载到本地,然后才能使用PFSClient。 + +### [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) +- 功能: + 提供七层协议的反向代理、基于粘性会话的负载均衡功能。 + +- 透传用户身份的办法 + Ingress需要把PFSClient的身份信息传给PFSServer,配置的方法参考[link](http://www.integralist.co.uk/posts/clientcertauth.html#3) + +### PFSServer +PFSServer提供RESTful API接口,接收处理PFSClient端的文件管理请求,并且把结果返回PFSClient端。 + +RESTful API + +- /api/v1/files + - `GET /api/v1/files`: Get metadata of files or directories. + - `POST /api/v1/files`: Create files or directories. + - `PATCH /api/v1/files`: Update files or directories. + - `DELETE /api/v1/files`: Delete files or directories. + +- /api/v1/file/chunks + - `GET /api/v1/storage/file/chunks`: Get chunks's metadata of a file. + +- /api/v1/storage/files + - `GET /api/v1/storage/files`: Download files or directories. + - `POST /api/v1/storage/files`: Upload files or directories. + +- /api/v1/storage/file/chunks + - `GET /api/v1/storage/file/chunks`: Download chunks's data. + - `POST /api/v1/storage/file/chunks`: Upload chunks's data. + +## 文件传输优化 + +### 分块文件传输 +用户文件可能是比较大的,上传到Cloud或者下载到本地的时间可能比较长,而且在传输的过程中也可能出现网络不稳定的情况。为了应对以上的问题,我们提出了Chunk的概念,一个Chunk由所在的文件偏移、数据、数据长度及校验值组成。文件的上传和下载都是通过对Chunk的操作来实现的。由于Chunk比较小(默认256K),完成一个传输动作完成的时间也比较短,不容易出错。PFSClient需要在传输完毕最后一个Chunk的时候检查destination文件的MD5值是否和source文件一致。 + +一个典型的Chunk如下所示: + +``` +type Chunk struct { + fileOffset int64 + checksum uint32 + len uint32 + data []byte +} +``` + +### 生成sparse文件 +当destination文件不存在或者大小和source文件不一致时,可以用[Fallocate](https://Go.org/pkg/syscall/#Fallocate)生成sparse文件,然后就可以并发写入多个Chunk。 + +### 覆盖不一致的部分 +文件传输的的关键在于需要PFSClient端对比source和destination的文件Chunks的checksum是否保持一致,不一致的由PFSClient下载或者传输Chunk完成。这样已经传输成功的部分就不用重新传输了。 + +## 用户使用流程 +参考[link](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md) + +## 框架生成 +用[swagger](https://github.com/swagger-api/swagger-codegen)生成PFSClient和PFSServer的框架部分,以便我们可以把更多的精力放到逻辑本身上。 + +## 参考文档 +- [TLS complete guide](https://github.com/k8sp/tls/blob/master/tls.md) +- [aws.s3](http://docs.aws.amazon.com/cli/latest/reference/s3/) +- [linux man document](https://linux.die.net/man/) diff --git a/doc/design/file_manager/pfs/pfsclient.md b/doc/design/file_manager/pfs/pfsclient.md new file mode 100644 index 0000000000000000000000000000000000000000..56bc70c54bbc92b78d66e04fb495b1300cf8ebe0 --- /dev/null +++ b/doc/design/file_manager/pfs/pfsclient.md @@ -0,0 +1,129 @@ +# PFSClient + +## Description +The `pfs` command is a Command Line Interface to manage your files on PaddlePaddle Cloud + +## Synopsis +``` +paddle [options] pfs [parameters] +``` + +## Options +``` +--profile (string) + Use a specific profile from your credential file. + +--help (string) + Display more information about command + +--version + Output version information and exit + +--debug + Show detailed debugging log + +--only-show-errors (boolean) + Only errors and warnings are displayed. All other output is suppressed. +``` + +## Path Arguments +When using a command, we need to specify path arguments. There are two path argument type: `localpath` and `pfspath`. + +A `pfspath` begin with `/pfs`, eg: `/pfs/$DATACENTER/home/$USER/folder`. + +[Here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md#上传训练文件) is how to config datacenters. + +## order of Path Arguments +Commonly, if there are two path arguments, the first is the source, and the second is the destination. + +## Subcommonds +- rm - remove files or directories + +``` +Synopsis: + rm [-r] [-v] ... + +Options: + -r + Remove directories and their contents recursively + -v + Cause rm to be verbose, showing files after they are removed. + +Examples: + paddle pfs rm /pfs/$DATACENTER/home/$USER/file + paddle pfs rm -r /pfs/$DATACENTER/home/$USER/folder +``` +- mv - move (rename) files + +``` +Synopsis: + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + +Options: + -f + Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.) + -n + Do not overwrite an existing file. (The -n option overrides previous -f options.) + -v + Cause mv to be verbose, showing files after they are moved. + +Examples: + paddle pfs mv ./text1.txt /pfs/$DATACENTER/home/$USER/text1.txt +``` +- cp - copy files or directories + +``` +Synopsis: + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + +Options: + -r + Copy directories recursively + -f + Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.) + -n + Do not overwrite an existing file. (The -n option overrides previous -f options.) + -v + Cause cp to be verbose, showing files after they are copied. + --preserve--links + Reserve links when copy links + +Examples: + paddle pfs cp ./file /pfs/$DATACENTER/home/$USER/file + paddle pfs cp /pfs/$DATACENTER/home/$USER/file ./file +``` +- ls- list files + +``` +Synopsis: + ls [-r] ... + +Options: + -R + List directory(ies) recursively + +Examples: + paddle pfs ls /pfs/$DATACENTER/home/$USER/file + paddle pfs ls /pfs/$DATACENTER/home/$USER/folder +``` + +- mkdir - mkdir directory(ies) +Create intermediate directory(ies) as required. + +``` +Synopsis: + mkdir ... + +Examples: + paddle pfs mkdir /pfs/$DATACENTER/home/$USER/folder +``` diff --git a/doc/design/file_manager/src/filemanager.graffle b/doc/design/file_manager/src/filemanager.graffle new file mode 100644 index 0000000000000000000000000000000000000000..7861a33072bc1908f69d12b37c20491dd8663103 Binary files /dev/null and b/doc/design/file_manager/src/filemanager.graffle differ diff --git a/doc/design/file_manager/src/filemanager.png b/doc/design/file_manager/src/filemanager.png new file mode 100644 index 0000000000000000000000000000000000000000..8139a19f5722f56d3c211f3ab0d3982f751134b9 Binary files /dev/null and b/doc/design/file_manager/src/filemanager.png differ diff --git a/doc/design/images/replica.png b/doc/design/images/replica.png new file mode 100644 index 0000000000000000000000000000000000000000..ef59e56b01d792a059279e6bb9a29f3db6a59a41 Binary files /dev/null and b/doc/design/images/replica.png differ diff --git a/doc/design/images/two_phase_commit.png b/doc/design/images/two_phase_commit.png new file mode 100644 index 0000000000000000000000000000000000000000..ef6f7317bd440cc7d9fe08fcbbf2b7a542f99049 Binary files /dev/null and b/doc/design/images/two_phase_commit.png differ diff --git a/doc/design/mkldnn/README.MD b/doc/design/mkldnn/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..e956994431fbb43438c56dcd96ad8313cf516090 --- /dev/null +++ b/doc/design/mkldnn/README.MD @@ -0,0 +1,110 @@ +# Intel® MKL-DNN on PaddlePaddle: Design Doc + +我们计划将Intel深度神经网络数学库(**MKL-DNN**\[[1](#references)\])集成到PaddlePaddle,充分展现英特尔平台的优势,有效提升PaddlePaddle在英特尔架构上的性能。 + +我们短期内的基本目标是: + +- 完成常用layer的MKL-DNN实现。 +- 完成常见深度神经网络VGG,GoogLeNet 和 ResNet的MKL-DNN实现。 + + +## Contents + +- [Overview](#overview) +- [Actions](#actions) + - [CMake](#cmake) + - [Layers](#layers) + - [Activations](#activations) + - [Unit Tests](#unit-tests) + - [Protobuf Messages](#protobuf-messages) + - [Python API](#python-api) + - [Demos](#demos) + - [Benchmarking](#benchmarking) + - [Others](#others) +- [Design Concerns](#design-concerns) + +## Overview + +我们会把MKL-DNN作为第三方库集成进PaddlePaddle,整体框架图 +
+
+Figure 1. PaddlePaddle on IA. +
+ +## Actions +我们把集成方案大致分为了如下几个方面。 + +### CMake +我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。 + +同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。 + +所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 + +**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。 + +### Layers +所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在 +`paddle/gserver/layers`中,并且文件名都会一以*Mkldnn*开头。 + +所有MKL-DNN的layers都会继承于一个叫做`MkldnnLayer`的父类,该父类继承于PaddlePaddle的基类`Layer`。 + +### Activations +由于在PaddlePaddle中,激活函数是独立于layer概念的,所以会在`paddle/gserver/activations`目录下添加一个`MkldnnActivation.h`文件定义一些用于MKL-DNN的接口,实现方法还是会在`ActivationFunction.cpp`文件。 + +### Unit Tests +会在`paddle/gserver/test`目录下添加`test_Mkldnn.cpp`和`MkldnnTester.*`用于MKL-DNN的测试。 + +Activation的测试,计划在PaddlePaddle原有的测试文件上直接添加新的测试type。 + +### Protobuf Messages +根据具体layer的需求可能会在`proto/ModelConfig.proto`里面添加必要的选项。 + +### Python API +目前只考虑**v1 API**。 + +计划在`python/paddle/trainer/config_parser.py`里面添加`use_mkldnn`这个选择,方便用户选择使用MKL-DNN的layers。 + +具体实现方式比如: + +```python +use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) +if use_mkldnn + self.layer_type = mkldnn_* +``` + +所有MKL-DNN的layer type会以*mkldnn_*开头,以示区分。 + +并且可能在`python/paddle/trainer_config_helper`目录下的`activations.py `和`layers.py`里面添加必要的MKL-DNN的接口。 + +### Demos + +会在`v1_api_demo`目录下添加一个`mkldnn`的文件夹,里面放入一些用于MKL-DNN测试的demo脚本。 + +### Benchmarking +会考虑添加部分逻辑在`benchmark/paddle/image/run.sh`,添加使用MKL-DNN的测试。 + +### Others +1. 如果在使用MKL-DNN的情况下,会把CPU的Buffer对齐为64。 +2. 深入PaddlePaddle,寻找有没有其他可以优化的可能,进一步优化。比如可能会用OpenMP改进SGD的更新性能。 + +## Design Concerns + +为了更好的符合PaddlePaddle的代码风格\[[2](#references)\],同时又尽可能少的牺牲MKL-DNN的性能\[[3](#references)\]。 + +我们总结出一些特别需要注意的点: + +1. 使用**deviceId_**。为了尽可能少的在父类Layer中添加变量或者函数,我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MkldnnLayer`特有的设备ID。 +2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。 +3. 创建`MkldnnMatrix`,用于管理MKL-DNN会用到的相关memory函数、接口以及会用的到格式信息。 +4. 创建`MkldnnBase`,定义一些除了layer和memory相关的类和函数。包括MKL-DNN会用到`MkldnnStream`和`CpuEngine`,和未来可能还会用到`FPGAEngine`等。 +5. 在**Argument**里添加两个`MkldnnMatrixPtr`,取名为`mkldnnValue`和`mkldnnGrad`,用于存放`MkldnnLayer`会用到的memory buffer。 并且添加函数cvt(会修改为一个更加合适的函数名),用于处理"CPU device"和"MKL-DNN device"之间memory的相互转化。 +6. 在父类`Layer`中的`getOutput`函数中添加一段逻辑,用于判断`deviceId`,并针对device在MKL-DNN和CPU之间不统一的情况,做一个前期转换。 也就是调用`Argument`的cvt函数把output统一到需要的device上。 +7. 在原来的`FLAGS`中添加一个`use_mkldnn`的flag,用于选择是否使用MKL-DNN的相关功能。 + +## References + +1. [Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN)](https://github.com/01org/mkl-dnn "Intel MKL-DNN") +2. [原来的方案](https://github.com/PaddlePaddle/Paddle/pull/3096)会引入**nextLayer**的信息。但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。 +3. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的CUDNN部分使用的也是`NCHW`,所以不存在这个问题),所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。 + diff --git a/doc/design/mkldnn/image/overview.png b/doc/design/mkldnn/image/overview.png new file mode 100644 index 0000000000000000000000000000000000000000..84b455c28230703599a2529f014cfbb222138fef Binary files /dev/null and b/doc/design/mkldnn/image/overview.png differ diff --git a/doc/design/multi_language_interface/00.why_plain_c.md b/doc/design/multi_language_interface/00.why_plain_c.md new file mode 100644 index 0000000000000000000000000000000000000000..a1443093342c5a3ed698fb6b52a751dfc7cb5319 --- /dev/null +++ b/doc/design/multi_language_interface/00.why_plain_c.md @@ -0,0 +1,118 @@ +# Paddle多语言接口实现 +## 背景 + +Paddle需要一个多语言接口,这个接口需要做到: + +* 有标准的,良好的文档 + * 例如Python可以使用[Sphinx](http://www.sphinx-doc.org/en/stable/)生成API文档,golang可以使用[GoDoc](https://godoc.org/golang.org/x/tools/cmd/godoc)生成文档。这都需要这个接口按照约定俗成的规则来注释完备。 +* 不同语言的接口适应不同语言的特性 + * 例如Java与Python的错误处理是直接扔出来Exception,而对于golang错误处理应该使用返回值。 + +## 基本要求 + +Paddle的多语言接口实现包括一下几个方面: + +* 我们使用动态库来分发Paddle。在这个动态库中不嵌入任何其他语言的解释器,也不使用其他动态库。 +* 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号。 +* 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler)。 +* 不使用SWIG这种代码生成器,而是手写多语言绑定。 + + +## 原因 + +### 使用动态库来分发Paddle + +* Paddle的链接方式比较复杂 + * 如果用户要把Paddle的静态库(libpaddle.a)链接到自己的程序里,得使用 `--whole-archive` (for GCC) 或者 `--force_load` (for Clang) 参数,来确保把 libpaddle.a 里所有的符号都写入自己的程序的二进制文件里。这是因为 Paddle 的源码里使用了[object factory design pattern](http://stackoverflow.com/a/1310326/724872)。 +* 编译型语言,例如C/C++使用静态库和动态库难度差不多。但是解释性语言,例如[Python](http://stackoverflow.com/questions/19560594/how-to-import-static-library-in-python)或者[Java](http://stackoverflow.com/questions/24493337/linking-static-library-with-jni),只能调用Paddle的动态库,否则得把Paddle静态库链接到解释器里。 + * 解释性语言实际运行的二进制是解释器本身,如果调用静态库只能将静态库与解释器链接。例如对于Java来说,便是将静态库加入JVM中。这对于通常的Java的开发者来说,是不常见的做法。 + +### 动态库中不嵌入任何其他语言的解释器 + +* 目前Paddle的进程模型是C++内部驱动Python解释器进行模型配置解析和数据读取 +* 我们最终的动态库中不嵌入Python或者其他任何语言的解释器。模型配置解析,数据读取均交由其他语言完成 + +现阶段Paddle有一个问题是,Paddle内嵌的Python解释器和外部使用的Python如果版本不同,会直接报错退出。 + +### Paddle动态库中,不引用其他动态库 + +* 即这个动态库是不依赖于其他任何文件的,可以在任何机器上执行的。 + +### 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号 + +* 由于C++编译器没有[名字修饰](https://en.wikipedia.org/wiki/Name_mangling#C.2B.2B)的规范,不同版本的编译器之间,对于同一段C++代码生成的符号可能不一致。而多语言接口需要直接读取生成的二进制(动态库),需要有稳定的导出符号。 +* C语言是有导出符号的标准的,并且在常见的平台上,都是ABI调用标准的。 +* 大多数语言都支持使用C语言API +* 使用C99而不使用C89,是因为C99支持[Fixed-width integer types](https://en.wikipedia.org/wiki/C_data_types#Fixed-width_integer_types)和[Boolean type](https://en.wikipedia.org/wiki/C_data_types#Boolean_type)。 +* 使用C99而不使用C11的原因是,[C11](https://en.wikipedia.org/wiki/C11_(C_standard_revision))并没有Paddle特别需要的特性,且C99相对于C11使用更加广泛。 + +### 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler) + +* Paddle内部的类为C++书写,直接导出到C的接口比较困难。 +* 在C-API中使用`void*`来表示Paddle内部类。再在每一个API中自己检查类型。 + +在C的头文件 `paddle_matrix.h` 中: + +```C +typedef void* paddle_matrix; +typedef int paddle_error; + +extern "C" +paddle_error paddle_matrix_get_shape(paddle_matrix matrix, + uint64_t* width, + uint64_t* height); +``` +而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp` + +```cpp +#include "paddle/math/matrix.h" +extern "C" +paddle_error paddle_matrix_shape(paddle_matrix matrix, + uint64_t *width, + uint64_t *height) { + auto m = (paddle::capi::CMatrix*)(matrix); + *width = m->width(); + *height = m->height(); +} +``` + +其中`paddle/capi/CMatrix.hpp`文件内容为: + +```cpp +namespace paddle { +namespace math { + +class CMatrix { + std::shared_ptr mat; +}; + +} // namespace math +} // namespace paddle +``` + +### 不使用SWIG这种代码生成器,而是手写多语言绑定 + +* [SWIG](http://www.swig.org/)是一个多语言接口的代码生成器。他的目标是使用C/C++写代码,SWIG直接读取C/C++的头文件,生成各种语言的绑定代码。 + * 对于多语言接口,SWIG需要写一个interface文件。这个文件具有独特的语法,学习成本高。且增加一个第三方语言,就需要对这个第三方语言增加一些定义。有的时候,interface文件的写法非常[tricky](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/api/Paddle.swig#L36)。社区贡献代码学习成本高。 + * SWIG暴露的接口保留了C++的接口样式,很难保证多语言代码风格的一致性。(函数命名,错误处理) + * 因为SWIG在第三方语言中暴露的函数名,类名和C++中完全一致。C++的命名风格并不能适应其他第三方语言。如果使用SWIG我们需要将在interface文件里,将大量的`SomeCppClass`重命名成`some_python_class`,或者`SomeGoTypes`。 + * 对于不同语言,错误处理的方式也不尽相同。例如对于Java或者Python,最常见的错误处理方式是Exception,而对于Golang,错误处理方式是返回值。而SWIG只能简单的暴露C++接口,无法做到对于各种语言错误处理方式的适配。 + * 对于大多数语言,直接使用C语言的.h并不困难。例如Python的[cffi](https://cffi.readthedocs.io/en/latest/overview.html#simple-example-abi-level-in-line)或者[Cython](http://cython.org/), golang的[cgo](https://golang.org/cmd/cgo/)。 + * SWIG支持的语言或者解释器有局限。例如对于Python,使用SWIG只支持CPython解释器,而不支持PyPy解释器。 + + +## 原因列表 + +| 结论 | 对比 | 原因 | +|---| --- | --- | +| 使用动态库 | 不使用静态库 | 解释型语言只能调用动态库,Paddle静态库链接复杂 | +| 不嵌入其他语言解释器 | 不嵌入Python解释器 | Paddle C++目前嵌入Python解释器,会导致不同版本Python在一个进程里的bug | +| 不引用其他动态库 | | Paddle一个动态库可以在任何Linux系统上运行 | +| 使用C99做接口 | 不使用C++做接口 | C有标准的ABI,C99是目前C最广泛的使用标准,且C99支持bool类型和定长整数(uint64_t等)类型 | +| 使用void*作为类句柄 | 不显示的写每个类具体包含什么| 实现简单,并且让接口脱离实现细节 | +| 手写多语言绑定 | 不使用SWIG | 使用SWIG需要多语言绑定的开发人员熟练掌握SWIG配置,社区参与困难。SWIG生成的代码不能保证多语言代码风格的一致性 | + + +## 实现 + +参考[Inference implementation](01.inference_implementation.md) diff --git a/doc/design/multi_language_interface/01.inference_implementation.md b/doc/design/multi_language_interface/01.inference_implementation.md new file mode 100644 index 0000000000000000000000000000000000000000..9820284523246a062581f322616d196f575c9d29 --- /dev/null +++ b/doc/design/multi_language_interface/01.inference_implementation.md @@ -0,0 +1,131 @@ +# C-API 模型推断实现文档 + +本文档描述Paddle C-API的实现细节。Paddle C-API是多语言API的基础部分。Paddle需要暴露的API很多。先实现模型推断的API,通过模型推断API的实现作为一个样例,来进行讨论。至于为什么需要C-API,请参考[Why Plain C](./00.why_plain_c.md)。 + +## Table of Contents + * [C-API 模型推断实现文档](#c-api-模型推断实现文档) + * [暴露接口原则](#暴露接口原则) + * [目录结构](#目录结构) + * [实现方式](#实现方式) + * [capi.h](#capih) + * [具体某种类型的头文件](#具体某种类型的头文件) + * [capi_private.h](#capi_privateh) + * [具体某种类型的实现文件](#具体某种类型的实现文件) + * [libpaddle_capi_shared.{so, dylib}](#libpaddle_capi_sharedso-dylib) + * [libpaddle_capi_whole.a](#libpaddle_capi_wholea) + * [examples](#examples) + * [编译选项](#编译选项) + + +## 暴露接口原则 + +1. 所有的接口均为C接口。即使用`extern "C"` +2. 除构造某种类型的函数(`paddle_matrix_create`等),其他函数均返回`paddle_error`。且调用时不能抛出异常或出现运行时错误。 +3. 所有类型名为`paddle_类型名`,所有与类型相关的函数,函数名为`paddle_类型名_函数名` +4. 如果某一个Paddle Core概念(GradientMachine/Matrix)需要被暴露到其他语言,那么 + * 为了暴露的接口尽量简单。只暴露概念的接口,而不暴露概念的实现。即暴露`GradientMachine`或者`Matrix`但不暴露`RecurrentGradientMachine`和`CpuSparseMatrix`。 + * 暴露这个概念必要函数。`必要`是指,即完成某一个任务的最少函数。 +5. 不在`capi`接口层做过多封装。 + * 如果某一个Paddle概念必须要暴露,但是又过于琐碎。不在`capi`这一层进行封装,而是直接修改Paddle Core。让Paddle核心中,这一概念不再琐碎。 + + +## 目录结构 + +```text +Paddle + `-- paddle + `-- capi + `-- examples # The example project for C-API. + `-- tests # unittests for C-API + `-- capi.h # C-API header file. + `-- capi_private.h # The shared header file between implementation sources. + `-- matrix.{h, cpp} + `-- gradient_machine.{h, cpp} + `-- ... +``` + + +Paddle的C-API目录结构如上图表所示。这个目录中除了`capi_private.h`之外的所有头文件,均会被安装到include/paddle路径下。C-API生成的二进制文件会被安装到`lib`目录下。即,安装后的目录结构为 + +```text +`-- include + `-- paddle + `-- capi.h + `-- matrix.h + `-- gradient_machine.h + `-- ... +`-- lib + `-- libpaddle_capi_shared.{so, dylib} # In mac, dynamic libary's file name extention is `dylib` + `-- libpaddle_capi_whole.a # static library for all symbols of Paddle. +``` + +## 实现方式 + +下面分别介绍某一类文件的实现方式。 + +### capi.h + +`capi.h`是用户使用C-API时所唯一需要引入的头文件。在`capi.h`中,引入了类型的头文件,`matrix.h`, `gradient_machine.h`。在引入其他类型的头文件时,使用相对路径的引用方式。即`#include "matrix.h"` + +### 具体某种类型的头文件 + +具体某种类型的头文件,即例如`matrix.h`,`gradient_machine.h`等。在这些头文件中,包含了某种类型的类型定义和暴露的全部函数。 + +这个头文件不假设其他文件的引用顺序,即使用户直接引用某种类型的头文件,也不应该报错(虽然不鼓励这样)。如果某一个类型需要引用另一个类型,例如`gradient_machine`需要引用`matrix`,则直接引入另一种类型的头文件,即`#include "matrix.h"`。 + +### capi_private.h + +`capi_prviate.h`是各个实现中共享的头文件,他主要包含了实际暴露的类型结构。在用户使用C-API时,Paddle的类型全部退化成`void *`,即`typedef paddle_matrix void*`。但,对于每种C-API暴露的类型,均是在`capi_private.h`中实现的结构体。 + +```cpp +struct CMatrix { + int type = MatrixType; + std::shared_ptr mat; +}; +``` + +通常,这个结构体包含两个项目。 + +* `type`是一个类型的标志。对于每种类型,type字段均不尽相同。这样,即使C-API接受的类型全是`void *`,我们也可以确定每一个参数的类型。 + + ```cpp + void some_c_api_function(void* some_instance) { + int* type = (int *) some_instance; + switch (*type) { + case MatrixType: + CMatrix* mat = (CMatrix *) some_instance; + ... + ... + } + } + ``` +* 这个结构体中的另一个项目是,Paddle Core中这一类型接口的智能指针(shared_ptr)。 + * 使用智能指针的原因是: 用户可以安全的释放某个C-API的实例,而不必在意Paddle Core是否还在使用这个实例。 + * 例如,用户通过C-API获得了神经网络的参数实例。当用户使用完这个参数后,直接删除这个参数即可。即便Paddle Core中的模型还在使用这个参数,这个参数也不会一并删除。 + +### 具体某种类型的实现文件 + +具体某种类型的实现文件,即`matrix.cpp`, `gradient_machine.cpp`等文件。在这些文件中,使用C++ 11实现了C-API的接口,并且使用`extern "C"`导出这些接口。在实现过程中,对输入参数的安全性进行了必要的判断,并将C-API接口的参数转发给`Paddle Core`。 + +### libpaddle\_capi_shared.{so, dylib} + +`libpaddle_capi_shared`是C-API导出的动态库。这个动态库的连接参数与Paddle的其他二进制(例如`paddle_trainer`)类似。用户可以直接使用这个动态库来引入Paddle C-API。具体使用方法为`-lpaddle_capi_shared`。 + +### libpaddle\_capi_whole.a + +`libpaddle_capi_whole`是C-API导出的静态库。这个静态库包含了Paddle的全部符号。他是将`libpaddle_gserver.a`, `libpaddle_math.a`, `libpaddle_capi.a`等全部静态库中的目标文件全部打包后产生的文件。具体使用方法为`--whole-archive -lpaddle_capi_whole --no-whole-archive`。 + + +### examples + +在样例中,使用`C99`开发了模型预测的样例代码。具体请参考[example/README.md](../../../paddle/capi/examples/README.md)。 + +## 编译选项 + +C-API的编译选项默认关闭,打开这个编译选项,需要在cmake的时候,设置 + +```bash +cmake ${YOUR_SOURCE_ROOT} -DWITH_C_API=ON -DWITH_PYTHON=OFF -DWITH_SWIG_PY=OFF +``` + +编译C-API的时候推荐Paddle不嵌入Python解释器,也不生成`SWIG`接口,具体原因参考[Why Plain C](./00.why_plain_c.md)。 diff --git a/doc/design/parameters_in_cpp.md b/doc/design/parameters_in_cpp.md new file mode 100644 index 0000000000000000000000000000000000000000..b6f99bc7d9d6fafacb0a4bcff806b65d9aef98cc --- /dev/null +++ b/doc/design/parameters_in_cpp.md @@ -0,0 +1,41 @@ +# Design Doc: The C++ Class `Parameters` + +`Parameters` is a concept we designed in Paddle V2 API. `Parameters` is a container of parameters, and make Paddle can shared parameter between topologies. We described usages of `Parameter` in [api.md](./api.md). + +We used Python to implement Parameters when designing V2 API before. There are several defects for current implementation: +* We just use `memcpy` to share Parameters between topologies, but this is very inefficient. +* We did not implement share Parameters while training. We just trigger `memcpy` when start training. + +It is necessary that we implement Parameters in CPP side. However, it could be a code refactoring for Paddle, because Paddle was designed for training only one topology before, i.e., each GradientMachine contains its Parameter as a data member. In current Paddle implementation, there are three concepts associated with `Parameters`: + +1. `paddle::Parameter`. A `Parameters` is a container for `paddle::Parameter`. +It is evident that we should use `paddle::Parameter` when developing `Parameters`. +However, the `Parameter` class contains many functions and does not have a clear interface. +It contains `create/store Parameter`, `serialize/deserialize`, `optimize(i.e SGD)`, `randomize/zero`. +When we developing `Parameters`, we only use `create/store Parameter` functionality. +We should extract functionalities of Parameter into many classes to clean Paddle CPP implementation. + +2. `paddle::GradientMachine` and its sub-classes, e.g., `paddle::MultiGradientMachine`, `paddle::NeuralNetwork`. +We should pass `Parameters` to `paddle::GradientMachine` when `forward/backward` to avoid `memcpy` between topologies. +Also, we should handle multi-GPU/CPU training, because `forward` and `backward` would perform on multi-GPUs and multi-CPUs. +`Parameters` should dispatch the parameter value to each device, and gather the parameter gradient from each device. + +3. `paddle::ParameterUpdater`. The ParameterUpdater is used to update parameters in Paddle. +So `Parameters` should be used by `paddle::ParameterUpdater`, and `paddle::ParameterUpdater` should optimize `Parameters` (by SGD). + + +The step by step approach for implementation Parameters in Paddle C++ core is listed below. Each step should be a PR and could be merged into Paddle one by one. + +1. Clean `paddle::Parameter` interface. Extract the functionalities of `paddle::Parameter` to prepare for the implementation of Parameters. + +2. Implementation a `Parameters` class. It just stores the `paddle::Parameter` inside. Make `GradientMachine` uses `Parameters` as a class member. + +3. Make `Parameters` support Multi-CPU and Multi-GPU training to prepare for sharing `Parameter` between topologies. +Because we need share `Parameters` between topologies, it is `Parameters`'s response to exchange Parameters between GPUs. +`GradientMachine` should not handle how to exchange Parameters because `GradientMachine` only used to train one topology and we need to support train many topologies in Paddle, i.e., there could be many GradientMachines use one `Parameters`. + * We should use a global function to exchange Parameters between GPUs, not a member function in `Parameters`. The `MultiGradientMachine` invoke this function, which uses `Parameters` as this function inputs. + * The MultiGradientMachine contains many functionalities. Extracting the Parameters exchanging logic could make MultiGradientMachine clearer and simpler. + +4. Make `Parameters` as an argument for `forward/backward` function, not a data member for `GradientMachine`. For example, `forward` could be `forward(const Parameters& params, ...)` and `backward` could be `backward(Parameters* params, ...)`. After this step, Paddle could share `Parameters` between topologies. + +5. `ParameterUpdater` is invoked by `GradientMachine` and `Trainer`, but it updates `Parameters`. In the end of this code refactoring, we could change `ParameterUpdater` directly uses `Parameters` to make `ParameterUpdater`'s implementation clear. diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f21f7af520df5171798326818ecb97c3bcd14a12 --- /dev/null +++ b/doc/design/reader/README.md @@ -0,0 +1,202 @@ +# Python Data Reader Design Doc + +At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that + +- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. +- A *reader creator* is a function that returns a reader function. +- A *reader decorator* is a function, which accepts one or more readers, and returns a reader. +- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items. + +and provide function which converts reader to batch reader, frequently used reader creators and reader decorators. + +## Data Reader Interface + +Indeed, *data reader* doesn't have to be a function that reads and yields data items. It can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`): + +``` +iterable = data_reader() +``` + +Element produced from the iterable should be a **single** entry of data, **not** a mini batch. That entry of data could be a single item, or a tuple of items. Item should be of [supported type](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int) + +An example implementation for single item data reader creator: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader +``` + +An example implementation for multiple item data reader creator: +```python +def reader_creator_random_image_and_label(width, height, label): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height), label + return reader +``` + +## Batch Reader Interface + +*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple. + +Here are valid outputs: +```python +# a mini batch of three data items. Each data item consist three columns of data, each of which is 1. +[(1, 1, 1), +(2, 2, 2), +(3, 3, 3)] + +# a mini batch of three data items, each data item is a list (single column). +[([1,1,1],), +([2,2,2],), +([3,3,3],), +``` + +Please note that each item inside the list must be a tuple, below is an invalid output: +```python + # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],). + # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1], + # or three column of datas, each of which is 1. +[[1,1,1], +[2,2,2], +[3,3,3]] +``` + +It's easy to convert from reader to batch reader: +```python +mnist_train = paddle.dataset.mnist.train() +mnist_train_batch_reader = paddle.batch(mnist_train, 128) +``` + +Also easy to create custom batch reader: +```python +def custom_batch_reader(): + while True: + batch = [] + for i in xrange(128): + batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. + yield batch + +mnist_random_image_batch_reader = custom_batch_reader +``` + +## Usage + +batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: + +```python +# two data layer is created: +image_layer = paddle.layer.data("image", ...) +label_layer = paddle.layer.data("label", ...) + +# ... +batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128) +paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...) +``` + +## Data Reader Decorator + +*Data reader decorator* takes a single or multiple data reader, returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` syntax. + +Since we have a strict interface for data readers (no parameter, return a single data item). Data reader can be used flexiable via data reader decorators. Following are a few examples: + +### Prefetch Data + +Since reading data may take time and training can not proceed without data. It is generally a good idea to prefetch data. + +Use `paddle.reader.buffered` to prefetch data: + +```python +buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100) +``` + +`buffered_reader` will try to buffer (prefetch) `100` data entries. + +### Compose Multiple Data Readers + +For example, we want to use a source of real images (reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661). + +We can do: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader + +def reader_creator_bool(t): + def reader: + while True: + yield t + return reader + +true_reader = reader_creator_bool(True) +false_reader = reader_creator_bool(False) + +reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader) +# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry. +# And we don't care second item at this time. +paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) +``` + +### Shuffle + +Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader that buffers `n` data entries and shuffle them before a data entry is read. + +Example: +```python +reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512) +``` + +## Q & A + +### Why reader return only a single entry, but not a mini batch? + +Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). + +We provide function `paddle.batch` to turn (single entry) reader into batch reader. + +### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient? + +In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically. + +### Why use a dictionary but not a list to provide mapping? + +We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["image", "label"]`) is because that user can easily resue item (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or skip item (e.g., using `{"image_a":0, "label":2}`). + +### How to create custom data reader creator + +```python +def image_reader_creator(image_path, label_path, n): + def reader(): + f = open(image_path) + l = open(label_path) + images = numpy.fromfile( + f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') + images = images / 255.0 * 2.0 - 1.0 + labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") + for i in xrange(n): + yield images[i, :], labels[i] # a single entry of data is created each time + f.close() + l.close() + return reader + +# images_reader_creator creates a reader +reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) +paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...) +``` + +### How is `paddle.train` implemented + +An example implementation of paddle.train could be: + +```python +def train(batch_reader, mapping, batch_size, total_pass): + for pass_idx in range(total_pass): + for mini_batch in batch_reader(): # this loop will never end in online learning. + do_forward_backward(mini_batch, mapping) +``` diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md new file mode 100644 index 0000000000000000000000000000000000000000..0c10e782808ca6456347ec54cb5e921162731ede --- /dev/null +++ b/doc/design/releasing_process.md @@ -0,0 +1,67 @@ +# Paddle发行规范 + +Paddle使用git-flow branching model做分支管理,使用[Semantic Versioning](http://semver.org/)标准表示Paddle版本号。 + +Paddle每次发新的版本,遵循以下流程: + +1. 从`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0` +2. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。 +3. 对这个版本的提交,做如下几个操作: + * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 + * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 + * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 + * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 + * 编译这个版本的python wheel包,并发布到pypi。 + * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 + * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 + * 上传方法: + ``` + cd build/python + pip install twine + twine upload dist/[package to upload] + ``` +4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 +5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 +6. 协同完成Release Note的书写 + + +需要注意的是: + +* `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试Paddle的行为。 +* 在`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop`和`release/版本号`这三个分支。 + +# Paddle 分支规范 + +Paddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。 + +* Paddle的主版本库遵循[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范。其中: + * `master`分支为稳定(stable branch)版本分支。每一个`master`分支的版本都是经过单元测试和回归测试的版本。 + * `develop`分支为开发(develop branch)版本分支。每一个`develop`分支的版本都经过单元测试,但并没有经过回归测试。 + * `release/版本号`分支为每一次Release时建立的临时分支。在这个阶段的代码正在经历回归测试。 + +* 其他用户的fork版本库并不需要严格遵守[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,但所有fork的版本库的所有分支都相当于特性分支。 + * 建议,开发者fork的版本库使用`develop`分支同步主版本库的`develop`分支 + * 建议,开发者fork的版本库中,再基于`develop`版本fork出自己的功能分支。 + * 当功能分支开发完毕后,向Paddle的主版本库提交`Pull Reuqest`,进而进行代码评审。 + * 在评审过程中,开发者修改自己的代码,可以继续在自己的功能分支提交代码。 + +* BugFix分支也是在开发者自己的fork版本库维护,与功能分支不同的是,BugFix分支需要分别给主版本库的`master`、`develop`与可能有的`release/版本号`分支,同时提起`Pull Request`。 + +# Paddle回归测试列表 + +本列表说明Paddle发版之前需要测试的功能点。 + +## Paddle Book中所有章节 + +Paddle每次发版本首先要保证Paddle Book中所有章节功能的正确性。功能的正确性包括验证Paddle目前的`paddle_trainer`训练和纯使用`Python`训练模型正确性。 + +| | 新手入门章节 | 识别数字 | 图像分类 | 词向量 | 情感分析 | 语意角色标注 | 机器翻译 | 个性化推荐 | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| API.V2 + Docker + GPU | | | | | | | | | +| API.V2 + Docker + CPU | | | | | | | | | +| `paddle_trainer` + Docker + GPU | | | | | | | | | +| `paddle_trainer` + Docker + CPU | | | | | | | | | +| API.V2 + Ubuntu + GPU | | | | | | | | | +| API.V2 + Ubuntu + CPU | | | | | | | | | +| `paddle_trainer` + Ubuntu + GPU | | | | | | | | | +| `paddle_trainer` + Ubuntu + CPU | | | | | | | | | diff --git a/doc/design/scope.md b/doc/design/scope.md new file mode 100644 index 0000000000000000000000000000000000000000..c9e0be716b606f6c7bf0373e0c6e632647e07a6f --- /dev/null +++ b/doc/design/scope.md @@ -0,0 +1,124 @@ +# Design of Scope in Paddle + +## Overview + +Scope is an important concept in programming languages, which defines a program region that a set of bindings between names and entities applies. In a specific scope, a valid name is uniquely associated with an entity, such as a variable. And in another scope, this name may refer to other entity or nothing at all. It clearly restricts the visibility and validity of names in a program. Hence **Scope** is introduced to PaddlePaddle to manage variables in context. But different from the original abstract concept, Scope now becomes an object with two important attributes: + +- Scope is an association of a name to variable. +- Variables in a parent scope can be retrieved from local scope. + +A detailed explanation of these two attributes goes as following. + + +## Scope is an association of a name to variable. + +Scope is an association of a name to variable. All variables belong to `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can run in different scopes and update different variable in the scope. + + +1. Scope only contains a map of a name to variable. + + All parameters, data, states in a Net should be variables and stored inside a scope. Each op should get inputs and outputs to do computation from a scope, such as data buffer, state(momentum) etc. + +1. Variable can only be created by Scope and a variable can only be got from Scope. User cannot create or get a variable outside a scope. This is a constraints of our framework, and will keep our framework simple and clear. + +1. Scope only contains methods that are used to Create and Get Variables. Scope do not contain Operators and have no information to run them. + `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. + - `Create` is used to create a Variable by its name and add the mapping relation. + - `Get` is used to find a Variable by name. + +1. Every variable only belongs to one certain Scope. + + Variable can not belong to many scopes. If you want to use variables from parent scope, you can use `parent scope`. + +1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. + + Because Variable can only be got from Scope. When destroying Scope, we also need to destroy all the Variables in it. If user store `Variable` pointer to private data member or some global variable, the pointer will be a invalid pointer when associated `Scope` is destroyed. + +```cpp +class Scope { + public: + Variable* NewVar(const std::string& name); + const Variable* FindVar(const std::string& name) const; + + private: + std::unordered_map> vars_; +}; +``` + + +## Parent scope and local scope + +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be a local scope. There are two attributes about local scope. + +1. We can create local variables in a local scope. When that local scope are destroyed, all local variables should also be destroyed. +2. Variables in a parent scope can be retrieved from local scopes of that parent scope, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. + +```cpp +class Scope { + public: + Scope(const std::shared_ptr& scope): parent_(scope) {} + + Variable* FindVar(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } else if (parent_ != nullptr) { + return parent_->FindVar(name); + } else { + return nullptr; + } + } + + private: + std::shared_ptr parent_ {nullptr}; +}; +``` + +In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. + +A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. + +# Interface Design + +```cpp +class Variable { + private: + Variable() = default; + friend class Scope; +}; + +class Scope { + private: + Scope(const std::shared_ptr& parent = nullptr); + + public: + static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); + + // return nullptr if not found. + Variable* FindVar(const std::string& name) const; + + // return if already contains same name variable. + Variable* NewVar(const std::string& name); + + private: + std::shared_ptr parent_; + std::unordered_map> vars_; +}; +``` +## Only scope can create a variable + +To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`. + +## When scope destroyed, all variables inside this scope should be destroyed together + +The scope hold unique pointers for all variables. User can `FindVar` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together. + +## Sharing a parent scope + +Local scope contains a `parent_` pointer. It is a linked-list for scopes. Using a `shared_ptr` because when a local scope is using, its parents cannot be destroyed. + +Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shared pointer. We cannot construct a scope variable, because it cannot be passed to other scope as `parent` pointer. + +## Orthogonal interface + +`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return a `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily. diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md new file mode 100644 index 0000000000000000000000000000000000000000..5e07c29c56d21728599195d420d3222213d77e7c --- /dev/null +++ b/doc/design/simple_op_design.md @@ -0,0 +1,202 @@ +## Interaction between C++ and Python + +Users employ API in Python to describe their own network, however, the network construction actually happens in C++. so Protobuf is introduced to send the message between Python and C++. + +The Interaction between Python and C++ can be simplified as two steps: + +1. C++ tells Python how many Ops there are, and what parameter do users need to offer to initialize a new Op. Python then builds API for each Op at compile time. + +2. Users invoke APIs built by Python and provide necessary parameters. These parameters will be sent to C++ fo finish Op construction task. + +### Message form C++ to Python + +We define a Protobuf message class `OpProto` to hold message needed in the first step. What should an `OpProto` contain? This question is equivalent to “What message do we need to offer, to build a Python API which is legal and user oriented and can use to describe a whole Op.” + +Following message are necessary: + +1. Op's name, and its simple comment. +2. Input and output variable number; each variable's name, type, and comment. +3. Op's attributes; each attribute includes name, type, comment, **default value** and **value range**. + +So `OpProto` can be defined as follows: + +```proto +enum AttrType { + INT = 1; + FLOAT = 2; + STRING = 3; + INTS = 4; + FLOATS = 5; + STRINGS = 6; +}; + +message AttrValue { + AttrType type = 1; + optional int iv = 2; + optional float fv = 3; + optional string sv = 4; + repeated int ivs = 5; + repeated float fvs = 6; + repeated string svs = 7; +}; + +message AttrProto { + required string name = 1; + required string comment = 2; + required AttrType type = 3; +}; + +message VarProto { + required string name = 1; + required string comment = 2; + required bool is_tensor = 3; +}; + +message OpProto { + repeated VarProto inputs = 1; + repeated VarProto outputs = 2; + repeated AttrProto attrs = 3; + required string type = 4; + required string comment = 5; +}; +``` + +To generate Python code automatically: + +```python +def create_python_ops_creatation_functions(): + op_protos = paddle.framework.OpRegistry.get_all_op_proto() + for type_name in op_protos: + op_proto = op_protos[type_name] + def __impl__(**kwargs): # User must use key word args in Paddle API + inputs = [kwargs.get(ipt.name, "") for ipt in op_proto.inputs] + outputs = [kwargs.get(opt.name, "") for opt in op_proto.outputs] + attrs = [cast_to_op_attr(attr, kwargs.get(attr.name, None)) for attr in op_proto.attrs] + opdesc = (input, outputs, type_name, attrs) + return paddle.framework.OpRegistry.CreateOp(opdesc) + __impl__.__doc__ = create_doc_string(op_proto) + globals()[type_name] = __impl__ + +create_python_ops_creatation_functions() +``` + +### Message from Python to C++ + +To hold message needed in the above second step, we define Protobuf message class `OpDesc`. It is used to hold user-specified parameters in Op describing. + +```proto +message OpDesc { + required string type = 1; + repeated string inputs = 2; + repeated string outputs = 3; + map attrs = 4; +}; +``` + +## OpProto Register + +Every Op has its own `OpProto`. For using convenience, we need to register them and record all their messages. For each `Op` class, we define a corresponding `OpMaker` class, in whose constructor we implement the `OpProto`'s building process. `OpMaker`'s constructor will be invoked by another function `OpRegistry::RegisterOp()`. + +```cpp +class OpProtoMaker { +public: + OpProtoMaker(OpProto* proto): proto_(proto) {} +protected: + OpProto* proto_; + void AddInput(const std::string& name, const std::string& desc) {...} + void AddAttr(const std::string& name, const std::string& desc, TypeId type) {...} + void AddComment(const std::string& comment) { ... } +}; + +class OpRegistry { +public: + using OpCreator = std::function; + + template + static void RegisterOp(const std::string& name) { + gCreators_[name] = [](const OpDesc& desc) { + return new OpType(desc); + }; + OpProto& opProto = gProtos_[name]; + OpMaker()(&opProto); + } + + static map gCreators_; + static map gProtos_; +}; + +template +class OpRegister { + public: + OpRegister(std::string type) { + OpRegistry::RegisterOp(type); + } +}; + +#define REGISTER_OP(op_class, op_maker_class, type_name) \ + class op_class##Register { \ + private: \ + const static OpRegister<#op_class, #op_maker_class> reg; \ + }; \ + const Register op_class##Register::reg(#type_name); + +class CosineOp { +// ... +} + +struct CosineOpProtoMaker : public OpProtoMaker { + CosineOpProtoMaker(OpProto* proto) : OpProtoMaker(proto) { + AddInput("input", "input of cosine op"); + AddAttr("scale", "scale of cosine op", float).Default(1.0).LargerThan(0.0); + AddType("cos"); + AddComment("This is cos op"); + } +} + +REGISTER_OP(CosineOp, CosineOpProtoMaker, cos); +``` + +In `REGISTER_OP(CosineOp, CosineOpProtoMaker, cos)`, we register not only `CosineOp` but also `CosineOpProto`. As fields of `CosineOpProto`, the default value and value range of `scale` are also registered here. + +## Python API + +Python APIs are divided into two types, high-level API and low-level API. + +### High-Level API + +High-level API is called by users directly, so it should keep its style consistent with existing V2 APIs. + +Here is a sample about how a define a fc layer: + +```python +hd = fc_layer(input=data, size=56, with_bias=True, activation="sigmoid"); +``` + +`hd` is the output of `fc_layer` and it's a `variable`. It can be further sent into other layers as input. + +The definition of `fc_layer()`: + +```python +def fc_layer(input, size, with_bias, activation): + attr_map = {"size":size} + check_attrs(attr_map) + w = make_variable('w') + if with_bias: + b = make_variable('b') + else: + b = None + fc_output = make_variable('fc_output'); + fc_op(input, w, b, fc_output, attr_map) + act_output = make_variable('sigmod_output'); + if activation == "sigmod": + sigmod_op(fc_output, act_output); + elif: + # ... + return act_output; +``` + +### Low Leval API + +In above sample, `fc_op` and `sigmod_op` are low-level API. They build `OpDesc` and invoke corresponding C++ code. + +*TODO* diff --git a/doc/design/speech/README.MD b/doc/design/speech/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..7304650e628dba210488cd2dc4836318b5383b2a --- /dev/null +++ b/doc/design/speech/README.MD @@ -0,0 +1,155 @@ +# DeepSpeech2 on PaddlePaddle: Design Doc + +We are planning to build Deep Speech 2 (DS2) \[[1](#references)\], a powerful Automatic Speech Recognition (ASR) engine, on PaddlePaddle. For the first-stage plan, we have the following short-term goals: + +- Release a basic distributed implementation of DS2 on PaddlePaddle. +- Contribute a chapter of Deep Speech to PaddlePaddle Book. + +Intensive system optimization and low-latency inference library (details in \[[1](#references)\]) are not yet covered in this first-stage plan. + +## Table of Contents + +- [Tasks](#tasks) +- [Task Dependency](#task-dependency) +- [Design Details](#design-details) + - [Overview](#overview) + - [Row Convolution](#row-convolution) + - [Beam Search With CTC and LM](#beam-search-with-ctc-and-lm) +- [Future Work](#future-work) +- [References](#references) + +## Tasks + +We roughly break down the project into 14 tasks: + +1. Develop an **audio data provider**: + - Json filelist generator. + - Audio file format transformer. + - Spectrogram feature extraction, power normalization etc. + - Batch data reader with SortaGrad. + - Data augmentation (optional). + - Prepare (one or more) public English data sets & baseline. +2. Create a **simplified DS2 model configuration**: + - With only fixed-length (by padding) audio sequences (otherwise need *Task 3*). + - With only bidirectional-GRU (otherwise need *Task 4*). + - With only greedy decoder (otherwise need *Task 5, 6*). +3. Develop to support **variable-shaped** dense-vector (image) batches of input data. + - Update `DenseScanner` in `dataprovider_converter.py`, etc. +4. Develop a new **lookahead-row-convolution layer** (See \[[1](#references)\] for details): + - Lookahead convolution windows. + - Within-row convolution, without kernels shared across rows. +5. Build KenLM **language model** (5-gram) for beam search decoder: + - Use KenLM toolkit. + - Prepare the corpus & train the model. + - Create infererence interfaces (for Task 6). +6. Develop a **beam search decoder** with CTC + LM + WORDCOUNT: + - Beam search with CTC. + - Beam search with external custom scorer (e.g. LM). + - Try to design a more general beam search interface. +7. Develop a **Word Error Rate evaluator**: + - update `ctc_error_evaluator`(CER) to support WER. +8. Prepare internal dataset for Mandarin (optional): + - Dataset, baseline, evaluation details. + - Particular data preprocessing for Mandarin. + - Might need cooperating with the Speech Department. +9. Create **standard DS2 model configuration**: + - With variable-length audio sequences (need *Task 3*). + - With unidirectional-GRU + row-convolution (need *Task 4*). + - With CTC-LM beam search decoder (need *Task 5, 6*). +10. Make it run perfectly on **clusters**. +11. Experiments and **benchmarking** (for accuracy, not efficiency): + - With public English dataset. + - With internal (Baidu) Mandarin dataset (optional). +12. Time **profiling** and optimization. +13. Prepare **docs**. +14. Prepare PaddlePaddle **Book** chapter with a simplified version. + +## Task Dependency + +Tasks parallelizable within phases: + +Roadmap | Description | Parallelizable Tasks +----------- | :------------------------------------ | :-------------------- +Phase I | Simplified model & components | *Task 1* ~ *Task 8* +Phase II | Standard model & benchmarking & profiling | *Task 9* ~ *Task 12* +Phase III | Documentations | *Task13* ~ *Task14* + +Issue for each task will be created later. Contributions, discussions and comments are all highly appreciated and welcomed! + +## Design Details + +### Overview + +Traditional **ASR** (Automatic Speech Recognition) pipelines require great human efforts devoted to elaborately tuning multiple hand-engineered components (e.g. audio feature design, accoustic model, pronuncation model and language model etc.). **Deep Speech 2** (**DS2**) \[[1](#references)\], however, trains such ASR models in an end-to-end manner, replacing most intermediate modules with only a single deep network architecture. With scaling up both the data and model sizes, DS2 achieves a very significant performance boost. + +Please read Deep Speech 2 \[[1](#references),[2](#references)\] paper for more background knowledge. + +The classical DS2 network contains 15 layers (from bottom to top): + +- **Two** data layers (audio spectrogram, transcription text) +- **Three** 2D convolution layers +- **Seven** uni-directional simple-RNN layers +- **One** lookahead row convolution layers +- **One** fully-connected layers +- **One** CTC-loss layer + +
+
+Figure 1. Archetecture of Deep Speech 2 Network. +
+ +We don't have to persist on this 2-3-7-1-1-1 depth \[[2](#references)\]. Similar networks with different depths might also work well. As in \[[1](#references)\], authors use a different depth (e.g. 2-2-3-1-1-1) for final experiments. + +Key ingredients about the layers: + +- **Data Layers**: + - Frame sequences data of audio **spectrogram** (with FFT). + - Token sequences data of **transcription** text (labels). + - These two type of sequences do not have the same lengthes, thus a CTC-loss layer is required. +- **2D Convolution Layers**: + - Not only temporal convolution, but also **frequency convolution**. Like a 2D image convolution, but with a variable dimension (i.e. temporal dimension). + - With striding for only the first convlution layer. + - No pooling for all convolution layers. +- **Uni-directional RNNs** + - Uni-directional + row convolution: for low-latency inference. + - Bi-direcitional + without row convolution: if we don't care about the inference latency. +- **Row convolution**: + - For looking only a few steps ahead into the feature, instead of looking into a whole sequence in bi-directional RNNs. + - Not nessesary if with bi-direcitional RNNs. + - "**Row**" means convolutions are done within each frequency dimension (row), and no convolution kernels shared across. +- **Batch Normalization Layers**: + - Added to all above layers (except for data and loss layer). + - Sequence-wise normalization for RNNs: BatchNorm only performed on input-state projection and not state-state projection, for efficiency consideration. + + +Required Components | PaddlePaddle Support | Need to Develop +:------------------------------------- | :-------------------------------------- | :----------------------- +Data Layer I (Spectrogram) | Not supported yet. | TBD (Task 3) +Data Layer II (Transcription) | `paddle.data_type.integer_value_sequence` | - +2D Convolution Layer | `paddle.layer.image_conv_layer` | - +DataType Converter (vec2seq) | `paddle.layer.block_expand` | - +Bi-/Uni-directional RNNs | `paddle.layer.recurrent_group` | - +Row Convolution Layer | Not supported yet. | TBD (Task 4) +CTC-loss Layer | `paddle.layer.warp_ctc` | - +Batch Normalization Layer | `paddle.layer.batch_norm` | - +CTC-Beam search | Not supported yet. | TBD (Task 6) + +### Row Convolution + +TODO by Assignees + +### Beam Search with CTC and LM + +TODO by Assignees + +## Future Work + +- Efficiency Improvement +- Accuracy Improvement +- Low-latency Inference Library +- Large-scale benchmarking + +## References + +1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016. +2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595. diff --git a/doc/design/speech/image/ds2_network.png b/doc/design/speech/image/ds2_network.png new file mode 100644 index 0000000000000000000000000000000000000000..1a5b2184d47928cc2849d5a7c8ea2d8cf5337e11 Binary files /dev/null and b/doc/design/speech/image/ds2_network.png differ diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..138efb566e43fa71952f057829c2afbca96cadc9 --- /dev/null +++ b/doc/faq/index_cn.rst @@ -0,0 +1,323 @@ +#################### +FAQ +#################### + +.. contents:: + +1. 如何减少内存占用 +--------------------------------- + +神经网络的训练本身是一个非常消耗内存和显存的工作,经常会消耗数10GB的内存和数GB的显存。 +PaddlePaddle的内存占用主要分为如下几个方面\: + +* DataProvider缓冲池内存(只针对内存) +* 神经元激活内存(针对内存和显存) +* 参数内存 (针对内存和显存) +* 其他内存杂项 + +其中,其他内存杂项是指PaddlePaddle本身所用的一些内存,包括字符串分配,临时变量等等,暂不考虑在内。 + +减少DataProvider缓冲池内存 +++++++++++++++++++++++++++ + +PyDataProvider使用的是异步加载,同时在内存里直接随即选取数据来做Shuffle。即 + +.. graphviz:: + + digraph { + rankdir=LR; + 数据文件 -> 内存池 -> PaddlePaddle训练 + } + +所以,减小这个内存池即可减小内存占用,同时也可以加速开始训练前数据载入的过程。但是,这 +个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的, +那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为 + +.. literalinclude:: src/reduce_min_pool_size.py + +这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 :ref:`api_pydataprovider2` 。 + +神经元激活内存 +++++++++++++++ + +神经网络在训练的时候,会对每一个激活暂存一些数据,如神经元激活值等。 +在反向传递的时候,这些数据会被用来更新参数。这些数据使用的内存主要和两个参数有关系, +一是batch size,另一个是每条序列(Sequence)长度。所以,其实也是和每个mini-batch中包含 +的时间步信息成正比。 + +所以做法可以有两种: + +* 减小batch size。 即在网络配置中 :code:`settings(batch_size=1000)` 设置成一个小一些的值。但是batch size本身是神经网络的超参数,减小batch size可能会对训练结果产生影响。 +* 减小序列的长度,或者直接扔掉非常长的序列。比如,一个数据集大部分序列长度是100-200, + 但是突然有一个10000长的序列,就很容易导致内存超限,特别是在LSTM等RNN中。 + +参数内存 +++++++++ + +PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需要使用不同大小的内存。 +例如使用 :code:`adadelta` 算法,则需要使用等于权重参数规模大约5倍的内存。举例,如果参数保存下来的模型目录 +文件为 :code:`100M`, 那么该优化算法至少需要 :code:`500M` 的内存。 + +可以考虑使用一些优化算法,例如 :code:`momentum`。 + +2. 如何加速PaddlePaddle的训练速度 +--------------------------------- + +加速PaddlePaddle训练可以考虑从以下几个方面\: + +* 减少数据载入的耗时 +* 加速训练速度 +* 利用分布式训练驾驭更多的计算资源 + +减少数据载入的耗时 +++++++++++++++++++ + +使用\ :code:`pydataprovider`\ 时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。 +:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。 + +.. literalinclude:: src/reduce_min_pool_size.py + +同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。 + + +加速训练速度 +++++++++++++ + +PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 :code:`sparse_binary_vector` 、 :code:`sparse_vector` 、或者 :code:`integer_value` 的任一一种。同时,与这个训练数据交互的Layer,需要将其Parameter设置成 sparse 更新模式,即设置 :code:`sparse_update=True` + +这里使用简单的 :code:`word2vec` 训练语言模型距离,具体使用方法为\: + +使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为\: + +.. literalinclude:: src/word2vec_dataprovider.py + +这个任务的配置为\: + +.. literalinclude:: src/word2vec_config.py + + +利用更多的计算资源 +++++++++++++++++++ + +利用更多的计算资源可以分为一下几个方式来进行\: + +* 单机CPU训练 + + * 使用多线程训练。设置命令行参数 :code:`trainer_count`。 + +* 单机GPU训练 + + * 使用显卡训练。设置命令行参数 :code:`use_gpu`。 + * 使用多块显卡训练。设置命令行参数 :code:`use_gpu` 和 :code:`trainer_count` 。 + +* 多机训练 + + * 请参考 :ref:`cluster_train` 。 + + +3. 遇到“非法指令”或者是“illegal instruction” +-------------------------------------------- + +PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二进制发行版可能会导致这种错误,请选择正确的版本。 + +4. 如何选择SGD算法的学习率 +-------------------------- + +在采用sgd/async_sgd进行训练时,一个重要的问题是选择正确的learning_rate。如果learning_rate太大,那么训练有可能不收敛,如果learning_rate太小,那么收敛可能很慢,导致训练时间过长。 + +通常做法是从一个比较大的learning_rate开始试,如果不收敛,那减少学习率10倍继续试验,直到训练收敛为止。那么如何判断训练不收敛呢?可以估计出如果模型采用不变的输出最小的cost0是多少。 + +如果训练过程的的cost明显高于这个常数输出的cost,那么我们可以判断为训练不收敛。举一个例子,假如我们是三分类问题,采用multi-class-cross-entropy作为cost,数据中0,1,2三类的比例为 :code:`0.2, 0.5, 0.3` , 那么常数输出所能达到的最小cost是 :code:`-(0.2*log(0.2)+0.5*log(0.5)+0.3*log(0.3))=1.03` 。如果训练一个pass(或者更早)后,cost还大于这个数,那么可以认为训练不收敛,应该降低学习率。 + + +5. 如何初始化参数 +----------------- + +默认情况下,PaddlePaddle使用均值0,标准差为 :math:`\frac{1}{\sqrt{d}}` 来初始化参数。其中 :math:`d` 为参数矩阵的宽度。这种初始化方式在一般情况下不会产生很差的结果。如果用户想要自定义初始化方式,PaddlePaddle目前提供两种参数初始化的方式\: + +* 高斯分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_mean=0.0, initial_std=1.0)` +* 均匀分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0)` + +比如设置一个全连接层的参数初始化方式和bias初始化方式,可以使用如下代码。 + +.. code-block:: python + + hidden = fc_layer(input=ipt, param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0), + bias_attr=ParamAttr(initial_mean=1.0, initial_std=0.0)) + +上述代码将bias全部初始化为1.0, 同时将参数初始化为 :code:`[1.0, -1.0]` 的均匀分布。 + +6. 如何共享参数 +--------------- + +PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字的参数,会共享参数。设置参数的名字,可以使用 :code:`ParamAttr(name="YOUR_PARAM_NAME")` 来设置。更方便的设置方式,是使得要共享的参数使用同样的 :code:`ParamAttr` 对象。 + +简单的全连接网络,参数共享的配置示例为\: + +.. literalinclude:: ../../python/paddle/trainer_config_helpers/tests/configs/shared_fc.py + +这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。 + +7. \*-cp27mu-linux_x86_64.whl is not a supported wheel on this platform. +------------------------------------------------------------------------ + +出现这个问题的主要原因是,系统编译wheel包的时候,使用的 :code:`wheel` 包是最新的, +而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。 +更新 :code:`pip` 包的方法是\: + +.. code-block:: bash + + pip install --upgrade pip + +8. python相关的单元测试都过不了 +-------------------------------- + +如果出现以下python相关的单元测试都过不了的情况: + +.. code-block:: bash + + 24 - test_PyDataProvider (Failed) + 26 - test_RecurrentGradientMachine (Failed) + 27 - test_NetworkCompare (Failed) + 28 - test_PyDataProvider2 (Failed) + 32 - test_Prediction (Failed) + 33 - test_Compare (Failed) + 34 - test_Trainer (Failed) + 35 - test_TrainerOnePass (Failed) + 36 - test_CompareTwoNets (Failed) + 37 - test_CompareTwoOpts (Failed) + 38 - test_CompareSparse (Failed) + 39 - test_recurrent_machine_generation (Failed) + 40 - test_PyDataProviderWrapper (Failed) + 41 - test_config_parser (Failed) + 42 - test_swig_api (Failed) + 43 - layers_test (Failed) + +并且查询PaddlePaddle单元测试的日志,提示: + +.. code-block:: bash + + paddle package is already in your PYTHONPATH. But unittest need a clean environment. + Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'. + +解决办法是: + +* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。 + + +9. 运行Docker GPU镜像出现 "CUDA driver version is insufficient" +---------------------------------------------------------------- + +用户在使用PaddlePaddle GPU的Docker镜像的时候,常常出现 `Cuda Error: CUDA driver version is insufficient for CUDA runtime version`, 原因在于没有把机器上CUDA相关的驱动和库映射到容器内部。 +具体的解决方法是: + +.. code-block:: bash + + $ export CUDA_SO="$(\ls usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddlepaddle:latest-gpu + +更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 `_ 。 + + +10. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致 +---------------------------------------------------------------- + +这是目前CMake寻找Python的逻辑存在缺陷,如果系统安装了多个Python版本,CMake找到的Python库和Python解释器版本可能有不一致现象,导致编译PaddlePaddle失败。正确的解决方法是, +用户强制指定特定的Python版本,具体操作如下: + + .. code-block:: bash + + cmake .. -DPYTHON_EXECUTABLE= -DPYTHON_LIBRARY= -DPYTHON_INCLUDE_DIR= + +用户需要指定本机上Python的路径:````, ````, ```` + +11. CMake源码编译,Paddle版本号为0.0.0 +-------------------------------------- + +如果运行 :code:`paddle version`, 出现 :code:`PaddlePaddle 0.0.0`;或者运行 :code:`cmake ..`,出现 + +.. code-block:: bash + + CMake Warning at cmake/version.cmake:20 (message): + Cannot add paddle version from git tag + +那么用户需要拉取所有的远程分支到本机,命令为 :code:`git fetch upstream`,然后重新cmake即可。 + +12. A protocol message was rejected because it was too big +---------------------------------------------------------- + +如果在训练NLP相关模型时,出现以下错误: + +.. code-block:: bash + + [libprotobuf ERROR google/protobuf/io/coded_stream.cc:171] A protocol message was rejected because it was too big (more than 67108864 bytes). To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. + F1205 14:59:50.295174 14703 TrainerConfigHelper.cpp:59] Check failed: m->conf.ParseFromString(configProtoStr) + +可能的原因是:传给dataprovider的某一个args过大,一般是由于直接传递大字典导致的。错误的define_py_data_sources2类似: + +.. code-block:: python + + src_dict = dict() + for line_count, line in enumerate(open(src_dict_path, "r")): + src_dict[line.strip()] = line_count + + define_py_data_sources2( + train_list, + test_list, + module="dataprovider", + obj="process", + args={"src_dict": src_dict}) + +解决方案是:将字典的地址作为args传给dataprovider,然后在dataprovider里面根据该地址加载字典。即define_py_data_sources2应改为: + +.. code-block:: python + + define_py_data_sources2( + train_list, + test_list, + module="dataprovider", + obj="process", + args={"src_dict_path": src_dict_path}) + +完整源码可参考 `seqToseq `_ 示例。 + +13. 如何指定GPU设备 +------------------- + +例如机器上有4块GPU,编号从0开始,指定使用2、3号GPU: + +* 方式1:通过 `CUDA_VISIBLE_DEVICES `_ 环境变量来指定特定的GPU。 + +.. code-block:: bash + + env CUDA_VISIBLE_DEVICES=2,3 paddle train --use_gpu=true --trainer_count=2 + +* 方式2:通过命令行参数 ``--gpu_id`` 指定。 + +.. code-block:: bash + + paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 + + +14. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办? +------------------------------------------------------------------------ + +Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。 +主要原因包括两个方面: + +* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。 +* 模型一直不收敛,发散到了一个数值特别大的地方。 +* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。 + +主要的解决办法是减小学习律或者对数据进行归一化处理。 + +15. 编译安装后执行 import paddle.v2 as paddle 报ImportError: No module named v2 +------------------------------------------------------------------------ +先查看一下是否曾经安装过paddle v1版本,有的话需要先卸载: + +pip uninstall py_paddle paddle + +然后安装paddle的python环境, 在build目录下执行 + +pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl diff --git a/doc_cn/faq/reduce_min_pool_size.py b/doc/faq/src/reduce_min_pool_size.py similarity index 100% rename from doc_cn/faq/reduce_min_pool_size.py rename to doc/faq/src/reduce_min_pool_size.py diff --git a/doc_cn/faq/word2vec_config.py b/doc/faq/src/word2vec_config.py similarity index 100% rename from doc_cn/faq/word2vec_config.py rename to doc/faq/src/word2vec_config.py diff --git a/doc_cn/faq/word2vec_dataprovider.py b/doc/faq/src/word2vec_dataprovider.py similarity index 100% rename from doc_cn/faq/word2vec_dataprovider.py rename to doc/faq/src/word2vec_dataprovider.py diff --git a/doc/getstarted/basic_usage/index_cn.rst b/doc/getstarted/basic_usage/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..428f58830e0b10c024f31238b7404c6df193eecd --- /dev/null +++ b/doc/getstarted/basic_usage/index_cn.rst @@ -0,0 +1,108 @@ +经典的线性回归任务 +================== + +PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍将向你展示如何利用PaddlePaddle来解决一个经典的线性回归问题。 + +任务简介 +-------- + +我们展示如何用PaddlePaddle解决 `单变量的线性回归 `_ 问题。线性回归的输入是一批点 `(x, y)` ,其中 `y = wx + b + ε`, 而 ε 是一个符合高斯分布的随机变量。线性回归的输出是从这批点估计出来的参数 `w` 和 `b` 。 + +一个例子是房产估值。我们假设房产的价格(y)是其大小(x)的一个线性函数,那么我们可以通过收集市场上房子的大小和价格,用来估计线性函数的参数w 和 b。 + +准备数据 +----------- + +假设变量 `x` 和 `y` 的真实关系为: `y = 2x + 0.3 + ε`,这里展示如何使用观测数据来拟合这一线性关系。首先,Python代码将随机产生2000个观测点,作为线性回归的输入。下面脚本符合PaddlePaddle期待的读取数据的Python程序的模式。 + +.. code-block:: python + + # dataprovider.py + from paddle.trainer.PyDataProvider2 import * + import random + + # 定义输入数据的类型: 2个浮点数 + @provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False) + def process(settings, input_file): + for i in xrange(2000): + x = random.random() + yield [x], [2*x+0.3] + +训练模型 +----------- + +为了还原 `y = 2x + 0.3`,我们先从一条随机的直线 `y' = wx + b` 开始,然后利用观测数据调整 `w` 和 `b` 使得 `y'` 和 `y` 的差距不断减小,最终趋于接近。这个过程就是模型的训练过程,而 `w` 和 `b` 就是模型的参数,即我们的训练目标。 + +在PaddlePaddle里,该模型的网络配置如下。 + +.. code-block:: python + + # trainer_config.py + from paddle.trainer_config_helpers import * + + # 1. 定义数据来源,调用上面的process函数获得观测数据 + data_file = 'empty.list' + with open(data_file, 'w') as f: f.writelines(' ') + define_py_data_sources2(train_list=data_file, test_list=None, + module='dataprovider', obj='process',args={}) + + # 2. 学习算法。控制如何改变模型参数 w 和 b + settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) + + # 3. 神经网络配置 + x = data_layer(name='x', size=1) + y = data_layer(name='y', size=1) + # 线性计算网络层: ȳ = wx + b + ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) + # 计算误差函数,即 ȳ 和真实 y 之间的距离 + cost = mse_cost(input= ȳ, label=y) + outputs(cost) + + +这段简短的配置展示了PaddlePaddle的基本用法: + +- 第一部分定义了数据输入。一般情况下,PaddlePaddle先从一个文件列表里获得数据文件地址,然后交给用户自定义的函数(例如上面的 `process`函数)进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件,所以放一个空列表(`empty.list`)即可。 + +- 第二部分主要是选择学习算法,它定义了模型参数改变的规则。PaddlePaddle提供了很多优秀的学习算法,这里使用一个基于momentum的随机梯度下降(SGD)算法,该算法每批量(batch)读取12个采样数据进行随机梯度计算来更新更新。 + +- 最后一部分是神经网络的配置。由于PaddlePaddle已经实现了丰富的网络层,所以很多时候你需要做的只是定义正确的网络层并把它们连接起来。这里使用了三种网络单元: + + - **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x` 和 `y`。 + - **全连接层**:全连接层 `fc_layer` 是基础的计算单元,这里利用它建模变量之间的线性关系。计算单元是神经网络的核心,PaddlePaddle支持大量的计算单元和任意深度的网络连接,从而可以拟合任意的函数来学习复杂的数据关系。 + - **回归误差代价层**:回归误差代价层 `mse_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。 + +定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令: + +.. code-block:: bash + + paddle train --config=trainer_config.py --save_dir=./output --num_passes=30 + +PaddlePaddle将在观测数据集上迭代训练30轮,并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到,随着轮数增加误差代价函数的输出在不断的减小,这意味着模型在训练数据上不断的改进,直到逼近真实解:` y = 2x + 0.3 ` + +模型检验 +----------- + +训练完成后,我们希望能够检验模型的好坏。一种常用的做法是用学习的模型对另外一组测试数据进行预测,评价预测的效果。在这个例子中,由于已经知道了真实答案,我们可以直接观察模型的参数是否符合预期来进行检验。 + +PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件,所以可以利用如下方法读取模型的参数。 + +.. code-block:: python + + import numpy as np + import os + + def load(file_name): + with open(file_name, 'rb') as f: + f.read(16) # skip header for float type. + return np.fromfile(f, dtype=np.float32) + + print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b')) + # w=1.999743, b=0.300137 + +.. image:: ./parameters.png + :align: center + :scale: 80 % + +从图中可以看到,虽然 `w` 和 `b` 都使用随机值初始化,但在起初的几轮训练中它们都在快速逼近真实值,并且后续仍在不断改进,使得最终得到的模型几乎与真实模型一致。 + +这样,我们用PaddlePaddle解决了单变量线性回归问题, 包括数据输入、模型训练和最后的结果验证。 diff --git a/doc/getstarted/basic_usage/index_en.rst b/doc/getstarted/basic_usage/index_en.rst index dca7a6b1f4f017b302148c611122806f112564a9..6775da20c2f51000f305b095d40abd27b8fa6c0e 100644 --- a/doc/getstarted/basic_usage/index_en.rst +++ b/doc/getstarted/basic_usage/index_en.rst @@ -1,15 +1,15 @@ -Basic Usage -============= +Simple Linear Regression +======================== PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on. -1. A Classic Problem ---------------------- +Problem Background +------------------ Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - `simple linear regression `_: you have observed a set of two-dimensional data points of ``X`` and ``Y``, where ``X`` is an explanatory variable and ``Y`` is corresponding dependent variable, and you want to recover the underlying correlation between ``X`` and ``Y``. Linear regression can be used in many practical scenarios. For example, ``X`` can be a variable about house size, and ``Y`` a variable about house price. You can build a model that captures relationship between them by observing real estate markets. -2. Prepare the Data --------------------- +Prepare the Data +----------------- Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types. @@ -26,8 +26,8 @@ Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's se x = random.random() yield [x], [2*x+0.3] -3. Train a NeuralNetwork -------------------------- +Train a NeuralNetwork +---------------------- To recover this relationship between ``X`` and ``Y``, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line ``Y' = wX + b`` , then we gradually adapt ``w`` and ``b`` to minimize the difference between ``Y'`` and ``Y``. Here is what it looks like in PaddlePaddle: @@ -49,7 +49,7 @@ To recover this relationship between ``X`` and ``Y``, we use a neural network wi x = data_layer(name='x', size=1) y = data_layer(name='y', size=1) y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) - cost = regression_cost(input=y_predict, label=y) + cost = mse_cost(input=y_predict, label=y) outputs(cost) Some of the most fundamental usages of PaddlePaddle are demonstrated: @@ -73,8 +73,8 @@ Now that everything is ready, you can train the network with a simple command li This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path ``./output``. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess. -4. Evaluate the Model ------------------------ +Evaluate the Model +------------------- Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: ``w=2, b=0.3``, thus a better option is to check out model parameters directly. @@ -99,11 +99,3 @@ In PaddlePaddle, training is just to get a collection of model parameters, which Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer. There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data. - - -5. Where to Go from Here -------------------------- - -- `Install and Build <../build_and_install/index.html>`_ -- `Tutorials <../demo/quick_start/index_en.html>`_ -- `Example and Demo <../demo/index.html>`_ diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md index 150d7fc43720314462ac5c5b72f6a93b18e6d735..c0608ede8e57b224dae4b3d510d704a8b0918b53 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ b/doc/getstarted/build_and_install/build_from_source_en.md @@ -4,6 +4,8 @@ Installing from Sources * [1. Download and Setup](#download) * [2. Requirements](#requirements) * [3. Build on Ubuntu](#ubuntu) +* [4. Build on Centos](#centos) + ## Download and Setup You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle). @@ -11,25 +13,23 @@ You can download PaddlePaddle from the [github source](https://github.com/Paddle ```bash git clone https://github.com/PaddlePaddle/Paddle paddle cd paddle -git submodule update --init --recursive ``` - ## Requirements To compile the source code, your computer must be equipped with the following dependencies. -- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1) -- **CMake**: version >= 2.8 +- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1) and gfortran compiler +- **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X) - **BLAS**: MKL, OpenBlas or ATLAS -- **Protocol Buffers**: version >= 2.4, **Note: 3.x is not supported** -- **Python**: only python 2.7 is supported currently +- **Python**: only support Python 2.7 +- **Go** **Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported! For CUDA 8.0, GCC versions later than 5.3 are not supported! ### Options -PaddlePaddle supports some build options. To enable it, first you need to install the related libraries. +PaddlePaddle supports some build options. @@ -40,14 +40,21 @@ PaddlePaddle supports some build options. To enable it, first you need to instal - - - - - - - - + + + + + + + + + + + + + + +
WITH_GPUCompile with GPU mode.
WITH_DOUBLECompile with double precision floating-point, default: single precision.
WITH_GLOGCompile with glog. If not found, default: an internal log implementation.
WITH_GFLAGSCompile with gflags. If not found, default: an internal flag implementation.
WITH_TESTINGCompile with gtest for PaddlePaddle's unit testing.
WITH_DOC Compile to generate PaddlePaddle's docs, default: disabled (OFF).
WITH_SWIG_PYCompile with python predict API, default: disabled (OFF).
WITH_STYLE_CHECKCompile with code style check, default: enabled (ON).
WITH_GPUCompile PaddlePaddle with NVIDIA GPU
WITH_AVXCompile PaddlePaddle with AVX intrinsics
WITH_DSOCompile PaddlePaddle with dynamic linked CUDA
WITH_TESTINGCompile PaddlePaddle with unit testing
WITH_SWIG_PYCompile PaddlePaddle with inference api
WITH_STYLE_CHECKCompile PaddlePaddle with style check
WITH_PYTHONCompile PaddlePaddle with python interpreter
WITH_DOUBLECompile PaddlePaddle with double precision
WITH_RDMACompile PaddlePaddle with RDMA support
WITH_TIMERCompile PaddlePaddle with stats timer
WITH_PROFILERCompile PaddlePaddle with GPU profiler
WITH_DOCCompile PaddlePaddle with documentation
WITH_COVERAGECompile PaddlePaddle with code coverage
COVERALLS_UPLOADPackage code coverage data to coveralls
ON_TRAVISExclude special unit test on Travis CI
@@ -59,18 +66,16 @@ PaddlePaddle supports some build options. To enable it, first you need to instal As a simple example, consider the following: -1. **Python Dependencies(optional)** +1. **BLAS Dependencies(optional)** - To compile PaddlePaddle with python predict API, make sure swig installed and set `-DWITH_SWIG_PY=ON` as follows: + CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically. + To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. ```bash - # install swig on ubuntu - sudo apt-get install swig - # install swig on Mac OS X - brew install swig - - # active swig in cmake - cmake .. -DWITH_SWIG_PY=ON + # specify MKL + cmake .. -DMKL_ROOT= + # or specify OpenBLAS + cmake .. -DOPENBLAS_ROOT= ``` 2. **Doc Dependencies(optional)** @@ -79,7 +84,7 @@ As a simple example, consider the following: ```bash pip install 'sphinx>=1.4.0' - pip install sphinx_rtd_theme breathe recommonmark + pip install sphinx_rtd_theme recommonmark # install doxygen on Ubuntu sudo apt-get install doxygen @@ -94,24 +99,33 @@ As a simple example, consider the following: ### Install Dependencies -- **CPU Dependencies** +- **Paddle Dependencies** ```bash # necessary sudo apt-get update - sudo apt-get install -y g++ make cmake swig build-essential libatlas-base-dev python python-pip libpython-dev m4 libprotobuf-dev protobuf-compiler python-protobuf python-numpy git - # optional - sudo apt-get install libgoogle-glog-dev - sudo apt-get install libgflags-dev - sudo apt-get install libgtest-dev - sudo pip install wheel - pushd /usr/src/gtest - cmake . - make - sudo cp *.a /usr/lib - popd + sudo apt-get install -y git curl gcc g++ gfortran make build-essential automake + sudo apt-get install -y python python-pip python-numpy libpython-dev bison + sudo pip install 'protobuf==3.1.0.post1' + + # Install Go + # You can follow https://golang.org/doc/install for a detailed explanation. + wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ + tar -C $HOME -xzf go.tgz && \ + mkdir $HOME/gopath && \ + rm go.tgz + + # Setup environment variables + export GOROOT=$HOME/go + export GOPATH=$HOME/gopath + export PATH=$PATH:$GOROOT/bin + + # install cmake 3.4 + curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ + cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \ + cd .. && rm -rf cmake-3.4.1 ``` - + - **GPU Dependencies (optional)** To build GPU version, you will need the following installed: @@ -144,51 +158,78 @@ As usual, the best option is to create build folder under paddle project directo ```bash mkdir build && cd build -cmake .. +``` + +Finally, you can build and install PaddlePaddle: + +```bash +# you can add build option here, such as: +cmake .. -DCMAKE_INSTALL_PREFIX= +# please use sudo make install, if you want to install PaddlePaddle into the system +make -j `nproc` && make install +# set PaddlePaddle installation path in ~/.bashrc +export PATH=/bin:$PATH +# install PaddlePaddle Python modules. +sudo pip install /opt/paddle/share/wheels/*.whl ``` +## Build on Centos 7 -CMake first check PaddlePaddle's dependencies in system default path. After installing some optional -libraries, corresponding build option will be set automatically (for instance, glog, gtest and gflags). -If still not found, you can manually set it based on CMake error information from your screen. +### Install Dependencies -As a simple example, consider the following: +- **CPU Dependencies** -- **Only CPU with swig** + ```bash + # necessary + sudo yum update + sudo yum install -y epel-release + sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git + sudo pip install wheel numpy + sudo pip install 'protobuf>=3.0.0' + ``` + +- **GPU Dependencies (optional)** - ```bash - cmake .. -DWITH_GPU=OFF -DWITH_SWIG_PY=ON - ``` -- **GPU with swig** + To build GPU version, you will need the following installed: - ```bash - cmake .. -DWITH_GPU=ON -DWITH_SWIG_PY=ON - ``` + 1. a CUDA-capable GPU + 2. A supported version of Linux with a gcc compiler and toolchain + 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) + 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) -- **GPU with doc and swig** + The CUDA development environment relies on tight integration with the host development environment, + including the host compiler and C runtime libraries, and is therefore only supported on + distribution versions that have been qualified for this CUDA Toolkit release. + + After downloading cuDNN library, issue the following commands: + + ```bash + sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local + sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* + ``` + Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. + + ```bash + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + export PATH=/usr/local/cuda/bin:$PATH + ``` - ```bash - cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON - ``` +### Build and Install -Finally, you can build PaddlePaddle: +As usual, the best option is to create build folder under paddle project directory. + +```bash +mkdir build && cd build +``` + +Finally, you can build and install PaddlePaddle: ```bash # you can add build option here, such as: -cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX= -DWITH_SWIG_PY=ON +cmake3 .. -DCMAKE_INSTALL_PREFIX= # please use sudo make install, if you want to install PaddlePaddle into the system make -j `nproc` && make install # set PaddlePaddle installation path in ~/.bashrc export PATH=/bin:$PATH -``` - -If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed. -Otherwise, PaddlePaddle will automatically install python dependencies -at first time when user run paddle commands, such as `paddle version`, `paddle train`. -It may require sudo privileges: - -```bash -# you can run +# install PaddlePaddle Python modules. sudo pip install /opt/paddle/share/wheels/*.whl -# or just run -sudo paddle version ``` diff --git a/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst b/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..be0c1ffa451b2901ec06621dd4d886f800b4562e --- /dev/null +++ b/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst @@ -0,0 +1,43 @@ +PaddlePaddle的编译选项 +====================== + +PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。用户可在调用cmake的时候设置它们,详细的cmake使用方法可以参考 `官方文档 `_ 。 + +Bool型的编译选项 +---------------- +用户可在cmake的命令行中,通过使用 ``-D`` 命令设置该类编译选项,例如 + +.. code-block:: bash + + cmake .. -DWITH_GPU=OFF + +.. csv-table:: Bool型的编译选项 + :widths: 1, 7, 2 + :file: compile_options.csv + +BLAS/CUDA/Cudnn的编译选项 +-------------------------- +BLAS ++++++ + +PaddlePaddle支持以下任意一种BLAS库:`MKL `_ ,`ATLAS `_ ,`OpenBlAS `_ 和 `REFERENCE BLAS `_ 。 + +.. csv-table:: BLAS路径相关的编译选项 + :widths: 1, 2, 7 + :file: cblas_settings.csv + +CUDA/Cudnn ++++++++++++ + +PaddlePaddle可以使用cudnn v2之后的任何一个版本来编译运行,但尽量请保持编译和运行使用的cudnn是同一个版本。 我们推荐使用最新版本的cudnn v5.1。 + +编译选项的设置 +++++++++++++++ + +PaddePaddle通过编译时指定路径来实现引用各种BLAS/CUDA/Cudnn库。cmake编译时,首先在系统路径(/usr/lib\:/usr/local/lib)中搜索这几个库,同时也会读取相关路径变量来进行搜索。 通过使用 ``-D`` 命令可以设置,例如 + +.. code-block:: bash + + cmake .. -DMKL_ROOT=/opt/mkl/ -DCUDNN_ROOT=/opt/cudnnv5 + +注意:这几个编译选项的设置,只在第一次cmake的时候有效。如果之后想要重新设置,推荐清理整个编译目录(``rm -rf``)后,再指定。 diff --git a/doc_cn/build_and_install/cmake/cblas_settings.csv b/doc/getstarted/build_and_install/cmake/cblas_settings.csv similarity index 100% rename from doc_cn/build_and_install/cmake/cblas_settings.csv rename to doc/getstarted/build_and_install/cmake/cblas_settings.csv diff --git a/doc/getstarted/build_and_install/cmake/compile_options.csv b/doc/getstarted/build_and_install/cmake/compile_options.csv new file mode 100644 index 0000000000000000000000000000000000000000..463b825470579d0c3736a408b1e82dd33e6f8d42 --- /dev/null +++ b/doc/getstarted/build_and_install/cmake/compile_options.csv @@ -0,0 +1,12 @@ +选项,说明,默认值 +WITH_GPU,是否支持GPU。,取决于是否寻找到CUDA工具链 +WITH_DOUBLE,是否使用双精度浮点数。,否 +WITH_DSO,是否运行时动态加载CUDA动态库,而非静态加载CUDA动态库。,是 +WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制文件,是 +WITH_PYTHON,是否内嵌PYTHON解释器。方便今后的嵌入式移植工作。,是 +WITH_STYLE_CHECK,是否编译时进行代码风格检查,是 +WITH_RDMA,是否开启RDMA,否 +WITH_TIMER,是否开启计时功能。如果开启会导致运行略慢,打印的日志变多,但是方便调试和测Benchmark,否 +WITH_TESTING,是否开启单元测试,取决于是否寻找到GTEST +WITH_DOC,是否编译中英文文档,否 +WITH_SWIG_PY,是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练,取决于是否寻找到SWIG \ No newline at end of file diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..02b96bb413156786db6dc77696c5640b97c10aa4 --- /dev/null +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -0,0 +1,222 @@ +PaddlePaddle的Docker容器使用方式 +================================ + +PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 + +Docker使用入门 +------------------------------ + +几个基础的概念帮助理解和使用Docker: + +- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行: + + .. code-block:: bash + + docker images + + 来列出当前系统中的所有镜像,同样可以执行: + + .. code-block:: bash + + docker pull paddlepaddle/paddle:0.10.0 + + 来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用ocker.paddlepaddle.org/paddle下载。 + +- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。 + 实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。 + 可以执行: + + .. code-block:: bash + + docker run paddlepaddle/paddle:0.10.0 + + 来使用一个镜像启动一个容器。 + +- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中 + 访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到 + Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用 + debian镜像,并且启动后执行 :code:`ls /data`。 + + .. code-block:: bash + + docker run --rm -v $(pwd):/data debian ls /data + +PaddlePaddle发布的Docker镜像使用说明 +------------------------------ + +我们把PaddlePaddle的编译环境打包成一个镜像,称为开发镜像,里面涵盖了 +PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打包成一个镜 +像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 +PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 +行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 +`dockerhub.com `_ +和国内镜像`docker.paddlepaddle.org` 提供最新 +的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。 + +**注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。** + +1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` + + 这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布, + 文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。 + 开发镜像包含了以下工具: + + - gcc/clang + - nvcc + - Python + - sphinx + - woboq + - sshd + 很多开发者会使用远程的安装有GPU的服务器工作,用户可以使用ssh登录到这台服务器上并执行 :code:`docker exec`进入开发镜像并开始工作, + 也可以在开发镜像中启动一个SSHD服务,方便开发者直接登录到镜像中进行开发: + + 以交互容器方式运行开发镜像: + + .. code-block:: bash + + docker run -it --rm paddlepaddle/paddle:0.10.0-dev /bin/bash + + 或者,可以以后台进程方式运行容器: + + .. code-block:: bash + + docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0-dev + + 然后用密码 :code:`root` SSH进入容器: + + .. code-block:: bash + + ssh -p 2202 root@localhost + + SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。 + +2. 生产镜像:根据CPU、GPU和非AVX区分了如下4个镜像: + + - GPU/AVX::code:`paddlepaddle/paddle:-gpu` + - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` + - CPU/AVX::code:`paddlepaddle/paddle:` + - CPU/no-AVX::code:`paddlepaddle/paddle:-noavx` + + 纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: + + .. code-block:: bash + + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi + + 如果输出是No,就需要选择使用no-AVX的镜像 + + **注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用** + + 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 + 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 + + .. code-block:: bash + + nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash + + 注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。: + + .. code-block:: bash + + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu + +3. 运行以及发布您的AI程序 + + 假设您已经完成了一个AI训练的python程序 :code:`a.py`,这个程序是您在开发机上使用开发镜像完成开发。此时您可以运行这个命令在开发机上进行测试运行: + + .. code-block:: bash + + docker run -it -v $PWD:/work paddle /work/a.py + + 如果要使用GPU,请运行: + + .. code-block:: bash + + nvidia-docker run -it -v $PWD:/work paddle /work/a.py + + + 这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0` + 创建和发布自己的AI程序镜像。 + +运行PaddlePaddle Book +--------------------- + +Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 + +PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Nodebook。 +如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。 + +我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行: + +.. code-block:: bash + + docker run -p 8888:8888 paddlepaddle/book + +然后在浏览器中输入以下网址: + +.. code-block:: text + + http://localhost:8888/ + +就这么简单,享受您的旅程! + +通过Docker容器开发PaddlePaddle +------------------------------ + +开发人员可以在Docker开发镜像中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 + +1. 制作PaddlePaddle开发镜像 + + PaddlePaddle每次发布新版本都会发布对应的开发镜像供开发者直接使用。这里介绍如生成造这个开发镜像。 + 生成Docker镜像的方式有两个,一个是直接把一个容器转换成镜像,另一个是创建Dockerfile并运行docker build指令按照Dockerfile生成镜像。第一个方法的好处是简单快捷,适合自己实验,可以快速迭代。第二个方法的好处是Dockerfile可以把整个生成流程描述很清楚,其他人很容易看懂镜像生成过程,持续集成系统也可以简单地复现这个过程。我们采用第二个方法。Dockerfile位于PaddlePaddle repo的根目录。生成生产镜像只需要运行: + + .. code-block:: bash + + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + docker build -t paddle:dev . + + docker build这个命令的-t指定了生成的镜像的名字,这里我们用paddle:dev。到此,PaddlePaddle开发镜像就被构建完毕了。 + +2. 制作PaddlePaddle生产镜像 + + 生产镜像的生成分为两步,第一步是运行: + + .. code-block:: bash + + docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=OFF" -e "WITH_TEST=ON" paddle:dev + + 以上命令会编译PaddlePaddle,生成运行程序,以及生成创建生产镜像的Dockerfile。所有生成的的文件都在build目录下。“WITH_GPU”控制生成的生产镜像是否支持GPU,“WITH_AVX”控制生成的生产镜像是否支持AVX,”WITH_TEST“控制是否生成单元测试。 + + 第二步是运行: + + .. code-block:: bash + + docker build -t paddle:prod -f build/Dockerfile ./build + + 以上命令会按照生成的Dockerfile把生成的程序拷贝到生产镜像中并做相应的配置,最终生成名为paddle:prod的生产镜像。 + +3. 运行单元测试 + + 运行以下指令: + + .. code-block:: bash + + docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" + +文档 +---- + +Paddle的Docker开发镜像带有一个通过 `woboq code browser +`_ 生成的HTML版本的C++源代码,便于用户浏览C++源码。 + +只要在Docker里启动PaddlePaddle的时候给它一个名字,就可以再运行另一个Nginx Docker镜像来服务HTML代码: + +.. code-block:: bash + + docker run -d --name paddle-cpu-doc paddle:0.10.0-dev + docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx + +接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 1ab6fc6a728f68b16d798a577da2896481eb17d1..94860240f6a4a9bed8a865684a8a79960489280e 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -8,80 +8,244 @@ Please be aware that you will need to change `Dockers settings `_ to make full use of your hardware resource on Mac OS X and Windows. +Working With Docker +------------------- -CPU-only and GPU Images ------------------------ +Docker is simple as long as we understand a few basic concepts: -For each version of PaddlePaddle, we release 2 Docker images, a -CPU-only one and a CUDA GPU one. We do so by configuring -`dockerhub.com `_ -automatically runs the following commands: +- *image*: A Docker image is a pack of software. It could contain one or more programs and all their dependencies. For example, the PaddlePaddle's Docker image includes pre-built PaddlePaddle and Python and many Python packages. We can run a Docker image directly, other than installing all these software. We can type -.. code-block:: base + .. code-block:: bash - docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile . - docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu . + docker images + to list all images in the system. We can also run -To run the CPU-only image as an interactive container: + .. code-block:: bash + + docker pull paddlepaddle/paddle:0.10.0 -.. code-block:: bash + to download a Docker image, paddlepaddle/paddle in this example, + from Dockerhub.com. + +- *container*: considering a Docker image a program, a container is a + "process" that runs the image. Indeed, a container is exactly an + operating system process, but with a virtualized filesystem, network + port space, and other virtualized environment. We can type + + .. code-block:: bash + + docker run paddlepaddle/paddle:0.10.0 + + to start a container to run a Docker image, paddlepaddle/paddle in this example. + +- By default docker container have an isolated file system namespace, + we can not see the files in the host file system. By using *volume*, + mounted files in host will be visible inside docker container. + Following command will mount current dirctory into /data inside + docker container, run docker container from debian image with + command :code:`ls /data`. + + .. code-block:: bash + + docker run --rm -v $(pwd):/data debian ls /data + +Usage of CPU-only and GPU Images +---------------------------------- + +We package PaddlePaddle's compile environment into a Docker image, +called the develop image, it contains all compiling tools that +PaddlePaddle needs. We package compiled PaddlePaddle program into a +Docker image as well, called the production image, it contains all +runtime environment that running PaddlePaddle needs. For each version +of PaddlePaddle, we release both of them. Production image includes +CPU-only version and a CUDA GPU version and their no-AVX versions. + +We put the docker images on `dockerhub.com +`_. You can find the +latest versions under "tags" tab at dockerhub.com. + +** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.** + + +1. development image :code:`paddlepaddle/paddle:-dev` + + This image has packed related develop tools and runtime + environment. Users and developers can use this image instead of + their own local computer to accomplish development, build, + releasing, document writing etc. While different version of paddle + may depends on different version of libraries and tools, if you + want to setup a local environment, you must pay attention to the + versions. The development image contains: + + - gcc/clang + - nvcc + - Python + - sphinx + - woboq + - sshd + + Many developers use servers with GPUs, they can use ssh to login to + the server and run :code:`docker exec` to enter the docker + container and start their work. Also they can start a development + docker image with SSHD service, so they can login to the container + and start work. + +2. Production images, this image might have multiple variants: + + - GPU/AVX::code:`paddlepaddle/paddle:-gpu` + - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` + - CPU/AVX::code:`paddlepaddle/paddle:` + - CPU/no-AVX::code:`paddlepaddle/paddle:-noavx` + + Please be aware that the CPU-only and the GPU images both use the + AVX instruction set, but old computers produced before 2008 do not + support AVX. The following command checks if your Linux computer + supports AVX: + + .. code-block:: bash + + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi + + **NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.** + To run the CPU-only image as an interactive container: + + .. code-block:: bash + + docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash + + Above method work with the GPU image too -- the recommended way is + using `nvidia-docker `_. + + Please install nvidia-docker first following this `tutorial + `_. + + Now you can run a GPU image: + + .. code-block:: bash + + nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash + + +Train Model Using Python API +---------------------------- - docker run -it --rm paddledev/paddle:cpu-latest /bin/bash +Our official docker image provides a runtime for PaddlePaddle +programs. The typical workflow will be as follows: -or, we can run it as a daemon container +Create a directory as workspace: .. code-block:: bash - docker run -d -p 2202:22 paddledev/paddle:cpu-latest + mkdir ~/workspace -and SSH to this container using password :code:`root`: +Edit a PaddlePaddle python program using your favourite editor .. code-block:: bash - ssh -p 2202 root@localhost + emacs ~/workspace/example.py -An advantage of using SSH is that we can connect to PaddlePaddle from -more than one terminals. For example, one terminal running vi and -another one running Python interpreter. Another advantage is that we -can run the PaddlePaddle container on a remote server and SSH to it -from a laptop. +Run the program using docker: +.. code-block:: bash + + docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py -Above methods work with the GPU image too -- just please don't forget -to install CUDA driver and let Docker knows about it: +Or if you are using GPU for training: .. code-block:: bash - export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest + nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py + +Above commands will start a docker container by running :code:`python +/workspace/example.py`. It will stop once :code:`python +/workspace/example.py` finishes. + +Another way is to tell docker to start a :code:`/bin/bash` session and +run PaddlePaddle program interactively: +.. code-block:: bash -Non-AVX Images --------------- + docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash + # now we are inside docker container + cd /workspace + python example.py -Please be aware that the CPU-only and the GPU images both use the AVX -instruction set, but old computers produced before 2008 do not support -AVX. The following command checks if your Linux computer supports -AVX: +Running with GPU is identical: .. code-block:: bash - if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi + nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash + # now we are inside docker container + cd /workspace + python example.py + + +Develop PaddlePaddle or Train Model Using C++ API +--------------------------------------------------- + +We will be using PaddlePaddle development image since it contains all +compiling tools and dependencies. + +1. Build PaddlePaddle develop image + + Use following command to build PaddlePaddle develop image: + + .. code-block:: bash + + git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle + docker build -t paddle:dev . + +2. Build PaddlePaddle production image + + There are two steps for building production image, the first step is to run: + + .. code-block:: bash + docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=OFF" -e "WITH_TEST=ON" paddle:dev -If it doesn't, we will need to build non-AVX images manually from -source code: + The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated. + + The second step is to run: + + .. code-block:: bash + + docker build -t paddle:prod -f build/Dockerfile ./build + + The above command will generate the production image by copying the compiled PaddlePaddle program into the image. + +3. Run unit test + + Following command will run unit test: + + .. code-block:: bash + + docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" + +PaddlePaddle Book +------------------ + +The Jupyter Notebook is an open-source web application that allows +you to create and share documents that contain live code, equations, +visualizations and explanatory text in a single browser. + +PaddlePaddle Book is an interactive Jupyter Notebook for users and developers. +We already exposed port 8888 for this book. If you want to +dig deeper into deep learning, PaddlePaddle Book definitely is your best choice. + +We provide a packaged book image, simply issue the command: .. code-block:: bash - cd ~ - git clone github.com/PaddlePaddle/Paddle - cd Paddle - git submodule update --init --recursive - docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . - docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . + docker run -p 8888:8888 paddlepaddle/book + +Then, you would back and paste the address into the local browser: + +.. code-block:: text + + http://localhost:8888/ + +That's all. Enjoy your journey! Documentation @@ -93,12 +257,12 @@ generated using `woboq code browser for users to browse and understand the C++ source code. As long as we give the Paddle Docker container a name, we can run an -additional nginx Docker container to serve the volume from the Paddle +additional Nginx Docker container to serve the volume from the Paddle container: .. code-block:: bash - docker run -d --name paddle-cpu-doc paddle:cpu + docker run -d --name paddle-cpu-doc paddle: docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx diff --git a/doc/getstarted/build_and_install/index_cn.rst b/doc/getstarted/build_and_install/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..a24df6c518fad84a48061ecb34ee46cb312a4995 --- /dev/null +++ b/doc/getstarted/build_and_install/index_cn.rst @@ -0,0 +1,29 @@ +安装与编译 +========== + +.. _install_steps: + +安装流程 +++++++++ + +PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜像,ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境,同时欢迎贡献更多的安装包。 + +.. toctree:: + :maxdepth: 1 + + docker_install_cn.rst + ubuntu_install_cn.rst + + + +编译流程 +++++++++ + +.. warning:: + + 编译流程主要推荐高级用户查看,普通用户请走安装流程。 + +.. toctree:: + :maxdepth: 1 + + cmake/build_from_source_cn.rst diff --git a/doc/getstarted/build_and_install/ubuntu_install_cn.rst b/doc/getstarted/build_and_install/ubuntu_install_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..9e39ccb00f5d5655c30148900a3d76a22aacfc01 --- /dev/null +++ b/doc/getstarted/build_and_install/ubuntu_install_cn.rst @@ -0,0 +1,71 @@ +Ubuntu部署PaddlePaddle +=================================== + +PaddlePaddle提供了ubuntu 14.04 deb安装包。 + +安装 +------ + +安装包的下载地址是\: https://github.com/PaddlePaddle/Paddle/releases + +它包含四个版本\: + +* cpu版本: 支持主流x86处理器平台, 使用了avx指令集。 + +* cpu-noavx版本:支持主流x86处理器平台,没有使用avx指令集。 + +* gpu版本:支持主流x86处理器平台,支持nvidia cuda平台,使用了avx指令集。 + +* gpu-noavx版本:支持主流x86处理器平台,支持nvidia cuda平台,没有使用avx指令集。 + +下载完相关安装包后,执行: + +.. code-block:: shell + + sudo apt-get install gdebi + gdebi paddle-*-cpu.deb + +或者: + +.. code-block:: shell + + dpkg -i paddle-*-cpu.deb + apt-get install -f + + +在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的, +在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。 + +安装完成后,可以使用命令 :code:`paddle version` 查看安装后的paddle 版本: + +.. code-block:: shell + + PaddlePaddle 0.8.0b1, compiled with + with_avx: ON + with_gpu: OFF + with_double: OFF + with_python: ON + with_rdma: OFF + with_timer: OFF + with_predict_sdk: + + +可能遇到的问题 +-------------- + +libcudart.so/libcudnn.so找不到 +++++++++++++++++++++++++++++++ + +安装完成后,运行 :code:`paddle train` 报错\: + +.. code-block:: shell + + 0831 12:36:04.151525 1085 hl_dso_loader.cc:70] Check failed: nullptr != *dso_handle For Gpu version of PaddlePaddle, it couldn't find CUDA library: libcudart.so Please make sure you already specify its path.Note: for training data on Cpu using Gpu version of PaddlePaddle,you must specify libcudart.so via LD_LIBRARY_PATH. + +原因是未设置cuda运行时环境变量。 如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,并设置: + +.. code-block:: shell + + export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH + export PATH=/usr/local/cuda/bin:$PATH + diff --git a/doc/getstarted/concepts/src/train.py b/doc/getstarted/concepts/src/train.py new file mode 100644 index 0000000000000000000000000000000000000000..7e604f23de38543a00f305d508af0791193f78ba --- /dev/null +++ b/doc/getstarted/concepts/src/train.py @@ -0,0 +1,52 @@ +import paddle.v2 as paddle +import numpy as np + +# init paddle +paddle.init(use_gpu=False) + +# network config +x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2)) +y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) +y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) +cost = paddle.layer.mse_cost(input=y_predict, label=y) + +# create parameters +parameters = paddle.parameters.create(cost) +# create optimizer +optimizer = paddle.optimizer.Momentum(momentum=0) +# create trainer +trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) + + +# event_handler to print training info +def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1 == 0: + print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id, + event.cost) + + +# define training dataset reader +def train_reader(): + train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) + train_y = np.array([[-2], [-3], [-7], [-7]]) + + def reader(): + for i in xrange(train_y.shape[0]): + yield train_x[i], train_y[i] + + return reader + + +# define feeding map +feeding = {'x': 0, 'y': 1} + +# training +trainer.train( + reader=paddle.batch( + train_reader(), batch_size=1), + feeding=feeding, + event_handler=event_handler, + num_passes=100) diff --git a/doc/getstarted/concepts/use_concepts_cn.rst b/doc/getstarted/concepts/use_concepts_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..f15b11bd780402a3ec1755900e8c648f5d2a7bc5 --- /dev/null +++ b/doc/getstarted/concepts/use_concepts_cn.rst @@ -0,0 +1,150 @@ +############ +基本使用概念 +############ + +PaddlePaddle是源于百度的一个深度学习平台。PaddlePaddle为深度学习研究人员提供了丰富的API,可以轻松地完成神经网络配置,模型训练等任务。 +这里将介绍PaddlePaddle的基本使用概念,并且展示了如何利用PaddlePaddle来解决一个经典的线性回归问题。 +在使用该文档之前,请参考 `安装文档 <../build_and_install/index_cn.html>`_ 完成PaddlePaddle的安装。 + + +配置网络 +============ + +加载PaddlePaddle +---------------------- + +在进行网络配置之前,首先需要加载相应的Python库,并进行初始化操作。 + +.. code-block:: bash + + import paddle.v2 as paddle + import numpy as np + paddle.init(use_gpu=False) + + +搭建神经网络 +----------------------- + +搭建神经网络就像使用积木搭建宝塔一样。在PaddlePaddle中,layer是我们的积木,而神经网络是我们要搭建的宝塔。我们使用不同的layer进行组合,来搭建神经网络。 +宝塔的底端需要坚实的基座来支撑,同样,神经网络也需要一些特定的layer作为输入接口,来完成网络的训练。 + +例如,我们可以定义如下layer来描述神经网络的输入: + +.. code-block:: bash + + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2)) + y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) + +其中x表示输入数据是一个维度为2的稠密向量,y表示输入数据是一个维度为1的稠密向量。 + +PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和三种序列模式。 + +四种数据类型: + +* dense_vector:稠密的浮点数向量。 +* sparse_binary_vector:稀疏的01向量,即大部分值为0,但有值的地方必须为1。 +* sparse_float_vector:稀疏的向量,即大部分值为0,但有值的部分可以是任何浮点数。 +* integer:整数标签。 + +三种序列模式: + +* SequenceType.NO_SEQUENCE:不是一条序列 +* SequenceType.SEQUENCE:是一条时间序列 +* SequenceType.SUB_SEQUENCE: 是一条时间序列,且序列的每一个元素还是一个时间序列。 + +不同的数据类型和序列模式返回的格式不同,列表如下: + ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| | NO_SEQUENCE | SEQUENCE | SUB_SEQUENCE | ++======================+=====================+===================================+================================================+ +| dense_vector | [f, f, ...] | [[f, ...], [f, ...], ...] | [[[f, ...], ...], [[f, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_binary_vector | [i, i, ...] | [[i, ...], [i, ...], ...] | [[[i, ...], ...], [[i, ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| sparse_float_vector | [(i,f), (i,f), ...] | [[(i,f), ...], [(i,f), ...], ...] | [[[(i,f), ...], ...], [[(i,f), ...], ...],...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ +| integer_value | i | [i, i, ...] | [[i, ...], [i, ...], ...] | ++----------------------+---------------------+-----------------------------------+------------------------------------------------+ + +其中,f代表一个浮点数,i代表一个整数。 + +注意:对sparse_binary_vector和sparse_float_vector,PaddlePaddle存的是有值位置的索引。例如, + +- 对一个5维非序列的稀疏01向量 ``[0, 1, 1, 0, 0]`` ,类型是sparse_binary_vector,返回的是 ``[1, 2]`` 。 +- 对一个5维非序列的稀疏浮点向量 ``[0, 0.5, 0.7, 0, 0]`` ,类型是sparse_float_vector,返回的是 ``[(1, 0.5), (2, 0.7)]`` 。 + + +在定义输入layer之后,我们可以使用其他layer进行组合。在组合时,需要指定layer的输入来源。 + +例如,我们可以定义如下的layer组合: + +.. code-block:: bash + + y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) + cost = paddle.layer.mse_cost(input=y_predict, label=y) + +其中,x与y为之前描述的输入层;而y_predict是接收x作为输入,接上一个全连接层;cost接收y_predict与y作为输入,接上均方误差层。 + +最后一层cost中记录了神经网络的所有拓扑结构,通过组合不同的layer,我们即可完成神经网络的搭建。 + + +训练模型 +============ + +在完成神经网络的搭建之后,我们首先需要根据神经网络结构来创建所需要优化的parameters,并创建optimizer。 +之后,我们可以创建trainer来对网络进行训练。 + +.. code-block:: bash + + parameters = paddle.parameters.create(cost) + optimizer = paddle.optimizer.Momentum(momentum=0) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) + +其中,trainer接收三个参数,包括神经网络拓扑结构、神经网络参数以及迭代方程。 + +在搭建神经网络的过程中,我们仅仅对神经网络的输入进行了描述。而trainer需要读取训练数据进行训练,PaddlePaddle中通过reader来加载数据。 + +.. code-block:: bash + + # define training dataset reader + def train_reader(): + train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) + train_y = np.array([[-2], [-3], [-7], [-7]]) + def reader(): + for i in xrange(train_y.shape[0]): + yield train_x[i], train_y[i] + return reader + +最终我们可以调用trainer的train方法启动训练: + +.. code-block:: bash + + # define feeding map + feeding = {'x': 0, 'y': 1} + + # event_handler to print training info + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1 == 0: + print "Pass %d, Batch %d, Cost %f" % ( + event.pass_id, event.batch_id, event.cost) + # training + trainer.train( + reader=paddle.batch(train_reader(), batch_size=1), + feeding=feeding, + event_handler=event_handler, + num_passes=100) + +关于PaddlePaddle的更多使用方法请参考 `进阶指南 <../../howto/index_cn.html>`_。 + +线性回归完整示例 +============== + +下面给出在三维空间中使用线性回归拟合一条直线的例子: + +.. literalinclude:: src/train.py + :linenos: + +有关线性回归的实际应用,可以参考PaddlePaddle book的 `第一章节 `_。 \ No newline at end of file diff --git a/doc/getstarted/index_cn.rst b/doc/getstarted/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..aa418c657a4ba16cce61c030066f4d3e14e891cc --- /dev/null +++ b/doc/getstarted/index_cn.rst @@ -0,0 +1,10 @@ +新手入门 +============ + +.. toctree:: + :maxdepth: 1 + + build_and_install/index_cn.rst + concepts/use_concepts_cn.rst + +- `深度学习入门课程 `_ diff --git a/doc/getstarted/index_en.rst b/doc/getstarted/index_en.rst index 55d95d8015e56ddae3363d19315db0fad841caad..be3253e3d41b99a2b696e2c5ef6463ed49680d69 100644 --- a/doc/getstarted/index_en.rst +++ b/doc/getstarted/index_en.rst @@ -2,7 +2,8 @@ GET STARTED ============ .. toctree:: - :maxdepth: 2 + :maxdepth: 1 build_and_install/index_en.rst - basic_usage/index_en.rst + +- `Deep Learning 101 `_ diff --git a/doc/howto/cluster/cluster_train_en.md b/doc/howto/cluster/cluster_train_en.md deleted file mode 100644 index 1de34a6a99440bf45af8b1fec2c7a2361865fed3..0000000000000000000000000000000000000000 --- a/doc/howto/cluster/cluster_train_en.md +++ /dev/null @@ -1,156 +0,0 @@ -# How to Run Distributed Training - -In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation). - -[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and Kubernetes. - -## Prerequisite - -1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric: - - ```bash - pip install fabric - ``` - -1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime. - -1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes. For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`. In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password. - -## Prepare Job Workspace - -We refer to the directory where we put dependent libraries, config files, etc., as *workspace*. - -These ```train/test``` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as ```train.list/test.list``` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition. - -Generally, you can use same model file from local training for cluster training. What you should have in mind that, the ```batch_size``` set in ```setting``` function in model file means batch size in ```each``` node of cluster job instead of total batch size if synchronization SGD was used. - -Following steps are based on demo/recommendation demo in demo directory. - -You just go through demo/recommendation tutorial doc until ```Train``` section, and at last you will get train/test data and model configuration file. Finaly, just use demo/recommendation as workspace for cluster training. - -At last your workspace should look like as follow: -``` -. -|-- common_utils.py -|-- data -| |-- config.json -| |-- config_generator.py -| |-- meta.bin -| |-- meta_config.json -| |-- meta_generator.py -| |-- ml-1m -| |-- ml_data.sh -| |-- ratings.dat.test -| |-- ratings.dat.train -| |-- split.py -| |-- test.list -| `-- train.list -|-- dataprovider.py -|-- evaluate.sh -|-- prediction.py -|-- preprocess.sh -|-- requirements.txt -|-- run.sh -`-- trainer_config.py -``` -Not all of these files are needed for cluster training, but it's not necessary to remove useless files. - -```trainer_config.py``` -Indicates the model config file. - -```train.list``` and ```test.list``` -File index. It stores all relative or absolute file paths of all train/test data at current node. - -```dataprovider.py``` -used to read train/test samples. It's same as local training. - -```data``` -all files in data directory are refered by train.list/test.list which are refered by data provider. - - -## Prepare Cluster Job Configuration - -The options below must be carefully set in cluster_train/conf.py - -```HOSTS``` all nodes hostname or ip that will run cluster job. You can also append user and ssh port with hostname, such as root@192.168.100.17:9090. - -```ROOT_DIR``` workspace ROOT directory for placing JOB workspace directory - -```PADDLE_NIC``` the NIC(Network Interface Card) interface name for cluster communication channel, such as eth0 for ethternet, ib0 for infiniband. - -```PADDLE_PORT``` port number for cluster commnunication channel - -```PADDLE_PORTS_NUM``` the number of port used for cluster communication channle. if the number of cluster nodes is small(less than 5~6nodes), recommend you set it to larger, such as 2 ~ 8, for better network performance. - -```PADDLE_PORTS_NUM_FOR_SPARSE``` the number of port used for sparse updater cluster commnunication channel. if sparse remote update is used, set it like ```PADDLE_PORTS_NUM``` - -```LD_LIBRARY_PATH``` set addtional LD_LIBRARY_PATH for cluster job. You can use it to set CUDA libraries path. - -Default Configuration as follow: - -```python -HOSTS = [ - "root@192.168.100.17", - "root@192.168.100.18", - ] - -''' -workspace configuration -''' - -#root dir for workspace -ROOT_DIR = "/home/paddle" - -''' -network configuration -''' -#pserver nics -PADDLE_NIC = "eth0" -#pserver port -PADDLE_PORT = 7164 -#pserver ports num -PADDLE_PORTS_NUM = 2 -#pserver sparse ports num -PADDLE_PORTS_NUM_FOR_SPARSE = 2 - -#environments setting for all processes in cluster job -LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/lib64" -``` - -### Launching Cluster Job -```paddle.py``` provides automatical scripts to start all PaddlePaddle cluster processes in different nodes. By default, all command line options can set as ```paddle.py``` command options and ```paddle.py``` will transparently and automatically set these options to PaddlePaddle lower level processes. - -```paddle.py```provides two distinguished command option for easy job launching. - -```job_dispatch_package``` set it with local ```workspace```directory, it will be dispatched to all nodes set in conf.py. It could be helpful for frequent hacking workspace files, otherwise frequent mulit-nodes workspace deployment could make your crazy. -```job_workspace``` set it with already deployed workspace directory, ```paddle.py``` will skip dispatch stage to directly launch cluster job with all nodes. It could help to reduce heavy -dispatch latency. - -```cluster_train/run.sh``` provides command line sample to run ```demo/recommendation``` cluster job, just modify ```job_dispatch_package``` and ```job_workspace``` with your defined directory, then: -``` -sh run.sh -``` - -The cluster Job will start in several seconds. - -### Kill Cluster Job -```paddle.py``` can capture ```Ctrl + C``` SIGINT signal to automatically kill all processes launched by it. So just stop ```paddle.py``` to kill cluster job. You should mannally kill job if program crashed. - -### Check Cluster Training Result -Check log in $workspace/log for details, each node owns same log structure. - -```paddle_trainer.INFO``` -It provides almost all interal output log for training, same as local training. Check runtime model convergence here. - -```paddle_pserver2.INFO``` -It provides pserver running log, which could help to diagnose distributed error. - -```server.log``` -It provides stderr and stdout of pserver process. Check error log if training crashs. - -```train.log``` -It provides stderr and stdout of trainer process. Check error log if training crashs. - -### Check Model Output -After one pass finished, model files will be writed in ```output``` directory in node 0. -```nodefile``` in workspace indicates the node id of current cluster job. diff --git a/doc/howto/cmd_parameter/arguments_en.md b/doc/howto/cmd_parameter/arguments_en.md deleted file mode 100644 index 013edbc9047817d7f6b82c4d5188412bd2ce41d6..0000000000000000000000000000000000000000 --- a/doc/howto/cmd_parameter/arguments_en.md +++ /dev/null @@ -1,409 +0,0 @@ -# Argument Outline - -It looks like there are a lot of arguments. However, most of them are for developers or alrealy set automatically in cluster submitting environment and users do not need to care about them. Here, we divide these arguments into serveral classes according to the scenario that they are used in. For example, the arguments in `common` can be used in all scenes. Some arguments can be only used in certain layers. Some are needed by multi machines training in cluster, etc. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -√ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
argslocal traincluster trainlocal testcluster test
commonjob
use_gpu
local
config
config_args
num_passes
trainer_count
version
show_layer_stat
traindot_period
test_period
saving_period
show_parameter_stats_period
init_model_path
load_missing_parameter_strategy
saving_period_by_batches
use_old_updater
enable_grad_share
grad_share_block_num
log_error_clipping
log_clipping
save_only_one
allow_inefficient_sparse_update
start_pass
train/testsave_dir
testing during trainingtest_period
average_test_period
testmodel_list
test_wait
test_pass
predict_output_dir
distribute_test
Auc/PnpairValidationpredict_file
GPUgpu_id
parallel_nn
allow_only_one_model_on_one_gpu
cudnn_dir
cuda_dir
cudnn_conv_workspace_limit_in_mb
RNNbeam_size
rnn_use_batch
prev_batch_state
diy_beam_search_prob_so
metric learningexternal
data_server_port
PServerstart_pserver
pservers
port
port_num
ports_num_for_sparse
nics
rdma_tcp
small_messages
loadsave_parameters_in_pserver
log_period_server
pserver_num_threads
sock_send_buf_size
sock_recv_buf_size
num_gradient_servers
parameter_block_size
parameter_block_size_for_sparse
Async SGDasync_count
async_lagged_ratio_min
async_lagged_ratio_default
Performance Tuninglog_barrier_abstract
log_barrier_lowest_nodes
log_barrier_show_log
check_sparse_distribution_batches
check_sparse_distribution_ratio
check_sparse_distribution_unbalance_degree
check_sparse_distribution_in_pserver
show_check_sparse_distribution_log
Data Providermemory_threshold_on_load_data
RandomNumberseed
thread_local_rand_use_global_seed
UnitTestcheckgrad_eps
Matrix/Vectorenable_parallel_vector
- diff --git a/doc/howto/cmd_parameter/detail_introduction_en.md b/doc/howto/cmd_parameter/detail_introduction_en.md deleted file mode 100644 index 510396b629e398cef2ccda2f1cec474160693219..0000000000000000000000000000000000000000 --- a/doc/howto/cmd_parameter/detail_introduction_en.md +++ /dev/null @@ -1,336 +0,0 @@ -# Detail Description - -## Common - -* `--job` - - Job mode, including: **train, test, checkgrad**, where checkgrad is mainly for developers and users do not need to care about. - - type: string (default: train) - -* `--config` - - Use to specfiy network configure file. - - type: string (default: null). - -* `--use_gpu` - - Whether to use GPU for training, false is cpu mode and true is gpu mode. - - type: bool (default: 1). - -* `--local` - - Whether the training is in local mode or not. True when training locally or using one node in cluster. False when using multiple machines in cluster. - - type: bool (default: 1). - -* `--trainer_count` - - Define the number of threads used in one machine. For example, trainer_count = 4, means use 4 GPU in GPU mode and 4 threads in CPU mode. Each thread (or GPU) is assigned to 1/4 samples in current batch. That is to say, if setting batch_size of 512 in trainer config, each thread train 128 samples. - - type: int32 (default: 1). - -* `--num_passes` - - When `--job=train`, means training for num_passes passes. One pass means training all samples in dataset one time. When `--job=test`, means testing data from model of test_pass to model of (num_passes - 1). - - type: int32 (default: 100). - -* `--config_args` - - arguments passed to config file. Format: key1=value1,key2=value2. - - type: string (default: null). - -* `--version` - - Whether to print version information. - - type: bool (default: 0). - -* `--show_layer_stat` - - Whether to show the statistics of each layer **per batch**. - - type: bool (default: 0). - -## Train - -* `--log_period` - - Log progress every log_period batches. - - type: int32 (default: 100). - -* `--dot_period` - - Print '.' every dot_period batches. - - type: int32 (default: 1). - -* `--saving_period` - - Save parameters every saving_period passes - - type: int32 (default: 1). - -* `--save_dir` - - Directory for saving model parameters. It needs to be specified, but no need to be created in advance. - - type: string (default: null). - -* `--start_pass` - - Start training from this pass. It will load parameters from the previous pass. - - type: int32 (default: 0). - -* `--show_parameter_stats_period` - - Show parameter statistic during training every show_parameter_stats_period batches. It will not show by default. - - type: int32 (default: 0). - -* `--save_only_one` - - Save the parameters only in last pass, while the previous parameters will be removed. - - type: bool (default: 0). - -* `--load_missing_parameter_strategy` - - Specify the loading operation when model file is missing. Now support fail/rand/zere three operations. - - `fail`: program will exit. - - `rand`: uniform or normal distribution according to **initial\_strategy** in network config. Uniform range is: **[mean - std, mean + std]**, where mean and std are configures in trainer config. - - `zero`: all parameters are zero. - - type: string (default: fail). - -* `--init_model_path` - - Path of the initialization model. If it was set, start\_pass will be ignored. It can be used to specify model path in testing mode as well. - - type: string (default: null). - -* `--saving_period_by_batches` - - Save parameters every saving_period_by_batches batches in one pass. - - type: int32 (default: 0). - -* `--log_error_clipping` - - Whether to print error clipping log when setting **error_clipping_threshold** in layer config. If it is true, log will be printed in backward propagation **per batch**. This clipping effects on **gradient of output**. - - type: bool (default: 0). - -* `--log_clipping` - - Enable print log clipping or not when setting **gradient_clipping_threshold** in trainer config. This clipping effects on **gradient w.r.t. (with respect to) weight**. - - type: bool (default: 0). - -* `--use_old_updater` - - Whether to use the old RemoteParameterUpdater. Default use ConcurrentRemoteParameterUpdater. It is mainly for deverlopers and users usually do not need to care about. - - type: bool (default: 0). - -* `--enable_grad_share` - - threshold for enable gradient parameter, which is shared for batch multi-cpu training. - - type: int32 (default: 100 \* 1024 \* 1024). - -* `--grad_share_block_num` - - block number of gradient parameter, which is shared for batch multi-cpu training. - - type: int32 (default: 64). - -## Test - -* `--test_pass` - - Load parameter from this pass to test. - - type: int32 (default: -1). - -* `--test_period` - - if equal 0, do test on all test data at the end of each pass. While if equal non-zero, do test on all test data every test_period batches. - - type: int32 (default: 0). - -* `--test_wait` - - Whether to wait for parameter per pass if not exist. If set test_data_path in submitting environment of cluster, it will launch one process to perfom testing, so we need to set test_wait=1. Note that in the cluster submitting environment, this argument has been set True by default. - - type: bool (default: 0). - -* `--model_list` - - File that saves the model list when testing. It was set automatically when using cluster submitting environment after setting model_path. - - type: string (default: "", null). - -* `--predict_output_dir` - - Directory that saves the layer output. It is configured in Outputs() in network config. Default, this argument is null, meaning save nothing. Specify this directory if you want to save feature map of some layers in testing mode. Note that, layer outputs are values after activation function. - - type: string (default: "", null). - -* `--average_test_period` - - Do test on average parameter every `average_test_period` batches. It MUST be devided by FLAGS_log_period. Default 0 means do not test on average parameter. - - type: int32 (default: 0). - -* `--distribute_test` - - Testing in distribute environment will merge results from multiple machines. - - type: bool (default: 0). - -* `--predict_file` - - File name for saving predicted result. Default, this argument is null, meaning save nothing. Now, this argument is only used in AucValidationLayer and PnpairValidationLayer, and saves predicted result every pass. - - type: string (default: "", null). - -## GPU - -* `--gpu_id` - - Which gpu core to use. - - type: int32 (default: 0). - -* `--allow_only_one_model_on_one_gpu` - - If true, do not allow multiple models on one GPU device. - - type: bool (default: 1). - -* `--parallel_nn` - - Whether to use multi-thread to calculate one neural network or not. If false, use gpu_id specify which gpu core to use (the device property in trainer config will be ingored). If true, the gpu core is specified in trainer config (gpu_id will be ignored). - - type: bool (default: 0). - -* `--cudnn_dir` - - Choose path to dynamic load NVIDIA CuDNN library, for instance, /usr/local/cuda/lib64. [Default]: LD_LIBRARY_PATH - - type: string (default: "", null) - -* `--cuda_dir` - - Choose path to dynamic load NVIDIA CUDA library, for instance, /usr/local/cuda/lib64. [Default]: LD_LIBRARY_PATH - - type: string (default: "", null) - -* `--cudnn_conv_workspace_limit_in_mb` - - Specify cuDNN max workspace limit, in units MB, 4096MB=4GB by default. - - type: int32 (default: 4096MB=4GB) - -## NLP: RNN/LSTM/GRU -* `--rnn_use_batch` - - Whether to use batch method for calculation in simple RecurrentLayer. - - type: bool (default: 0). - -* `--prev_batch_state` - - batch is continue with next batch. - - type: bool (default: 0). - -* `--beam_size` - - Beam search uses breadth-first search to build its search tree. At each level of the tree, it generates all successors of the states at the current level, sorting them in increasing order of heuristic cost. However, it only stores a predetermined number of best states at each level (called the beam size). - - type: int32 (default: 1). - -* `--diy_beam_search_prob_so` - - Specify shared dynamic library. It can be defined out of paddle by user. - - type: string (default: "", null). - -## Metric Learning -* `--external` - - Whether to use external machine for metric learning. - - type: bool (default: 0). - -* `--data_server_port` - - Listening port for dserver (data server), dserver is mainly used in metric learning. - - type: int32 (default: 21134). - -## DataProvider - -* `--memory_threshold_on_load_data` - - Stop loading data when memory is not sufficient. - - type: double (default: 1.0). - -## Unit Test - -* `--checkgrad_eps` - - parameter change size for checkgrad. - - type: double (default: 1e-05). - -## Parameter Server and Distributed Communication - -* `--start_pserver` - - Whether to start pserver (parameter server). - - type: bool (default: 0). - -* `--pservers` - - Comma separated IP addresses of pservers. It is set automatically in cluster submitting environment. - - type: string (default: "127.0.0.1"). - -* `--port` - - Listening port for pserver. - - type: int32 (default: 20134). - -* `--ports_num` - - The ports number for parameter send, increment based on default port number. - - type: int32 (default: 1). - -* `--trainer_id` - - In distributed training, each trainer must be given an unique id ranging from 0 to num_trainers-1. Trainer 0 is the master trainer. User do not need to care this flag. - - type: int32 (default: 0). - -* `--num_gradient_servers` - - Numbers of gradient servers. This arguments is set automatically in cluster submitting environment. - - type: int32 (default: 1). - -* `--small_messages` - - If message size is small, recommend set it True to enable quick ACK and no delay - - type: bool (default: 0). - -* `--sock_send_buf_size` - - Restrict socket send buffer size. It can reduce network congestion if set carefully. - - type: int32 (default: 1024 \* 1024 \* 40). - -* `--sock_recv_buf_size` - - Restrict socket recieve buffer size. - - type: int32 (default: 1024 \* 1024 \* 40). - -* `--parameter_block_size` - - Parameter block size for pserver, will automatically calculate a suitable value if it's not set. - - type: int32 (default: 0). - -* `--parameter_block_size_for_sparse` - - Parameter block size for sparse update pserver, will automatically calculate a suitable value if it's not set. - - type: int32 (default: 0). - -* `--log_period_server` - - Log progress every log_period_server batches at pserver end. - - type: int32 (default: 500). - -* `--loadsave_parameters_in_pserver` - - Load and save parameters in pserver. Only work when parameter set sparse_remote_update. - - type: bool (default: 0). - -* `--pserver_num_threads` - - number of threads for sync op exec. - - type: bool (default: 1). - -* `--ports_num_for_sparse` - - The ports number for parameter send, increment based on default (port + ports_num). It is used by sparse Tranning. - - type: int32 (default: 0). - -* `--nics` - - Network device name for pservers, already set in cluster submitting environment. - - type: string (default: "xgbe0,xgbe1"). - -* `--rdma_tcp` - - Use rdma or tcp transport protocol, already set in cluster submitting environment. - - type: string (default: "tcp"). - -## Async SGD -* `--async_count` - - Defined the asynchronous training length, if 0, then use synchronized training. - - type: int32 (default: 0). - -* `--async_lagged_ratio_min` - - Control the minimize value of `config_.async_lagged_grad_discard_ratio()`. - - type: double (default: 1.0). - -* `--async_lagged_ratio_default` - - If async_lagged_grad_discard_ratio is not set in network config, use it as defalut value. - - type: double (default: 1.5). - -## Performance Tuning - -* `--log_barrier_abstract` - - If true, show abstract barrier performance information. - - type: bool (default: 1). - -* `--log_barrier_show_log` - - If true, always show barrier abstract even with little gap. - - type: bool (default: 0). - -* `--log_barrier_lowest_nodes` - - How many lowest node will be logged. - - type: int32 (default: 5). - -* `--check_sparse_distribution_in_pserver` - - Whether to check that the distribution of sparse parameter on all pservers is balanced. - - type: bool (default: 0). - -* `--show_check_sparse_distribution_log` - - show log details for sparse parameter distribution in pserver. - - type: bool (default: 0). - -* `--allow_inefficient_sparse_update` - - Whether to allow inefficient sparse update. - - type: bool (default: 0). - -* `--check_sparse_distribution_batches` - - Running sparse parameter distribution check every so many batches. - - type: int32 (default: 100). - -* `--check_sparse_distribution_ratio` - - If parameters dispatched to different pservers have an unbalanced distribution for check_sparse_distribution_ratio * check_sparse_distribution_batches times, crash program. - - type: double (default: 0.6). - -* `--check_sparse_distribution_unbalance_degree` - - The ratio of maximum data size / minimun data size for different pserver. - - type: double (default: 2). - -## Matrix/Vector/RandomNumber -* `--enable_parallel_vector` - - threshold for enable parallel vector. - - type: int32 (default: 0). - -* `--seed` - - random number seed. 0 for srand(time) - - type: int32 (default: 1) - -* `--thread_local_rand_use_global_seed` - - Whether to use global seed in rand of thread local. - - type: bool (default: 0). diff --git a/doc/howto/cmd_parameter/index_en.md b/doc/howto/cmd_parameter/index_en.md deleted file mode 100644 index fb658f2aa5bc0edef7b5dcb24a582d2c4182caa7..0000000000000000000000000000000000000000 --- a/doc/howto/cmd_parameter/index_en.md +++ /dev/null @@ -1,8 +0,0 @@ -```eval_rst -.. _cmd_line_index_en: -``` -# How to Set Command-line Parameters - -* [Use Case](use_case_en.md) -* [Arguments](arguments_en.md) -* [Detailed Descriptions](detail_introduction_en.md) diff --git a/doc/howto/cmd_parameter/use_case_en.md b/doc/howto/cmd_parameter/use_case_en.md deleted file mode 100644 index 4d7bb33f36fe258ee24796eedc9296065923e58f..0000000000000000000000000000000000000000 --- a/doc/howto/cmd_parameter/use_case_en.md +++ /dev/null @@ -1,182 +0,0 @@ -# Use Case - -## Local Training - -These command line arguments are commonly used by local training experiments, such as image classification, natural language processing, et al. - -``` -paddle train \ - --use_gpu=1/0 \ #1:GPU,0:CPU(default:true) - --config=network_config \ - --save_dir=output \ - --trainer_count=COUNT \ #(default:1) - --test_period=M \ #(default:0) - --num_passes=N \ #(defalut:100) - --log_period=K \ #(default:100) - --dot_period=1000 \ #(default:1) - #[--show_parameter_stats_period=100] \ #(default:0) - #[--saving_period_by_batches=200] \ #(default:0) -``` -`show_parameter_stats_period` and `saving_period_by_batches` are optional according to your task. - -### 1) Pass Command Argument to Network config - -`config_args` is a useful parameter to pass arguments to network config. - -``` ---config_args=generating=1,beam_size=5,layer_num=10 \ -``` -And `get_config_arg` can be used to parse these arguments in network config as follows: - -``` -generating = get_config_arg('generating', bool, False) -beam_size = get_config_arg('beam_size', int, 3) -layer_num = get_config_arg('layer_num', int, 8) -``` - -`get_config_arg`: - -``` -get_config_arg(name, type, default_value) -``` -- name: the name specified in the `--config_args` -- type: value type, bool, int, str, float etc. -- default_value: default value if not set. - -### 2) Use Model to Initialize Network - -add argument: - -``` ---init_model_path=model_path ---load_missing_parameter_strategy=rand -``` - -## Local Testing - -Method 1: - -``` -paddle train --job=test \ - --use_gpu=1/0 \ - --config=network_config \ - --trainer_count=COUNT \ - --init_model_path=model_path \ -``` -- use init\_model\_path to specify test model. -- only can test one model. - -Method 2: - -``` -paddle train --job=test \ - --use_gpu=1/0 \ - --config=network_config \ - --trainer_count=COUNT \ - --model_list=model.list \ -``` -- use model_list to specify test models -- can test several models, where model.list likes: - -``` -./alexnet_pass1 -./alexnet_pass2 -``` - -Method 3: - -``` -paddle train --job=test \ - --use_gpu=1/0 \ - --config=network_config \ - --trainer_count=COUNT \ - --save_dir=model \ - --test_pass=M \ - --num_passes=N \ -``` -This way must use model path saved by Paddle like this: `model/pass-%5d`. Testing model is from M-th pass to (N-1)-th pass. For example: M=12 and N=14 will test `model/pass-00012` and `model/pass-00013`. - -## Sparse Training - -Sparse training is usually used to accelerate calculation when input is sparse data with highly dimension. For example, dictionary dimension of input data is 1 million, but one sample just have several words. In paddle, sparse matrix multiplication is used in forward propagation and sparse updating is perfomed on weight updating after backward propagation. - -### 1) Local training - -You need to set **sparse\_update=True** in network config. Check the network config documentation for more details. - -### 2) cluster training - -Add the following argument for cluster training of a sparse model. At the same time you need to set **sparse\_remote\_update=True** in network config. Check the network config documentation for more details. - -``` ---ports_num_for_sparse=1 #(default: 0) -``` - -## parallel_nn -`parallel_nn` can be set to mixed use of GPUs and CPUs to compute layers. That is to say, you can deploy network to use a GPU to compute some layers and use a CPU to compute other layers. The other way is to split layers into different GPUs, which can **reduce GPU memory** or **use parallel computation to accelerate some layers**. - -If you want to use these characteristics, you need to specify device ID in network config (denote it as deviceId) and add command line argument: - -``` ---parallel_nn=true -``` -### case 1: Mixed Use of GPU and CPU -Consider the following example: - -``` -#command line: -paddle train --use_gpu=true --parallel_nn=true trainer_count=COUNT - -default_device(0) - -fc1=fc_layer(...) -fc2=fc_layer(...) -fc3=fc_layer(...,layer_attr=ExtraAttr(device=-1)) - -``` -- default_device(0): set default device ID to 0. This means that except the layers with device=-1, all layers will use a GPU, and the specific GPU used for each layer depends on trainer\_count and gpu\_id (0 by default). Here, layer l1 and l2 are computed on the GPU. - -- device=-1: use the CPU for layer l3. - -- trainer_count: - - trainer_count=1: if gpu\_id is not set, then use the first GPU to compute layers l1 and l2. Otherwise use the GPU with gpu\_id. - - - trainer_count>1: use trainer\_count GPUs to compute one layer using data parallelism. For example, trainer\_count=2 means that GPUs 0 and 1 will use data parallelism to compute layer l1 and l2. - -### Case 2: Specify Layers in Different Devices - -``` -#command line: -paddle train --use_gpu=true --parallel_nn=true --trainer_count=COUNT - -#network: -fc2=fc_layer(input=l1, layer_attr=ExtraAttr(device=0), ...) -fc3=fc_layer(input=l1, layer_attr=ExtraAttr(device=1), ...) -fc4=fc_layer(input=fc2, layer_attr=ExtraAttr(device=-1), ...) -``` -In this case, we assume that there are 4 GPUs in one machine. - -- trainer_count=1: - - Use GPU 0 to compute layer l2. - - Use GPU 1 to compute layer l3. - - Use CPU to compute layer l4. - -- trainer_count=2: - - Use GPU 0 and 1 to compute layer l2. - - Use GPU 2 and 3 to compute layer l3. - - Use CPU to compute l4 in two threads. - -- trainer_count=4: - - It will fail (note, we have assumed that there are 4 GPUs in machine), because argument `allow_only_one_model_on_one_gpu` is true by default. - -**Allocation of device ID when `device!=-1`**: - -``` -(deviceId + gpu_id + threadId * numLogicalDevices_) % numDevices_ - -deviceId: specified in layer. -gpu_id: 0 by default. -threadId: thread ID, range: 0,1,..., trainer_count-1 -numDevices_: device (GPU) count in machine. -numLogicalDevices_: min(max(deviceId + 1), numDevices_) -``` diff --git a/doc/howto/contribute_to_paddle_en.md b/doc/howto/contribute_to_paddle_en.md deleted file mode 100644 index 1decc91d62cc25c5b3157bdc6e0835421be23252..0000000000000000000000000000000000000000 --- a/doc/howto/contribute_to_paddle_en.md +++ /dev/null @@ -1,131 +0,0 @@ -# How to Contribute Code - -We sincerely appreciate your contributions. You can use fork and pull request -workflow to merge your code. - -## Code Requirements -- Your code must be fully documented by - [doxygen](http://www.stack.nl/~dimitri/doxygen/) style. -- Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler - passes the code style check. -- All code must have unit test. -- Pass all unit tests. - -The following tutorial guides you into submitting your contibution. - -## [Creating a Fork](https://help.github.com/articles/fork-a-repo/) - -Just head over to the GitHub page and click the "Fork" button. -It's just that simple. - -## Clone - -Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/). -The **develop** is the main branch, and other user's branches are feature branches. - -Once you've created a fork, you can use your favorite git client to clone your -repo or just head straight to the command line: - -```shell -# Clone your fork to your local machine -git clone --branch develop https://github.com/USERNAME/Paddle.git -``` -If your repository doesn't contain **develop** branch, just create it by your own. - -```shell -git clone https://github.com/USERNAME/Paddle.git Paddle -cd Paddle -git checkout -b develop # create develop branch. -git remote add upstream https://github.com/PaddlePaddle/Paddle.git # add upstream to baidu/Paddle -git pull upstream develop # update to upstream -git submodule update --init --recursive -``` - -Then you can start to develop by making a local developement branch - -```shell -git checkout -b MY_COOL_STUFF_BRANCH -``` - -## Commit - -Commit your changes by following command lines: - -```shell -# show the working tree status -git status -# add modified files -git add xx -env EDITOR=vim git commit # You can write your comments by vim/nano/emacs. -``` -The first line of commit infomation is the title. The second and later lines -are the details if any. - -## Keeping Fork Up to Date - -Before pull your request, you should sync your code from the latest PaddlePaddle. -To do this, you'll need to add a remote at first: - -```shell -# see the current configured remote repository -git remote -v -# add upstream repository -git remote add upstream https://github.com/PaddlePaddle/Paddle.git -# verify the new upstream -git remote -v -``` - -Update your fork with the latest upstream changes: - -```shell -git pull --rebase upstream develop -``` - -If there are no unique commits locally, git will simply perform a fast-forward. -However, if you have been making changes (in the vast majority of cases you -probably shouldn't be), you may have to deal with conflicts. - -Now, your local master branch is up-to-date with everything modified upstream. - -## Push to GitHub - -```shell -# push to your repository in Github -git push -u origin MY_COOL_STUFF_BRANCH # create remote branch MY_COOL_STUFF_BRANCH to origin. -``` - -## Pull Request - -Go to the page for your fork on GitHub, select your development branch, -and click the **pull request button**. - -## Update your pull request with the lastest version - -During the code review, your pull request may become stale because new commits in -baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this -by clicking the "Update Branch" button in your pull request page. However, in the case -of conflict, you need to do the update manually. You need to do the following on -your local repository: -```shell -git checkout MY_COOL_STUFF_BRANCH -git pull upstream develop -# You may need to resolve the conflict according to the git prompt. -# Make and test your code. -git push origin MY_COOL_STUFF_BRANCH -``` -Now your Pull Request is updated with the latest version. - -## Revise your pull request - -When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request. - -The possible commands are - -```shell -git checkout MY_COOL_STUFF_BRANCH -git pull upstream develop # update local to newest code base. -# May be some conflicts will occured. -# And develop your cool stuff -env EDITOR=vim git commit # add your revise log -git push origin MY_COOL_STUFF_BRANCH -``` diff --git a/doc/howto/cross_compiling/cross_compiling_for_android_cn.md b/doc/howto/cross_compiling/cross_compiling_for_android_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..90dc84718c9ce1374cda6022de177afeeb60279d --- /dev/null +++ b/doc/howto/cross_compiling/cross_compiling_for_android_cn.md @@ -0,0 +1,75 @@ +# 构建Android平台上的PaddlePaddle库 + +用户可通过交叉编译的方式,在用户熟悉的开发平台(Linux,Mac OS X和Windows)上编译Android平台上适用的PaddlePaddle库。 +本文档将以Linux x86-64平台为例,介绍交叉编译Android平台上适用的PaddlePaddle库的方法和步骤。 + +## 准备交叉编译环境 + +从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。Android平台上使用的C/C++交叉编译工具链为[Android NDK](https://developer.android.com/ndk/downloads/index.html?hl=zh-cn),用户可自行前往下载预编译好的版本,也可通过以下命令获取: + +```bash +wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip +unzip -q android-ndk-r14b-linux-x86_64.zip +``` + +Android NDK中包含了所有Android API级别、所有架构(arm/arm64/x86/mips)需要用到的编译工具和系统库。用户可根据自己的编译目标架构、所需支持的最低Android API级别,构建[独立工具链](https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn)。 +比如: + +```bash +your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \ + --arch=arm --platform=android-21 --install-dir=your/path/to/my_standalone_toolchain +``` + +此命令将在your/path/to/my_standalone_toolchain目录生成一套编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,使用的编译器为arm-linux-androideabi-gcc (GCC) 4.9。 + +注意:**PaddlePaddle要求使用的编译工具链所支持的Andoid API级别不小于21**。 + +## 配置交叉编译参数 + +CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/android.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake),以提供一些默认的编译器和编译参数相关配置。注意,从CMake 3.7版本开始,CMake官方对Android平台的交叉编译提供了通用的支持。PaddlePaddle若检测到用户使用的CMake版本不低于3.7时,将会将用户传进来的配置参数传递CMake系统,交由CMake系统本身来处理。有关参数配置的详细说明见[cmake-toolchains](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling)。 + +交叉编译Android版本的PaddlePaddle库时,有一些必须配置的参数: +- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`Android`。在设置`CMAKE_SYSTEM_NAME=Android`后,PaddlePaddle的CMake系统才认为是在交叉编译Android系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及Android所需`arm_soft_fp_abi`分支的目标机版OpenBLAS库。此外,还会强制设置一些PaddlePaddle参数的值(`WITH_GPU=OFF`、`WITH_AVX=OFF`、`WITH_PYTHON=OFF`、`WITH_RDMA=OFF`)。 +- `WITH_C_API`,必须设置为`ON`。在Android平台上只支持使用C-API来预测。 +- `WITH_SWIG_PY`,必须设置为`OFF`。在Android平台上不支持通过swig调用来训练或者预测。 + +Android平台可选配置参数: + +- `ANDROID_STANDALONE_TOOLCHAIN`,独立工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动推导和设置需要使用的交叉编译器、sysroot、以及Android API级别;否则,用户需要在cmake时手动设置这些值。无默认值。 +- `ANDROID_ABI`,目标架构ABI。目前只支持`armeabi-v7a`,默认值为`armeabi-v7a`。 +- `ANDROID_NATIVE_API_LEVEL`,工具链的Android API级别。若没有显式设置,PaddlePaddle将根据`ANDROID_STANDALONE_TOOLCHAIN`的值自动推导得到。 +- `ANROID_ARM_MODE`,是否使用ARM模式。可设置`ON/OFF`,默认值为`ON`。 +- `ANDROID_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。 + +其他配置参数: + +- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。 + +一种常用的cmake配置如下: + +```bash +cmake -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_STANDALONE_TOOLCHAIN=your/path/to/my_standalone_toolchain \ + -DANDROID_ABI=armeabi-v7a \ + -DANDROID_ARM_NEON=ON \ + -DANDROID_ARM_MODE=ON \ + -DCMAKE_INSTALL_PREFIX=your/path/to/install \ + -DWITH_C_API=ON \ + -DWITH_SWIG_PY=OFF \ + .. +``` + +用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。 + +## 编译和安装 + +CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle预测库。 + +```bash +make +make install +``` + +注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。 + +执行完安装命令后,`your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Android版本的库。自此,PaddlePaddle的已经安装完成,用户可将`your/path/to/install`目录下的生成文件用于深度学习相关Android App中,调用方法见C-API文档。 diff --git a/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md b/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..085b5dda1615a9af918b59870db460fcc5acdcca --- /dev/null +++ b/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md @@ -0,0 +1,65 @@ +# 构建Raspberry Pi平台上的PaddlePaddle库 + +对于Rasspberry Pi系统,用户可通过ssh等方式登录到Raspberry Pi系统上,按照[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档所述,直接编译Raspberry Pi平台上适用的PaddlePaddle库。 + +用户也可以在自己熟悉的开发平台上,通过交叉编译的方式来编译。这篇文档将以Linux x86-64平台为例,介绍交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。 + +## 准备交叉编译环境 + +从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。用户可自行前往[github](https://github.com/raspberrypi/tools)下载Raspberry Pi平台使用的C/C++交叉编译工具链,也可通过以下命令获取: + +```bash +git clone https://github.com/raspberrypi/tools.git +``` + +该github仓库中包含若干个预编译好的、针对不同平台的编译工具。宿主机是Linux x86-64环境,则需选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具,所使用的编译器为arm-linux-gnueabihf-gcc 4.8.3。 + +注意,该编译工具链需要系统glibc支持2.14以上。 + +## 配置交叉编译参数 + +CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake),以提供一些默认的编译器和编译参数相关配置。 + +交叉编译Raspberry Pi版本PaddlePaddle库时,有一些必须配置的参数: + +- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后,PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。 + +Raspberry Pi平台可选配置参数: + +- `RPI_TOOLCHAIN`,编译工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器;否则,用户需要在cmake时手动设置这些值。无默认值。 +- `RPI_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。 + +其他配置参数: + +- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。 + +cmake参数如下; + +``` +cmake -DCMAKE_SYSTEM_NAME=RPi \ + -DRPI_TOOLCHAIN=your/path/to/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 \ + -DRPI_ARM_NEON=ON \ + -DCMAKE_INSTALL_PREFIX=your/path/to/install \ + -DWITH_GPU=OFF \ + -DWITH_C_API=ON \ + -DWITH_PYTHON=OFF \ + -DWITH_SWIG_PY=OFF \ + .. +``` + +用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。 + +## 编译和安装 + +CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle。 + +```bash +make +make install +``` + +注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。 + +执行完安装命令后,由于上一步cmake配置中`WITH_C_API`设置为`ON`,`your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Raspberry Pi版本的库。 + +更多的编译配置见[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档。 diff --git a/doc/howto/deep_model/index_en.rst b/doc/howto/deep_model/index_en.rst deleted file mode 100644 index 00a45641e6ad60a944c4334503e117cab1624896..0000000000000000000000000000000000000000 --- a/doc/howto/deep_model/index_en.rst +++ /dev/null @@ -1,7 +0,0 @@ -How to Configure Deep Models -============================ - -.. toctree:: - :maxdepth: 1 - - rnn/rnn_en.rst diff --git a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..e05173c2006ff47ecb6ca5a4fe1502de750acc59 --- /dev/null +++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst @@ -0,0 +1,89 @@ +########################### +支持双层序列作为输入的Layer +########################### + +.. contents:: + +概述 +==== + +在自然语言处理任务中,序列是一种常见的数据类型。一个独立的词语,可以看作是一个非序列输入,或者,我们称之为一个0层的序列;由词语构成的句子,是一个单层序列;若干个句子构成一个段落,是一个双层的序列。 + +双层序列是一个嵌套的序列,它的每一个元素,又是一个单层的序列。这是一种非常灵活的数据组织方式,帮助我们构造一些复杂的输入信息。 + +我们可以按照如下层次定义非序列,单层序列,以及双层序列。 + ++ 0层序列:一个独立的元素,类型可以是PaddlePaddle支持的任意输入数据类型 ++ 单层序列:排成一列的多个元素,每个元素是一个0层序列,元素之间的顺序是重要的输入信息 ++ 双层序列:排成一列的多个元素,每个元素是一个单层序列,称之为双层序列的一个子序列(subseq),subseq的每个元素是一个0层序列 + +在 PaddlePaddle中,下面这些Layer能够接受双层序列作为输入,完成相应的计算。 + +pooling +======== + +pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API。 + +.. code-block:: bash + + seq_pool = pooling(input=layer, + pooling_type=pooling.Max(), + agg_level=AggregateLevel.TO_SEQUENCE) + +- `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。 + +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): + + - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 + - 输入:一个双层序列,或一个单层序列 + - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值) + +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: + + - 作用:一个双层序列经过运算变成一个单层序列 + - 输入:必须是一个双层序列 + - 输出:一个单层序列,序列的每个元素是原来双层序列每个subseq元素的平均值(或最大值) + +last_seq 和 first_seq +===================== + +last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详细见 :ref:`api_v2.layer_last_seq` 配置API。 + +.. code-block:: bash + + last = last_seq(input=layer, + agg_level=AggregateLevel.TO_SEQUENCE) + +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): + + - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 + - 输入:一个双层序列或一个单层序列 + - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。 + +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: + - 作用:一个双层序列经过运算变成一个单层序列 + - 输入:必须是一个双层序列 + - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 + +expand +====== + +expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。 + +.. code-block:: bash + + ex = expand(input=layer1, + expand_as=layer2, + expand_level=ExpandLevel.FROM_NO_SEQUENCE) + +- `expand_level=ExpandLevel.FROM_NO_SEQUENCE` 时(默认值): + + - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列 + - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息 + - 输出:一个单层序列或一个双层序列,输出序列的类型(双层序列或单层序列)和序列中含有元素的数目同 layer2 一致。若输出是单层序列,单层序列的每个元素(0层序列),都是对layer1元素的拷贝;若输出是双层序列,双层序列每个subseq中每个元素(0层序列),都是对layer1元素的拷贝 + +- `expand_level=ExpandLevel.FROM_SEQUENCE` 时: + + - 作用:一个单层序列经过运算扩展成一个双层序列 + - 输入:layer1必须是一个单层序列,是待扩展的数据;layer2 必须是一个双层序列,提供扩展的长度信息 + - 输出:一个双层序列,序列中含有元素的数目同 layer2 一致。要求单层序列含有元素的数目(0层序列)和双层序列含有subseq 的数目一致。单层序列第i个元素(0层序列),被扩展为一个单层序列,构成了输出双层序列的第i个 subseq 。 diff --git a/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..efdc44455ea4dc81a87b4d4fc8a81e78b15cb06a --- /dev/null +++ b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst @@ -0,0 +1,231 @@ +.. _algo_hrnn_rnn_api_compare: + +##################### +单双层RNN API对比介绍 +##################### + +本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。 + +示例1:双层RNN,子序列间无Memory +================================ + +在双层RNN中的经典情况是将内层的每一个时间序列数据,分别进行序列操作;并且内层的序列操作之间独立无依赖,即不需要使用Memory\ 。 + +在本示例中,单层RNN和双层RNN的网络配置,都是将每一句分好词后的句子,使用LSTM作为encoder,压缩成一个向量。区别是RNN使用两层序列模型,将多句话看成一个整体同时使用encoder压缩。二者语意上完全一致。这组语义相同的示例配置如下: + +* 单层RNN\: `sequence_layer_group.conf `_ +* 双层RNN\: `sequence_nest_layer_group.conf `_ + + +读取双层序列数据 +---------------- + +首先,本示例中使用的原始数据如下\: + +- 本例中的原始数据一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。这个数据也被单层RNN网络直接使用。 + +.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg + :language: text + + +- 双层序列数据一共有4个样本。 每个样本间用空行分开,整体数据和原始数据完全一样。但于双层序列的LSTM来说,第一个样本同时encode两条数据成两个向量。这四条数据同时处理的句子数量为\ :code:`[2, 3, 2, 3]`\ 。 + +.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest + :language: text + +其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\: + +.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py + :language: python + :lines: 21-39 + :linenos: + +- 这是普通的单层时间序列的DataProvider代码,其说明如下: + + * DataProvider共返回两个数据,分别是words和label。即上述代码中的第19行。 + + - words是原始数据中的每一句话,所对应的词表index数组。它是integer_value_sequence类型的,即整数数组。words即为这个数据中的单层时间序列。 + - label是原始数据中对于每一句话的分类标签,它是integer_value类型的。 + +.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py + :language: python + :lines: 42-71 + :linenos: + +- 对于同样的数据,双层时间序列的DataProvider的代码。其说明如下: + + - DataProvider共返回两组数据,分别是sentences和labels。即在双层序列的原始数据中,每一组内的所有句子和labels + - sentences是双层时间序列的数据。由于它内部包含了每组数据中的所有句子,且每个句子表示为对应的词表索引数组,因此它是integer_value_sub_sequence 类型的,即双层时间序列。 + - labels是每组内每个句子的标签,故而是一个单层时间序列。 + + +模型配置的模型配置 +------------------------------------------ + +首先,我们看一下单层RNN的配置。代码中9-15行(高亮部分)即为单层RNN序列的使用代码。这里使用了PaddlePaddle预定义好的RNN处理函数。在这个函数中,RNN对于每一个时间步通过了一个LSTM网络。 + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf + :language: python + :lines: 38-63 + :linenos: + :emphasize-lines: 9-15 + + +其次,我们看一下语义相同的双层RNN的网络配置\: + +* PaddlePaddle中的许多layer并不在意输入是否是时间序列,例如\ :code:`embedding_layer`\ 。在这些layer中,所有的操作都是针对每一个时间步来进行的。 + +* 在该配置的7-26行(高亮部分),将双层时间序列数据先变换成单层时间序列数据,再对每一个单层时间序列进行处理。 + + * 使用\ :code:`recurrent_group`\ 这个函数进行变换,在变换时需要将输入序列传入。由于我们想要的变换是双层时间序列=> 单层时间序列,所以我们需要将输入数据标记成\ :code:`SubsequenceInput`\ 。 + + * 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。 + +* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.TO_SEQUENCE`\ 。 + +* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf + :language: python + :lines: 38-64 + :linenos: + :emphasize-lines: 7-26 + +示例2:双层RNN,子序列间有Memory +================================ + +本示例意图使用单层RNN和双层RNN实现两个完全等价的全连接RNN。 + +* 对于单层RNN,输入数据为一个完整的时间序列,例如\ :code:`[4, 5, 2, 0, 9, 8, 1, 4]`\ 。 + +* 对于双层RNN,输入数据为在单层RNN数据里面,任意将一些数据组合成双层时间序列,例如\ :code:`[ [4, 5, 2], [0, 9], [8, 1, 4]]`。 + +模型配置的模型配置 +------------------ + +我们选取单双层序列配置中的不同部分,来对比分析两者语义相同的原因。 + +- 单层RNN:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。 + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf + :language: python + :lines: 36-48 + +- 双层RNN,外层memory是一个元素: + + - 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。 + - 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每个时间步都用了上一个时间步的输出结果”一致。 + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf + :language: python + :lines: 39-66 + +.. warning:: + PaddlePaddle目前只支持在每个时间步中,Memory的时间序列长度一致的情况。 + +示例3:双层RNN,输入不等长 +========================== + +.. role:: red + +.. raw:: html + + + +**输入不等长** 是指recurrent_group的多个输入序列,在每个时间步的子序列长度可以不相等。但序列输出时,需要指定与某一个输入的序列信息是一致的。使用\ :red:`targetInlink`\ 可以指定哪一个输入和输出序列信息一致,默认指定第一个输入。 + +示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。 + +示例3对于单层RNN和双层RNN数据完全相同。 + +* 对于单层RNN的数据一共有两个样本,他们分别是\ :code:`[1, 2, 4, 5, 2], [5, 4, 1, 3, 1]`\ 和\ :code:`[0, 2, 2, 5, 0, 1, 2], [1, 5, 4, 2, 3, 6, 1]`\ 。对于每一个单层RNN的数据,均有两组特征。 + +* 在单层数据的基础上,双层RNN数据随意加了一些隔断,例如将第一条数据转化为\ :code:`[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]]`\ 。 + +* 需要注意的是PaddlePaddle目前只支持子序列数目一样的多输入双层RNN。例如本例中的两个特征,均有三个子序列。每个子序列长度可以不一致,但是子序列的数目必须一样。 + + +模型配置 +-------- + +和示例2中的配置类似,示例3的配置使用了单层RNN和双层RNN,实现两个完全等价的全连接RNN。 + +* 单层RNN\: + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py + :language: python + :lines: 42-59 + :linenos: + +* 双层RNN\ \: + +.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py + :language: python + :lines: 41-80 + :linenos: + +在上面代码中,单层和双层序列的使用和示例2中的示例类似,区别是同时处理了两个输入。而对于双层序列,两个输入的子序列长度也并不相同。但是,我们使用了\ :code:`targetInlink`\ 参数设置了外层\ :code:`recurrent_group`\ 的输出格式。所以外层输出的序列形状,和\ :code:`emb2`\ 的序列形状一致。 + +示例4:beam_search的生成 +======================== + +TBD + + +词汇表 +====== + +.. _glossary_memory: + +Memory +------ + +Memory是PaddlePaddle实现RNN时候使用的一个概念。RNN即时间递归神经网络,通常要求时间步之间具有一些依赖性,即当前时间步下的神经网络依赖前一个时间步神经网络中某一个神经元输出。如下图所示。 + +.. graphviz:: src/glossary_rnn.dot + +上图中虚线的连接,即是跨越时间步的网络连接。PaddlePaddle在实现RNN的时候,将这种跨越时间步的连接用一个特殊的神经网络单元实现。这个神经网络单元就叫Memory。Memory可以缓存上一个时刻某一个神经元的输出,然后在下一个时间步输入给另一个神经元。使用Memory的RNN实现便如下图所示。 + +.. graphviz:: src/glossary_rnn_with_memory.dot + +使用这种方式,PaddlePaddle可以比较简单的判断哪些输出是应该跨越时间步的,哪些不是。 + +.. _glossary_timestep: + +时间步 +------ + +参考时间序列。 + + +.. _glossary_sequence: + +时间序列 +-------- + +时间序列(time series)是指一系列的特征数据。这些特征数据之间的顺序是有意义的。即特征的数组,而不是特征的集合。而这每一个数组元素,或者每一个系列里的特征数据,即为一个时间步(time step)。值得注意的是,时间序列、时间步的概念,并不真正的和『时间』有关。只要一系列特征数据中的『顺序』是有意义的,即为时间序列的输入。 + +举例说明,例如文本分类中,我们通常将一句话理解成一个时间序列。比如一句话中的每一个单词,会变成词表中的位置。而这一句话就可以表示成这些位置的数组。例如 :code:`[9, 2, 3, 5, 3]` 。 + +关于时间序列(time series)的更详细准确的定义,可以参考 `维基百科页面 Time series `_ 或者 `维基百科中文页面 时间序列 `_ 。 + +另外,Paddle中经常会将时间序列成为 :code:`Sequence` 。他们在Paddle的文档和API中是一个概念。 + +.. _glossary_RNN: + +RNN +--- + +RNN 在PaddlePaddle的文档中,一般表示 :code:`Recurrent neural network`,即时间递归神经网络。详细介绍可以参考 `维基百科页面 Recurrent neural network `_ 或者 `中文维基百科页面 `_ 中关于时间递归神经网络的介绍。 + +RNN 一般在PaddlePaddle中,指对于一个时间序列输入数据,每一个时间步之间的神经网络具有一定的相关性。例如,某一个神经元的一个输入为上一个时间步网络中某一个神经元的输出。或者,从每一个时间步来看,神经网络的网络结构中具有有向环结构。 + +.. _glossary_双层RNN: + +双层RNN +------- + +双层RNN顾名思义,即RNN之间有一次嵌套关系。输入数据整体上是一个时间序列,而对于每一个内层特征数据而言,也是一个时间序列。即二维数组,或者数组的数组这个概念。 而双层RNN是可以处理这种输入数据的网络结构。 + +例如,对于段落的文本分类,即将一段话进行分类。我们将一段话看成句子的数组,每个句子又是单词的数组。这便是一种双层RNN的输入数据。而将这个段落的每一句话用lstm编码成一个向量,再对每一句话的编码向量用lstm编码成一个段落的向量。再对这个段落向量进行分类,即为这个双层RNN的网络结构。 + diff --git a/doc/howto/deep_model/rnn/index_cn.rst b/doc/howto/deep_model/rnn/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..9ecab5594cff47cde4700b7ce0f58013a960a16e --- /dev/null +++ b/doc/howto/deep_model/rnn/index_cn.rst @@ -0,0 +1,10 @@ +RNN相关模型 +=========== + +.. toctree:: + :maxdepth: 1 + + rnn_config_cn.rst + recurrent_group_cn.md + hierarchical_layer_cn.rst + hrnn_rnn_api_compare_cn.rst diff --git a/doc/howto/deep_model/rnn/index_en.rst b/doc/howto/deep_model/rnn/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..7adc79873d699fdfd5a85034bcef964dd1f19132 --- /dev/null +++ b/doc/howto/deep_model/rnn/index_en.rst @@ -0,0 +1,7 @@ +RNN Models +========== + +.. toctree:: + :maxdepth: 1 + + rnn_config_en.rst diff --git a/doc/howto/deep_model/rnn/recurrent_group_cn.md b/doc/howto/deep_model/rnn/recurrent_group_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..06dc9e089ab2b2b926fcb1bd034262f2c846f06f --- /dev/null +++ b/doc/howto/deep_model/rnn/recurrent_group_cn.md @@ -0,0 +1,96 @@ +# Recurrent Group教程 + +## 概述 + +序列数据是自然语言处理任务面对的一种主要输入数据类型。 + +一句话是由词语构成的序列,多句话进一步构成了段落。因此,段落可以看作是一个嵌套的双层的序列,这个序列的每个元素又是一个序列。 + +双层序列是PaddlePaddle支持的一种非常灵活的数据组织方式,帮助我们更好地描述段落、多轮对话等更为复杂的语言数据。基于双层序列输入,我们可以设计搭建一个灵活的、层次化的RNN,分别从词语和句子级别编码输入数据,同时也能够引入更加复杂的记忆机制,更好地完成一些复杂的语言理解任务。 + +在PaddlePaddle中,`recurrent_group`是一种任意复杂的RNN单元,用户只需定义RNN在一个时间步内完成的计算,PaddlePaddle负责完成信息和误差在时间序列上的传播。 + +更进一步,`recurrent_group`同样可以扩展到双层序列的处理上。通过两个嵌套的`recurrent_group`分别定义子句级别和词语级别上需要完成的运算,最终实现一个层次化的复杂RNN。 + +目前,在PaddlePaddle中,能够对双向序列进行处理的有`recurrent_group`和部分Layer,具体可参考文档:支持双层序列作为输入的Layer。 + +## 相关概念 + +### 基本原理 +`recurrent_group` 是PaddlePaddle支持的一种任意复杂的RNN单元。使用者只需要关注于设计RNN在一个时间步之内完成的计算,PaddlePaddle负责完成信息和梯度在时间序列上的传播。 + +PaddlePaddle中,`recurrent_group`的一个简单调用如下: + +``` python +recurrent_group(step, input, reverse) +``` +- step:一个可调用的函数,定义一个时间步之内RNN单元完成的计算 +- input:输入,必须是一个单层序列,或者一个双层序列 +- reverse:是否以逆序处理输入序列 + +使用`recurrent_group`的核心是设计step函数的计算逻辑。step函数内部可以自由组合PaddlePaddle支持的各种layer,完成任意的运算逻辑。`recurrent_group` 的输入(即input)会成为step函数的输入,由于step 函数只关注于RNN一个时间步之内的计算,在这里`recurrent_group`替我们完成了原始输入数据的拆分。 + +### 输入 +`recurrent_group`处理的输入序列主要分为以下三种类型: + +- **数据输入**:一个双层序列进入`recurrent_group`会被拆解为一个单层序列,一个单层序列进入`recurrent_group`会被拆解为非序列,然后交给step函数,这一过程对用户是完全透明的。可以有以下两种:1)通过data_layer拿到的用户输入;2)其它layer的输出。 + +- **只读Memory输入**:`StaticInput` 定义了一个只读的Memory,由`StaticInput`指定的输入不会被`recurrent_group`拆解,`recurrent_group` 循环展开的每个时间步总是能够引用所有输入,可以是一个非序列,或者一个单层序列。 + +- **序列生成任务的输入**:`GeneratedInput`只用于在序列生成任务中指定输入数据。 + +### 输入示例 + +序列生成任务大多遵循encoder-decoer架构,encoder和decoder可以是能够处理序列的任意神经网络单元,而RNN是最流行的选择。 + +给定encoder输出和当前词,decoder每次预测产生下一个最可能的词语。在这种结构中,decoder接受两个输入: + +- 要生成的目标序列:是decoder的数据输入,也是decoder循环展开的依据,`recurrent_group`会对这类输入进行拆解。 + +- encoder输出,可以是一个非序列,或者一个单层序列:是一个unbounded memory,decoder循环展开的每一个时间步会引用全部结果,不应该被拆解,这种类型的输入必须通过`StaticInput`指定。关于Unbounded Memory的更多讨论请参考论文 [Neural Turning Machine](https://arxiv.org/abs/1410.5401)。 + +在序列生成任务中,decoder RNN总是引用上一时刻预测出的词的词向量,作为当前时刻输入。`GeneratedInput`自动完成这一过程。 + +### 输出 +`step`函数必须返回一个或多个Layer的输出,这个Layer的输出会作为整个`recurrent_group` 最终的输出结果。在输出的过程中,`recurrent_group` 会将每个时间步的输出拼接,这个过程对用户也是透明的。 + +### memory +memory只能在`recurrent_group`中定义和使用。memory不能独立存在,必须指向一个PaddlePaddle定义的Layer。引用memory得到这layer上一时刻输出,因此,可以将memory理解为一个时延操作。 + +可以显示地指定一个layer的输出用于初始化memory。不指定时,memory默认初始化为0。 + +## 双层RNN介绍 +`recurrent_group`帮助我们完成对输入序列的拆分,对输出的合并,以及计算逻辑在序列上的循环展开。 + +利用这种特性,两个嵌套的`recurrent_group`能够处理双层序列,实现词语和句子两个级别的双层RNN结构。 + +- 单层(word-level)RNN:每个状态(state)对应一个词(word)。 +- 双层(sequence-level)RNN:一个双层RNN由多个单层RNN组成,每个单层RNN(即双层RNN的每个状态)对应一个子句(subseq)。 + +为了描述方便,下文以NLP任务为例,将含有子句(subseq)的段落定义为一个双层序列,将含有词语的句子定义为一个单层序列,那么0层序列即为一个词语。 + +## 双层RNN的使用 + +### 训练流程的使用方法 +使用 `recurrent_group`需要遵循以下约定: + +- **单进单出**:输入和输出都是单层序列。 + - 如果有多个输入,不同输入序列含有的词语数必须严格相等。 + - 输出一个单层序列,输出序列的词语数和输入序列一致。 + - memory:在step函数中定义 memory指向一个layer,通过引用memory得到这个layer上一个时刻输出,形成recurrent 连接。memory的is_seq参数必须为false。如果没有定义memory,每个时间步之内的运算是独立的。 + - boot_layer:memory的初始状态,默认初始状为0,memory的is_seq参数必须为false。 + +- **双进双出**:输入和输出都是双层序列。 + - 如果有多个输入序列,不同输入含有的子句(subseq)数必须严格相等,但子句含有的词语数可以不相等。 + - 输出一个双层序列,子句(subseq)数、子句的单词数和指定的一个输入序列一致,默认为第一个输入。 + - memory:在step函数中定义memory,指向一个layer,通过引用memory得到这个layer上一个时刻的输出,形成recurrent连接。定义在外层`recurrent_group` step函数中的memory,能够记录上一个subseq 的状态,可以是一个单层序列(只作为read-only memory),也可以是一个词语。如果没有定义memory,那么 subseq 之间的运算是独立的。 + - boot_layer:memory 初始状态,可以是一个单层序列(只作为read-only memory)或一个向量。默认不设置,即初始状态为0。 + +- **双进单出**:目前还未支持,会报错"In hierachical RNN, all out links should be from sequences now"。 + + +### 生成流程的使用方法 +使用`beam_search`需要遵循以下约定: + +- 单层RNN:从一个word生成下一个word。 +- 双层RNN:即把单层RNN生成后的subseq给拼接成一个新的双层seq。从语义上看,也不存在一个subseq直接生成下一个subseq的情况。 diff --git a/doc/howto/deep_model/rnn/rnn_config_cn.rst b/doc/howto/deep_model/rnn/rnn_config_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..4d684cf8ad5a8082cf31fb27027119b3d3e700b6 --- /dev/null +++ b/doc/howto/deep_model/rnn/rnn_config_cn.rst @@ -0,0 +1,261 @@ +RNN配置 +======== + +本教程将指导你如何在 PaddlePaddle +中配置循环神经网络(RNN)。PaddlePaddle +高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何: + +- 配置循环神经网络架构。 +- 使用学习完成的循环神经网络模型生成序列。 + +我们将使用 vanilla 循环神经网络和 sequence to sequence +模型来指导你完成这些步骤。sequence to sequence +模型的代码可以在 `book/08.machine_translation `_ 找到。 +wmt14数据的提供文件在 `python/paddle/v2/dataset/wmt14.py `_ 。 + +配置循环神经网络架构 +-------------------- + +简单门控循环神经网络(Gated Recurrent Neural Network) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +循环神经网络在每个时间步骤顺序地处理序列。下面列出了 LSTM 的架构的示例。 + +.. image:: ../../../tutorials/sentiment_analysis/bi_lstm.jpg + :align: center + +一般来说,循环网络从 :math:`t=1` 到 :math:`t=T` 或者反向地从 :math:`t=T` 到 :math:`t=1` 执行以下操作。 + +.. math:: + + x_{t+1} = f_x(x_t), y_t = f_y(x_t) + +其中 :math:`f_x(.)` 称为\ **单步函数**\ (即单时间步执行的函数,step +function),而 :math:`f_y(.)` 称为\ **输出函数**\ 。在 vanilla +循环神经网络中,单步函数和输出函数都非常简单。然而,PaddlePaddle +可以通过修改这两个函数来实现复杂的网络配置。我们将使用 sequence to +sequence +模型演示如何配置复杂的循环神经网络模型。在本节中,我们将使用简单的 +vanilla +循环神经网络作为使用\ ``recurrent_group``\ 配置简单循环神经网络的例子。 +注意,如果你只需要使用简单的RNN,GRU或LSTM,那么推荐使用\ ``grumemory``\ 和\ ``lstmemory``\ ,因为它们的计算效率比\ ``recurrent_group``\ 更高。 + +对于 vanilla RNN,在每个时间步长,\ **单步函数**\ 为: + +.. math:: + + x_{t+1} = W_x x_t + W_i I_t + b + +其中 :math:`x_t` 是RNN状态,并且 :math:`I_t` 是输入,:math:`W_x` 和 +:math:`W_i` 分别是RNN状态和输入的变换矩阵。:math:`b` 是偏差。它的\ **输出函数**\ 只需要 :math:`x_t` 作为输出。 + +``recurrent_group``\ 是构建循环神经网络的最重要的工具。 +它定义了\ **单步函数**\ ,\ **输出函数**\ 和循环神经网络的输入。注意,这个函数的\ ``step``\ 参数需要实现\ ``step function``\ (单步函数)和\ ``output function``\ (输出函数): + +.. code:: python + + def simple_rnn(input, + size=None, + name=None, + reverse=False, + rnn_bias_attr=None, + act=None, + rnn_layer_attr=None): + def __rnn_step__(ipt): + out_mem = paddle.layer.memory(name=name, size=size) + rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt), + paddle.layer.full_matrix_projection(input=out_mem)], + name = name, + bias_attr = rnn_bias_attr, + act = act, + layer_attr = rnn_layer_attr, + size = size) + return rnn_out + return paddle.layer.recurrent_group(name='%s_recurrent_group' % name, + step=__rnn_step__, + reverse=reverse, + input=input) + +PaddlePaddle +使用“Memory”(记忆模块)实现单步函数。\ **Memory**\ 是在PaddlePaddle中构造循环神经网络时最重要的概念。 +Memory是在单步函数中循环使用的状态,例如 :math:`x_{t+1} = f_x(x_t)` 。 +一个Memory包含\ **输出**\ 和\ **输入**\ 。当前时间步处的Memory的输出作为下一时间步Memory的输入。Memory也可以具有\ **boot +layer(引导层)**\ ,其输出被用作Memory的初始值。 +在我们的例子中,门控循环单元的输出被用作输出Memory。请注意,\ ``rnn_out``\ 层的名称与\ ``out_mem``\ 的名称相同。这意味着\ ``rnn_out`` +(*x*\ \ *t* + 1)的输出被用作\ ``out_mem``\ Memory的\ **输出**\ 。 + +Memory也可以是序列。在这种情况下,在每个时间步中,我们有一个序列作为循环神经网络的状态。这在构造非常复杂的循环神经网络时是有用的。 +其他高级功能包括定义多个Memory,以及使用子序列来定义分级循环神经网络架构。 + +我们在函数的结尾返回\ ``rnn_out``\ 。 这意味着 ``rnn_out`` +层的输出被用作门控循环神经网络的\ **输出**\ 函数。 + +Sequence to Sequence Model with Attention +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +我们将使用 sequence to sequence model with attention +作为例子演示如何配置复杂的循环神经网络模型。该模型的说明如下图所示。 + +.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png + :align: center + +在这个模型中,源序列 :math:`S = \{s_1, \dots, s_T\}` +用双向门控循环神经网络编码。双向门控循环神经网络的隐藏状态 +:math:`H_S = \{H_1, \dots, H_T\}` 被称为 +*编码向量*\ 。解码器是门控循环神经网络。当解读每一个 :math:`y_t` 时, +这个门控循环神经网络生成一系列权重 :math:`W_S^t = \{W_1^t, \dots, W_T^t\}` , +用于计算编码向量的加权和。加权和用来生成 :math:`y_t` 。 + +模型的编码器部分如下所示。它叫做\ ``grumemory``\ 来表示门控循环神经网络。如果网络架构简单,那么推荐使用循环神经网络的方法,因为它比 +``recurrent_group`` +更快。我们已经实现了大多数常用的循环神经网络架构,可以参考 :ref:`api_trainer_config_helpers_layers` 了解更多细节。 + +我们还将编码向量投射到 ``decoder_size`` +维空间。这通过获得反向循环网络的第一个实例,并将其投射到 +``decoder_size`` 维空间完成: + +.. code:: python + + # 定义源语句的数据层 + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) + # 计算每个词的词向量 + src_embedding = paddle.layer.embedding( + input=src_word_id, + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + # 应用前向循环神经网络 + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) + # 应用反向递归神经网络(reverse=True表示反向循环神经网络) + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) + # 将循环神经网络的前向和反向部分混合在一起 + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) + + # 投射编码向量到 decoder_size + encoded_proj = paddle.layer.mixed( + size=decoder_size, + input=paddle.layer.full_matrix_projection(encoded_vector)) + + # 计算反向RNN的第一个实例 + backward_first = paddle.layer.first_seq(input=src_backward) + + # 投射反向RNN的第一个实例到 decoder size + decoder_boot = paddle.layer.mixed( + size=decoder_size, + act=paddle.activation.Tanh(), + input=paddle.layer.full_matrix_projection(backward_first)) + +解码器使用 ``recurrent_group`` 来定义循环神经网络。单步函数和输出函数在 +``gru_decoder_with_attention`` 中定义: + +.. code:: python + + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) + group_inputs.append(trg_embedding) + + # 对于配备有注意力机制的解码器,在训练中, + # 目标向量(groudtruth)是数据输入, + # 而源序列的编码向量可以被无边界的memory访问 + # StaticInput 意味着不同时间步的输入都是相同的值, + # 否则它以一个序列输入,不同时间步的输入是不同的。 + # 所有输入序列应该有相同的长度。 + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) + +单步函数的实现如下所示。首先,它定义解码网络的\ **Memory**\ 。然后定义 +attention,门控循环单元单步函数和输出函数: + +.. code:: python + + def gru_decoder_with_attention(enc_vec, enc_proj, current_word): + # 定义解码器的Memory + # Memory的输出定义在 gru_step 内 + # 注意 gru_step 应该与它的Memory名字相同 + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) + # 计算 attention 加权编码向量 + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) + # 混合当前词向量和attention加权编码向量 + decoder_inputs = paddle.layer.mixed( + size=decoder_size * 3, + input=[ + paddle.layer.full_matrix_projection(input=context), + paddle.layer.full_matrix_projection(input=current_word) + ]) + # 定义门控循环单元循环神经网络单步函数 + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) + # 定义输出函数 + out = paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax(), + input=paddle.layer.full_matrix_projection(input=gru_step)) + return out + +生成序列 +-------- + +训练模型后,我们可以使用它来生成序列。通常的做法是使用\ **beam search** +生成序列。以下代码片段定义 beam search 算法。注意,\ ``beam_search`` +函数假设 ``step`` 的输出函数返回的是下一个时刻输出词的 softmax +归一化概率向量。我们对模型进行了以下更改。 + +- 使用 ``GeneratedInput`` 来表示 trg\_embedding。 ``GeneratedInput`` + 将上一时间步所生成的词的向量来作为当前时间步的输入。 +- 使用 ``beam_search`` 函数。这个函数需要设置: + + - ``bos_id``: 开始标记。每个句子都以开始标记开头。 + - ``eos_id``: 结束标记。每个句子都以结束标记结尾。 + - ``beam_size``: beam search 算法中的beam大小。 + - ``max_length``: 生成序列的最大长度。 + +代码如下: + +.. code:: python + + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + # 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。 + # 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。 + # 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 。 + trg_embedding = paddle.layer.GeneratedInput( + size=target_dict_dim, + embedding_name='_target_language_embedding', + embedding_size=word_vector_dim) + group_inputs.append(trg_embedding) + beam_gen = paddle.layer.beam_search( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs, + bos_id=0, # Beginnning token. + eos_id=1, # End of sentence token. + beam_size=beam_size, + max_length=max_length) + + return beam_gen + +注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 `book/06.understand_sentiment `_ 了解更多详细信息。 + +完整的配置文件在 `book/08.machine_translation/train.py `_ 。 diff --git a/doc/howto/deep_model/rnn/rnn_config_en.rst b/doc/howto/deep_model/rnn/rnn_config_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..2b581290a41005c04cb1d8b6febe57f17d2416d3 --- /dev/null +++ b/doc/howto/deep_model/rnn/rnn_config_en.rst @@ -0,0 +1,235 @@ +RNN Configuration +================= + +This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to: + +- configure recurrent neural network architecture. +- generate sequence with learned recurrent neural network models. + +We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at `book/08.machine_translation `_ . +And the data preparation of this model can be found at `python/paddle/v2/dataset/wmt14.py `_ + +=============================================== +Configure Recurrent Neural Network Architecture +=============================================== + +------------------------------------- +Simple Gated Recurrent Neural Network +------------------------------------- + +Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below. + +.. image:: ../../../tutorials/sentiment_analysis/src/bi_lstm.jpg + :align: center + +Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`. + +.. math:: + + x_{t+1} = f_x(x_t), y_t = f_y(x_t) + + +where :math:`f_x(.)` is called **step function**, and :math:`f_y(.)` is called **output function**. In vanilla recurrent neural network, both of the step function and output function are very simple. However, PaddlePaddle supports the configuration of very complex architectures by modifying these two functions. We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. In this section, we will use a simple vanilla recurrent neural network as an example of configuring simple recurrent neural network using :code:`recurrent_group`. Notice that if you only need to use simple RNN, GRU, or LSTM, then :code:`grumemory` and :code:`lstmemory` is recommended because they are more computationally efficient than :code:`recurrent_group`. + +For vanilla RNN, at each time step, the **step function** is: + +.. math:: + + x_{t+1} = W_x x_t + W_i I_t + b + +where :math:`x_t` is the RNN state, and :math:`I_t` is the input, :math:`W_x` and :math:`W_i` are transformation matrices for RNN states and inputs, respectively. :math:`b` is the bias. +Its **output function** simply takes :math:`x_t` as the output. + +:code:`recurrent_group` is the most important tools for constructing recurrent neural networks. It defines the **step function**, **output function** and the inputs of the recurrent neural network. Notice that the :code:`step` argument of this function implements both the :code:`step function` and the :code:`output function`: + +.. code-block:: python + + def simple_rnn(input, + size=None, + name=None, + reverse=False, + rnn_bias_attr=None, + act=None, + rnn_layer_attr=None): + def __rnn_step__(ipt): + out_mem = paddle.layer.memory(name=name, size=size) + rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt), + paddle.layer.full_matrix_projection(input=out_mem)], + name = name, + bias_attr = rnn_bias_attr, + act = act, + layer_attr = rnn_layer_attr, + size = size) + return rnn_out + return paddle.layer.recurrent_group(name='%s_recurrent_group' % name, + step=__rnn_step__, + reverse=reverse, + input=input) + + +PaddlePaddle uses memory to construct step function. **Memory** is the most important concept when constructing recurrent neural networks in PaddlePaddle. A memory is a state that is used recurrently in step functions, such as :math:`x_{t+1} = f_x(x_t)`. One memory contains an **output** and a **input**. The output of memory at the current time step is utilized as the input of the memory at the next time step. A memory can also has a **boot layer**, whose output is utilized as the initial value of the memory. In our case, the output of the gated recurrent unit is employed as the output memory. Notice that the name of the layer :code:`rnn_out` is the same as the name of :code:`out_mem`. This means the output of the layer :code:`rnn_out` (:math:`x_{t+1}`) is utilized as the **output** of :code:`out_mem` memory. + +A memory can also be a sequence. In this case, at each time step, we have a sequence as the state of the recurrent neural network. This can be useful when constructing very complex recurrent neural network. Other advanced functions include defining multiple memories, and defining hierarchical recurrent neural network architecture using sub-sequence. + +We return :code:`rnn_out` at the end of the function. It means that the output of the layer :code:`rnn_out` is utilized as the **output** function of the gated recurrent neural network. + +----------------------------------------- +Sequence to Sequence Model with Attention +----------------------------------------- +We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. An illustration of the sequence to sequence model with attention is shown in the following figure. + +.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png + :align: center + +In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`. + +The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :ref:`api_trainer_config_helpers_layers` for more details. + +We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space: + +.. code-block:: python + + # Define the data layer of the source sentence. + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) + # Calculate the word embedding of each word. + src_embedding = paddle.layer.embedding( + input=src_word_id, + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + # Apply forward recurrent neural network. + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) + # Apply backward recurrent neural network. reverse=True means backward recurrent neural network. + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) + # Mix the forward and backward parts of the recurrent neural network together. + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) + + # Project encoding vector to decoder_size. + encoded_proj = paddle.layer.mixed( + size=decoder_size, + input=paddle.layer.full_matrix_projection(encoded_vector)) + + # Compute the first instance of the backward RNN. + backward_first = paddle.layer.first_seq(input=src_backward) + + # Project the first instance of backward RNN to decoder size. + decoder_boot = paddle.layer.mixed( + size=decoder_size, + act=paddle.activation.Tanh(), + input=paddle.layer.full_matrix_projection(backward_first)) + + +The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: + +.. code-block:: python + + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) + group_inputs.append(trg_embedding) + + # For decoder equipped with attention mechanism, in training, + # target embedding (the groudtruth) is the data input, + # while encoded source sequence is accessed to as an unbounded memory. + # StaticInput means the same value is utilized at different time steps. + # Otherwise, it is a sequence input. Inputs at different time steps are different. + # All sequence inputs should have the same length. + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) + + +The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function: + +.. code-block:: python + + def gru_decoder_with_attention(enc_vec, enc_proj, current_word): + # Defines the memory of the decoder. + # The output of this memory is defined in gru_step. + # Notice that the name of gru_step should be the same as the name of this memory. + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) + # Compute attention weighted encoder vector. + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) + # Mix the current word embedding and the attention weighted encoder vector. + decoder_inputs = paddle.layer.mixed( + size=decoder_size * 3, + input=[ + paddle.layer.full_matrix_projection(input=context), + paddle.layer.full_matrix_projection(input=current_word) + ]) + # Define Gated recurrent unit recurrent neural network step function. + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) + # Defines the output function. + out = paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax(), + input=paddle.layer.full_matrix_projection(input=gru_step)) + return out + + +================= +Generate Sequence +================= +After training the model, we can use it to generate sequences. A common practice is to use **beam search** to generate sequences. The following code snippets defines a beam search algorithm. Notice that :code:`beam_search` function assumes the output function of the :code:`step` returns a softmax normalized probability vector of the next token. We made the following changes to the model. + +* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step. +* use :code:`beam_search` function. This function needs to set: + + - :code:`bos_id`: the start token. Every sentence starts with the start token. + - :code:`eos_id`: the end token. Every sentence ends with the end token. + - :code:`beam_size`: the beam size used in beam search. + - :code:`max_length`: the maximum length of the generated sentences. + +The code is listed below: + +.. code-block:: python + + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + # In generation, decoder predicts a next target word based on + # the encoded source sequence and the last generated target word. + # The encoded source sequence (encoder's output) must be specified by + # StaticInput which is a read-only memory. + # Here, GeneratedInputs automatically fetchs the last generated word, + # which is initialized by a start mark, such as . + trg_embedding = paddle.layer.GeneratedInput( + size=target_dict_dim, + embedding_name='_target_language_embedding', + embedding_size=word_vector_dim) + group_inputs.append(trg_embedding) + beam_gen = paddle.layer.beam_search( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs, + bos_id=0, # Beginnning token. + eos_id=1, # End of sentence token. + beam_size=beam_size, + max_length=max_length) + + return beam_gen + + +Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `book/06.understand_sentiment `_ for more details. + +The full configuration file is located at `book/08.machine_translation/train.py `_ . diff --git a/doc/howto/deep_model/rnn/rnn_en.rst b/doc/howto/deep_model/rnn/rnn_en.rst deleted file mode 100644 index da29b8efadd299fe4fc74a71392cbc9a56e32be3..0000000000000000000000000000000000000000 --- a/doc/howto/deep_model/rnn/rnn_en.rst +++ /dev/null @@ -1,251 +0,0 @@ -RNN Configuration -================= - -This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to: - -- prepare sequence data for learning recurrent neural networks. -- configure recurrent neural network architecture. -- generate sequence with learned recurrent neural network models. - -We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at :code:`demo/seqToseq`. - -===================== -Prepare Sequence Data -===================== - -PaddlePaddle does not need any preprocessing to sequence data, such as padding. The only thing that needs to be done is to set the type of the corresponding type to input. For example, the following code snippets defines three input. All of them are sequences, and the size of them are :code:`src_dict`, :code:`trg_dict`, and :code:`trg_dict`: - -.. code-block:: python - - settings.input_types = [ - integer_value_sequence(len(settings.src_dict)), - integer_value_sequence(len(settings.trg_dict)), - integer_value_sequence(len(settings.trg_dict))] - - -Then at the :code:`process` function, each :code:`yield` function will return three integer lists. Each integer list is treated as a sequence of integers: - -.. code-block:: python - - yield src_ids, trg_ids, trg_ids_next - - -For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`. - -=============================================== -Configure Recurrent Neural Network Architecture -=============================================== - -------------------------------------- -Simple Gated Recurrent Neural Network -------------------------------------- - -Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below. - -.. image:: ../../../tutorials/sentiment_analysis/bi_lstm.jpg - :align: center - -Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`. - -.. math:: - - x_{t+1} = f_x(x_t), y_t = f_y(x_t) - - -where :math:`f_x(.)` is called **step function**, and :math:`f_y(.)` is called **output function**. In vanilla recurrent neural network, both of the step function and output function are very simple. However, PaddlePaddle supports the configuration of very complex architectures by modifying these two functions. We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. In this section, we will use a simple vanilla recurrent neural network as an example of configuring simple recurrent neural network using :code:`recurrent_group`. Notice that if you only need to use simple RNN, GRU, or LSTM, then :code:`grumemory` and :code:`lstmemory` is recommended because they are more computationally efficient than :code:`recurrent_group`. - -For vanilla RNN, at each time step, the **step function** is: - -.. math:: - - x_{t+1} = W_x x_t + W_i I_t + b - -where :math:`x_t` is the RNN state, and :math:`I_t` is the input, :math:`W_x` and :math:`W_i` are transformation matrices for RNN states and inputs, respectively. :math:`b` is the bias. -Its **output function** simply takes :math:`x_t` as the output. - -:code:`recurrent_group` is the most important tools for constructing recurrent neural networks. It defines the **step function**, **output function** and the inputs of the recurrent neural network. Notice that the :code:`step` argument of this function implements both the :code:`step function` and the :code:`output function`: - -.. code-block:: python - - def simple_rnn(input, - size=None, - name=None, - reverse=False, - rnn_bias_attr=None, - act=None, - rnn_layer_attr=None): - def __rnn_step__(ipt): - out_mem = memory(name=name, size=size) - rnn_out = mixed_layer(input = [full_matrix_projection(ipt), - full_matrix_projection(out_mem)], - name = name, - bias_attr = rnn_bias_attr, - act = act, - layer_attr = rnn_layer_attr, - size = size) - return rnn_out - return recurrent_group(name='%s_recurrent_group' % name, - step=__rnn_step__, - reverse=reverse, - input=input) - - -PaddlePaddle uses memory to construct step function. **Memory** is the most important concept when constructing recurrent neural networks in PaddlePaddle. A memory is a state that is used recurrently in step functions, such as :math:`x_{t+1} = f_x(x_t)`. One memory contains an **output** and a **input**. The output of memory at the current time step is utilized as the input of the memory at the next time step. A memory can also has a **boot layer**, whose output is utilized as the initial value of the memory. In our case, the output of the gated recurrent unit is employed as the output memory. Notice that the name of the layer :code:`rnn_out` is the same as the name of :code:`out_mem`. This means the output of the layer :code:`rnn_out` (:math:`x_{t+1}`) is utilized as the **output** of :code:`out_mem` memory. - -A memory can also be a sequence. In this case, at each time step, we have a sequence as the state of the recurrent neural network. This can be useful when constructing very complex recurrent neural network. Other advanced functions include defining multiple memories, and defining hierarchical recurrent neural network architecture using sub-sequence. - -We return :code:`rnn_out` at the end of the function. It means that the output of the layer :code:`rnn_out` is utilized as the **output** function of the gated recurrent neural network. - ------------------------------------------ -Sequence to Sequence Model with Attention ------------------------------------------ -We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. An illustration of the sequence to sequence model with attention is shown in the following figure. - -.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png - :align: center - -In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`. - -The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_ for more details. - -We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space: - -.. code-block:: python - - # Define the data layer of the source sentence. - src_word_id = data_layer(name='source_language_word', size=source_dict_dim) - # Calculate the word embedding of each word. - src_embedding = embedding_layer( - input=src_word_id, - size=word_vector_dim, - param_attr=ParamAttr(name='_source_language_embedding')) - # Apply forward recurrent neural network. - src_forward = grumemory(input=src_embedding, size=encoder_size) - # Apply backward recurrent neural network. reverse=True means backward recurrent neural network. - src_backward = grumemory(input=src_embedding, - size=encoder_size, - reverse=True) - # Mix the forward and backward parts of the recurrent neural network together. - encoded_vector = concat_layer(input=[src_forward, src_backward]) - - # Project encoding vector to decoder_size. - encoder_proj = mixed_layer(input = [full_matrix_projection(encoded_vector)], - size = decoder_size) - - # Compute the first instance of the backward RNN. - backward_first = first_seq(input=src_backward) - - # Project the first instance of backward RNN to decoder size. - decoder_boot = mixed_layer(input=[full_matrix_projection(backward_first)], size=decoder_size, act=TanhActivation()) - - -The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: - -.. code-block:: python - - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] - trg_embedding = embedding_layer( - input=data_layer(name='target_language_word', - size=target_dict_dim), - size=word_vector_dim, - param_attr=ParamAttr(name='_target_language_embedding')) - group_inputs.append(trg_embedding) - - # For decoder equipped with attention mechanism, in training, - # target embedding (the groudtruth) is the data input, - # while encoded source sequence is accessed to as an unbounded memory. - # StaticInput means the same value is utilized at different time steps. - # Otherwise, it is a sequence input. Inputs at different time steps are different. - # All sequence inputs should have the same length. - decoder = recurrent_group(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) - - -The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function: - -.. code-block:: python - - def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - # Defines the memory of the decoder. - # The output of this memory is defined in gru_step. - # Notice that the name of gru_step should be the same as the name of this memory. - decoder_mem = memory(name='gru_decoder', - size=decoder_size, - boot_layer=decoder_boot) - # Compute attention weighted encoder vector. - context = simple_attention(encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) - # Mix the current word embedding and the attention weighted encoder vector. - decoder_inputs = mixed_layer(inputs = [full_matrix_projection(context), - full_matrix_projection(current_word)], - size = decoder_size * 3) - # Define Gated recurrent unit recurrent neural network step function. - gru_step = gru_step_layer(name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) - # Defines the output function. - out = mixed_layer(input=[full_matrix_projection(input=gru_step)], - size=target_dict_dim, - bias_attr=True, - act=SoftmaxActivation()) - return out - - -================= -Generate Sequence -================= -After training the model, we can use it to generate sequences. A common practice is to use **beam search** to generate sequences. The following code snippets defines a beam search algorithm. Notice that :code:`beam_search` function assumes the output function of the :code:`step` returns a softmax normalized probability vector of the next token. We made the following changes to the model. - -* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step. -* use :code:`beam_search` function. This function needs to set: - - - :code:`bos_id`: the start token. Every sentence starts with the start token. - - :code:`eos_id`: the end token. Every sentence ends with the end token. - - :code:`beam_size`: the beam size used in beam search. - - :code:`max_length`: the maximum length of the generated sentences. - -* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set: - - - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files. - - :code:`dict_file`: the dictionary file for converting word id to word. - - :code:`result_file`: the path of the generation result file. - -The code is listed below: - -.. code-block:: python - - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] - # In generation, decoder predicts a next target word based on - # the encoded source sequence and the last generated target word. - # The encoded source sequence (encoder's output) must be specified by - # StaticInput which is a read-only memory. - # Here, GeneratedInputs automatically fetchs the last generated word, - # which is initialized by a start mark, such as . - trg_embedding = GeneratedInput( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) - group_inputs.append(trg_embedding) - beam_gen = beam_search(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, # Beginnning token. - eos_id=1, # End of sentence token. - beam_size=beam_size, - max_length=max_length) - - seqtext_printer_evaluator(input=beam_gen, - id_input=data_layer(name="sent_id", size=1), - dict_file=trg_dict_path, - result_file=gen_trans_file) - outputs(beam_gen) - - -Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details. - -The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`. diff --git a/doc_cn/algorithm/rnn/glossary_rnn.dot b/doc/howto/deep_model/rnn/src/glossary_rnn.dot similarity index 100% rename from doc_cn/algorithm/rnn/glossary_rnn.dot rename to doc/howto/deep_model/rnn/src/glossary_rnn.dot diff --git a/doc_cn/algorithm/rnn/glossary_rnn_with_memory.dot b/doc/howto/deep_model/rnn/src/glossary_rnn_with_memory.dot similarity index 100% rename from doc_cn/algorithm/rnn/glossary_rnn_with_memory.dot rename to doc/howto/deep_model/rnn/src/glossary_rnn_with_memory.dot diff --git a/doc_cn/algorithm/rnn/simple_full_hierarchical_recurrent.dot b/doc/howto/deep_model/rnn/src/simple_full_hierarchical_recurrent.dot similarity index 100% rename from doc_cn/algorithm/rnn/simple_full_hierarchical_recurrent.dot rename to doc/howto/deep_model/rnn/src/simple_full_hierarchical_recurrent.dot diff --git a/doc_cn/algorithm/rnn/simple_full_recurrent.dot b/doc/howto/deep_model/rnn/src/simple_full_recurrent.dot similarity index 100% rename from doc_cn/algorithm/rnn/simple_full_recurrent.dot rename to doc/howto/deep_model/rnn/src/simple_full_recurrent.dot diff --git a/doc/howto/new_layer/FullyConnected.jpg b/doc/howto/dev/FullyConnected.jpg similarity index 100% rename from doc/howto/new_layer/FullyConnected.jpg rename to doc/howto/dev/FullyConnected.jpg diff --git a/doc/howto/dev/contribute_to_paddle_cn.md b/doc/howto/dev/contribute_to_paddle_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..699390145226ec2b65fdf5122db187e1d30d669e --- /dev/null +++ b/doc/howto/dev/contribute_to_paddle_cn.md @@ -0,0 +1,239 @@ +# 如何贡献代码 + +我们真诚地感谢您的贡献,欢迎通过 GitHub 的 fork 和 pull request 流程来提交代码。 + +## 代码要求 +- 代码注释请遵守 [Doxygen](http://www.stack.nl/~dimitri/doxygen/) 的样式。 +- 确保编译器选项 `WITH_STYLE_CHECK` 已打开,并且编译能通过代码样式检查。 +- 所有代码必须具有单元测试。 +- 通过所有单元测试。 +- 请遵守[提交代码的一些约定](#提交代码的一些约定)。 + +以下教程将指导您提交代码。 +## [Fork](https://help.github.com/articles/fork-a-repo/) + +跳转到[PaddlePaddle](https://github.com/PaddlePaddle/Paddle) GitHub首页,然后单击 `Fork` 按钮,生成自己目录下的仓库,比如 。 + +## 克隆(Clone) + +将远程仓库 clone 到本地: + +```bash +➜ git clone https://github.com/USERNAME/Paddle +➜ cd Paddle +``` + + +## 创建本地分支 + +Paddle 目前使用[Git流分支模型](http://nvie.com/posts/a-successful-git-branching-model/)进行开发,测试,发行和维护,具体请参考 [Paddle 分支规范](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/releasing_process.md#paddle-分支规范)。 + +所有的 feature 和 bug fix 的开发工作都应该在一个新的分支上完成,一般从 `develop` 分支上创建新分支。 + +使用 `git checkout -b` 创建并切换到新分支。 + +```bash +➜ git checkout -b my-cool-stuff +``` + +值得注意的是,在 checkout 之前,需要保持当前分支目录 clean,否则会把 untracked 的文件也带到新分支上,这可以通过 `git status` 查看。 + +## 使用 `pre-commit` 钩子 + +Paddle 开发人员使用 [pre-commit](http://pre-commit.com/) 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码(C++,Python),在提交(commit)前自动检查一些基本事宜(如每个文件只有一个 EOL,Git 中不要添加大文件等)。 + +`pre-commit`测试是 Travis-CI 中单元测试的一部分,不满足钩子的 PR 不能被提交到 Paddle,首先安装并在当前目录运行它: + +```bash +➜ pip install pre-commit +➜ pre-commit install +``` + +Paddle 使用 `clang-format` 来调整 C/C++ 源代码格式,请确保 `clang-format` 版本在 3.8 以上。 + +## 开始开发 + +在本例中,我删除了 README.md 中的一行,并创建了一个新文件。 + +通过 `git status` 查看当前状态,这会提示当前目录的一些变化,同时也可以通过 `git diff` 查看文件具体被修改的内容。 + +```bash +➜ git status +On branch test +Changes not staged for commit: + (use "git add ..." to update what will be committed) + (use "git checkout -- ..." to discard changes in working directory) + + modified: README.md + +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +no changes added to commit (use "git add" and/or "git commit -a") +``` + +## 构建和测试 + +编译 PaddlePaddle 的源码以及生成文档需要多种开发工具。为了方便大家,我们的标准开发流程是把这些工具都装进一个Docker image,称为*开发镜像*,通常名字是 `paddle:dev`。然后所有用 `cmake && make` 的地方(比如IDE配置里)都用 `docker run paddle:dev`来代替。 + +如要build这个开发镜像,在源码目录树的根目录中运行: + +```bash +➜ docker build -t paddle:dev . +``` + +随后可以用这个开发镜像开始build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以: + +```bash +➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev +``` + +这个过程除了编译PaddlePaddle为 `./build/libpaddle.so`,并且输出一个 `./build/paddle.deb`文件之外,还会输出一个 `build/Dockerfile`。我们只需要运行下面命令把编译好的PaddlePaddle打包成一个*生产镜像*(`paddle:prod`): + +```bash +➜ docker build -t paddle:prod -f build/Dockerfile . +``` + +如果要运行所有的单元测试,可以用如下命令: + +```bash +➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" +``` + +关于构建和测试的更多信息,请参见[这篇文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_cn.rst)。 + +## 提交(commit) + +接下来我们取消对 README.md 文件的改变,然后提交新添加的 test 文件。 + +```bash +➜ git checkout -- README.md +➜ git status +On branch test +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +nothing added to commit but untracked files present (use "git add" to track) +➜ git add test +``` + +Git 每次提交代码,都需要写提交说明,这可以让其他人知道这次提交做了哪些改变,这可以通过`git commit` 完成。 + +```bash +➜ git commit +CRLF end-lines remover...............................(no files to check)Skipped +yapf.................................................(no files to check)Skipped +Check for added large files..............................................Passed +Check for merge conflicts................................................Passed +Check for broken symlinks................................................Passed +Detect Private Key...................................(no files to check)Skipped +Fix End of Files.....................................(no files to check)Skipped +clang-formater.......................................(no files to check)Skipped +[my-cool-stuff c703c041] add test file + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 233 +``` + +## 保持本地仓库最新 + +在准备发起 Pull Request 之前,需要同步原仓库()最新的代码。 + +首先通过 `git remote` 查看当前远程仓库的名字。 + +```bash +➜ git remote +origin +➜ git remote -v +origin https://github.com/USERNAME/Paddle (fetch) +origin https://github.com/USERNAME/Paddle (push) +``` + +这里 origin 是我们 clone 的远程仓库的名字,也就是自己用户名下的 Paddle,接下来我们创建一个原始 Paddle 仓库的远程主机,命名为 upstream。 + +```bash +➜ git remote add upstream https://github.com/PaddlePaddle/Paddle +➜ git remote +origin +upstream +``` + +获取 upstream 的最新代码并更新当前分支。 + +```bash +➜ git fetch upstream +➜ git pull upstream develop +``` + +## Push 到远程仓库 + +将本地的修改推送到 GitHub 上,也就是 https://github.com/USERNAME/Paddle。 + +```bash +# 推送到远程仓库 origin 的 my-cool-stuff 分支上 +➜ git push origin my-cool-stuff +``` + +## 建立 Issue 并完成 Pull Request + +建立一个 Issue 描述问题,并记录它的编号。 + +切换到所建分支,然后点击 `New pull request`。 + +screen shot 2017-04-26 at 9 09 28 pm + +选择目标分支: + +screen shot 2017-04-26 at 9 11 52 pm + +在 PR 的描述说明中,填写 `resolve #Issue编号` 可以在这个 PR 被 merge 后,自动关闭对应的 Issue,具体请见 。 + +接下来等待 review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。 + +## 删除远程分支 + +在 PR 被 merge 进主仓库后,我们可以在 PR 的页面删除远程仓库的分支。 + +screen shot 2017-04-26 at 9 18 24 pm + +也可以使用 `git push origin :分支名` 删除远程分支,如: + +```bash +➜ git push origin :my-cool-stuff +``` + +## 删除本地分支 + +最后,删除本地分支。 + +```bash +# 切换到 develop 分支 +➜ git checkout develop + +# 删除 my-cool-stuff 分支 +➜ git branch -D my-cool-stuff +``` + +至此,我们就完成了一次代码贡献的过程。 + +## 提交代码的一些约定 + +为了使评审人在评审代码时更好地专注于代码本身,请您每次提交代码时,遵守以下约定: +1. 请保证Travis-CI 中单元测试能顺利通过。如果没过,说明提交的代码存在问题,评审人一般不做评审。 +2. 提交PUll Request前: + - 请注意commit的数量: + - 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。 + - 建议:每次提交时,保持尽量少的commit,可以通过`git commit --amend`补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](http://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)。 + - 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。 +3. 如果解决了某个Issue的问题,请在该PUll Request的**第一个**评论框中加上:`fix #issue_number`,这样当该PUll Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)。 + +此外,在回复评审人意见时,请您遵守以下约定: +1. 评审人的每个意见都必须回复(这是开源社区的基本礼貌,别人帮了忙,应该说谢谢): + - 对评审意见同意且按其修改完的,给个简单的`Done`即可; + - 对评审意见不同意的,请给出您自己的反驳理由。 +2. 如果评审意见比较多: + - 请给出总体的修改情况。 + - 请采用[start a review](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/)进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。 diff --git a/doc/howto/dev/contribute_to_paddle_en.md b/doc/howto/dev/contribute_to_paddle_en.md new file mode 100644 index 0000000000000000000000000000000000000000..40d1eb62d722244139cc84eb170c190d988f5626 --- /dev/null +++ b/doc/howto/dev/contribute_to_paddle_en.md @@ -0,0 +1,219 @@ +# Contribute Code + +We sincerely appreciate your contributions. You can use fork and pull request +workflow to merge your code. + +## Code Requirements +- Your code comments must be fully documented by + [Doxygen](http://www.stack.nl/~dimitri/doxygen/) style. +- Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler + passes the code style check. +- All code must have unit test. +- Pass all unit tests. + +The following tutorial guides you into submitting your contibution. + +## [Creating a Fork](https://help.github.com/articles/fork-a-repo/) + +Just head over to the GitHub page and click the "Fork" button. +It's just that simple. + +## Clone + +Clone remote repository. + +```bash +➜ git clone https://github.com/USERNAME/Paddle +➜ cd Paddle +``` + +## Create a local branch + +Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/). + +All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch . + +```bash +➜ git checkout -b my-cool-stuff +``` + +Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`. + +## Using `pre-commit` hook + +Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git +pre-commit hooks. It can help us format source codes (cpp, python), check some +basic thing before commit (only one EOL for each file, do not add a huge file +in git). `pre-commit` tests is a part of unit tests in Travis-CI now, every +PR doesn't fit hook can not be merged into Paddle. + +To use [pre-commit](http://pre-commit.com/), you should install it by +`pip install pre-commit`, and currently, Paddle uses `clang-format` to format +c/cpp sources. Please make sure clang-format 3.8+ installed. + +Install and run it as follow: + +```bash +➜ pip install pre-commit +➜ pre-commit install +``` + +When you commit your code, the pre-commit hook will check the local code if there is +anything not suitable to commit, and so on. + +## Start to develop + +In this tutorial, I delete a line in README.md and created a new file. + +We can use `git status` to inspect the changes of current directory, `git diff` to see difference. + +```bash +➜ git status +On branch test +Changes not staged for commit: + (use "git add ..." to update what will be committed) + (use "git checkout -- ..." to discard changes in working directory) + + modified: README.md + +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +no changes added to commit (use "git add" and/or "git commit -a") +``` +## Build and Test + +We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs. + +If you want to build the develop image, just run: + +```bash +➜ docker build -t paddle:dev . +``` + +Then we can use the develop image to build PaddlePaddle source. For example: + +```bash +➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev +``` + +The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated. + +Then we can generate the production image by copying the compiled PaddlePaddle program into the image by + +```bash +➜ docker build -t paddle:prod -f build/Dockerfile . +``` + +Run unit test finally: + +```bash +➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" +``` + +For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst). + +## Commit + +Next we cancel the changes to the README.md file and then commit our changes by following command lines: + +```bash +➜ git checkout -- README.md +➜ git status +On branch test +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +nothing added to commit but untracked files present (use "git add" to track) +➜ git add test +``` + +We should write a description of each commit by `git commit` to allow others to know +the changes in these files. + +```bash +➜ git commit +CRLF end-lines remover...............................(no files to check)Skipped +yapf.................................................(no files to check)Skipped +Check for added large files..............................................Passed +Check for merge conflicts................................................Passed +Check for broken symlinks................................................Passed +Detect Private Key...................................(no files to check)Skipped +Fix End of Files.....................................(no files to check)Skipped +clang-formater.......................................(no files to check)Skipped +[my-cool-stuff c703c041] add test file + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 233 +``` + +## Keeping Fork Up to Date + +Before pull your request, you should sync your code from the latest PaddlePaddle. +To do this, you'll need to add a remote at first: + +```bash +➜ git remote add upstream https://github.com/PaddlePaddle/Paddle +➜ git remote +origin +upstream +``` + +Update your fork with the latest upstream changes: + +```bash +➜ git fetch upstream +➜ git pull upstream develop +``` + +Now, your local master branch is up-to-date with everything modified upstream. + +## Push to GitHub + +```bash +# push to your repository in Github +➜ git push origin my-cool-stuff +``` + +## Create an issue and a Pull Request + +Create an Issue to describe the problem and record its number. + +Go to the page for your fork on GitHub, select your development branch, +and click the `New pull request`. + +screen shot 2017-04-26 at 9 09 28 pm + +Then select the target branch: + +screen shot 2017-04-26 at 9 11 52 pm + +We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in . + +Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch. + +## Delete origin branch + +After the PR is merge into the main repository, we can delete the remote branch on the PR page. + +screen shot 2017-04-26 at 9 18 24 pm + +Or just run: + +```bash +➜ git push origin :my-cool-stuff +``` + +## Delete local branch + +Finally, we delete local branch: + +```bash +➜ git checkout develop + +# delete my-cool-stuff branch +➜ git branch -D my-cool-stuff +``` diff --git a/doc/howto/dev/new_layer_cn.rst b/doc/howto/dev/new_layer_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..75037e693b32f923ee7dc9dfec322495fe4ce10a --- /dev/null +++ b/doc/howto/dev/new_layer_cn.rst @@ -0,0 +1,389 @@ +================ +实现新的网络层 +================ + +这份教程展示了如何在PaddlePaddle中实现一个自定义的网络层。在这里我们使用全连接层作为例子来展示实现新网络层所需要的四个步骤。 + +1. 推导该层前向和后向传递的方程。 +2. 实现该层的C++类。 +3. 增加梯度检测的单元测试,以保证梯度的正确计算。 +4. 封装该层的Python接口。 + +推导方程 +================ + +首先我们需要推导该网络层的*前向传播*和*后向传播*的方程。前向传播给定输入,计算输出。后向传播给定输出的梯度,计算输入和参数的梯度。 + +下图是一个全连接层的示意图。在全连接层中,每个输出节点都连接到所有的输入节点上。 + +.. image:: FullyConnected.jpg + :align: center + :scale: 60 % + +一个网络层的前向传播部分把输入转化为相应的输出。 +全连接层以一个维度为 :math:`D_i` 的稠密向量作为输入,使用一个尺度为 :math:`D_i \times D_o` 的变换矩阵 :math:`W` 把 :math:`x` 映射到一个维度为 :math:`D_o` 的向量,并在乘积结果上再加上维度为 :math:`D_o` 的偏置向量 :math:`b` 。 + +.. math:: + + y = f(W^T x + b) + +其中 :math:`f(.)` 是一个非线性的*激活方程*,例如sigmoid, tanh,以及Relu。 + +变换矩阵 :math:`W` 和偏置向量 :math:`b` 是该网络层的*参数*。一个网络层的参数是在*反向传播*时被训练的。反向传播根据输出的梯度,分别计算每个参数的梯度,以及输入的梯度。优化器则用链式法则来对每个参数计算损失函数的梯度。 + +假设损失函数是 :math:`c(y)` ,那么 + +.. math:: + + \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} + +假设 :math:`z = W^T x + b` ,那么 + +.. math:: + + \frac{\partial y}{\partial z} = \frac{\partial f(z)}{\partial z} + +PaddlePaddle的base layer类可以自动计算上面的导数。 + +因此,对全连接层来说,我们需要计算: + +.. math:: + + \frac{\partial z}{\partial x} = W, \frac{\partial z_j}{\partial W_{ij}} = x_i, \frac{\partial z}{\partial b} = \mathbf 1 + +其中 :math:`\mathbf 1` 是一个全1的向量, :math:`W_{ij}` 是矩阵 :math:`W` 第i行第j列的数值, :math:`z_j` 是向量 :math:`z` 的第j个值, :math:`x_i` 是向量 :math:`x` 的第i个值。 + +最后我们使用链式法则计算 :math:`\frac{\partial z}{\partial x}` 以及 :math:`\frac{\partial z}{\partial W}` 。计算的细节将在下面的小节给出。 + +实现C++类 +=================== + +一个网络层的C++类需要实现初始化,前向和后向。全连接层的实现位于:code:`paddle/gserver/layers/FullyConnectedLayer.h`及:code:`paddle/gserver/layers/FullyConnectedLayer.cpp`。这里我们展示一份简化过的代码。 + +这个类需要继承 :code:`paddle::Layer` 这个基类,并且需要重写基类中的以下几个虚函数: + +- 类的构造函数和析构函数。 +- :code:`init` 函数。用于初始化参数和设置。 +- :code:`forward` 。实现网络层的前向传播。 +- :code:`backward` 。实现网络层的后向传播。 +- :code:`prefetch` 。用来从参数服务器预取参数矩阵相应的行。如果网络层不需要远程稀疏更新,则不需要重写该函数。(大多数网络层不需要支持远程稀疏更新) + + +头文件如下: + +.. code-block:: c++ + + namespace paddle { + /** + * 全连接层的每个输出都连接到上一层的所有的神经元上。 + * 它的输入与经过学习的参数做内积并加上偏置(可选)。 + * + * 配置文件接口是fc_layer。 + */ + + class FullyConnectedLayer : public Layer { + protected: + WeightList weights_; + std::unique_ptr biases_; + + public: + explicit FullyConnectedLayer(const LayerConfig& config) + : Layer(config) {} + ~FullyConnectedLayer() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + Weight& getWeight(int idx) { return *weights_[idx]; } + + void prefetch(); + void forward(PassType passType); + void backward(const UpdateCallback& callback = nullptr); + }; + } // namespace paddle + +头文件中把参数定义为类的成员变量。我们使用 :code:`Weight` 类作为参数的抽象,它支持多线程更新。该类的实现细节在“实现细节”中详细介绍。 + +- :code:`weights_` 是存有一系列变换矩阵的权重。在当前的实现方式下,网络层可以有多个输入。因此,它可能有不止一个权重。每个权重对应一个输入。 +- :code:`biases_` 是存有偏置向量的权重。 + +全连接层没有网络层配置的超参数。如果一个网络层需要配置的话,通常的做法是将配置存于 :code:`LayerConfig& config` 中,并在类构建函数中把它放入一个类成员变量里。 + +下面的代码片段实现了 :code:`init` 函数。 + +- 首先,所有的 :code:`init` 函数必须先调用基类中的函数 :code:`Layer::init(layerMap, parameterMap);` 。该语句会为每个层初始化其所需要的变量和连接。 +- 之后初始化所有的权重矩阵 :math:`W` 。当前的实现方式下,网络层可以有多个输入。因此,它可能有不止一个权重。 +- 最后,初始化偏置向量。 + + +.. code-block:: c++ + + bool FullyConnectedLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* 初始化父类 */ + Layer::init(layerMap, parameterMap); + + /* 初始化权重表 */ + CHECK(inputLayers_.size() == parameters_.size()); + for (size_t i = 0; i < inputLayers_.size(); i++) { + // 获得参数尺寸 + size_t height = inputLayers_[i]->getSize(); + size_t width = getSize(); + + // 新建一个权重 + if (parameters_[i]->isSparse()) { + CHECK_LE(parameters_[i]->getSize(), width * height); + } else { + CHECK_EQ(parameters_[i]->getSize(), width * height); + } + Weight* w = new Weight(height, width, parameters_[i]); + + // 将新建的权重加入权重表 + weights_.emplace_back(w); + } + + /* 初始化biases_ */ + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); + } + + return true; + } + +实现前向传播的部分有下面几个步骤。 + +- 每个层在其 :code:`forward` 函数的开头必须调用 :code:`Layer::forward(passType);` 。 +- 之后使用 :code:`reserveOutput(batchSize, size);` 为输出分配内存。由于我们支持训练数据有不同的批次大小,所以这一步是必要的。 :code:`reserveOutput` 会相应地改变输出的尺寸。为了保证效率,如果需要扩大矩阵,我们会重新分配内存;如果需要缩减矩阵,我们会继续使用现有的内存块。 +- 之后使用矩阵运算函数来计算 :math:`\sum_i W_i x + b`。:code:`getInput(i).value` 返回第i个输入矩阵。每个输入都是一个 :math:`batchSize \times dim` 的矩阵,每行表示一个批次中的单个输入。对于我们支持的全部矩阵操作,请参考 :code:`paddle/math/Matrix.h`和:code:`paddle/math/BaseMatrix.h` 。 +- 最终,使用 :code:`forwardActivation();` 进行激活操作。这会自动进行网络配置中声明的激活操作。 + + +.. code-block:: c++ + + void FullyConnectedLayer::forward(PassType passType) { + Layer::forward(passType); + + /* 若有必要,为output_申请内存 */ + int batchSize = getInput(0).getBatchSize(); + int size = getSize(); + + { + // 设置输出的尺寸 + reserveOutput(batchSize, size); + } + + MatrixPtr outV = getOutputValue(); + + // 对每个输入乘上变换矩阵 + for (size_t i = 0; i != inputLayers_.size(); ++i) { + auto input = getInput(i); + CHECK(input.value) << "The input of 'fc' layer must be matrix"; + i == 0 ? outV->mul(input.value, weights_[i]->getW(), 1, 0) + : outV->mul(input.value, weights_[i]->getW(), 1, 1); + } + + /* 加上偏置向量 */ + if (biases_.get() != NULL) { + outV->addBias(*(biases_->getW()), 1); + } + + /* 激活 */ { + forwardActivation(); + } + } + +实现后向传播的部分有下面几个步骤。 + +- :code:`backwardActivation()` 计算激活函数的梯度。通过 :code:`getOutputGrad()` 来获得输出的梯度,调用该函数后,梯度会就地(不使用额外空间)乘上输出的梯度。 +- 计算偏置的梯度。注意,我们使用 :code:`biases_->getWGrad()` 来得到某个特定参数的梯度矩阵。在一个参数的梯度被更新后,**必须**要调用 :code:`getParameterPtr()->incUpdate(callback);` 。这用于在多线程和多机上更新参数。 +- 最后,计算转换矩阵和输入的梯度,并对相应的参数调用 :code:`incUpdate` 。PaddlePaddle可以通过该机制判断是否已经收集齐所有的梯度,从而可以做一些与计算重叠的工作(例如,网络通信)。 + + +.. code-block:: c++ + + void FullyConnectedLayer::backward(const UpdateCallback& callback) { + /* 对激活求导 */ { + backwardActivation(); + } + + if (biases_ && biases_->getWGrad()) { + biases_->getWGrad()->collectBias(*getOutputGrad(), 1); + + biases_->getParameterPtr()->incUpdate(callback); + } + + bool syncFlag = hl_get_sync_flag(); + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + /* 计算当前层权重的梯度 */ + if (weights_[i]->getWGrad()) { + MatrixPtr input_T = getInputValue(i)->getTranspose(); + MatrixPtr oGrad = getOutputGrad(); + { + weights_[i]->getWGrad()->mul(input_T, oGrad, 1, 1); + } + } + + + /* 计算输入层的偏差 */ + MatrixPtr preGrad = getInputGrad(i); + if (NULL != preGrad) { + MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); + preGrad->mul(getOutputGrad(), weights_T, 1, 1); + } + + { + weights_[i]->getParameterPtr()->incUpdate(callback); + } + } + } + + :code:`prefetch` 函数指出了在训练时需要从参数服务器取出的行。仅在远程稀疏训练时有效。使用远程稀疏方式训练时,完整的参数矩阵被分布在不同的参数服务器上。当网络层用一个批次做训练时,该批次的输入中仅有一个子集是非零的。因此,该层仅需要这些非零样本位置所对应的变换矩阵的那些行。 :code:`prefetch` 表明了这些行的标号。 + +大多数层不需要远程稀疏训练函数。这种情况下不需要重写该函数。 + +.. code-block:: c++ + + void FullyConnectedLayer::prefetch() { + for (size_t i = 0; i != inputLayers_.size(); ++i) { + auto* sparseParam = + dynamic_cast(weights_[i]->getW().get()); + if (sparseParam) { + MatrixPtr input = getInputValue(i); + sparseParam->addRows(input); + } + } + } + +最后,使用 :code:`REGISTER_LAYER(fc, FullyConnectedLayer);` 来注册该层。 :code:`fc` 是该层的标识符, :code:`FullyConnectedLayer` 是该层的类名。 + +.. code-block:: c++ + + namespace paddle { + REGISTER_LAYER(fc, FullyConnectedLayer); + } + +若 :code:`cpp` 被放在 :code:`paddle/gserver/layers` 目录下,其会自动被加入编译列表。 + + +写梯度检查单元测试 +=============================== + +写梯度检查单元测试是一个验证新实现的层是否正确的相对简单的办法。梯度检查单元测试通过有限差分法来验证一个层的梯度。首先对输入做一个小的扰动 :math:`\Delta x` ,然后观察到输出的变化为 :math:`\Delta y` ,那么,梯度就可以通过这个方程计算得到 :math:`\frac{\Delta y}{\Delta x }` 。之后,再用这个梯度去和 :code:`backward` 函数得到的梯度去对比,以保证梯度计算的正确性。需要注意的是梯度检查仅仅验证了梯度的计算,并不保证 :code:`forward` 和 :code:`backward` 函数的实现是正确的。你需要一些更复杂的单元测试来保证你实现的网络层是正确的。 + +所有网络层的梯度检查单测都位于 :code:`paddle/gserver/tests/test_LayerGrad.cpp` 。我们建议你在写新网络层时把测试代码放入新的文件中。下面列出了全连接层的梯度检查单元测试。它包含以下几步: + ++ 生成网络层配置。网络层配置包含以下几项: + - 偏置参数的大小。(例子中是4096) + - 层的类型。(例子中是fc) + - 层的大小。(例子中是4096) + - 激活的类型。(例子中是softmax) + - dropout的比例。(例子中是0.1) ++ 配置网络层的输入。在这个例子里,我们仅有一个输入。 + - 输入的类型( :code:`INPUT_DATA` ),可以是以下几种: + - :code:`INPUT_DATA` :稠密向量。 + - :code:`INPUT_LABEL` :整数。 + - :code:`INPUT_DATA_TARGET` :稠密向量,但不用于计算梯度。 + - :code:`INPUT_SEQUENCE_DATA` :含有序列信息的稠密向量。 + - :code:`INPUT_HASSUB_SEQUENCE_DATA` :含有序列信息和子序列信息的稠密向量。 + - :code:`INPUT_SEQUENCE_LABEL` :含有序列信息的整数。 + - :code:`INPUT_SPARSE_NON_VALUE_DATA` :0-1稀疏数据。 + - :code:`INPUT_SPARSE_FLOAT_VALUE_DATA` :浮点稀疏数据。 + - 输入的名字。(例子中是 :code:`layer_0` ) + - 输入的大小。(例子中是8192) + - 非零数字的个数,仅对稀疏数据有效。 + - 稀疏数据的格式,仅对稀疏数据有效。 ++ 对每个输入,都需要调用一次 :code:`config.layerConfig.add_inputs();` 。 ++ 调用 :code:`testLayerGrad` 来做梯度检查。它包含以下参数。 + - 层和输入的配置。(例子中是 :code:`config` ) + - 网络层的类型。(例子中是 :code:`fc` ) + - 梯度检查的输入数据的批次大小。(例子中是100) + - 输入是否是转置的。大多数层需要设置为 :code:`false` 。(例子中是 :code:`false` ) + - 是否使用权重。有些层或者激活需要做归一化以保证它们的输出的和是一个常数。例如,softmax激活的输出的和总是1。在这种情况下,我们不能通过常规的梯度检查的方式来计算梯度。因此我们采用输出的加权和(非常数)来计算梯度。(例子中是 :code:`true` ,因为全连接层的激活可以是softmax) + +.. code-block:: c++ + + void testFcLayer(string format, size_t nnz) { + // Create layer configuration. + TestConfig config; + config.biasSize = 4096; + config.layerConfig.set_type("fc"); + config.layerConfig.set_size(4096); + config.layerConfig.set_active_type("softmax"); + config.layerConfig.set_drop_rate(0.1); + // Setup inputs. + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); + config.layerConfig.add_inputs(); + LOG(INFO) << config.inputDefs[0].sparse.sparse << " " + << config.inputDefs[0].sparse.format; + for (auto useGpu : {false, true}) { + testLayerGrad(config, "fc", 100, /* trans */ false, useGpu, + /* weight */ true); + } + } + +如果你要为了测试而增加新的文件,例如 :code:`paddle/gserver/tests/testFCGrad.cpp` ,你需要把该文件加入 :code:`paddle/gserver/tests/CMakeLists.txt` 中。下面给出了一个例子。当你执行命令 :code:`make tests` 时,所有的单测都会被执行一次。注意,有些层可能需要高精度来保证梯度检查单测正确执行。你需要在配置cmake时将 :code:`WITH_DOUBLE` 设置为 `ON` 。 + +.. code-block:: bash + + add_unittest_without_exec(test_FCGrad + test_FCGrad.cpp + LayerGradUtil.cpp + TestUtil.cpp) + + add_test(NAME test_FCGrad + COMMAND test_FCGrad) + + +实现python封装 +======================== + +python封装的实现使得我们可以在配置文件中使用新实现的网络层。所有的python封装都在 :code:`python/paddle/trainer/config_parser.py` 中。全连接层python封装的例子中包含下面几步: + +- 所有的Python封装都使用 :code:`@config_layer('fc')` 这样的装饰器。网络层的标识符为 :code:`fc` 。 +- 实现构造函数 :code:`__init__` 。 + - 它首先调用基构造函数 :code:`super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)` 。 :code:`FCLayer` 是Python封装的类名。 :code:`fc` 是网络层的标识符。为了封装能够正确工作,这些名字必须要写对。 + - 之后,计算变换矩阵的大小和格式(是否稀疏)。 + +.. code-block:: python + + @config_layer('fc') + class FCLayer(LayerBase): + def __init__( + self, + name, + size, + inputs, + bias=True, + **xargs): + super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + psize = self.config.size * input_layer.size + dims = [input_layer.size, self.config.size] + format = self.inputs[input_index].format + sparse = format == "csr" or format == "csc" + if sparse: + psize = self.inputs[input_index].nnz + self.create_input_parameter(input_index, psize, dims, sparse, format) + self.create_bias_parameter(bias, self.config.size) + +在网络配置中,网络层的细节可以通过下面这些代码片段来指定。这个类的参数包括: + +- :code:`name` 是网络层实例的名字标识符。 +- :code:`type` 是网络层的类型,通过网络层的标识符来指定。 +- :code:`size` 是网络层输出的大小。 +- :code:`bias` 表明这个层的一个实例是否需要偏置。 +- :code:`inputs` 说明这个层的输入,输入是由一个list中的网络层实例的名字组成的。 + +.. code-block:: python + + Layer( + name = "fc1", + type = "fc", + size = 64, + bias = True, + inputs = [Input("pool3")] + ) + +我们建议你为你的Python封装实现一个“助手”,使得搭模型时更方便。具体可以参考 :code:`python/paddle/trainer_config_helpers/layers.py` 。 diff --git a/doc/howto/dev/new_layer_en.rst b/doc/howto/dev/new_layer_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..110a9fb38f890a766bb4480e91feb22d3b0838a5 --- /dev/null +++ b/doc/howto/dev/new_layer_en.rst @@ -0,0 +1,390 @@ +================ +Write New Layers +================ + +This tutorial will guide you to write customized layers in PaddlePaddle. We will utilize fully connected layer as an example to guide you through the following steps for writing a new layer. + +- Derive equations for the forward and backward part of the layer. +- Implement C++ class for the layer. +- Write gradient check unit test to make sure the gradients are correctly computed. +- Implement Python wrapper for the layer. + +Derive Equations +================ + +First we need to derive equations of the *forward* and *backward* part of the layer. The forward part computes the output given an input. The backward part computes the gradients of the input and the parameters given the the gradients of the output. + +The illustration of a fully connected layer is shown in the following figure. In a fully connected layer, all output nodes are connected to all the input nodes. + +.. image:: FullyConnected.jpg + :align: center + :scale: 60 % + +The *forward part* of a layer transforms an input into the corresponding output. +Fully connected layer takes a dense input vector with dimension :math:`D_i`. It uses a transformation matrix :math:`W` with size :math:`D_i \times D_o` to project :math:`x` into a :math:`D_o` dimensional vector, and add a bias vector :math:`b` with dimension :math:`D_o` to the vector. + +.. math:: + + y = f(W^T x + b) + +where :math:`f(.)` is an nonlinear *activation* function, such as sigmoid, tanh, and Relu. + +The transformation matrix :math:`W` and bias vector :math:`b` are the *parameters* of the layer. The *parameters* of a layer are learned during training in the *backward pass*. The backward pass computes the gradients of the output function with respect to all parameters and inputs. The optimizer can use chain rule to compute the gradients of the loss function with respect to each parameter. + +Suppose our loss function is :math:`c(y)`, then + +.. math:: + + \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} + +Suppose :math:`z = W^T x + b`, then + +.. math:: + + \frac{\partial y}{\partial z} = \frac{\partial f(z)}{\partial z} + +This derivative can be automatically computed by our base layer class. + +Then, for fully connected layer, we need to compute: + +.. math:: + + \frac{\partial z}{\partial x} = W, \frac{\partial z_j}{\partial W_{ij}} = x_i, \frac{\partial z}{\partial b} = \mathbf 1 + +where :math:`\mathbf 1` is an all one vector, :math:`W_{ij}` is the number at the i-th row and j-th column of the matrix :math:`W`, :math:`z_j` is the j-th component of the vector :math:`z`, and :math:`x_i` is the i-th component of the vector :math:`x`. + +Finally we can use chain rule to calculate :math:`\frac{\partial z}{\partial x}`, and :math:`\frac{\partial z}{\partial W}`. The details of the computation will be given in the next section. + +Implement C++ Class +=================== + +The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. + +It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions: + +- constructor and destructor. +- :code:`init` function. It is used to initialize the parameters and settings. +- :code:`forward`. It implements the forward part of the layer. +- :code:`backward`. It implements the backward part of the layer. +- :code:`prefetch`. It is utilized to determine the rows corresponding parameter matrix to prefetch from parameter server. You do not need to override this function if your layer does not need remote sparse update. (most layers do not need to support remote sparse update) + + +The header file is listed below: + +.. code-block:: c++ + + namespace paddle { + /** + * A layer has full connections to all neurons in the previous layer. + * It computes an inner product with a set of learned weights, and + * (optionally) adds biases. + * + * The config file api is fc_layer. + */ + + class FullyConnectedLayer : public Layer { + protected: + WeightList weights_; + std::unique_ptr biases_; + + public: + explicit FullyConnectedLayer(const LayerConfig& config) + : Layer(config) {} + ~FullyConnectedLayer() {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + Weight& getWeight(int idx) { return *weights_[idx]; } + + void prefetch(); + void forward(PassType passType); + void backward(const UpdateCallback& callback = nullptr); + }; + } // namespace paddle + +It defines the parameters as class variables. We use :code:`Weight` class as abstraction of parameters. It supports multi-thread update. The details of this class will be described in details in the implementations. + +- :code:`weights_` is a list of weights for the transformation matrices. The current implementation can have more than one inputs. Thus, it has a list of weights. One weight corresponds to an input. +- :code:`biases_` is a weight for the bias vector. + +The fully connected layer does not have layer configuration hyper-parameters. If there are some layer hyper-parameters, a common practice is to store it in :code:`LayerConfig& config`, and put it into a class variable in the constructor. + +The following code snippet implements the :code:`init` function. + +- First, every :code:`init` function must call the :code:`init` function of the base class :code:`Layer::init(layerMap, parameterMap);`. This statement will initialize the required variables and connections for each layer. +- The it initializes all the weights matrices :math:`W`. The current implementation can have more than one inputs. Thus, it has a list of weights. +- Finally, it initializes the bias. + + +.. code-block:: c++ + + bool FullyConnectedLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + /* initialize the weightList */ + CHECK(inputLayers_.size() == parameters_.size()); + for (size_t i = 0; i < inputLayers_.size(); i++) { + // Option the parameters + size_t height = inputLayers_[i]->getSize(); + size_t width = getSize(); + + // create a new weight + if (parameters_[i]->isSparse()) { + CHECK_LE(parameters_[i]->getSize(), width * height); + } else { + CHECK_EQ(parameters_[i]->getSize(), width * height); + } + Weight* w = new Weight(height, width, parameters_[i]); + + // append the new weight to the list + weights_.emplace_back(w); + } + + /* initialize biases_ */ + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); + } + + return true; + } + +The implementation of the forward part has the following steps. + +- Every layer must call :code:`Layer::forward(passType);` at the beginning of its :code:`forward` function. +- Then it allocates memory for the output using :code:`reserveOutput(batchSize, size);`. This step is necessary because we support the batches to have different batch sizes. :code:`reserveOutput` will change the size of the output accordingly. For the sake of efficiency, we will allocate new memory if we want to expand the matrix, but we will reuse the existing memory block if we want to shrink the matrix. +- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/math/Matrix.h` and :code:`paddle/math/BaseMatrix.h`. +- Finally it applies the activation function using :code:`forwardActivation();`. It will automatically applies the corresponding activation function specifies in the network configuration. + + +.. code-block:: c++ + + void FullyConnectedLayer::forward(PassType passType) { + Layer::forward(passType); + + /* malloc memory for the output_ if necessary */ + int batchSize = getInput(0).getBatchSize(); + int size = getSize(); + + { + // Settup the size of the output. + reserveOutput(batchSize, size); + } + + MatrixPtr outV = getOutputValue(); + + // Apply the the transformation matrix to each input. + for (size_t i = 0; i != inputLayers_.size(); ++i) { + auto input = getInput(i); + CHECK(input.value) << "The input of 'fc' layer must be matrix"; + i == 0 ? outV->mul(input.value, weights_[i]->getW(), 1, 0) + : outV->mul(input.value, weights_[i]->getW(), 1, 1); + } + + /* add the bias-vector */ + if (biases_.get() != NULL) { + outV->addBias(*(biases_->getW()), 1); + } + + /* activation */ { + forwardActivation(); + } + } + +The implementation of the backward part has the following steps. + +- :code:`backwardActivation()` computes the gradients of the activation. The gradients will be multiplies in place to the gradients of the output, which can be retrieved using :code:`getOutputGrad()`. +- Compute the gradients of bias. Notice that we an use :code:`biases_->getWGrad()` to get the gradient matrix of the corresponding parameter. After the gradient of one parameter is updated, it **MUST** call :code:`getParameterPtr()->incUpdate(callback);`. This is utilize for parameter update over multiple threads or multiple machines. +- Then it computes the gradients of the transformation matrices and inputs, and it calls :code:`incUpdate` for the corresponding parameter. This gives the framework the chance to know whether it has gathered all the gradient to one parameter so that it can do some overlapping work (e.g., network communication) + + +.. code-block:: c++ + + void FullyConnectedLayer::backward(const UpdateCallback& callback) { + /* Do derivation for activations.*/ { + backwardActivation(); + } + + if (biases_ && biases_->getWGrad()) { + biases_->getWGrad()->collectBias(*getOutputGrad(), 1); + + biases_->getParameterPtr()->incUpdate(callback); + } + + bool syncFlag = hl_get_sync_flag(); + + for (size_t i = 0; i != inputLayers_.size(); ++i) { + /* Calculate the W-gradient for the current layer */ + if (weights_[i]->getWGrad()) { + MatrixPtr input_T = getInputValue(i)->getTranspose(); + MatrixPtr oGrad = getOutputGrad(); + { + weights_[i]->getWGrad()->mul(input_T, oGrad, 1, 1); + } + } + + + /* Calculate the input layers error */ + MatrixPtr preGrad = getInputGrad(i); + if (NULL != preGrad) { + MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); + preGrad->mul(getOutputGrad(), weights_T, 1, 1); + } + + { + weights_[i]->getParameterPtr()->incUpdate(callback); + } + } + } + +The :code:`prefetch` function specifies the rows that need to be fetched from parameter server during training. It is only useful for remote sparse training. In remote sparse training, the full parameter matrix is stored distributedly at the parameter server. When the layer uses a batch for training, only a subset of locations of the input is non-zero in this batch. Thus, this layer only needs the rows of the transformation matrix corresponding to the locations of these non-zero entries. The :code:`prefetch` function specifies the ids of these rows. + +Most of the layers do not need remote sparse training function. You do not need to override this function in this case. + +.. code-block:: c++ + + void FullyConnectedLayer::prefetch() { + for (size_t i = 0; i != inputLayers_.size(); ++i) { + auto* sparseParam = + dynamic_cast(weights_[i]->getW().get()); + if (sparseParam) { + MatrixPtr input = getInputValue(i); + sparseParam->addRows(input); + } + } + } + +Finally, you can use :code:`REGISTER_LAYER(fc, FullyConnectedLayer);` to register the layer. :code:`fc` is the identifier of the layer, and :code:`FullyConnectedLayer` is the class name of the layer. + +.. code-block:: c++ + + namespace paddle { + REGISTER_LAYER(fc, FullyConnectedLayer); + } + +If the :code:`cpp` file is put into :code:`paddle/gserver/layers`, it will be automatically added to the compilation list. + + +Write Gradient Check Unit Test +=============================== + +An easy way to verify the correctness of new layer's implementation is to write a gradient check unit test. Gradient check unit test utilizes finite difference method to verify the gradient of a layer. It modifies the input with a small perturbation :math:`\Delta x` and observes the changes of output :math:`\Delta y`, the gradient can be computed as :math:`\frac{\Delta y}{\Delta x }`. This gradient can be compared with the gradient computed by the :code:`backward` function of the layer to ensure the correctness of the gradient computation. Notice that the gradient check only tests the correctness of the gradient computation, it does not necessarily guarantee the correctness of the implementation of the :code:`forward` and :code:`backward` function. You need to write more sophisticated unit tests to make sure your layer is implemented correctly. + +All the gradient check unit tests are located in :code:`paddle/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps. + ++ Create layer configuration. A layer configuration can include the following attributes: + - size of the bias parameter. (4096 in our example) + - type of the layer. (fc in our example) + - size of the layer. (4096 in our example) + - activation type. (softmax in our example) + - dropout rate. (0.1 in our example) ++ configure the input of the layer. In our example, we have only one input. + - type of the input (:code:`INPUT_DATA`) in our example. It can be one of the following types + - :code:`INPUT_DATA`: dense vector. + - :code:`INPUT_LABEL`: integer. + - :code:`INPUT_DATA_TARGET`: dense vector, but it does not used to compute gradient. + - :code:`INPUT_SEQUENCE_DATA`: dense vector with sequence information. + - :code:`INPUT_HASSUB_SEQUENCE_DATA`: dense vector with both sequence and sub-sequence information. + - :code:`INPUT_SEQUENCE_LABEL`: integer with sequence information. + - :code:`INPUT_SPARSE_NON_VALUE_DATA`: 0-1 sparse data. + - :code:`INPUT_SPARSE_FLOAT_VALUE_DATA`: float sparse data. + - name of the input. (:code:`layer_0` in our example) + - size of the input. (8192 in our example) + - number of non-zeros, only useful for sparse inputs. + - format of sparse data, only useful for sparse inputs. ++ each inputs needs to call :code:`config.layerConfig.add_inputs();` once. ++ call :code:`testLayerGrad` to perform gradient checks. It has the following arguments. + - layer and input configurations. (:code:`config` in our example) + - type of the layer. (:code:`fc` in our example) + - batch size of the gradient check. (100 in our example) + - whether the input is transpose. Most layers need to set it to :code:`false`. (:code:`false` in our example) + - whether to use weights. Some layers or activations perform normalization so that the sum of their output is a constant. For example, the sum of output of a softmax activation is one. In this case, we cannot correctly compute the gradients using regular gradient check techniques. A weighted sum of the output, which is not a constant, is utilized to compute the gradients. (:code:`true` in our example, because the activation of a fully connected layer can be softmax) + +.. code-block:: c++ + + void testFcLayer(string format, size_t nnz) { + // Create layer configuration. + TestConfig config; + config.biasSize = 4096; + config.layerConfig.set_type("fc"); + config.layerConfig.set_size(4096); + config.layerConfig.set_active_type("softmax"); + config.layerConfig.set_drop_rate(0.1); + // Setup inputs. + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); + config.layerConfig.add_inputs(); + LOG(INFO) << config.inputDefs[0].sparse.sparse << " " + << config.inputDefs[0].sparse.format; + for (auto useGpu : {false, true}) { + testLayerGrad(config, "fc", 100, /* trans */ false, useGpu, + /* weight */ true); + } + } + +If you are creating a new file for the test, such as :code:`paddle/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake. + +.. code-block:: bash + + add_unittest_without_exec(test_FCGrad + test_FCGrad.cpp + LayerGradUtil.cpp + TestUtil.cpp) + + add_test(NAME test_FCGrad + COMMAND test_FCGrad) + + +Implement Python Wrapper +======================== + +Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps: + +- Use :code:`@config_layer('fc')` at the decorator for all the Python wrapper class. :code:`fc` is the identifier of the layer. +- Implements :code:`__init__` constructor function. + - It first call :code:`super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)` base constructor function. :code:`FCLayer` is the Python wrapper class name, and :code:`fc` is the layer identifier name. They must be correct in order for the wrapper to work. + - Then it computes the size and format (whether sparse) of each transformation matrix as well as the size. + +.. code-block:: python + + @config_layer('fc') + class FCLayer(LayerBase): + def __init__( + self, + name, + size, + inputs, + bias=True, + **xargs): + super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + psize = self.config.size * input_layer.size + dims = [input_layer.size, self.config.size] + format = self.inputs[input_index].format + sparse = format == "csr" or format == "csc" + if sparse: + psize = self.inputs[input_index].nnz + self.create_input_parameter(input_index, psize, dims, sparse, format) + self.create_bias_parameter(bias, self.config.size) + +In network configuration, the layer can be specifies using the following code snippets. The arguments of this class are: + +- :code:`name` is the name identifier of the layer instance. +- :code:`type` is the type of the layer, specified using layer identifier. +- :code:`size` is the output size of the layer. +- :code:`bias` specifies whether this layer instance has bias. +- :code:`inputs` specifies a list of layer instance names as inputs. + +.. code-block:: python + + Layer( + name = "fc1", + type = "fc", + size = 64, + bias = True, + inputs = [Input("pool3")] + ) + +You are also recommended to implement a helper for the Python wrapper, which makes it easier to write models. You can refer to :code:`python/paddle/trainer_config_helpers/layers.py` for examples. diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..36e5d420c986fc8d88eefee4aa221dba0a0480f2 --- /dev/null +++ b/doc/howto/dev/write_docs_cn.rst @@ -0,0 +1,78 @@ +################## +如何贡献/修改文档 +################## + +PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。 + + +如何构建PaddlePaddle的文档 +========================== + +PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式,我们提供了一个构建脚本build_docs.sh来进行构建。 +PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。 + + +使用Docker构建PaddlePaddle的文档 +-------------------------------- + +使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即 + +.. code-block:: bash + + cd TO_YOUR_PADDLE_CLONE_PATH + cd paddle/scripts/tools/build_docs + bash build_docs.sh with_docker + +编译完成后,会在当前目录生成两个子目录\: + +* doc 英文文档目录 +* doc_cn 中文文档目录 + +打开浏览器访问对应目录下的index.html即可访问本地文档。 + + + +直接构建PaddlePaddle的文档 +-------------------------- + +因为PaddlePaddle的v2 api文档生成过程依赖于py_paddle Python包,用户需要首先确认py_paddle包已经安装。 + +.. code-block:: bash + + python -c "import py_paddle" + +如果提示错误,那么用户需要在本地编译安装PaddlePaddle,请参考 `源码编译文档 `_ 。 +注意,用户在首次编译安装PaddlePaddle时,请将WITH_DOC选项关闭。在编译安装正确之后,请再次确认py_paddle包已经安装,即可进行下一步操作。 + +如果提示正确,可以执行以下命令编译生成文档,即 + +.. code-block:: bash + + cd TO_YOUR_PADDLE_CLONE_PATH + cd paddle/scripts/tools/build_docs + bash build_docs.sh local + +编译完成之后,会在当前目录生成两个子目录\: + +* doc 英文文档目录 +* doc_cn 中文文档目录 + +打开浏览器访问对应目录下的index.html即可访问本地文档。 + + +如何书写PaddlePaddle的文档 +========================== + +PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程进行书写。 + +如何更新www.paddlepaddle.org文档 +================================ + +开发者给PaddlePaddle代码增加的注释以PR的形式提交到github中,提交方式可参见 `贡献文档 `_ 。 +目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 `_ 和 +`英文文档 `_ 。 + + + +.. _cmake: https://cmake.org/ +.. _sphinx: http://www.sphinx-doc.org/en/1.4.8/ diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..26449a6365843b526b3ac3111b337d2f17524c9d --- /dev/null +++ b/doc/howto/index_cn.rst @@ -0,0 +1,39 @@ +进阶指南 +======== + +使用说明 +-------- + +.. toctree:: + :maxdepth: 1 + + usage/cmd_parameter/index_cn.rst + usage/cluster/cluster_train_cn.md + usage/k8s/k8s_basis_cn.md + usage/k8s/k8s_cn.md + usage/k8s/k8s_distributed_cn.md + +开发标准 +-------- + +.. toctree:: + :maxdepth: 1 + + dev/write_docs_cn.rst + dev/contribute_to_paddle_cn.md + +模型配置 +-------- + +.. toctree:: + :maxdepth: 1 + + deep_model/rnn/index_cn.rst + +性能优化 +-------- + +.. toctree:: + :maxdepth: 1 + + optimization/gpu_profiling_cn.rst diff --git a/doc/howto/index_en.rst b/doc/howto/index_en.rst index bd64c5b1fb1226b07a07094cfd60bce6fa4e7884..1fbfcd260b912078f00ed5b720ed607db725c4e2 100644 --- a/doc/howto/index_en.rst +++ b/doc/howto/index_en.rst @@ -7,9 +7,10 @@ Usage .. toctree:: :maxdepth: 1 - cmd_parameter/index_en.md - deep_model/index_en.rst - cluster/cluster_train_en.md + usage/cmd_parameter/index_en.rst + usage/cluster/cluster_train_en.md + usage/k8s/k8s_en.md + usage/k8s/k8s_aws_en.md Development ------------ @@ -17,8 +18,16 @@ Development .. toctree:: :maxdepth: 1 - new_layer/index_en.rst - contribute_to_paddle_en.md + dev/new_layer_en.rst + dev/contribute_to_paddle_en.md + +Configuration +------------- + +.. toctree:: + :maxdepth: 1 + + deep_model/rnn/index_en.rst Optimization ------------- @@ -26,4 +35,4 @@ Optimization .. toctree:: :maxdepth: 1 - optimization/index_en.rst + optimization/gpu_profiling_en.rst diff --git a/doc/howto/new_layer/index_en.rst b/doc/howto/new_layer/index_en.rst deleted file mode 100644 index 922bda5b0d879b9041e3c0ca5d2518363a7cfa05..0000000000000000000000000000000000000000 --- a/doc/howto/new_layer/index_en.rst +++ /dev/null @@ -1,391 +0,0 @@ -======================= -How to Write New Layers -======================= - -This tutorial will guide you to write customized layers in PaddlePaddle. We will utilize fully connected layer as an example to guide you through the following steps for writing a new layer. - -- Derive equations for the forward and backward part of the layer. -- Implement C++ class for the layer. -- Write gradient check unit test to make sure the gradients are correctly computed. -- Implement Python wrapper for the layer. - -Derive Equations -================ - -First we need to derive equations of the *forward* and *backward* part of the layer. The forward part computes the output given an input. The backward part computes the gradients of the input and the parameters given the the gradients of the output. - -The illustration of a fully connected layer is shown in the following figure. In a fully connected layer, all output nodes are connected to all the input nodes. - -.. image:: FullyConnected.jpg - :align: center - :scale: 60 % - -The *forward part* of a layer transforms an input into the corresponding output. -Fully connected layer takes a dense input vector with dimension :math:`D_i`. It uses a transformation matrix :math:`W` with size :math:`D_i \times D_o` to project :math:`x` into a :math:`D_o` dimensional vector, and add a bias vector :math:`b` with dimension :math:`D_o` to the vector. - -.. math:: - - y = f(W^T x + b) - -where :math:`f(.)` is an nonlinear *activation* function, such as sigmoid, tanh, and Relu. - -The transformation matrix :math:`W` and bias vector :math:`b` are the *parameters* of the layer. The *parameters* of a layer are learned during training in the *backward pass*. The backward pass computes the gradients of the output function with respect to all parameters and inputs. The optimizer can use chain rule to compute the gradients of the loss function with respect to each parameter. - -Suppose our loss function is :math:`c(y)`, then - -.. math:: - - \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} - -Suppose :math:`z = f(W^T x + b)`, then - -.. math:: - - \frac{\partial y}{\partial z} = \frac{\partial f(z)}{\partial z} - -This derivative can be automatically computed by our base layer class. - -Then, for fully connected layer, we need to compute: - -.. math:: - - \frac{\partial z}{\partial x} = W, \frac{\partial z_j}{\partial W_{ij}} = x_i, \frac{\partial z}{\partial b} = \mathbf 1 - -where :math:`\mathbf 1` is an all one vector, :math:`W_{ij}` is the number at the i-th row and j-th column of the matrix :math:`W`, :math:`z_j` is the j-th component of the vector :math:`z`, and :math:`x_i` is the i-th component of the vector :math:`x`. - -Finally we can use chain rule to calculate :math:`\frac{\partial z}{\partial x}`, and :math:`\frac{\partial z}{\partial W}`. The details of the computation will be given in the next section. - -Implement C++ Class -=================== - -The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. - -It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions: - -- constructor and destructor. -- :code:`init` function. It is used to initialize the parameters and settings. -- :code:`forward`. It implements the forward part of the layer. -- :code:`backward`. It implements the backward part of the layer. -- :code:`prefetch`. It is utilized to determine the rows corresponding parameter matrix to prefetch from parameter server. You do not need to override this function if your layer does not need remote sparse update. (most layers do not need to support remote sparse update) - - -The header file is listed below: - -.. code-block:: c++ - - namespace paddle { - /** - * A layer has full connections to all neurons in the previous layer. - * It computes an inner product with a set of learned weights, and - * (optionally) adds biases. - * - * The config file api is fc_layer. - */ - - class FullyConnectedLayer : public Layer { - protected: - WeightList weights_; - std::unique_ptr biases_; - - public: - explicit FullyConnectedLayer(const LayerConfig& config) - : Layer(config) {} - ~FullyConnectedLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - Weight& getWeight(int idx) { return *weights_[idx]; } - - void prefetch(); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); - }; - } // namespace paddle - -It defines the parameters as class variables. We use :code:`Weight` class as abstraction of parameters. It supports multi-thread update. The details of this class will be described in details in the implementations. - -- :code:`weights_` is a list of weights for the transformation matrices. The current implementation can have more than one inputs. Thus, it has a list of weights. One weight corresponds to an input. -- :code:`biases_` is a weight for the bias vector. - -The fully connected layer does not have layer configuration hyper-parameters. If there are some layer hyper-parameters, a common practice is to store it in :code:`LayerConfig& config`, and put it into a class variable in the constructor. - -The following code snippet implements the :code:`init` function. - -- First, every :code:`init` function must call the :code:`init` function of the base class :code:`Layer::init(layerMap, parameterMap);`. This statement will initialize the required variables and connections for each layer. -- The it initializes all the weights matrices :math:`W`. The current implementation can have more than one inputs. Thus, it has a list of weights. -- Finally, it initializes the bias. - - -.. code-block:: c++ - - bool FullyConnectedLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - /* Initialize the basic parent class */ - Layer::init(layerMap, parameterMap); - - /* initialize the weightList */ - CHECK(inputLayers_.size() == parameters_.size()); - for (size_t i = 0; i < inputLayers_.size(); i++) { - // Option the parameters - size_t height = inputLayers_[i]->getSize(); - size_t width = getSize(); - - // create a new weight - if (parameters_[i]->isSparse()) { - CHECK_LE(parameters_[i]->getSize(), width * height); - } else { - CHECK_EQ(parameters_[i]->getSize(), width * height); - } - Weight* w = new Weight(height, width, parameters_[i]); - - // append the new weight to the list - weights_.emplace_back(w); - } - - /* initialize biases_ */ - if (biasParameter_.get() != NULL) { - biases_ = std::unique_ptr(new Weight(1, getSize(), biasParameter_)); - } - - return true; - } - -The implementation of the forward part has the following steps. - -- Every layer must call :code:`Layer::forward(passType);` at the beginning of its :code:`forward` function. -- Then it allocates memory for the output using :code:`reserveOutput(batchSize, size);`. This step is necessary because we support the batches to have different batch sizes. :code:`reserveOutput` will change the size of the output accordingly. For the sake of efficiency, we will allocate new memory if we want to expand the matrix, but we will reuse the existing memory block if we want to shrink the matrix. -- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/math/Matrix.h` and :code:`paddle/math/BaseMatrix.h`. -- Finally it applies the activation function using :code:`forwardActivation();`. It will automatically applies the corresponding activation function specifies in the network configuration. - - -.. code-block:: c++ - - void FullyConnectedLayer::forward(PassType passType) { - Layer::forward(passType); - - /* malloc memory for the output_ if necessary */ - int batchSize = getInput(0).getBatchSize(); - int size = getSize(); - - { - // Settup the size of the output. - reserveOutput(batchSize, size); - } - - MatrixPtr outV = getOutputValue(); - - // Apply the the transformation matrix to each input. - for (size_t i = 0; i != inputLayers_.size(); ++i) { - auto input = getInput(i); - CHECK(input.value) << "The input of 'fc' layer must be matrix"; - i == 0 ? outV->mul(input.value, weights_[i]->getW(), 1, 0) - : outV->mul(input.value, weights_[i]->getW(), 1, 1); - } - - /* add the bias-vector */ - if (biases_.get() != NULL) { - outV->addBias(*(biases_->getW()), 1); - } - - /* activation */ { - forwardActivation(); - } - } - -The implementation of the backward part has the following steps. - -- :code:`backwardActivation()` computes the gradients of the activation. The gradients will be multiplies in place to the gradients of the output, which can be retrieved using :code:`getOutputGrad()`. -- Compute the gradients of bias. Notice that we an use :code:`biases_->getWGrad()` to get the gradient matrix of the corresponding parameter. After the gradient of one parameter is updated, it **MUST** call :code:`getParameterPtr()->incUpdate(callback);`. This is utilize for parameter update over multiple threads or multiple machines. -- Then it computes the gradients of the transformation matrices and inputs, and it calls :code:`incUpdate` for the corresponding parameter. This gives the framework the chance to know whether it has gathered all the gradient to one parameter so that it can do some overlapping work (e.g., network communication) - - -.. code-block:: c++ - - void FullyConnectedLayer::backward(const UpdateCallback& callback) { - /* Do derivation for activations.*/ { - backwardActivation(); - } - - if (biases_ && biases_->getWGrad()) { - biases_->getWGrad()->collectBias(*getOutputGrad(), 1); - - /* Increasing the number of gradient */ - biases_->getParameterPtr()->incUpdate(callback); - } - - bool syncFlag = hl_get_sync_flag(); - - for (size_t i = 0; i != inputLayers_.size(); ++i) { - /* Calculate the W-gradient for the current layer */ - if (weights_[i]->getWGrad()) { - MatrixPtr input_T = getInputValue(i)->getTranspose(); - MatrixPtr oGrad = getOutputGrad(); - { - weights_[i]->getWGrad()->mul(input_T, oGrad, 1, 1); - } - } - - - /* Calculate the input layers error */ - MatrixPtr preGrad = getInputGrad(i); - if (NULL != preGrad) { - MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); - preGrad->mul(getOutputGrad(), weights_T, 1, 1); - } - - { - weights_[i]->getParameterPtr()->incUpdate(callback); - } - } - } - -The :code:`prefetch` function specifies the rows that need to be fetched from parameter server during training. It is only useful for remote sparse training. In remote sparse training, the full parameter matrix is stored distributedly at the parameter server. When the layer uses a batch for training, only a subset of locations of the input is non-zero in this batch. Thus, this layer only needs the rows of the transformation matrix corresponding to the locations of these non-zero entries. The :code:`prefetch` function specifies the ids of these rows. - -Most of the layers do not need remote sparse training function. You do not need to override this function in this case. - -.. code-block:: c++ - - void FullyConnectedLayer::prefetch() { - for (size_t i = 0; i != inputLayers_.size(); ++i) { - auto* sparseParam = - dynamic_cast(weights_[i]->getW().get()); - if (sparseParam) { - MatrixPtr input = getInputValue(i); - sparseParam->addRows(input); - } - } - } - -Finally, you can use :code:`REGISTER_LAYER(fc, FullyConnectedLayer);` to register the layer. :code:`fc` is the identifier of the layer, and :code:`FullyConnectedLayer` is the class name of the layer. - -.. code-block:: c++ - - namespace paddle { - REGISTER_LAYER(fc, FullyConnectedLayer); - } - -If the :code:`cpp` file is put into :code:`paddle/gserver/layers`, it will be automatically added to the compilation list. - - -Write Gradient Check Unit Test -=============================== - -An easy way to verify the correctness of new layer's implementation is to write a gradient check unit test. Gradient check unit test utilizes finite difference method to verify the gradient of a layer. It modifies the input with a small perturbation :math:`\Delta x` and observes the changes of output :math:`\Delta y`, the gradient can be computed as :math:`\frac{\Delta y}{\Delta x }`. This gradient can be compared with the gradient computed by the :code:`backward` function of the layer to ensure the correctness of the gradient computation. Notice that the gradient check only tests the correctness of the gradient computation, it does not necessarily guarantee the correctness of the implementation of the :code:`forward` and :code:`backward` function. You need to write more sophisticated unit tests to make sure your layer is implemented correctly. - -All the gradient check unit tests are located in :code:`paddle/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps. - -+ Create layer configuration. A layer configuration can include the following attributes: - - size of the bias parameter. (4096 in our example) - - type of the layer. (fc in our example) - - size of the layer. (4096 in our example) - - activation type. (softmax in our example) - - dropout rate. (0.1 in our example) -+ configure the input of the layer. In our example, we have only one input. - - type of the input (:code:`INPUT_DATA`) in our example. It can be one of the following types - - :code:`INPUT_DATA`: dense vector. - - :code:`INPUT_LABEL`: integer. - - :code:`INPUT_DATA_TARGET`: dense vector, but it does not used to compute gradient. - - :code:`INPUT_SEQUENCE_DATA`: dense vector with sequence information. - - :code:`INPUT_HASSUB_SEQUENCE_DATA`: dense vector with both sequence and sub-sequence information. - - :code:`INPUT_SEQUENCE_LABEL`: integer with sequence information. - - :code:`INPUT_SPARSE_NON_VALUE_DATA`: 0-1 sparse data. - - :code:`INPUT_SPARSE_FLOAT_VALUE_DATA`: float sparse data. - - name of the input. (:code:`layer_0` in our example) - - size of the input. (8192 in our example) - - number of non-zeros, only useful for sparse inputs. - - format of sparse data, only useful for sparse inputs. -+ each inputs needs to call :code:`config.layerConfig.add_inputs();` once. -+ call :code:`testLayerGrad` to perform gradient checks. It has the following arguments. - - layer and input configurations. (:code:`config` in our example) - - type of the input. (:code:`fc` in our example) - - batch size of the gradient check. (100 in our example) - - whether the input is transpose. Most layers need to set it to :code:`false`. (:code:`false` in our example) - - whether to use weights. Some layers or activations perform normalization so that the sum of their output is a constant. For example, the sum of output of a softmax activation is one. In this case, we cannot correctly compute the gradients using regular gradient check techniques. A weighted sum of the output, which is not a constant, is utilized to compute the gradients. (:code:`true` in our example, because the activation of a fully connected layer can be softmax) - -.. code-block:: c++ - - void testFcLayer(string format, size_t nnz) { - // Create layer configuration. - TestConfig config; - config.biasSize = 4096; - config.layerConfig.set_type("fc"); - config.layerConfig.set_size(4096); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_drop_rate(0.1); - // Setup inputs. - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); - config.layerConfig.add_inputs(); - LOG(INFO) << config.inputDefs[0].sparse.sparse << " " - << config.inputDefs[0].sparse.format; - for (auto useGpu : {false, true}) { - testLayerGrad(config, "fc", 100, /* trans */ false, useGpu, - /* weight */ true); - } - } - -If you are creating a new file for the test, such as :code:`paddle/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake. - -.. code-block:: bash - - add_unittest_without_exec(test_FCGrad - test_FCGrad.cpp - LayerGradUtil.cpp - TestUtil.cpp) - - add_test(NAME test_FCGrad - COMMAND test_FCGrad) - - -Implement Python Wrapper -======================== - -Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps: - -- Use :code:`@config_layer('fc')` at the decorator for all the Python wrapper class. :code:`fc` is the identifier of the layer. -- Implements :code:`__init__` constructor function. - - It first call :code:`super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)` base constructor function. :code:`FCLayer` is the Python wrapper class name, and :code:`fc` is the layer identifier name. They must be correct in order for the wrapper to work. - - Then it computes the size and format (whether sparse) of each transformation matrix as well as the size. - -.. code-block:: python - - @config_layer('fc') - class FCLayer(LayerBase): - def __init__( - self, - name, - size, - inputs, - bias=True, - **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) - for input_index in xrange(len(self.inputs)): - input_layer = self.get_input_layer(input_index) - psize = self.config.size * input_layer.size - dims = [input_layer.size, self.config.size] - format = self.inputs[input_index].format - sparse = format == "csr" or format == "csc" - if sparse: - psize = self.inputs[input_index].nnz - self.create_input_parameter(input_index, psize, dims, sparse, format) - self.create_bias_parameter(bias, self.config.size) - -In network configuration, the layer can be specifies using the following code snippets. The arguments of this class are: - -- :code:`name` is the name identifier of the layer instance. -- :code:`type` is the type of the layer, specified using layer identifier. -- :code:`size` is the output size of the layer. -- :code:`bias` specifies whether this layer instance has bias. -- :code:`inputs` specifies a list of layer instance names as inputs. - -.. code-block:: python - - Layer( - name = "fc1", - type = "fc", - size = 64, - bias = True, - inputs = [Input("pool3")] - ) - -You are also recommended to implement a helper for the Python wrapper, which makes it easier to write models. You can refer to :code:`python/paddle/trainer_config_helpers/layers.py` for examples. diff --git a/doc/howto/optimization/gpu_profiling_cn.rst b/doc/howto/optimization/gpu_profiling_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..e2b0b0396e0034b01ed2c5081effdd3bcabf31ae --- /dev/null +++ b/doc/howto/optimization/gpu_profiling_cn.rst @@ -0,0 +1,242 @@ +================== +GPU性能分析与调优 +================== + +.. contents:: + +此教程将向您分步介绍如何使用内置的定时工具、 **nvprof** 或 **nvvp** 来运行性能分析和调优。 + +- 什么是性能分析? +- 为什么需要性能分析? +- 如何进行性能分析? +- 性能分析工具介绍 +- 详细教程 +- 性能分析小技巧 + +什么是性能分析? +================ +在软件工程的范畴里,性能分析(Profiling)是一个动态程序分析的术语,它可以指测量一个程序的空间(内存)复杂度或时间复杂度, +也可以说是某些特定指令的使用情况,或者是函数调用的频率和耗时等。通常情况下,分析得到的信息用于协助进行程序的优化。 + +简单来说,性能分析工具是用于给应用程序的性能做定量分析的。如果想很好的理解程序的行为,那程序分析工具是必不可少的利器。简单的性能分析,可以告诉您某个操作到底花了多长时间?而更深入的分析,甚至能解释为什么某个操作花了很长时间? + +为什么需要性能分析? +============================ +训练好一个深层神经网络通常要耗费非常长的时间,所以性能也就逐步变成了深度学习领域最重要的指标。 +而优化性能的首要任务,是需要了解哪些步骤拖慢了整体。 +如果某一块根本就不怎么耗时,那也就不需要急着优化性能啦! + +如何进行性能分析? +======================== +为了达到性能最优,您可以采用下面五个步骤: + +- 对代码进行性能分析 +- 找到运行慢的部分 +- 找到运行慢的原因 +- 修改成更快的版本 +- 再次对代码进行性能分析 + +Usually, processor has two key performance limits include float point throughput and +memory throughput. For GPU, it also need more parallelism to fulfill its potential. +This is why they can be so fast. + +通常情况下,处理器有两个关键性能限制:一个是浮点计算量,另一个是内存操作量。 +GPU则还需要高并行性,才能发挥其全部能力。这正是它们速度快的原因。 + +性能分析工具介绍 +====================== +就通常的GPU性能分析来说,市面上已经有NVIDIA或第三方提供的众多工具。 + +**nvprof** 是Nvidia性能分析工具, **nvvp** 则是带GUI的Nvidia可视化性能分析工具。 +在这个教程中,我们主要会介绍nvprof和nvvp。 + +:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate +above profilers. + +:code:`paddle/math/test` 目录中的 :code:`test_GpuProfiler` 就是用于展示上述分析工具的用法。 + +.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp + :language: c++ + :lines: 137-151 + :linenos: + +上述的代码片段包含了两种方法,您可以任意使用一个或两个来对感兴趣的代码段做性能分析。 + +1. :code:`REGISTER_TIMER_INFO` 是一个内置的定时器封装,可以用来计算CPU函数或cuda内核的时间消耗。 + +2. :code:`REGISTER_GPU_PROFILER` is a general purpose wrapper object of :code:`cudaProfilerStart` and :code:`cudaProfilerStop` to avoid +program crashes when CPU version of PaddlePaddle invokes them. + +3. :code:`REGISTER_GPU_PROFILER` 是一个封装对象,封装了 :code:`cudaProfilerStart` 和 :code:`cudaProfileStop` 两个操作;同时其内部实现可以避免纯CPU版本PaddlePaddle在执行本语句时发生崩溃。 + +您会在接下来的部分中获得更多的细节介绍。 + +详细教程 +============ + +内置定时器 +------------ + +如果想要启用PaddlePaddle的内置定时器,您首先需要在相关代码段中加入 :code:`REGISTER_TIMER_INFO`。 +接下来就可以使用 :code:`printStatus` 或者 :code:`printAllStatus` 函数来将信息输出到界面中。 +下面举个简单的例子: + +1. 加入 :code:`REGISTER_TIMER_INFO` 和 :code:`printAllStatus` 函数(如高亮部分)。 + + .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp + :language: c++ + :lines: 137-151 + :emphasize-lines: 8-12,14 + :linenos: + +2. cmake配置中将 **WITH_TIMER** 打开,重新编译PaddlePaddle。 + + .. code-block:: bash + + cmake .. -DWITH_TIMER=ON + make + +3. 执行您的代码,并观察结果(如高亮部分)。 + + .. code-block:: bash + :emphasize-lines: 1,12-15 + + > ./paddle/math/tests/test_GpuProfiler + I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler + I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions + I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done. + [==========] Running 1 test from 1 test case. + [----------] Global test environment set-up. + [----------] 1 test from Profiler + [ RUN ] Profiler.BilinearFwdBwd + I1117 11:13:42.845310 2522362816 test_GpuProfiler.cpp:114] Enable GPU Profiler Stat: [testBilinearFwdBwd] "numSamples = 10, channels = 16, im + gSizeX = 64, imgSizeY = 64" + I1117 11:13:42.850154 2522362816 ThreadLocal.cpp:37] thread use undeterministic rand seed:20659751 + I1117 11:13:42.981501 2522362816 Stat.cpp:130] ======= StatSet: [GlobalStatInfo] status ====== + I1117 11:13:42.981539 2522362816 Stat.cpp:133] Stat=testBilinearFwdBwd total=136.141 avg=136.141 max=136.141 min=136.141 count=1 + I1117 11:13:42.981572 2522362816 Stat.cpp:141] ======= BarrierStatSet status ====== + I1117 11:13:42.981575 2522362816 Stat.cpp:154] -------------------------------------------------- + [ OK ] Profiler.BilinearFwdBwd (136 ms) + [----------] 1 test from Profiler (136 ms total) + + [----------] Global test environment tear-down + [==========] 1 test from 1 test case ran. (136 ms total) + [ PASSED ] 1 test. + +nvprof 工具 +---------------- + +要使用命令行分析工具 **nvprof**,您按如下步骤操作即可: + +1. 将 :code:`REGISTER_GPU_PROFILER` 函数加到代码中(参考强调部分)。 + + .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp + :language: c++ + :lines: 137-151 + :emphasize-lines: 6-7 + :linenos: + +2. cmake中将 **WITH_PROFILER** 配置打开,重新编译PaddlePaddle。 + + .. code-block:: bash + + cmake .. -DWITH_PROFILER=ON + make + +3. 使用 **nvprof** 来分析执行文件。 + + .. code-block:: bash + + nvprof ./paddle/math/tests/test_GpuProfiler + +然后,您就能获得如下的分析结果: + +.. code-block:: bash + + ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler + ==78544== Profiling result: + Time(%) Time Calls Avg Min Max Name + 27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD] + 26.07% 9.0957ms 1 9.0957ms 9.0957ms 9.0957ms KeBilinearInterpBw + 23.78% 8.2977ms 1 8.2977ms 8.2977ms 8.2977ms KeBilinearInterpFw + 22.55% 7.8661ms 2 3.9330ms 1.5798ms 6.2863ms [CUDA memcpy DtoH] + + ==78544== API calls: + Time(%) Time Calls Avg Min Max Name + 46.85% 682.28ms 8 85.285ms 12.639us 682.03ms cudaStreamCreateWithFlags + 39.83% 580.00ms 4 145.00ms 302ns 550.27ms cudaFree + 9.82% 143.03ms 9 15.892ms 8.7090us 142.78ms cudaStreamCreate + 1.23% 17.983ms 7 2.5690ms 23.210us 6.4563ms cudaMemcpy + 1.23% 17.849ms 2 8.9247ms 8.4726ms 9.3768ms cudaStreamSynchronize + 0.66% 9.5969ms 7 1.3710ms 288.43us 2.4279ms cudaHostAlloc + 0.13% 1.9530ms 11 177.54us 7.6810us 591.06us cudaMalloc + 0.07% 1.0424ms 8 130.30us 1.6970us 453.72us cudaGetDevice + 0.04% 527.90us 40 13.197us 525ns 253.99us cudaEventCreateWithFlags + 0.03% 435.73us 348 1.2520us 124ns 42.704us cuDeviceGetAttribute + 0.03% 419.36us 1 419.36us 419.36us 419.36us cudaGetDeviceCount + 0.02% 260.75us 2 130.38us 129.32us 131.43us cudaGetDeviceProperties + 0.02% 222.32us 2 111.16us 106.94us 115.39us cudaLaunch + 0.01% 214.06us 4 53.514us 28.586us 77.655us cuDeviceGetName + 0.01% 115.45us 4 28.861us 9.8250us 44.526us cuDeviceTotalMem + 0.01% 83.988us 4 20.997us 578ns 77.760us cudaSetDevice + 0.00% 38.918us 1 38.918us 38.918us 38.918us cudaEventCreate + 0.00% 34.573us 31 1.1150us 279ns 12.784us cudaDeviceGetAttribute + 0.00% 17.767us 1 17.767us 17.767us 17.767us cudaProfilerStart + 0.00% 15.228us 2 7.6140us 3.5460us 11.682us cudaConfigureCall + 0.00% 14.536us 2 7.2680us 1.1490us 13.387us cudaGetLastError + 0.00% 8.6080us 26 331ns 173ns 783ns cudaSetupArgument + 0.00% 5.5470us 6 924ns 215ns 2.6780us cuDeviceGet + 0.00% 5.4090us 6 901ns 328ns 3.3320us cuDeviceGetCount + 0.00% 4.1770us 3 1.3920us 1.0630us 1.8300us cuDriverGetVersion + 0.00% 3.4650us 3 1.1550us 1.0810us 1.2680us cuInit + 0.00% 830ns 1 830ns 830ns 830ns cudaRuntimeGetVersion + + +nvvp 工具 +-------------- + +如果想使用可视化的分析器 **nvvp**,您可以导入 :code:`nvprof -o ...` 的输出,或者从工具的界面里运行您的应用。 + +**备注: nvvp 也支持CPU的性能分析** (需在nvvp界面中选上才能开启) + +.. image:: nvvp1.png + :align: center + :scale: 33% + +从内核函数的角度, **nvvp** 可以精确说明一个长耗时操作的具体原因。 +同时,如下图所示, **nvvp** 的内核block使用情况、寄存器使用情况和共享内存使用情况能让我们对GPU的整体使用有更好的理解。 + + +.. image:: nvvp2.png + :align: center + :scale: 33% + +而从应用的角度, **nvvp** 可以帮您提供一些定位性能瓶颈的建议。 +例如,下图中就展示了一些关于内存数据迁徙和计算资源利用率的建议,为您做性能调优提供了方向。 + +.. image:: nvvp3.png + :align: center + :scale: 33% + +.. image:: nvvp4.png + :align: center + :scale: 33% + +性能分析小技巧 +================== + +- 开始阶段,从 **nvprof** 和 **nvvp** 的输出信息入手是个不错的选择。 +- 接下来可以考虑下时间线的分析。 +- 如果真想挖掘内核深处的某个秘密,您最好先确认:这一块的耗时比例真的太高,值得深入分析。 +- 可能的情况下,试着让输出的分析数据和理论值对应。 + + 1) 例如,如果我知道内核花了10ms来移动1GB数据,那我会期望分析工具统计到速度是100GB/s。 + 2) 若有不一致之处,很有可能实际应用就是没有按照您的预期情况运行。 +- 了解您的硬件:如果您的GPU理论可以达到6 TFLOPs(6万亿次浮点运算每秒),而当前已经有5.5 TFLOPs了,那估计这里的潜力就没啥好挖的了…… + +性能分析是性能优化的关键一步。有的时候简简单单的改变就能在性能上产生明显的优化效果! +当然,具体情况因人而异。 + +参考资料 +=========== +Jeremy Appleyard, `GPU Profiling for Deep Learning `_, 2015 diff --git a/doc/howto/optimization/gpu_profiling_en.rst b/doc/howto/optimization/gpu_profiling_en.rst index 667bf1364e7cd4c9098caba72a127228d78ca38b..ed208ceaf7af0c5aab88fd4fcb18fa96b8c9ff38 100644 --- a/doc/howto/optimization/gpu_profiling_en.rst +++ b/doc/howto/optimization/gpu_profiling_en.rst @@ -1,5 +1,8 @@ -Profiling on PaddlePaddle -========================= +==================== +Tune GPU Performance +==================== + +.. contents:: This tutorial will guide you step-by-step through how to conduct profiling and performance tuning using built-in timer, **nvprof** and **nvvp**. @@ -49,11 +52,11 @@ For general GPU profiling, a bunch of tools are provided from both NVIDIA and th In this tutorial, we will focus on nvprof and nvvp. :code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate -above profilers. +above profilers. -.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp +.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp :language: c++ - :lines: 111-124 + :lines: 137-151 :linenos: The above code snippet includes two methods, you can use any of them to profile the regions of interest. @@ -77,10 +80,10 @@ As a simple example, consider the following: 1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines). - .. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp :language: c++ - :lines: 111-124 - :emphasize-lines: 8-10,13 + :lines: 137-151 + :emphasize-lines: 8-12,14 :linenos: 2. Configure cmake with **WITH_TIMER** and recompile PaddlePaddle. @@ -90,31 +93,31 @@ As a simple example, consider the following: cmake .. -DWITH_TIMER=ON make -3. Execute your code and observe the results (see the emphasize-lines). +3. Execute your code and observe the results (see the emphasize-lines). .. code-block:: bash :emphasize-lines: 1,12-15 - > ./paddle/math/tests/test_GpuProfiler - I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler - I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions - I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done. - [==========] Running 1 test from 1 test case. - [----------] Global test environment set-up. - [----------] 1 test from Profiler - [ RUN ] Profiler.BilinearFwdBwd + > ./paddle/math/tests/test_GpuProfiler + I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler + I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions + I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done. + [==========] Running 1 test from 1 test case. + [----------] Global test environment set-up. + [----------] 1 test from Profiler + [ RUN ] Profiler.BilinearFwdBwd I1117 11:13:42.845310 2522362816 test_GpuProfiler.cpp:114] Enable GPU Profiler Stat: [testBilinearFwdBwd] "numSamples = 10, channels = 16, im - gSizeX = 64, imgSizeY = 64" - I1117 11:13:42.850154 2522362816 ThreadLocal.cpp:37] thread use undeterministic rand seed:20659751 - I1117 11:13:42.981501 2522362816 Stat.cpp:130] ======= StatSet: [GlobalStatInfo] status ====== - I1117 11:13:42.981539 2522362816 Stat.cpp:133] Stat=testBilinearFwdBwd total=136.141 avg=136.141 max=136.141 min=136.141 count=1 - I1117 11:13:42.981572 2522362816 Stat.cpp:141] ======= BarrierStatSet status ====== - I1117 11:13:42.981575 2522362816 Stat.cpp:154] -------------------------------------------------- - [ OK ] Profiler.BilinearFwdBwd (136 ms) - [----------] 1 test from Profiler (136 ms total) - - [----------] Global test environment tear-down - [==========] 1 test from 1 test case ran. (136 ms total) + gSizeX = 64, imgSizeY = 64" + I1117 11:13:42.850154 2522362816 ThreadLocal.cpp:37] thread use undeterministic rand seed:20659751 + I1117 11:13:42.981501 2522362816 Stat.cpp:130] ======= StatSet: [GlobalStatInfo] status ====== + I1117 11:13:42.981539 2522362816 Stat.cpp:133] Stat=testBilinearFwdBwd total=136.141 avg=136.141 max=136.141 min=136.141 count=1 + I1117 11:13:42.981572 2522362816 Stat.cpp:141] ======= BarrierStatSet status ====== + I1117 11:13:42.981575 2522362816 Stat.cpp:154] -------------------------------------------------- + [ OK ] Profiler.BilinearFwdBwd (136 ms) + [----------] 1 test from Profiler (136 ms total) + + [----------] Global test environment tear-down + [==========] 1 test from 1 test case ran. (136 ms total) [ PASSED ] 1 test. nvprof profiler @@ -124,9 +127,9 @@ To use this command line profiler **nvprof**, you can simply issue the following 1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines). - .. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp :language: c++ - :lines: 111-124 + :lines: 137-151 :emphasize-lines: 6-7 :linenos: @@ -147,42 +150,42 @@ Then, you can get the following profiling result: .. code-block:: bash - ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler - ==78544== Profiling result: - Time(%) Time Calls Avg Min Max Name - 27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD] - 26.07% 9.0957ms 1 9.0957ms 9.0957ms 9.0957ms KeBilinearInterpBw - 23.78% 8.2977ms 1 8.2977ms 8.2977ms 8.2977ms KeBilinearInterpFw - 22.55% 7.8661ms 2 3.9330ms 1.5798ms 6.2863ms [CUDA memcpy DtoH] - - ==78544== API calls: - Time(%) Time Calls Avg Min Max Name - 46.85% 682.28ms 8 85.285ms 12.639us 682.03ms cudaStreamCreateWithFlags - 39.83% 580.00ms 4 145.00ms 302ns 550.27ms cudaFree - 9.82% 143.03ms 9 15.892ms 8.7090us 142.78ms cudaStreamCreate - 1.23% 17.983ms 7 2.5690ms 23.210us 6.4563ms cudaMemcpy - 1.23% 17.849ms 2 8.9247ms 8.4726ms 9.3768ms cudaStreamSynchronize - 0.66% 9.5969ms 7 1.3710ms 288.43us 2.4279ms cudaHostAlloc - 0.13% 1.9530ms 11 177.54us 7.6810us 591.06us cudaMalloc - 0.07% 1.0424ms 8 130.30us 1.6970us 453.72us cudaGetDevice - 0.04% 527.90us 40 13.197us 525ns 253.99us cudaEventCreateWithFlags - 0.03% 435.73us 348 1.2520us 124ns 42.704us cuDeviceGetAttribute - 0.03% 419.36us 1 419.36us 419.36us 419.36us cudaGetDeviceCount - 0.02% 260.75us 2 130.38us 129.32us 131.43us cudaGetDeviceProperties - 0.02% 222.32us 2 111.16us 106.94us 115.39us cudaLaunch - 0.01% 214.06us 4 53.514us 28.586us 77.655us cuDeviceGetName - 0.01% 115.45us 4 28.861us 9.8250us 44.526us cuDeviceTotalMem - 0.01% 83.988us 4 20.997us 578ns 77.760us cudaSetDevice - 0.00% 38.918us 1 38.918us 38.918us 38.918us cudaEventCreate - 0.00% 34.573us 31 1.1150us 279ns 12.784us cudaDeviceGetAttribute - 0.00% 17.767us 1 17.767us 17.767us 17.767us cudaProfilerStart - 0.00% 15.228us 2 7.6140us 3.5460us 11.682us cudaConfigureCall - 0.00% 14.536us 2 7.2680us 1.1490us 13.387us cudaGetLastError - 0.00% 8.6080us 26 331ns 173ns 783ns cudaSetupArgument - 0.00% 5.5470us 6 924ns 215ns 2.6780us cuDeviceGet - 0.00% 5.4090us 6 901ns 328ns 3.3320us cuDeviceGetCount - 0.00% 4.1770us 3 1.3920us 1.0630us 1.8300us cuDriverGetVersion - 0.00% 3.4650us 3 1.1550us 1.0810us 1.2680us cuInit + ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler + ==78544== Profiling result: + Time(%) Time Calls Avg Min Max Name + 27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD] + 26.07% 9.0957ms 1 9.0957ms 9.0957ms 9.0957ms KeBilinearInterpBw + 23.78% 8.2977ms 1 8.2977ms 8.2977ms 8.2977ms KeBilinearInterpFw + 22.55% 7.8661ms 2 3.9330ms 1.5798ms 6.2863ms [CUDA memcpy DtoH] + + ==78544== API calls: + Time(%) Time Calls Avg Min Max Name + 46.85% 682.28ms 8 85.285ms 12.639us 682.03ms cudaStreamCreateWithFlags + 39.83% 580.00ms 4 145.00ms 302ns 550.27ms cudaFree + 9.82% 143.03ms 9 15.892ms 8.7090us 142.78ms cudaStreamCreate + 1.23% 17.983ms 7 2.5690ms 23.210us 6.4563ms cudaMemcpy + 1.23% 17.849ms 2 8.9247ms 8.4726ms 9.3768ms cudaStreamSynchronize + 0.66% 9.5969ms 7 1.3710ms 288.43us 2.4279ms cudaHostAlloc + 0.13% 1.9530ms 11 177.54us 7.6810us 591.06us cudaMalloc + 0.07% 1.0424ms 8 130.30us 1.6970us 453.72us cudaGetDevice + 0.04% 527.90us 40 13.197us 525ns 253.99us cudaEventCreateWithFlags + 0.03% 435.73us 348 1.2520us 124ns 42.704us cuDeviceGetAttribute + 0.03% 419.36us 1 419.36us 419.36us 419.36us cudaGetDeviceCount + 0.02% 260.75us 2 130.38us 129.32us 131.43us cudaGetDeviceProperties + 0.02% 222.32us 2 111.16us 106.94us 115.39us cudaLaunch + 0.01% 214.06us 4 53.514us 28.586us 77.655us cuDeviceGetName + 0.01% 115.45us 4 28.861us 9.8250us 44.526us cuDeviceTotalMem + 0.01% 83.988us 4 20.997us 578ns 77.760us cudaSetDevice + 0.00% 38.918us 1 38.918us 38.918us 38.918us cudaEventCreate + 0.00% 34.573us 31 1.1150us 279ns 12.784us cudaDeviceGetAttribute + 0.00% 17.767us 1 17.767us 17.767us 17.767us cudaProfilerStart + 0.00% 15.228us 2 7.6140us 3.5460us 11.682us cudaConfigureCall + 0.00% 14.536us 2 7.2680us 1.1490us 13.387us cudaGetLastError + 0.00% 8.6080us 26 331ns 173ns 783ns cudaSetupArgument + 0.00% 5.5470us 6 924ns 215ns 2.6780us cuDeviceGet + 0.00% 5.4090us 6 901ns 328ns 3.3320us cuDeviceGetCount + 0.00% 4.1770us 3 1.3920us 1.0630us 1.8300us cuDriverGetVersion + 0.00% 3.4650us 3 1.1550us 1.0810us 1.2680us cuInit 0.00% 830ns 1 830ns 830ns 830ns cudaRuntimeGetVersion diff --git a/doc/howto/optimization/index_en.rst b/doc/howto/optimization/index_en.rst deleted file mode 100644 index 1e2f16b5da7a7aa9e5075effea2d2a171a987e6c..0000000000000000000000000000000000000000 --- a/doc/howto/optimization/index_en.rst +++ /dev/null @@ -1,7 +0,0 @@ -How to Tune GPU Performance -=========================== - -.. toctree:: - :maxdepth: 3 - - gpu_profiling_en.rst diff --git a/doc/howto/source/api.rst b/doc/howto/source/api.rst deleted file mode 100644 index 30396c26b61827847cc5acc29cee1c3c8e7b226e..0000000000000000000000000000000000000000 --- a/doc/howto/source/api.rst +++ /dev/null @@ -1,5 +0,0 @@ -API -=== - -.. doxygenfile:: paddle/api/PaddleAPI.h -.. doxygenfile:: paddle/api/Internal.h diff --git a/doc/howto/source/cuda/index.rst b/doc/howto/source/cuda/index.rst deleted file mode 100644 index b0fed2e7f72c9a9671e56e114edfc88d72504dbe..0000000000000000000000000000000000000000 --- a/doc/howto/source/cuda/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -CUDA -==== - -.. toctree:: - :maxdepth: 2 - - matrix.rst - nn.rst - utils.rst diff --git a/doc/howto/source/cuda/matrix.rst b/doc/howto/source/cuda/matrix.rst deleted file mode 100644 index b7699c83eda15d9003506f5fc57b51d52e7af823..0000000000000000000000000000000000000000 --- a/doc/howto/source/cuda/matrix.rst +++ /dev/null @@ -1,59 +0,0 @@ -Matrix -====== - -Base ----- - -hl_matrix.h -``````````` -.. doxygenfile:: paddle/cuda/include/hl_matrix.h - -hl_matrix_base.h -```````````````` -.. doxygenfile:: paddle/cuda/include/hl_matrix_base.cuh - -hl_matrix_apply.cuh -``````````````````` -.. doxygenfile:: paddle/cuda/include/hl_matrix_apply.cuh - -hl_matrix_ops.cuh -````````````````` -.. doxygenfile:: paddle/cuda/include/hl_matrix_ops.cuh - -hl_matrix_type.cuh -`````````````````` -.. doxygenfile:: paddle/cuda/include/hl_matrix_type.cuh - -hl_sse_matrix_kernel.cuh -```````````````````````` -.. doxygenfile:: paddle/cuda/include/hl_sse_matrix_kernel.cuh - -Matrix Function ---------------- - -hl_batch_transpose.h -```````````````````` -.. doxygenfile:: paddle/cuda/include/hl_batch_transpose.h - -hl_aggregate.h -`````````````` -.. doxygenfile:: paddle/cuda/include/hl_aggregate.h - -hl_top_k.h -`````````` -.. doxygenfile:: paddle/cuda/include/hl_top_k.h - -hl_table_apply.h -```````````````` -.. doxygenfile:: paddle/cuda/include/hl_table_apply.h - -Sparse Matrix -------------- - -hl_sparse.h -``````````` -.. doxygenfile:: paddle/cuda/include/hl_sparse.h - -hl_sparse.ph -```````````` -.. doxygenfile:: paddle/cuda/include/hl_sparse.ph diff --git a/doc/howto/source/cuda/nn.rst b/doc/howto/source/cuda/nn.rst deleted file mode 100644 index 5577d01e72a5b22847bda40528c46a28cacc1490..0000000000000000000000000000000000000000 --- a/doc/howto/source/cuda/nn.rst +++ /dev/null @@ -1,39 +0,0 @@ -Neural Network -============== - -Base ----- - -.. doxygenfile:: paddle/cuda/include/hl_gpu.h -.. doxygenfile:: paddle/cuda/include/hl_functions.h -.. doxygenfile:: paddle/cuda/include/hl_avx_functions.h -.. doxygenfile:: paddle/cuda/include/hl_gpu_functions.cuh -.. doxygenfile:: paddle/cuda/include/hl_activation_functions.h - - -CNN Related APIs ----------------- -.. doxygenfile:: paddle/cuda/include/hl_cnn.h -.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.h -.. doxygenfile:: paddle/cuda/include/hl_cuda_cudnn.ph - -RNN Related APIs ----------------- - -.. doxygenfile:: paddle/cuda/include/hl_recurrent_apply.cuh -.. doxygenfile:: paddle/cuda/include/hl_sequence.h - -LSTM Model -`````````` - -.. doxygenfile:: paddle/cuda/include/hl_lstm.h -.. dpxygenfile:: paddle/cuda/include/hl_cpu_lstm.cuh -.. doxygenfile:: paddle/cuda/include/hl_gpu_lstm.cuh -.. doxygenfile:: paddle/cuda/include/hl_lstm_ops.cuh - -GRU Model -````````` - -.. doxygenfile:: paddle/cuda/include/hl_gru_ops.cuh -.. doxygenfile:: paddle/cuda/include/hl_cpu_gru.cuh -.. doxygenfile:: paddle/cuda/include/hl_gpu_gru.cuh diff --git a/doc/howto/source/cuda/utils.rst b/doc/howto/source/cuda/utils.rst deleted file mode 100644 index 850e8bd1c6670947e2a5f1b6f9b0d5b252117cbf..0000000000000000000000000000000000000000 --- a/doc/howto/source/cuda/utils.rst +++ /dev/null @@ -1,37 +0,0 @@ -Utils -===== - -Dynamic Link Libs ------------------ -.. doxygenfile:: paddle/cuda/include/hl_dso_loader.h - -GPU Resources -------------- - -hl_cuda.ph -`````````` -.. doxygenfile:: paddle/cuda/include/hl_cuda.ph - -hl_cuda.h -````````` -.. doxygenfile:: paddle/cuda/include/hl_cuda.h - -HPPL Base ---------- -.. doxygenfile:: paddle/cuda/include/hl_base.h - -CUBLAS Wrapper --------------- -.. doxygenfile:: paddle/cuda/include/hl_cuda_cublas.h - -Timer ------ -.. doxygenfile:: paddle/cuda/include/hl_time.h - -Thread Resource ---------------- -.. doxygenfile:: paddle/cuda/include/hl_thread.ph - -Device Function ---------------- -.. doxygenfile:: paddle/cuda/include/hl_device_functions.cuh diff --git a/doc/howto/source/gserver/activations.rst b/doc/howto/source/gserver/activations.rst deleted file mode 100644 index 55b9d3be383c07842d7066280cc0e174788db1fb..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/activations.rst +++ /dev/null @@ -1,5 +0,0 @@ -Activations -=========== - -.. doxygenclass:: paddle::ActivationFunction - :members: diff --git a/doc/howto/source/gserver/dataproviders.rst b/doc/howto/source/gserver/dataproviders.rst deleted file mode 100644 index c30d9d6a36a6fbb664ae001274b6a7b0e721070f..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/dataproviders.rst +++ /dev/null @@ -1,87 +0,0 @@ -============== -Data Providers -============== - -DataProviders -============= - -Base ----- -.. doxygenclass:: paddle::DataProvider - :members: - -DataProviderGroup ------------------ -.. doxygenclass:: paddle::DataProviderGroup - :members: - -MultiDataProvider ------------------ -.. doxygenclass:: paddle::MultiDataProvider - :members: - -PyDataProvider -============== - -IFieldScanner -------------- -.. doxygenclass:: paddle::IFieldScanner - :members: - -DenseScanner -------------- -.. doxygenclass:: paddle::DenseScanner - :members: - -IndexScanner -------------- -.. doxygenclass:: paddle::IndexScanner - :members: - -SparseNonValueScanner ---------------------- -.. doxygenclass:: paddle::SparseNonValueScanner - :members: - -SparseValueScanner ------------------- -.. doxygenclass:: paddle::SparseValueScanner - :members: - -SequenceScanner ---------------- -.. doxygenclass:: paddle::SparseValueScanner - :members: - -IPyDataProviderCache --------------------- -.. doxygenclass:: paddle::IPyDataProviderCache - :members: - -NoCacheStrategy ---------------- -.. doxygenclass:: paddle::NoCacheStrategy - :members: - -CacheOnePassInMemory --------------------- -.. doxygenclass:: paddle::CacheOnePassInMemory - :members: - -IPyDataProvider ---------------- -.. doxygenclass:: paddle::PyDataProvider2 - :members: - -ProtoDataProvider -================= - -ProtoDataProvider ----------------- -.. doxygenclass:: paddle::ProtoDataProvider - :members: - -ProtoSequenceDataProvider -------------------------- -.. doxygenclass:: paddle::ProtoSequenceDataProvider - :members: diff --git a/doc/howto/source/gserver/evaluators.rst b/doc/howto/source/gserver/evaluators.rst deleted file mode 100644 index f5361f76cd2b1c9c004221c03ea05b2c1f3a652e..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/evaluators.rst +++ /dev/null @@ -1,103 +0,0 @@ -========== -Evaluators -========== - -Base -==== - -.. doxygenclass:: paddle::Evaluator - :members: - -Sum -=== - -SumEvaluator ------------- -.. doxygenclass:: paddle::SumEvaluator - :members: - -ColumnSumEvaluator ------------------- -.. doxygenclass:: paddle::ColumnSumEvaluator - :members: - -Classification -============== - -ClassificationErrorEvaluator ---------------------------- -.. doxygenclass:: paddle::ClassificationErrorEvaluator - :members: - -SequenceClassificationErrorEvaluator ------------------------------------- -.. doxygenclass:: paddle::SequenceClassificationErrorEvaluator - :members: - -AucEvaluator -------------- -.. doxygenclass:: paddle::AucEvaluator - :members: - -PrecisionRecallEvaluator ------------------------- -.. doxygenclass:: paddle::PrecisionRecallEvaluator - :members: - -ChunkEvaluator --------------- -.. doxygenclass:: paddle::ChunkEvaluator - :members: - -CTCEvaluator ------------- -.. doxygenclass:: paddle::CTCErrorEvaluator - :members: - - -Rank -==== - -PnpairEvaluator -------------- -.. doxygenclass:: paddle::PnpairEvaluator - :members: - -AucEvaluator -------------- -.. doxygenclass:: paddle::RankAucEvaluator - :members: - - -Printer -======= - -ValuePrinter -------------- -.. doxygenclass:: paddle::ValuePrinter - :members: - -GradientPrinter ---------------- -.. doxygenclass:: paddle::GradientPrinter - :members: - -MaxIdPrinter ------------- -.. doxygenclass:: paddle::MaxIdPrinter - :members: - -MaxFramePrinter ---------------- -.. doxygenclass:: paddle::MaxFramePrinter - :members: - -SequenceTextPrinter ------------------- -.. doxygenclass:: paddle::SequenceTextPrinter - :members: - -ClassificationErrorPrinter --------------------------- -.. doxygenclass:: paddle::ClassificationErrorPrinter - :members: diff --git a/doc/howto/source/gserver/gradientmachines.rst b/doc/howto/source/gserver/gradientmachines.rst deleted file mode 100644 index 04c8e91d0316a45ad10b0ed0513d3e8916b7c3d9..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/gradientmachines.rst +++ /dev/null @@ -1,27 +0,0 @@ -Gradient Machines -================= - -GradientMachine ---------------- -.. doxygenclass:: paddle::GradientMachine - :members: - -GradientMachineMode -------------------- -.. doxygenclass:: paddle::IGradientMachineMode - :members: - -MultiGradientMachine --------------------- -.. doxygenclass:: paddle::MultiGradientMachine - :members: - -TrainerThread -````````````` -.. doxygenclass:: paddle::TrainerThread - :members: - -RecurrentGradientMachine ------------------------- -.. doxygenclass:: paddle::RecurrentGradientMachine - :members: diff --git a/doc/howto/source/gserver/index.rst b/doc/howto/source/gserver/index.rst deleted file mode 100644 index 223b00b9a9dbf1db40ce702cf0e154e5e53a8644..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -GServer -======= - -.. toctree:: - :maxdepth: 2 - - activations.rst - dataproviders.rst - evaluators.rst - gradientmachines.rst - layers.rst - neworks.rst diff --git a/doc/howto/source/gserver/layers.rst b/doc/howto/source/gserver/layers.rst deleted file mode 100644 index 191b2bdff26ed17437370a12036f9dbb174dae15..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/layers.rst +++ /dev/null @@ -1,566 +0,0 @@ -====== -Layers -====== - -Base -==== - -Layer ------ -.. doxygenclass:: paddle::Layer - :members: - -Projection ----------- -.. doxygenclass:: paddle::Projection - :members: - -Operator --------- -.. doxygenclass:: paddle::Operator - :members: - -Data Layer -========== - -.. doxygenclass:: paddle::DataLayer - :members: - -Fully Connected Layers -====================== - -FullyConnectedLayer -------------------- -.. doxygenclass:: paddle::FullyConnectedLayer - :members: - -SelectiveFullyConnectedLayer ----------------------------- -.. doxygenclass:: paddle::SelectiveFullyConnectedLayer - :members: - -Conv Layers -=========== - -ConvBaseLayer -------------- -.. doxygenclass:: paddle::ConvBaseLayer - :members: - -ConvOperator ------------- -.. doxygenclass:: paddle::ConvOperator - :members: - -ConvShiftLayer --------------- -.. doxygenclass:: paddle::ConvShiftLayer - :members: - -CudnnConvLayer --------------- -.. doxygenclass:: paddle::CudnnConvLayer - :members: - -ExpandConvBaseLayer -------------------- -.. doxygenclass:: paddle::ExpandConvBaseLayer - :members: - -ExpandConvLayer ---------------- -.. doxygenclass:: paddle::ExpandConvLayer - :members: - -ContextProjection ------------------ -.. doxygenclass:: paddle::ContextProjection - :members: - -Pooling Layers -============== - -PoolLayer ---------- -.. doxygenclass:: paddle::PoolLayer - :members: - -PoolProjectionLayer -------------------- -.. doxygenclass:: paddle::PoolProjectionLayer - :members: - -CudnnPoolLayer --------------- -.. doxygenclass:: paddle::CudnnPoolLayer - :members: - -SpatialPyramidPoolLayer ------------------------ -.. doxygenclass:: paddle::SpatialPyramidPoolLayer - :members: - -MaxOutLayer ------------ -.. doxygenclass:: paddle::MaxOutLayer - :members: - -Norm Layers -=========== - -NormLayer ---------- -.. doxygenclass:: paddle::NormLayer - :members: - -CMRProjectionNormLayer ----------------------- -.. doxygenclass:: paddle::CMRProjectionNormLayer - :members: - -DataNormLayer -------------- -.. doxygenclass:: paddle::DataNormLayer - :members: - -ResponseNormLayer ------------------ -.. doxygenclass:: paddle::ResponseNormLayer - :members: - -BatchNormBaseLayer ------------------- -.. doxygenclass:: paddle::BatchNormBaseLayer - :members: - -BatchNormalizationLayer ------------------------ -.. doxygenclass:: paddle::BatchNormalizationLayer - :members: - -CudnnBatchNormLayer ------------------------ -.. doxygenclass:: paddle::CudnnBatchNormLayer - :members: - -SumToOneNormLayer ------------------ -.. doxygenclass:: paddle::SumToOneNormLayer - :members: - -Activation Layer -================ - -ParameterReluLayer ------------------- -.. doxygenclass:: paddle::ParameterReluLayer - :members: - -Recurrent Layers -================ - -RecurrentLayer --------------- -.. doxygenclass:: paddle::RecurrentLayer - :members: - -SequenceToBatch ---------------- -.. doxygenclass:: paddle::SequenceToBatch - :members: - -LSTM ----- -LstmLayer -````````` -.. doxygenclass:: paddle::LstmLayer - :members: - -LstmStepLayer -````````````` -.. doxygenclass:: paddle::LstmStepLayer - :members: - -LstmCompute -``````````` -.. doxygenclass:: paddle::LstmCompute - :members: - -MDLSTM ------- -MDLstmLayer -``````````` -.. doxygenclass:: paddle::MDLstmLayer - :members: - -CoordIterator -````````````` -.. doxygenclass:: paddle::CoordIterator - :members: - -GRU ---- -GatedRecurrentLayer -``````````````````` -.. doxygenclass:: paddle::GatedRecurrentLayer - :members: - -GruStepLayer -```````````` -.. doxygenclass:: paddle::GruStepLayer - :members: - -GruCompute -`````````` -.. doxygenclass:: paddle::GruCompute - :members: - -Recurrent Layer Group -===================== - -AgentLayer ----------- -.. doxygenclass:: paddle::AgentLayer - :members: - -SequenceAgentLayer ------------------- -.. doxygenclass:: paddle::SequenceAgentLayer - :members: - -GatherAgentLayer ----------------- -.. doxygenclass:: paddle::GatherAgentLayer - :members: - -SequenceGatherAgentLayer ------------------------- -.. doxygenclass:: paddle::SequenceGatherAgentLayer - :members: - -ScatterAgentLayer ------------------ -.. doxygenclass:: paddle::ScatterAgentLayer - :members: - -SequenceScatterAgentLayer -------------------------- -.. doxygenclass:: paddle::SequenceScatterAgentLayer - :members: - -GetOutputLayer --------------- -.. doxygenclass:: paddle::GetOutputLayer - :members: - -Mixed Layer -=========== -.. doxygenclass:: paddle::MixedLayer - :members: - -DotMulProjection ----------------- -.. doxygenclass:: paddle::DotMulProjection - :members: - -DotMulOperator --------------- -.. doxygenclass:: paddle::DotMulOperator - :members: - -FullMatrixProjection --------------------- -.. doxygenclass:: paddle::FullMatrixProjection - :members: - -IdentityProjection ------------------- -.. doxygenclass:: paddle::IdentityProjection - :members: - -IdentityOffsetProjection ------------------------- -.. doxygenclass:: paddle::IdentityOffsetProjection - :members: - -TableProjection ---------------- -.. doxygenclass:: paddle::TableProjection - :members: - -TransposedFullMatrixProjection ------------------------------- -.. doxygenclass:: paddle::TransposedFullMatrixProjection - :members: - -Aggregate Layers -================ - -Aggregate ---------- -AverageLayer -```````````` -.. doxygenclass:: paddle::AverageLayer - :members: - -MaxLayer -```````` -.. doxygenclass:: paddle::MaxLayer - :members: - -SequenceLastInstanceLayer -````````````````````````` -.. doxygenclass:: paddle::SequenceLastInstanceLayer - :members: - -Concat ------- -ConcatenateLayer -```````````````` -.. doxygenclass:: paddle::ConcatenateLayer - :members: - -ConcatenateLayer2 -````````````````` -.. doxygenclass:: paddle::ConcatenateLayer2 - :members: - -SequenceConcatLayer -``````````````````` -.. doxygenclass:: paddle::SequenceConcatLayer - :members: - -Subset ------- -SubSequenceLayer -```````````````` -.. doxygenclass:: paddle::SubSequenceLayer - :members: - -Reshaping Layers -================ - -BlockExpandLayer ----------------- -.. doxygenclass:: paddle::BlockExpandLayer - :members: - -ExpandLayer ------------ -.. doxygenclass:: paddle::ExpandLayer - :members: - -FeatureMapExpandLayer ---------------------- -.. doxygenclass:: paddle::FeatureMapExpandLayer - :members: - -ResizeLayer ------------ -.. doxygenclass:: paddle::ResizeLayer - :members: - -SequenceReshapeLayer --------------------- -.. doxygenclass:: paddle::SequenceReshapeLayer - :members: - -Math Layers -=========== - -AddtoLayer ----------- -.. doxygenclass:: paddle::AddtoLayer - :members: - -ConvexCombinationLayer ----------------------- -.. doxygenclass:: paddle::ConvexCombinationLayer - :members: - -InterpolationLayer ------------------- -.. doxygenclass:: paddle::InterpolationLayer - :members: - -MultiplexLayer --------------- -.. doxygenclass:: paddle::MultiplexLayer - :members: - -OuterProdLayer --------------- -.. doxygenclass:: paddle::OuterProdLayer - :members: - -PowerLayer ----------- -.. doxygenclass:: paddle::PowerLayer - :members: - -ScalingLayer ------------- -.. doxygenclass:: paddle::ScalingLayer - :members: - -SlopeInterceptLayer -------------------- -.. doxygenclass:: paddle::SlopeInterceptLayer - :members: - -TensorLayer ------------- -.. doxygenclass:: paddle::TensorLayer - :members: - -TransLayer ----------- -.. doxygenclass:: paddle::TransLayer - :members: - -Sampling Layers -=============== - -BilinearInterpLayer -------------------- -.. doxygenclass:: paddle::BilinearInterpLayer - :members: - -MultinomialSampler ------------------- -.. doxygenclass:: paddle::MultinomialSampler - :members: - -MaxIdLayer ----------- -.. doxygenclass:: paddle::MaxIdLayer - :members: - -SamplingIdLayer ---------------- -.. doxygenclass:: paddle::SamplingIdLayer - :members: - -Cost Layers -=========== - -CostLayer ------------ -.. doxygenclass:: paddle::CostLayer - :members: - -HuberTwoClass -````````````` -.. doxygenclass:: paddle::HuberTwoClass - :members: - -LambdaCost -``````````` -.. doxygenclass:: paddle::LambdaCost - :members: - -MultiBinaryLabelCrossEntropy -```````````````````````````` -.. doxygenclass:: paddle::MultiBinaryLabelCrossEntropy - :members: - -MultiClassCrossEntropy -``````````````````````` -.. doxygenclass:: paddle::MultiClassCrossEntropy - :members: - -MultiClassCrossEntropyWithSelfNorm -`````````````````````````````````` -.. doxygenclass:: paddle::MultiClassCrossEntropyWithSelfNorm - :members: - -RankingCost -``````````` -.. doxygenclass:: paddle::RankingCost - :members: - -SoftBinaryClassCrossEntropy -``````````````````````````` -.. doxygenclass:: paddle::SoftBinaryClassCrossEntropy - :members: - -SumOfSquaresCostLayer -````````````````````` -.. doxygenclass:: paddle::SumOfSquaresCostLayer - :members: - -SumCostLayer -````````````````````` -.. doxygenclass:: paddle::SumCostLayer - :members: - -CosSimLayer ------------ -.. doxygenclass:: paddle::CosSimLayer - :members: - -CosSimVecMatLayer ------------------ -.. doxygenclass:: paddle::CosSimVecMatLayer - :members: - -CRFDecodingLayer ----------------- -.. doxygenclass:: paddle::CRFDecodingLayer - :members: - -CRFLayer --------- -.. doxygenclass:: paddle::CRFLayer - :members: - -CTCLayer --------- -.. doxygenclass:: paddle::CTCLayer - :members: - -HierarchicalSigmoidLayer ------------------------- -.. doxygenclass:: paddle::HierarchicalSigmoidLayer - :members: - -LinearChainCRF --------------- -.. doxygenclass:: paddle::LinearChainCRF - :members: - -LinearChainCTC --------------- -.. doxygenclass:: paddle::LinearChainCTC - :members: - -NCELayer --------- -.. doxygenclass:: paddle::NCELayer - :members: - -Validation Layers ------------------ - -ValidationLayer -``````````````` -.. doxygenclass:: paddle::ValidationLayer - :members: - -AucValidation -````````````` -.. doxygenclass:: paddle::AucValidation - :members: - -PnpairValidation -```````````````` -.. doxygenclass:: paddle::PnpairValidation - :members: - -Check Layers -============ - -EosIdCheckLayer ---------------- -.. doxygenclass:: paddle::EosIdCheckLayer - :members: diff --git a/doc/howto/source/gserver/neworks.rst b/doc/howto/source/gserver/neworks.rst deleted file mode 100644 index 73fb60d549cc88f61d2e2d18c9ec31c37cf4fa9a..0000000000000000000000000000000000000000 --- a/doc/howto/source/gserver/neworks.rst +++ /dev/null @@ -1,12 +0,0 @@ -Networks -======== - -NeuralNetwork -------------- -.. doxygenclass:: paddle::NeuralNetwork - :members: - -ParallelNeuralNetwork ---------------------- -.. doxygenclass:: paddle::ParallelNeuralNetwork - :members: diff --git a/doc/howto/source/index.rst b/doc/howto/source/index.rst deleted file mode 100644 index 36323c888ee65147f59f28160dc26ca29235ba63..0000000000000000000000000000000000000000 --- a/doc/howto/source/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -Source Code Documents -===================== - -.. toctree:: - :maxdepth: 1 - - gserver/index.rst - trainer.rst - parameter/index.rst - pserver/index.rst - api.rst - cuda/index.rst - math/index.rst - utils/index.rst diff --git a/doc/howto/source/math/functions.rst b/doc/howto/source/math/functions.rst deleted file mode 100644 index aef12e0f005226c6d40d74d0e858a11585339758..0000000000000000000000000000000000000000 --- a/doc/howto/source/math/functions.rst +++ /dev/null @@ -1,10 +0,0 @@ -Functions -========= - -MathFunctions -------------- -.. doxygenfile:: paddle/math/MathFunctions.h - -SIMDFunctions -------------- -.. doxygenfile:: paddle/math/SIMDFunctions.h diff --git a/doc/howto/source/math/index.rst b/doc/howto/source/math/index.rst deleted file mode 100644 index 2ec16f2b4450c870f9590aea4ad4ca7dc415b75d..0000000000000000000000000000000000000000 --- a/doc/howto/source/math/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Math -==== - -.. toctree:: - :maxdepth: 2 - - vector.rst - matrix.rst - functions.rst - utils.rst diff --git a/doc/howto/source/math/matrix.rst b/doc/howto/source/math/matrix.rst deleted file mode 100644 index 9bb20f618d229e1baea15e26378bf40d7c6e1783..0000000000000000000000000000000000000000 --- a/doc/howto/source/math/matrix.rst +++ /dev/null @@ -1,76 +0,0 @@ -Matrix -====== - -Base ----- - -BaseMatrix Template -``````````````````` -.. doxygenclass:: paddle::BaseMatrixT - :members: - -Matrix -`````` -.. doxygenclass:: paddle::Matrix - :members: - -MatrixOffset -```````````` -.. doxygenclass:: paddle::MatrixOffset - :members: - -CpuMatrix ---------- - -CpuMatrix -````````` -.. doxygenclass:: paddle::CpuMatrix - :members: - -SharedCpuMatrix -``````````````` -.. doxygenclass:: paddle::SharedCpuMatrix - :members: - -GpuMatrix ---------- -.. doxygenclass:: paddle::GpuMatrix - :members: - -CpuSparseMatrix ---------------- - -CpuSparseMatrix -``````````````` -.. doxygenclass:: paddle::CpuSparseMatrix - :members: - -SparseRowCpuMatrix -`````````````````` -.. doxygenclass:: paddle::SparseRowCpuMatrix - :members: - -SparseAutoGrowRowCpuMatrix -`````````````````````````` -.. doxygenclass:: paddle::SparseAutoGrowRowCpuMatrix - :members: - -SparsePrefetchRowCpuMatrix -`````````````````````````` -.. doxygenclass:: paddle::SparsePrefetchRowCpuMatrix - :members: - -SparseRowIdsCpuMatrix -````````````````````` -.. doxygenclass:: paddle::SparseRowIdsCpuMatrix - :members: - -CacheRowCpuMatrix -````````````````` -.. doxygenclass:: paddle::CacheRowCpuMatrix - :members: - -GpuSparseMatrix ---------------- -.. doxygenclass:: paddle::GpuSparseMatrix - :members: diff --git a/doc/howto/source/math/utils.rst b/doc/howto/source/math/utils.rst deleted file mode 100644 index 55d9961a390c205563a9ae4fbd87ac4ae90fc314..0000000000000000000000000000000000000000 --- a/doc/howto/source/math/utils.rst +++ /dev/null @@ -1,18 +0,0 @@ -Memory Manager -============== - -Memory Handle -------------- -.. doxygenfile:: paddle/math/MemoryHandle.h - -Allocator ---------- -.. doxygenfile:: paddle/math/Allocator.h - -PoolAllocator -````````````` -.. doxygenfile:: paddle/math/PoolAllocator.h - -Storage -------- -.. doxygenfile:: paddle/math/Storage.h diff --git a/doc/howto/source/math/vector.rst b/doc/howto/source/math/vector.rst deleted file mode 100644 index 07f7062abaf4f30b8967b594f4e16ab881f5414f..0000000000000000000000000000000000000000 --- a/doc/howto/source/math/vector.rst +++ /dev/null @@ -1,37 +0,0 @@ -Vector -====== - -BaseVector -`````````` -.. doxygenclass:: paddle::BaseVector - :members: - -Vector Template -``````````````` -.. doxygenclass:: paddle::VectorT - :members: - -CpuVector Template -`````````````````` -.. doxygenclass:: paddle::CpuVectorT - :members: - -GpuVector Template -`````````````````` -.. doxygenclass:: paddle::GpuVectorT - :members: - -ParallelCpuVector Template -`````````````````````````` -.. doxygenclass:: paddle::ParallelCpuVectorT - :members: - -ParallelGpuVector Template -`````````````````````````` -.. doxygenclass:: paddle::ParallelGpuVectorT - :members: - -CpuGpuVector Template -````````````````````` -.. doxygenclass:: paddle::CpuGpuVectorT - :members: diff --git a/doc/howto/source/parameter/index.rst b/doc/howto/source/parameter/index.rst deleted file mode 100644 index 3bf6948dc3478574d8d125d8461235f8827e4e42..0000000000000000000000000000000000000000 --- a/doc/howto/source/parameter/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Parameter -========= - -.. toctree:: - :maxdepth: 2 - - parameter.rst - optimizer.rst - updater.rst diff --git a/doc/howto/source/parameter/optimizer.rst b/doc/howto/source/parameter/optimizer.rst deleted file mode 100644 index b5b8b850b349d547c9e5508d3ebec3d7e00ea310..0000000000000000000000000000000000000000 --- a/doc/howto/source/parameter/optimizer.rst +++ /dev/null @@ -1,22 +0,0 @@ -Optimizer -========= - -ParameterOptimizer ------------------- -.. doxygenfile:: paddle/parameter/ParameterOptimizer.h - -Regularizer ------------ -.. doxygenfile:: paddle/parameter/Regularizer.h - -FirstOrderOptimizer -------------------- -.. doxygenfile:: paddle/parameter/FirstOrderOptimizer.h - -AverageOptimizer ----------------- -.. doxygenfile:: paddle/parameter/AverageOptimizer.h - -OptimizerWithRegularizer ------------------------- -.. doxygenfile:: paddle/parameter/OptimizerWithRegularizer.h diff --git a/doc/howto/source/parameter/parameter.rst b/doc/howto/source/parameter/parameter.rst deleted file mode 100644 index 2daa62d4e63b952cd93bba35ee32ce35ce768a0d..0000000000000000000000000000000000000000 --- a/doc/howto/source/parameter/parameter.rst +++ /dev/null @@ -1,12 +0,0 @@ -Parameter -========= - -Parameter ---------- -.. doxygenfile:: paddle/parameter/Argument.h -.. doxygenfile:: paddle/parameter/Parameter.h -.. doxygenfile:: paddle/parameter/ParallelParameter.h - -Weight ------- -.. doxygenfile:: paddle/parameter/Weight.h diff --git a/doc/howto/source/parameter/updater.rst b/doc/howto/source/parameter/updater.rst deleted file mode 100644 index dfa22e8e7d1d6f0713974835de93194d2cc58e6f..0000000000000000000000000000000000000000 --- a/doc/howto/source/parameter/updater.rst +++ /dev/null @@ -1,14 +0,0 @@ -Updater -======= - -Base ----- -.. doxygenfile:: paddle/parameter/ParameterUpdaterBase.h - -Hook ----- -.. doxygenfile:: paddle/parameter/ParameterUpdaterHook.h - -Functions ---------- -.. doxygenfile:: paddle/parameter/ParameterUpdateFunctions.h diff --git a/doc/howto/source/pserver/client.rst b/doc/howto/source/pserver/client.rst deleted file mode 100644 index e5bba0706a1d919104b85e23861ba490a2c828db..0000000000000000000000000000000000000000 --- a/doc/howto/source/pserver/client.rst +++ /dev/null @@ -1,12 +0,0 @@ -Client -====== - -BaseClient ----------- -.. doxygenclass:: paddle::BaseClient - :members: - -ParameterClient2 ----------------- -.. doxygenclass:: paddle::ParameterClient2 - :members: diff --git a/doc/howto/source/pserver/index.rst b/doc/howto/source/pserver/index.rst deleted file mode 100644 index 0031e9476bd063511cc2f0a8c209f35627cf44ba..0000000000000000000000000000000000000000 --- a/doc/howto/source/pserver/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -PServer -======= - -.. toctree:: - :maxdepth: 2 - - client.rst - network.rst - server.rst - utils.rst diff --git a/doc/howto/source/pserver/network.rst b/doc/howto/source/pserver/network.rst deleted file mode 100644 index 7004c9d91fa9f2af11e15791ef682c108761027e..0000000000000000000000000000000000000000 --- a/doc/howto/source/pserver/network.rst +++ /dev/null @@ -1,27 +0,0 @@ -Network -======= - -SocketServer ------------- -.. doxygenclass:: paddle::SocketServer - :members: - -SocketWorker ------------- -.. doxygenclass:: paddle::SocketWorker - :members: - -SocketClient ------------- -.. doxygenclass:: paddle::SocketClient - :members: - -SocketChannel -------------- -.. doxygenclass:: paddle::SocketChannel - :members: - -MessageReader -------------- -.. doxygenclass:: paddle::MsgReader - :members: diff --git a/doc/howto/source/pserver/server.rst b/doc/howto/source/pserver/server.rst deleted file mode 100644 index 35301acf8ffe3d97e6124c37cf8fe1b43071e14e..0000000000000000000000000000000000000000 --- a/doc/howto/source/pserver/server.rst +++ /dev/null @@ -1,12 +0,0 @@ -Server -====== - -ProtoServer ------------ -.. doxygenclass:: paddle::ProtoServer - :members: - -ParameterServer2 ----------------- -.. doxygenclass:: paddle::ParameterServer2 - :members: diff --git a/doc/howto/source/trainer.rst b/doc/howto/source/trainer.rst deleted file mode 100644 index 85f1feb4fc941f94e65a6b1d037445d2367f65ec..0000000000000000000000000000000000000000 --- a/doc/howto/source/trainer.rst +++ /dev/null @@ -1,32 +0,0 @@ -Trainer -======= - -TrainerStats ------------- - -.. doxygenclass:: paddle::TrainerStats - :members: - -RemoteParameterUpdater ------------------------ - -.. doxygenclass:: paddle::RemoteParameterUpdater - :members: - -ConcurrentRemoteParameterUpdater --------------------------------- - -.. doxygenclass:: paddle::ConcurrentRemoteParameterUpdater - :members: - -SparseRemoteParameterUpdater ----------------------------- - -.. doxygenclass:: paddle::SparseRemoteParameterUpdater - :members: - -SparseRemoteParameterUpdaterComposite -------------------------------------- - -.. doxygenclass:: paddle::SparseRemoteParameterUpdaterComposite - :members: diff --git a/doc/howto/source/utils/customStackTrace.rst b/doc/howto/source/utils/customStackTrace.rst deleted file mode 100644 index cdc8930739eb4b4d6308ff1fbce170d2977d42e8..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/customStackTrace.rst +++ /dev/null @@ -1,4 +0,0 @@ -CustomStackTrace -================ -.. doxygenclass:: paddle::CustomStackTrace - :members: diff --git a/doc/howto/source/utils/enum.rst b/doc/howto/source/utils/enum.rst deleted file mode 100644 index e0da75afe164f9dab59b862faa7230fc57423e50..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/enum.rst +++ /dev/null @@ -1,3 +0,0 @@ -Enumeration wrapper -=================== -.. doxygennamespace:: paddle::enumeration_wrapper diff --git a/doc/howto/source/utils/index.rst b/doc/howto/source/utils/index.rst deleted file mode 100644 index 7ddc47d1726f7627852be922d2b769d0752aa799..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -Utils -===== - -.. toctree:: - :maxdepth: 2 - - lock.rst - queue.rst - thread.rst - customStackTrace.rst - enum.rst diff --git a/doc/howto/source/utils/lock.rst b/doc/howto/source/utils/lock.rst deleted file mode 100644 index f011acb9431f0f3dc3b2ba27fcfe71fe6eb07ae9..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/lock.rst +++ /dev/null @@ -1,32 +0,0 @@ -Lock -==== - -RWLock ------- -.. doxygenclass:: paddle::RWLock - :members: - -ReadLockGuard -------------- -.. doxygenclass:: paddle::ReadLockGuard - :members: - -SpinLock --------- -.. doxygenclass:: paddle::SpinLock - :members: - -Semaphore ---------- -.. doxygenclass:: paddle::Semaphore - :members: - -ThreadBarrier -------------- -.. doxygenclass:: paddle::ThreadBarrier - :members: - -LockedCondition ---------------- -.. doxygenclass:: paddle::LockedCondition - :members: diff --git a/doc/howto/source/utils/queue.rst b/doc/howto/source/utils/queue.rst deleted file mode 100644 index 98192648e2d61e622c2337d10ba024dd676ee685..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/queue.rst +++ /dev/null @@ -1,12 +0,0 @@ -Queue -===== - -Queue ------ -.. doxygenclass:: paddle::Queue - :members: - -BlockingQueue -------------- -.. doxygenclass:: paddle::BlockingQueue - :members: diff --git a/doc/howto/source/utils/thread.rst b/doc/howto/source/utils/thread.rst deleted file mode 100644 index 23d379a9894e5fc22bc6795a480a53d768e608e6..0000000000000000000000000000000000000000 --- a/doc/howto/source/utils/thread.rst +++ /dev/null @@ -1,27 +0,0 @@ -Thread -====== - -Thread ------- -.. doxygenclass:: paddle::Thread - :members: - -ThreadWorker ------------- -.. doxygenclass:: paddle::ThreadWorker - :members: - -SyncThreadPool --------------- -.. doxygenclass:: paddle::SyncThreadPool - :members: - -MultiThreadWorker ------------------ -.. doxygenclass:: paddle::MultiThreadWorker - :members: - -AsyncThreadPool ---------------- -.. doxygenclass:: paddle::AsyncThreadPool - :members: diff --git a/doc/howto/usage/cluster/cluster_train_cn.md b/doc/howto/usage/cluster/cluster_train_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..274452fbf0c595ad7b4dbeffe85ad9038f12b458 --- /dev/null +++ b/doc/howto/usage/cluster/cluster_train_cn.md @@ -0,0 +1,159 @@ +```eval_rst +.. _cluster_train: +``` + +# 运行分布式训练 + +在本文中,我们将阐释如何在集群上运行分布式 Paddle 训练作业。我们将以[推荐系统](https://github.com/baidu/Paddle/tree/develop/demo/recommendation)为例创建分布式的单进程训练。 + +在本文中使用的[脚本](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train)通过 SSH 运行分布式作业。 它们还可以供那些运行更复杂的集群管理系统(如 MPI 和 [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/k8s) )的用户参考。 + +## 前提条件 + +1. 上述脚本使用 Python 库 [fabric](http://www.fabfile.org/) 来运行 SSH 命令。 我们使用 `pip` 来安装 fabric: + + ```bash + pip install fabric + ``` + +2. 我们需要在集群的所有节点上安装 PaddlePaddle。 如果要启用GPU,需要在 `/usr/local/cuda` 中安装 CUDA; 否则 Paddle 将在运行时报错。 + +3. 在 [`cluster_train/conf.py`] 中设置 `ROOT_DIR`, 该 ROOT_DIR 要在所有节点上存在。为了方便起见,我们通常在所有节点上创建一个 Unix 用户 `paddle`,并设置 `ROOT_DIR=/home/paddle`。这样,我们可以将 SSH 公钥写入 `/home/paddle/.ssh/authorized_keys`,以便用户 `paddle` 可以 SSH 到所有节点而不用密码。 + +## 准备工作空间 + +我们将放置依赖库、配置等文件的目录视为 *工作空间(workspace)*。 + +这些 `train/test` 数据应该在启动集群作业之前准备好。 为了满足训练/测试数据放置在工作空间中不同目录的要求,PADDLE 根据在模型配置文件中使用的名为 `train.list/test.list` 的索引文件引用训练/测试数据,所以训练/测试数据也包含 train.list/test.list 两个列表文件。所有本地训练 demo 已经提供了脚本来帮助您创建这两个文件,并且集群作业中的所有节点将在正常情况下处理具有相同逻辑代码的文件。 + +通常,你可以使用本地训练中的相同模型文件进行集群训练。请记住,在模型文件的 `setting`函数中设置的 `batch_size` 表示在集群作业**每个**节点中的 batch 大小,而不是使用同步 SGD 的总 batch 大小。 + +以下步骤基于 demo 目录中的 [demo/recommendation](https://github.com/PaddlePaddle/Paddle/tree/develop/demo/recommendation)。 + +你只需完成 demo/recommendation 教程文档到 `Train` 的部分,之后你会得到训练/测试数据和模型配置文件。最后,只需使用 demo/recommendation 作为集群训练的工作空间。 + +最后,你的工作空间应如下所示: +``` +. +|-- common_utils.py +|-- data +| |-- config.json +| |-- config_generator.py +| |-- meta.bin +| |-- meta_config.json +| |-- meta_generator.py +| |-- ml-1m +| |-- ml_data.sh +| |-- ratings.dat.test +| |-- ratings.dat.train +| |-- split.py +| |-- test.list +| `-- train.list +|-- dataprovider.py +|-- evaluate.sh +|-- prediction.py +|-- preprocess.sh +|-- requirements.txt +|-- run.sh +`-- trainer_config.py +``` +虽然这些文件并非都需要集群训练,但是也没有必要删除无用的文件。 + +`trainer_config.py` +表示模型配置文件。 + +`train.list` 和 `test.list` +文件索引。它存储当前节点所有训练/测试数据的所有相对或绝对文件路径。 + +`dataprovider.py` +用于读取训练/测试样本。这与本地训练相同。 + +`data` +数据目录中的所有文件被 train.list/test.list 引用。 + + +## 准备集群作业配置 + +以下选项必须在 cluster_train/conf.py 中认真设置 + +`HOSTS` 所有节点运行集群作业的主机名或 IP 。你还可以将用户和 ssh 端口附加到主机名上,例如 root@192.168.100.17:9090。 + +`ROOT_DIR` 用于放置 JOB 工作空间目录的工作空间 ROOT 目录 + +`PADDLE_NIC` 集群通信通道的 NIC(Network Interface Card, 网络接口卡) 接口名称,例如以太网的 eth0,infiniband 的 ib0。 + +`PADDLE_PORT` 集群通信通道的端口号 + +`PADDLE_PORTS_NUM` 用于集群通信通道的端口数。 如果集群节点数量少(少于5〜6个节点),建议将其设置为较大,如2〜8,以获得更好的网络性能。 + +`PADDLE_PORTS_NUM_FOR_SPARSE` 用于 sparse remote updater 集群通信信道的端口数。如果使用 sparse remote update,则可以像 `PADDLE_PORTS_NUM` 一样设置。 + +`LD_LIBRARY_PATH` 为集群作业设置额外的 LD_LIBRARY_PATH。你可以使用它来设置 CUDA 库的路径。 + +默认配置如下: + +```python +HOSTS = [ + "root@192.168.100.17", + "root@192.168.100.18", + ] + +''' +工作空间配置 +''' + +#工作空间根目录 +ROOT_DIR = "/home/paddle" + +''' +网络配置 +''' +#pserver NIC +PADDLE_NIC = "eth0" +#pserver 端口 +PADDLE_PORT = 7164 +#pserver 端口数 +PADDLE_PORTS_NUM = 2 +#pserver sparse ports num +PADDLE_PORTS_NUM_FOR_SPARSE = 2 + +#集群作业中所有进程的环境设置 +LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/lib64" +``` + +### 启动集群作业 +`paddle.py` 提供了自动化脚本来启动不同节点中的所有 PaddlePaddle 集群进程。默认情况下,所有命令行选项可以设置为```paddle.py``` 命令选项并且 `paddle.py` 将透明、自动地将这些选项应用到 PaddlePaddle 底层进程。 + +`paddle.py` 为方便作业启动提供了两个独特的命令选项。 + +`job_dispatch_package` 设为本地 `workspace` 目录,它将被分发到 conf.py 中设置的所有节点。 它有助于帮助频繁修改和访问工作区文件的用户减少负担,否则频繁的多节点工作空间部署可能会很麻烦。 +`job_workspace` 设为已部署的工作空间目录,`paddle.py` 将跳过分发阶段直接启动所有节点的集群作业。它可以帮助减少分发延迟。 + +`cluster_train/run.sh` 提供了命令样例来运行 `demo/recommendation` 集群工作,只需用你定义的目录修改 `job_dispatch_package` 和 `job_workspace`,然后: +``` +sh run.sh +``` + +集群作业将会在几秒后启动。 + +### 终止集群作业 +`paddle.py`能获取`Ctrl + C` SIGINT 信号来自动终止它启动的所有进程。只需中断 `paddle.py` 任务来终止集群作业。如果程序崩溃你也可以手动终止。 + +### 检查集群训练结果 +详细信息请检查 $workspace/log 里的日志,每一个节点都有相同的日志结构。 + +`paddle_trainer.INFO` +提供几乎所有训练的内部输出日志,与本地训练相同。这里检验运行时间模型的收敛。 + +`paddle_pserver2.INFO` +提供 pserver 运行日志,有助于诊断分布式错误。 + +`server.log` +提供 pserver 进程的 stderr 和 stdout。训练失败时可以检查错误日志。 + +`train.log` +提供训练过程的 stderr 和 stdout。训练失败时可以检查错误日志。 + +### 检查模型输出 +运行完成后,模型文件将被写入节点 0 的 `output` 目录中。 +工作空间中的 `nodefile` 表示当前集群作业的节点 ID。 diff --git a/doc/howto/usage/cluster/cluster_train_en.md b/doc/howto/usage/cluster/cluster_train_en.md new file mode 100644 index 0000000000000000000000000000000000000000..c60876721cbf5565d6e48c8061811aacada748cd --- /dev/null +++ b/doc/howto/usage/cluster/cluster_train_en.md @@ -0,0 +1,156 @@ +# Run Distributed Training + +In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation). + +[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/k8s). + +## Prerequisite + +1. Aforementioned scripts use a Python library [fabric](http://www.fabfile.org/) to run SSH commands. We can use `pip` to install fabric: + + ```bash + pip install fabric + ``` + +1. We need to install PaddlePaddle on all nodes in the cluster. To enable GPUs, we need to install CUDA in `/usr/local/cuda`; otherwise Paddle would report errors at runtime. + +1. Set the `ROOT_DIR` variable in [`cluster_train/conf.py`] on all nodes. For convenience, we often create a Unix user `paddle` on all nodes and set `ROOT_DIR=/home/paddle`. In this way, we can write public SSH keys into `/home/paddle/.ssh/authorized_keys` so that user `paddle` can SSH to all nodes without password. + +## Prepare Job Workspace + +We refer to the directory where we put dependent libraries, config files, etc., as *workspace*. + +These `train/test` data should be prepared before launching cluster job. To satisfy the requirement that train/test data are placed in different directory from workspace, PADDLE refers train/test data according to index file named as `train.list/test.list` which are used in model config file. So the train/test data also contains train.list/test.list two list file. All local training demo already provides scripts to help you create these two files, and all nodes in cluster job will handle files with same logical code in normal condition. + +Generally, you can use same model file from local training for cluster training. What you should have in mind that, the `batch_size` set in `setting` function in model file means batch size in `each` node of cluster job instead of total batch size if synchronization SGD was used. + +Following steps are based on [demo/recommendation](https://github.com/PaddlePaddle/Paddle/tree/develop/demo/recommendation) demo in demo directory. + +You just go through demo/recommendation tutorial doc until `Train` section, and at last you will get train/test data and model configuration file. Finaly, just use demo/recommendation as workspace for cluster training. + +At last your workspace should look like as follow: +``` +. +|-- common_utils.py +|-- data +| |-- config.json +| |-- config_generator.py +| |-- meta.bin +| |-- meta_config.json +| |-- meta_generator.py +| |-- ml-1m +| |-- ml_data.sh +| |-- ratings.dat.test +| |-- ratings.dat.train +| |-- split.py +| |-- test.list +| `-- train.list +|-- dataprovider.py +|-- evaluate.sh +|-- prediction.py +|-- preprocess.sh +|-- requirements.txt +|-- run.sh +`-- trainer_config.py +``` +Not all of these files are needed for cluster training, but it's not necessary to remove useless files. + +`trainer_config.py` +Indicates the model config file. + +`train.list` and `test.list` +File index. It stores all relative or absolute file paths of all train/test data at current node. + +`dataprovider.py` +used to read train/test samples. It's same as local training. + +`data` +all files in data directory are refered by train.list/test.list which are refered by data provider. + + +## Prepare Cluster Job Configuration + +The options below must be carefully set in cluster_train/conf.py + +`HOSTS` all nodes hostname or ip that will run cluster job. You can also append user and ssh port with hostname, such as root@192.168.100.17:9090. + +`ROOT_DIR` workspace ROOT directory for placing JOB workspace directory + +`PADDLE_NIC` the NIC(Network Interface Card) interface name for cluster communication channel, such as eth0 for ethternet, ib0 for infiniband. + +`PADDLE_PORT` port number for cluster commnunication channel + +`PADDLE_PORTS_NUM` the number of port used for cluster communication channle. if the number of cluster nodes is small(less than 5~6nodes), recommend you set it to larger, such as 2 ~ 8, for better network performance. + +`PADDLE_PORTS_NUM_FOR_SPARSE` the number of port used for sparse updater cluster commnunication channel. if sparse remote update is used, set it like `PADDLE_PORTS_NUM` + +`LD_LIBRARY_PATH` set addtional LD_LIBRARY_PATH for cluster job. You can use it to set CUDA libraries path. + +Default Configuration as follow: + +```python +HOSTS = [ + "root@192.168.100.17", + "root@192.168.100.18", + ] + +''' +workspace configuration +''' + +#root dir for workspace +ROOT_DIR = "/home/paddle" + +''' +network configuration +''' +#pserver nics +PADDLE_NIC = "eth0" +#pserver port +PADDLE_PORT = 7164 +#pserver ports num +PADDLE_PORTS_NUM = 2 +#pserver sparse ports num +PADDLE_PORTS_NUM_FOR_SPARSE = 2 + +#environments setting for all processes in cluster job +LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/lib64" +``` + +### Launching Cluster Job +`paddle.py` provides automatical scripts to start all PaddlePaddle cluster processes in different nodes. By default, all command line options can set as `paddle.py` command options and `paddle.py` will transparently and automatically set these options to PaddlePaddle lower level processes. + +`paddle.py`provides two distinguished command option for easy job launching. + +`job_dispatch_package` set it with local `workspace`directory, it will be dispatched to all nodes set in conf.py. It could be helpful for frequent hacking workspace files, otherwise frequent mulit-nodes workspace deployment could make your crazy. +`job_workspace` set it with already deployed workspace directory, `paddle.py` will skip dispatch stage to directly launch cluster job with all nodes. It could help to reduce heavy +dispatch latency. + +`cluster_train/run.sh` provides command line sample to run `demo/recommendation` cluster job, just modify `job_dispatch_package` and `job_workspace` with your defined directory, then: +``` +sh run.sh +``` + +The cluster Job will start in several seconds. + +### Kill Cluster Job +`paddle.py` can capture `Ctrl + C` SIGINT signal to automatically kill all processes launched by it. So just stop `paddle.py` to kill cluster job. You should mannally kill job if program crashed. + +### Check Cluster Training Result +Check log in $workspace/log for details, each node owns same log structure. + +`paddle_trainer.INFO` +It provides almost all interal output log for training, same as local training. Check runtime model convergence here. + +`paddle_pserver2.INFO` +It provides pserver running log, which could help to diagnose distributed error. + +`server.log` +It provides stderr and stdout of pserver process. Check error log if training crashs. + +`train.log` +It provides stderr and stdout of trainer process. Check error log if training crashs. + +### Check Model Output +After one pass finished, model files will be writed in `output` directory in node 0. +`nodefile` in workspace indicates the node id of current cluster job. diff --git a/doc/howto/usage/cmd_parameter/arguments_cn.md b/doc/howto/usage/cmd_parameter/arguments_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..f7aa525054468670f59309ddf9206af55bb77869 --- /dev/null +++ b/doc/howto/usage/cmd_parameter/arguments_cn.md @@ -0,0 +1,394 @@ +# 参数概述 + +虽然Paddle看起来包含了众多参数,但是大部分参数是为开发者提供的,或者已经在集群提交环境中自动设置,因此用户并不需要关心它们。在此,根据这些参数的使用场合,我们将它们划分为不同的类别。例如,`通用`类别中的参数可用于所有场合。某些参数只可用于特定的层中,而有些参数需要在集群多机训练中使用等。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +√ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数本地训练集群训练本地测试集群测试
通用job
use_gpu
local
config
config_args
num_passes
trainer_count
version
show_layer_stat
训练dot_period
test_period
saving_period
show_parameter_stats_period
init_model_path
load_missing_parameter_strategy
saving_period_by_batches
use_old_updater
enable_grad_share
grad_share_block_num
log_error_clipping
log_clipping
save_only_one
start_pass
训练/测试save_dir
训练过程中测试test_period
average_test_period
测试model_list
test_wait
test_pass
predict_output_dir
distribute_test
Auc/正负对验证(PnpairValidation)predict_file
GPUgpu_id
parallel_nn
allow_only_one_model_on_one_gpu
cudnn_dir
cuda_dir
cudnn_conv_workspace_limit_in_mb
递归神经网络(RNN)beam_size
rnn_use_batch
prev_batch_state
diy_beam_search_prob_so
参数服务器(PServer)start_pserver
pservers
port
port_num
ports_num_for_sparse
nics
rdma_tcp
small_messages
loadsave_parameters_in_pserver
log_period_server
pserver_num_threads
sock_send_buf_size
sock_recv_buf_size
num_gradient_servers
parameter_block_size
parameter_block_size_for_sparse
异步随机梯度下降(Async SGD)async_count
async_lagged_ratio_min
async_lagged_ratio_default
性能调优(Performance Tuning)log_barrier_abstract
log_barrier_lowest_nodes
log_barrier_show_log
check_sparse_distribution_batches
check_sparse_distribution_ratio
check_sparse_distribution_unbalance_degree
check_sparse_distribution_in_pserver
show_check_sparse_distribution_log
数据提供器(Data Provider)memory_threshold_on_load_data
随机数seed
thread_local_rand_use_global_seed
单元测试checkgrad_eps
矩阵/向量enable_parallel_vector
+ diff --git a/doc/howto/usage/cmd_parameter/arguments_en.md b/doc/howto/usage/cmd_parameter/arguments_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d1963067bda949b11ececefed3db7db1432c6223 --- /dev/null +++ b/doc/howto/usage/cmd_parameter/arguments_en.md @@ -0,0 +1,394 @@ +# Argument Outline + +It looks like there are a lot of arguments. However, most of them are for developers or alrealy set automatically in cluster submitting environment and users do not need to care about them. Here, we divide these arguments into serveral classes according to the scenario that they are used in. For example, the arguments in `common` can be used in all scenes. Some arguments can be only used in certain layers. Some are needed by multi machines training in cluster, etc. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +√ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
argslocal traincluster trainlocal testcluster test
commonjob
use_gpu
local
config
config_args
num_passes
trainer_count
version
show_layer_stat
traindot_period
test_period
saving_period
show_parameter_stats_period
init_model_path
load_missing_parameter_strategy
saving_period_by_batches
use_old_updater
enable_grad_share
grad_share_block_num
log_error_clipping
log_clipping
save_only_one
start_pass
train/testsave_dir
testing during trainingtest_period
average_test_period
testmodel_list
test_wait
test_pass
predict_output_dir
distribute_test
Auc/PnpairValidationpredict_file
GPUgpu_id
parallel_nn
allow_only_one_model_on_one_gpu
cudnn_dir
cuda_dir
cudnn_conv_workspace_limit_in_mb
RNNbeam_size
rnn_use_batch
prev_batch_state
diy_beam_search_prob_so
PServerstart_pserver
pservers
port
port_num
ports_num_for_sparse
nics
rdma_tcp
small_messages
loadsave_parameters_in_pserver
log_period_server
pserver_num_threads
sock_send_buf_size
sock_recv_buf_size
num_gradient_servers
parameter_block_size
parameter_block_size_for_sparse
Async SGDasync_count
async_lagged_ratio_min
async_lagged_ratio_default
Performance Tuninglog_barrier_abstract
log_barrier_lowest_nodes
log_barrier_show_log
check_sparse_distribution_batches
check_sparse_distribution_ratio
check_sparse_distribution_unbalance_degree
check_sparse_distribution_in_pserver
show_check_sparse_distribution_log
Data Providermemory_threshold_on_load_data
RandomNumberseed
thread_local_rand_use_global_seed
UnitTestcheckgrad_eps
Matrix/Vectorenable_parallel_vector
+ diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_cn.md b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..b4625ba68cf23e5697554ba94efaf0b873f2c1de --- /dev/null +++ b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md @@ -0,0 +1,323 @@ +# 细节描述 + +## 通用 + +* `--job` + - 工作模式,包括: **train, test, checkgrad**,其中checkgrad主要为开发者使用,使用者不需要关心。 + - 类型: string (默认: train) + +* `--config` + - 用于指定网络配置文件。 + - 类型: string (默认: null). + +* `--use_gpu` + - 训练过程是否使用GPU,设置为true使用GPU模式,否则使用CPU模式。 + - 类型: bool (默认: 1). + +* `--local` +  - 训练过程是否为本地模式,设置为true使用本地训练或者使用集群上的一个节点,否则使用多机训练。 + - 类型: bool (默认: 1). + +* `--trainer_count` + - 指定一台机器上使用的线程数。例如,trainer_count = 4, 意思是在GPU模式下使用4个GPU,或者在CPU模式下使用4个线程。每个线程(或GPU)分配到当前数据块样本数的四分之一。也就是说,如果在训练配置中设置batch_size为512,每个线程分配到128个样本用于训练。 + - 类型: int32 (默认: 1). + +* `--num_passes` + - 当模式为`--job=train`时, 该参数的意思是训练num_passes轮。每轮会将数据集中的所有训练样本使用一次。当模式为`--job=test`时,意思是使用第test_pass个模型到第 num_passes-1 个模型测试数据。 + - 类型: int32 (默认: 100). + +* `--config_args` + - 传递给配置文件的参数。格式: key1=value1,key2=value2. + - 类型: string (默认: null). + +* `--version` + - 是否打印版本信息。 + - 类型: bool (默认: 0). + +* `--show_layer_stat` + - 是否显示**每个批次数据**中每层的数值统计. + - 类型: bool (默认: 0). + +## 训练 + +* `--log_period` + - 每log_period个批次打印日志进度. + - 类型: int32 (默认: 100). + +* `--dot_period` + - 每dot_period个批次输出符号'.'. + - 类型: int32 (默认: 1). + +* `--saving_period` + - 每saving_period轮保存训练参数. + - 类型: int32 (默认: 1). + +* `--save_dir` + - 保存模型参数的目录,需要明确指定,但不需要提前创建。 + - 类型: string (默认: null). + +* `--start_pass` + - 从start_pass轮开始训练,会加载上一轮的参数。 + - 类型: int32 (默认: 0). + +* `--show_parameter_stats_period` + - 在训练过程中每show_parameter_stats_period个批次输出参数统计。默认不显示。 + - 类型: int32 (默认: 0). + +* `--save_only_one` + - 只保存最后一轮的参数,而之前的参数将会被删除。 + - 类型: bool (默认: 0). + +* `--load_missing_parameter_strategy` + - 当模型参数不存在时,指定加载的方式。目前支持fail/rand/zero三种操作. + - `fail`: 程序直接退出. + - `rand`: 根据网络配置中的**initial\_strategy**采用均匀分布或者高斯分布初始化。均匀分布的范围是: **[mean - std, mean + std]**, 其中mean和std是训练配置中的参数. + - `zero`: 所有参数置为零. + - 类型: string (默认: fail). + +* `--init_model_path` + - 初始化模型的路径。如果设置该参数,start\_pass将不起作用。同样也可以在测试模式中指定模型路径。 + - 类型: string (默认: null). + +* `--saving_period_by_batches` + - 在一轮中每saving_period_by_batches个批次保存一次参数。 + - 类型: int32 (默认: 0). + +* `--log_error_clipping` + - 当在网络层配置中设置**error_clipping_threshold**时,该参数指示是否打印错误截断日志。如果为true,**每批次**的反向传播将会打印日志信息。该截断会影响**输出的梯度**. + - 类型: bool (默认: 0). + +* `--log_clipping` + - 当在训练配置中设置**gradient_clipping_threshold**时,该参数指示是否打印日志截断信息。该截断会影响**权重更新的梯度**. + - 类型: bool (默认: 0). + +* `--use_old_updater` + - 是否使用旧的RemoteParameterUpdater。 默认使用ConcurrentRemoteParameterUpdater,主要为开发者使用,使用者通常无需关心. + - 类型: bool (默认: 0). + +* `--enable_grad_share` + - 启用梯度参数的阈值,在多CPU训练时共享该参数. + - 类型: int32 (默认: 100 \* 1024 \* 1024). + +* `--grad_share_block_num` + - 梯度参数的分块数目,在多CPU训练时共享该参数. + - 类型: int32 (默认: 64). + +## 测试 + +* `--test_pass` + - 加载test_pass轮的模型用于测试. + - 类型: int32 (默认: -1). + +* `--test_period` + - 如果为0,每轮结束时对所有测试数据进行测试;如果不为0,每test_period个批次对所有测试数据进行测试. + - 类型: int32 (默认: 0). + +* `--test_wait` + - 指示当指定轮的测试模型不存在时,是否需要等待该轮模型参数。如果在训练期间同时发起另外一个进程进行测试,可以使用该参数. + - 类型: bool (默认: 0). + +* `--model_list` + - 测试时指定的存储模型列表的文件. + - 类型: string (默认: "", null). + +* `--predict_output_dir` + - 保存网络层输出结果的目录。该参数在网络配置的Outputs()中指定,默认为null,意思是不保存结果。在测试阶段,如果你想要保存某些层的特征图,请指定该目录。需要注意的是,网络层的输出是经过激活函数之后的值. + - 类型: string (默认: "", null). + +* `--average_test_period` + - 使用`average_test_period`个批次的参数平均值进行测试。该参数必须能被FLAGS_log_period整除,默认为0,意思是不使用平均参数执行测试. + - 类型: int32 (默认: 0). + +* `--distribute_test` + - 在分布式环境中测试,将多台机器的测试结果合并. + - 类型: bool (默认: 0). + +* `--predict_file` + - 保存预测结果的文件名。该参数默认为null,意思是不保存结果。目前该参数仅用于AucValidationLayer和PnpairValidationLayer层,每轮都会保存预测结果. + - 类型: string (默认: "", null). + +## GPU + +* `--gpu_id` + - 指示使用哪个GPU核. + - 类型: int32 (默认: 0). + +* `--allow_only_one_model_on_one_gpu` + - 如果为true,一个GPU设备上不允许配置多个模型. + - 类型: bool (默认: 1). + +* `--parallel_nn` + - 指示是否使用多线程来计算一个神经网络。如果为false,设置gpu_id指定使用哪个GPU核(训练配置中的设备属性将会无效)。如果为true,GPU核在训练配置中指定(gpu_id无效). + - 类型: bool (默认: 0). + +* `--cudnn_dir` + - 选择路径来动态加载NVIDIA CuDNN库,例如,/usr/local/cuda/lib64. [默认]: LD_LIBRARY_PATH + - 类型: string (默认: "", null) + +* `--cuda_dir` + - 选择路径来动态加载NVIDIA CUDA库,例如,/usr/local/cuda/lib64. [默认]: LD_LIBRARY_PATH + - 类型: string (默认: "", null) + +* `--cudnn_conv_workspace_limit_in_mb` + - 指定cuDNN的最大工作空间容限,单位是MB,默认为4096MB=4GB. + - 类型: int32 (默认: 4096MB=4GB) + +## 自然语言处理(NLP): RNN/LSTM/GRU +* `--rnn_use_batch` + - 指示在简单的RecurrentLayer层的计算中是否使用批处理方法. + - 类型: bool (默认: 0). + +* `--prev_batch_state` + - 标识是否为连续的batch计算. + - 类型: bool (默认: 0). + +* `--beam_size` + - 集束搜索使用广度优先搜索的方式构建查找树。在树的每一层上,都会产生当前层状态的所有继承结果,按启发式损失的大小递增排序。然而,每层上只能保存固定数目个最好的状态,该数目是提前定义好的,称之为集束大小. + - 类型: int32 (默认: 1). + +* `--diy_beam_search_prob_so` +  - 用户可以自定义beam search的方法,编译成动态库,供PaddlePaddle加载。 该参数用于指定动态库路径. + - 类型: string (默认: "", null). + +## 数据支持(DataProvider) + +* `--memory_threshold_on_load_data` + - 内存容限阈值,当超过该阈值时,停止加载数据. + - 类型: double (默认: 1.0). + +## 单元测试 + +* `--checkgrad_eps` + - 使用checkgrad模式时的参数变化大小. + - 类型: double (默认: 1e-05). + +## 参数服务器和分布式通信 + +* `--start_pserver` + - 指示是否开启参数服务器(parameter server). + - 类型: bool (默认: 0). + +* `--pservers` + - 参数服务器的IP地址,以逗号间隔. + - 类型: string (默认: "127.0.0.1"). + +* `--port` + - 参数服务器的监听端口. + - 类型: int32 (默认: 20134). + +* `--ports_num` + - 发送参数的端口号,根据默认端口号递增. + - 类型: int32 (默认: 1). + +* `--trainer_id` +  - 在分布式训练中,每个训练节点必须指定一个唯一的id号,从0到num_trainers-1。0号训练节点是主训练节点。使用者无需关心这个参数. + - 类型: int32 (默认: 0). + +* `--num_gradient_servers` + - 梯度服务器的数量,该参数在集群提交环境中自动设置. + - 类型: int32 (默认: 1). + +* `--small_messages` + - 如果消息数据太小,建议将该参数设为true,启动快速应答,无延迟. + - 类型: bool (默认: 0). + +* `--sock_send_buf_size` + - 限制套接字发送缓冲区的大小。如果仔细设置的话,可以有效减小网络的阻塞. + - 类型: int32 (默认: 1024 \* 1024 \* 40). + +* `--sock_recv_buf_size` + - 限制套接字接收缓冲区的大小. + - 类型: int32 (默认: 1024 \* 1024 \* 40). + +* `--parameter_block_size` + - 参数服务器的参数分块大小。如果未设置,将会自动计算出一个合适的值. + - 类型: int32 (默认: 0). + +* `--parameter_block_size_for_sparse` + - 参数服务器稀疏更新的参数分块大小。如果未设置,将会自动计算出一个合适的值. + - 类型: int32 (默认: 0). + +* `--log_period_server` + - 在参数服务器终端每log_period_server个批次打印日志进度. + - 类型: int32 (默认: 500). + +* `--loadsave_parameters_in_pserver` + - 在参数服务器上加载和保存参数,只有当设置了sparse_remote_update参数时才有效. + - 类型: bool (默认: 0). + +* `--pserver_num_threads` + - 同步执行操作的线程数. + - 类型: bool (默认: 1). + +* `--ports_num_for_sparse` + - 发送参数的端口号,根据默认值递增(port + ports_num),用于稀疏训练中. + - 类型: int32 (默认: 0). + +* `--nics` + - 参数服务器的网络设备名称,已经在集群提交环境中完成设置. + - 类型: string (默认: "xgbe0,xgbe1"). + +* `--rdma_tcp` + - 使用rdma还是tcp传输协议,该参数已经在集群提交环境中完成设置. + - 类型: string (默认: "tcp"). + +## 异步随机梯度下降(Async SGD) +* `--async_count` + - 定义异步训练的长度,如果为0,则使用同步训练. + - 类型: int32 (默认: 0). + +* `--async_lagged_ratio_min` + - 控制`config_.async_lagged_grad_discard_ratio()`的最小值. + - 类型: double (默认: 1.0). + +* `--async_lagged_ratio_default` + - 如果在网络配置中未设置async_lagged_grad_discard_ratio,则使用该参数作为默认值. + - 类型: double (默认: 1.5). + +## 性能调优(Performance Tuning) + +* `--log_barrier_abstract` + - 如果为true,则显示阻隔性能的摘要信息. + - 类型: bool (默认: 1). + +* `--log_barrier_show_log` + - 如果为true,则总会显示阻隔摘要信息,即使间隔很小. + - 类型: bool (默认: 0). + +* `--log_barrier_lowest_nodes` + - 最少显示多少个节点. + - 类型: int32 (默认: 5). + +* `--check_sparse_distribution_in_pserver` + - 指示是否检查所有参数服务器上的稀疏参数的分布是均匀的. + - 类型: bool (默认: 0). + +* `--show_check_sparse_distribution_log` + - 指示是否显示参数服务器上的稀疏参数分布的日志细节. + - 类型: bool (默认: 0). + +* `--check_sparse_distribution_batches` + - 每运行多少个批次执行一次稀疏参数分布的检查. + - 类型: int32 (默认: 100). + +* `--check_sparse_distribution_ratio` + - 如果检查到分配在不同参数服务器上的参数的分布不均匀次数大于check_sparse_distribution_ratio * check_sparse_distribution_batches次,程序停止. + - 类型: double (默认: 0.6). + +* `--check_sparse_distribution_unbalance_degree` + - 不同参数服务器上数据大小的最大值与最小值的比率. + - 类型: double (默认: 2). + +## 矩阵/向量/随机数 +* `--enable_parallel_vector` + - 启动并行向量的阈值. + - 类型: int32 (默认: 0). + +* `--seed` + - 随机数的种子。srand(time)的为0. + - 类型: int32 (默认: 1) + +* `--thread_local_rand_use_global_seed` + - 是否将全局种子应用于本地线程的随机数. + - 类型: bool (默认: 0). diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_en.md b/doc/howto/usage/cmd_parameter/detail_introduction_en.md new file mode 100644 index 0000000000000000000000000000000000000000..b681ebc81a355dfc1a7638a4463dff6979929a45 --- /dev/null +++ b/doc/howto/usage/cmd_parameter/detail_introduction_en.md @@ -0,0 +1,327 @@ +```eval_rst +.. _cmd_detail_introduction: +``` + +# Detail Description + +## Common + +* `--job` + - Job mode, including: **train, test, checkgrad**, where checkgrad is mainly for developers and users do not need to care about. + - type: string (default: train) + +* `--config` + - Use to specfiy network configure file. + - type: string (default: null). + +* `--use_gpu` + - Whether to use GPU for training, false is cpu mode and true is gpu mode. + - type: bool (default: 1). + +* `--local` + - Whether the training is in local mode or not. True when training locally or using one node in cluster. False when using multiple machines in cluster. + - type: bool (default: 1). + +* `--trainer_count` + - Define the number of threads used in one machine. For example, trainer_count = 4, means use 4 GPU in GPU mode and 4 threads in CPU mode. Each thread (or GPU) is assigned to 1/4 samples in current batch. That is to say, if setting batch_size of 512 in trainer config, each thread train 128 samples. + - type: int32 (default: 1). + +* `--num_passes` + - When `--job=train`, means training for num_passes passes. One pass means training all samples in dataset one time. When `--job=test`, means testing data from model of test_pass to model of (num_passes - 1). + - type: int32 (default: 100). + +* `--config_args` + - arguments passed to config file. Format: key1=value1,key2=value2. + - type: string (default: null). + +* `--version` + - Whether to print version information. + - type: bool (default: 0). + +* `--show_layer_stat` + - Whether to show the statistics of each layer **per batch**. + - type: bool (default: 0). + +## Train + +* `--log_period` + - Log progress every log_period batches. + - type: int32 (default: 100). + +* `--dot_period` + - Print '.' every dot_period batches. + - type: int32 (default: 1). + +* `--saving_period` + - Save parameters every saving_period passes + - type: int32 (default: 1). + +* `--save_dir` + - Directory for saving model parameters. It needs to be specified, but no need to be created in advance. + - type: string (default: null). + +* `--start_pass` + - Start training from this pass. It will load parameters from the previous pass. + - type: int32 (default: 0). + +* `--show_parameter_stats_period` + - Show parameter statistic during training every show_parameter_stats_period batches. It will not show by default. + - type: int32 (default: 0). + +* `--save_only_one` + - Save the parameters only in last pass, while the previous parameters will be removed. + - type: bool (default: 0). + +* `--load_missing_parameter_strategy` + - Specify the loading operation when model file is missing. Now support fail/rand/zero three operations. + - `fail`: program will exit. + - `rand`: uniform or normal distribution according to **initial\_strategy** in network config. Uniform range is: **[mean - std, mean + std]**, where mean and std are configures in trainer config. + - `zero`: all parameters are zero. + - type: string (default: fail). + +* `--init_model_path` + - Path of the initialization model. If it was set, start\_pass will be ignored. It can be used to specify model path in testing mode as well. + - type: string (default: null). + +* `--saving_period_by_batches` + - Save parameters every saving_period_by_batches batches in one pass. + - type: int32 (default: 0). + +* `--log_error_clipping` + - Whether to print error clipping log when setting **error_clipping_threshold** in layer config. If it is true, log will be printed in backward propagation **per batch**. This clipping effects on **gradient of output**. + - type: bool (default: 0). + +* `--log_clipping` + - Enable print log clipping or not when setting **gradient_clipping_threshold** in trainer config. This clipping effects on **gradient w.r.t. (with respect to) weight**. + - type: bool (default: 0). + +* `--use_old_updater` + - Whether to use the old RemoteParameterUpdater. Default use ConcurrentRemoteParameterUpdater. It is mainly for deverlopers and users usually do not need to care about. + - type: bool (default: 0). + +* `--enable_grad_share` + - threshold for enable gradient parameter, which is shared for batch multi-cpu training. + - type: int32 (default: 100 \* 1024 \* 1024). + +* `--grad_share_block_num` + - block number of gradient parameter, which is shared for batch multi-cpu training. + - type: int32 (default: 64). + +## Test + +* `--test_pass` + - Load parameter from this pass to test. + - type: int32 (default: -1). + +* `--test_period` + - if equal 0, do test on all test data at the end of each pass. While if equal non-zero, do test on all test data every test_period batches. + - type: int32 (default: 0). + +* `--test_wait` +  - Whether to wait for parameter per pass if not exist. It can be used when user launch another process to perfom testing during the training process. + - type: bool (default: 0). + +* `--model_list` + - File that saves the model list when testing. + - type: string (default: "", null). + +* `--predict_output_dir` + - Directory that saves the layer output. It is configured in Outputs() in network config. Default, this argument is null, meaning save nothing. Specify this directory if you want to save feature map of some layers in testing mode. Note that, layer outputs are values after activation function. + - type: string (default: "", null). + +* `--average_test_period` + - Do test on average parameter every `average_test_period` batches. It MUST be devided by FLAGS_log_period. Default 0 means do not test on average parameter. + - type: int32 (default: 0). + +* `--distribute_test` + - Testing in distribute environment will merge results from multiple machines. + - type: bool (default: 0). + +* `--predict_file` + - File name for saving predicted result. Default, this argument is null, meaning save nothing. Now, this argument is only used in AucValidationLayer and PnpairValidationLayer, and saves predicted result every pass. + - type: string (default: "", null). + +## GPU + +* `--gpu_id` + - Which gpu core to use. + - type: int32 (default: 0). + +* `--allow_only_one_model_on_one_gpu` + - If true, do not allow multiple models on one GPU device. + - type: bool (default: 1). + +* `--parallel_nn` + - Whether to use multi-thread to calculate one neural network or not. If false, use gpu_id specify which gpu core to use (the device property in trainer config will be ingored). If true, the gpu core is specified in trainer config (gpu_id will be ignored). + - type: bool (default: 0). + +* `--cudnn_dir` + - Choose path to dynamic load NVIDIA CuDNN library, for instance, /usr/local/cuda/lib64. [Default]: LD_LIBRARY_PATH + - type: string (default: "", null) + +* `--cuda_dir` + - Choose path to dynamic load NVIDIA CUDA library, for instance, /usr/local/cuda/lib64. [Default]: LD_LIBRARY_PATH + - type: string (default: "", null) + +* `--cudnn_conv_workspace_limit_in_mb` + - Specify cuDNN max workspace limit, in units MB, 4096MB=4GB by default. + - type: int32 (default: 4096MB=4GB) + +## NLP: RNN/LSTM/GRU +* `--rnn_use_batch` + - Whether to use batch method for calculation in simple RecurrentLayer. + - type: bool (default: 0). + +* `--prev_batch_state` + - batch is continue with next batch. + - type: bool (default: 0). + +* `--beam_size` + - Beam search uses breadth-first search to build its search tree. At each level of the tree, it generates all successors of the states at the current level, sorting them in increasing order of heuristic cost. However, it only stores a predetermined number of best states at each level (called the beam size). + - type: int32 (default: 1). + +* `--diy_beam_search_prob_so` + - Specify shared dynamic library. It can be defined out of paddle by user. + - type: string (default: "", null). + +## DataProvider + +* `--memory_threshold_on_load_data` + - Stop loading data when memory is not sufficient. + - type: double (default: 1.0). + +## Unit Test + +* `--checkgrad_eps` + - parameter change size for checkgrad. + - type: double (default: 1e-05). + +## Parameter Server and Distributed Communication + +* `--start_pserver` + - Whether to start pserver (parameter server). + - type: bool (default: 0). + +* `--pservers` + - Comma separated IP addresses of pservers. + - type: string (default: "127.0.0.1"). + +* `--port` + - Listening port for pserver. + - type: int32 (default: 20134). + +* `--ports_num` + - The ports number for parameter send, increment based on default port number. + - type: int32 (default: 1). + +* `--trainer_id` + - In distributed training, each trainer must be given an unique id ranging from 0 to num_trainers-1. Trainer 0 is the master trainer. User do not need to care this flag. + - type: int32 (default: 0). + +* `--num_gradient_servers` + - Numbers of gradient servers. This arguments is set automatically in cluster submitting environment. + - type: int32 (default: 1). + +* `--small_messages` + - If message size is small, recommend set it True to enable quick ACK and no delay + - type: bool (default: 0). + +* `--sock_send_buf_size` + - Restrict socket send buffer size. It can reduce network congestion if set carefully. + - type: int32 (default: 1024 \* 1024 \* 40). + +* `--sock_recv_buf_size` + - Restrict socket recieve buffer size. + - type: int32 (default: 1024 \* 1024 \* 40). + +* `--parameter_block_size` + - Parameter block size for pserver, will automatically calculate a suitable value if it's not set. + - type: int32 (default: 0). + +* `--parameter_block_size_for_sparse` + - Parameter block size for sparse update pserver, will automatically calculate a suitable value if it's not set. + - type: int32 (default: 0). + +* `--log_period_server` + - Log progress every log_period_server batches at pserver end. + - type: int32 (default: 500). + +* `--loadsave_parameters_in_pserver` + - Load and save parameters in pserver. Only work when parameter set sparse_remote_update. + - type: bool (default: 0). + +* `--pserver_num_threads` + - number of threads for sync op exec. + - type: bool (default: 1). + +* `--ports_num_for_sparse` + - The ports number for parameter send, increment based on default (port + ports_num). It is used by sparse Tranning. + - type: int32 (default: 0). + +* `--nics` + - Network device name for pservers, already set in cluster submitting environment. + - type: string (default: "xgbe0,xgbe1"). + +* `--rdma_tcp` + - Use rdma or tcp transport protocol, already set in cluster submitting environment. + - type: string (default: "tcp"). + +## Async SGD +* `--async_count` + - Defined the asynchronous training length, if 0, then use synchronized training. + - type: int32 (default: 0). + +* `--async_lagged_ratio_min` + - Control the minimize value of `config_.async_lagged_grad_discard_ratio()`. + - type: double (default: 1.0). + +* `--async_lagged_ratio_default` + - If async_lagged_grad_discard_ratio is not set in network config, use it as defalut value. + - type: double (default: 1.5). + +## Performance Tuning + +* `--log_barrier_abstract` + - If true, show abstract barrier performance information. + - type: bool (default: 1). + +* `--log_barrier_show_log` + - If true, always show barrier abstract even with little gap. + - type: bool (default: 0). + +* `--log_barrier_lowest_nodes` + - How many lowest node will be logged. + - type: int32 (default: 5). + +* `--check_sparse_distribution_in_pserver` + - Whether to check that the distribution of sparse parameter on all pservers is balanced. + - type: bool (default: 0). + +* `--show_check_sparse_distribution_log` + - show log details for sparse parameter distribution in pserver. + - type: bool (default: 0). + +* `--check_sparse_distribution_batches` + - Running sparse parameter distribution check every so many batches. + - type: int32 (default: 100). + +* `--check_sparse_distribution_ratio` + - If parameters dispatched to different pservers have an unbalanced distribution for check_sparse_distribution_ratio * check_sparse_distribution_batches times, crash program. + - type: double (default: 0.6). + +* `--check_sparse_distribution_unbalance_degree` + - The ratio of maximum data size / minimun data size for different pserver. + - type: double (default: 2). + +## Matrix/Vector/RandomNumber +* `--enable_parallel_vector` + - threshold for enable parallel vector. + - type: int32 (default: 0). + +* `--seed` + - random number seed. 0 for srand(time) + - type: int32 (default: 1) + +* `--thread_local_rand_use_global_seed` + - Whether to use global seed in rand of thread local. + - type: bool (default: 0). diff --git a/doc/howto/usage/cmd_parameter/index_cn.rst b/doc/howto/usage/cmd_parameter/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..4c8729821110b9aec99351fc0a83a1ba75a8a2bb --- /dev/null +++ b/doc/howto/usage/cmd_parameter/index_cn.rst @@ -0,0 +1,11 @@ +.. _cmd_line_index: + +设置命令行参数 +=============== + +.. toctree:: + :maxdepth: 1 + + use_case_cn.md + arguments_cn.md + detail_introduction_cn.md diff --git a/doc/howto/usage/cmd_parameter/index_en.rst b/doc/howto/usage/cmd_parameter/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..0e3c72d27aca063f1b6f1c23e55718dba373c40a --- /dev/null +++ b/doc/howto/usage/cmd_parameter/index_en.rst @@ -0,0 +1,11 @@ +.. _cmd_line_index: + +Set Command-line Parameters +=========================== + +.. toctree:: + :maxdepth: 1 + + use_case_en.md + arguments_en.md + detail_introduction_en.md diff --git a/doc/howto/usage/cmd_parameter/use_case_cn.md b/doc/howto/usage/cmd_parameter/use_case_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..db8c39d950771726346ff9c9481990abc13036cf --- /dev/null +++ b/doc/howto/usage/cmd_parameter/use_case_cn.md @@ -0,0 +1,182 @@ +# 使用案例 + +## 本地训练 + +本地训练的实验,诸如图像分类,自然语言处理等,通常都会使用下面这些命令行参数。 + +``` +paddle train \ + --use_gpu=1/0 \ #1:GPU,0:CPU(默认为1) + --config=network_config \ + --save_dir=output \ + --trainer_count=COUNT \ #(默认为1) + --test_period=M \ #(默认为0) + --num_passes=N \ #(默认为100) + --log_period=K \ #(默认为100) + --dot_period=1000 \ #(默认为1) + #[--show_parameter_stats_period=100] \ #(默认为0) + #[--saving_period_by_batches=200] \ #(默认为0) +``` +根据你的任务,可以选择是否使用参数`show_parameter_stats_period`和`saving_period_by_batches`。 + +### 1) 将命令参数传给网络配置 + +`config_args`是一个很有用的参数,用于将参数传递给网络配置。 + +``` +--config_args=generating=1,beam_size=5,layer_num=10 \ +``` +`get_config_arg`可用于在网络配置中解析这些参数,如下所示: + +``` +generating = get_config_arg('generating', bool, False) +beam_size = get_config_arg('beam_size', int, 3) +layer_num = get_config_arg('layer_num', int, 8) +``` + +`get_config_arg`: + +``` +get_config_arg(name, type, default_value) +``` +- name: `--config_args`中指定的名字 +- type: 值类型,包括bool, int, str, float等 +- default_value: 默认值 + +### 2) 使用模型初始化网络 + +增加如下参数: + +``` +--init_model_path=model_path +--load_missing_parameter_strategy=rand +``` + +## 本地测试 + +方法一: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --init_model_path=model_path \ +``` +- 使用init\_model\_path指定测试的模型 +- 只能测试单个模型 + +方法二: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --model_list=model.list \ +``` +- 使用model_list指定测试的模型列表 +- 可以测试多个模型,文件model.list如下所示: + +``` +./alexnet_pass1 +./alexnet_pass2 +``` + +方法三: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --save_dir=model \ + --test_pass=M \ + --num_passes=N \ +``` +这种方式必须使用Paddle存储的模型路径格式,如:`model/pass-%5d`。测试的模型包括从第M轮到第N-1轮存储的所有模型。例如,M=12,N=14这种写法将会测试模型`model/pass-00012`和`model/pass-00013`。 + +## 稀疏训练 + +当输入是维度很高的稀疏数据时,通常使用稀疏训练来加速计算过程。例如,输入数据的字典维数是1百万,但是每个样本仅包含几个词。在Paddle中,稀疏矩阵的乘积应用于前向传播过程,而稀疏更新在反向传播之后的权重更新时进行。 + +### 1) 本地训练 + +用户需要在网络配置中指定**sparse\_update=True**。请参照网络配置的文档了解更详细的信息。 + +### 2) 集群训练 + +在集群上训练一个稀疏模型需要加上下面的参数。同时用户需要在网络配置中指定**sparse\_remote\_update=True**。请参照网络配置的文档了解更详细的信息。 + +``` +--ports_num_for_sparse=1 #(默认为0) +``` + +## parallel_nn +用户可以设置`parallel_nn`来混合使用GPU和CPU计算网络层的参数。也就是说,你可以将网络配置成某些层使用GPU计算,而其他层使用CPU计算。另一种方式是将网络层划分到不同的GPU上去计算,这样可以减小GPU内存,或者采用并行计算来加速某些层的更新。 + +如果你想使用这些特性,你需要在网络配置中指定设备的ID号(表示为deviceId),并且加上下面的命令行参数: + +``` +--parallel_nn=true +``` +### 案例一:GPU和CPU混合使用 +请看下面的例子: + +``` +#command line: +paddle train --use_gpu=true --parallel_nn=true trainer_count=COUNT + +default_device(0) + +fc1=fc_layer(...) +fc2=fc_layer(...) +fc3=fc_layer(...,layer_attr=ExtraAttr(device=-1)) + +``` +- default_device(0): 设置默认设备号为0。这意味着除了指定device=-1的层之外,其他所有层都会使用GPU计算,每层使用的GPU号依赖于参数trainer\_count和gpu\_id(默认为0)。在此,fc1和fc2层在GPU上计算。 + +- device=-1: fc3层使用CPU计算。 + +- trainer_count: + - trainer_count=1: 如果未设置gpu\_id,那么fc1和fc2层将会使用第1个GPU来计算。否则使用gpu\_id指定的GPU。 + + - trainer_count>1: 在trainer\_count个GPU上使用数据并行来计算某一层。例如,trainer\_count=2意味着0号和1号GPU将会使用数据并行来计算fc1和fc2层。 + +### 案例二:在不同设备上指定层 + +``` +#command line: +paddle train --use_gpu=true --parallel_nn=true --trainer_count=COUNT + +#network: +fc2=fc_layer(input=l1, layer_attr=ExtraAttr(device=0), ...) +fc3=fc_layer(input=l1, layer_attr=ExtraAttr(device=1), ...) +fc4=fc_layer(input=fc2, layer_attr=ExtraAttr(device=-1), ...) +``` +在本例中,我们假设一台机器上有4个GPU。 + +- trainer_count=1: + - 使用0号GPU计算fc2层。 + - 使用1号GPU计算fc3层。 + - 使用CPU计算fc4层。 + +- trainer_count=2: + - 使用0号和1号GPU计算fc2层。 + - 使用2号和3号GPU计算fc3层。 + - 使用CPU两线程计算fc4层。 + +- trainer_count=4: + - 运行失败(注意到我们已经假设机器上有4个GPU),因为参数`allow_only_one_model_on_one_gpu`默认设置为真。 + +**当`device!=-1`时设备ID号的分配:** + +``` +(deviceId + gpu_id + threadId * numLogicalDevices_) % numDevices_ + +deviceId: 在层中指定 +gpu_id: 默认为0 +threadId: 线程ID号,范围: 0,1,..., trainer_count-1 +numDevices_: 机器的设备(GPU)数目 +numLogicalDevices_: min(max(deviceId + 1), numDevices_) +``` diff --git a/doc/howto/usage/cmd_parameter/use_case_en.md b/doc/howto/usage/cmd_parameter/use_case_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e287f0c4b9617cbc6504596512bf408c56dc10f9 --- /dev/null +++ b/doc/howto/usage/cmd_parameter/use_case_en.md @@ -0,0 +1,182 @@ +# Use Case + +## Local Training + +These command line arguments are commonly used by local training experiments, such as image classification, natural language processing, et al. + +``` +paddle train \ + --use_gpu=1/0 \ #1:GPU,0:CPU(default:true) + --config=network_config \ + --save_dir=output \ + --trainer_count=COUNT \ #(default:1) + --test_period=M \ #(default:0) + --num_passes=N \ #(defalut:100) + --log_period=K \ #(default:100) + --dot_period=1000 \ #(default:1) + #[--show_parameter_stats_period=100] \ #(default:0) + #[--saving_period_by_batches=200] \ #(default:0) +``` +`show_parameter_stats_period` and `saving_period_by_batches` are optional according to your task. + +### 1) Pass Command Argument to Network config + +`config_args` is a useful parameter to pass arguments to network config. + +``` +--config_args=generating=1,beam_size=5,layer_num=10 \ +``` +And `get_config_arg` can be used to parse these arguments in network config as follows: + +``` +generating = get_config_arg('generating', bool, False) +beam_size = get_config_arg('beam_size', int, 3) +layer_num = get_config_arg('layer_num', int, 8) +``` + +`get_config_arg`: + +``` +get_config_arg(name, type, default_value) +``` +- name: the name specified in the `--config_args` +- type: value type, bool, int, str, float etc. +- default_value: default value if not set. + +### 2) Use Model to Initialize Network + +add argument: + +``` +--init_model_path=model_path +--load_missing_parameter_strategy=rand +``` + +## Local Testing + +Method 1: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --init_model_path=model_path \ +``` +- use init\_model\_path to specify test model. +- only can test one model. + +Method 2: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --model_list=model.list \ +``` +- use model_list to specify test models +- can test several models, where model.list likes: + +``` +./alexnet_pass1 +./alexnet_pass2 +``` + +Method 3: + +``` +paddle train --job=test \ + --use_gpu=1/0 \ + --config=network_config \ + --trainer_count=COUNT \ + --save_dir=model \ + --test_pass=M \ + --num_passes=N \ +``` +This way must use model path saved by Paddle like this: `model/pass-%5d`. Testing model is from M-th pass to (N-1)-th pass. For example: M=12 and N=14 will test `model/pass-00012` and `model/pass-00013`. + +## Sparse Training + +Sparse training is usually used to accelerate calculation when input is sparse data with highly dimension. For example, dictionary dimension of input data is 1 million, but one sample just have several words. In paddle, sparse matrix multiplication is used in forward propagation and sparse updating is perfomed on weight updating after backward propagation. + +### 1) Local training + +You need to set **sparse\_update=True** in network config. Check the network config documentation for more details. + +### 2) cluster training + +Add the following argument for cluster training of a sparse model. At the same time you need to set **sparse\_remote\_update=True** in network config. Check the network config documentation for more details. + +``` +--ports_num_for_sparse=1 #(default: 0) +``` + +## parallel_nn +`parallel_nn` can be set to mixed use of GPUs and CPUs to compute layers. That is to say, you can deploy network to use a GPU to compute some layers and use a CPU to compute other layers. The other way is to split layers into different GPUs, which can **reduce GPU memory** or **use parallel computation to accelerate some layers**. + +If you want to use these characteristics, you need to specify device ID in network config (denote it as deviceId) and add command line argument: + +``` +--parallel_nn=true +``` +### case 1: Mixed Use of GPU and CPU +Consider the following example: + +``` +#command line: +paddle train --use_gpu=true --parallel_nn=true trainer_count=COUNT + +default_device(0) + +fc1=fc_layer(...) +fc2=fc_layer(...) +fc3=fc_layer(...,layer_attr=ExtraAttr(device=-1)) + +``` +- default_device(0): set default device ID to 0. This means that except the layers with device=-1, all layers will use a GPU, and the specific GPU used for each layer depends on trainer\_count and gpu\_id (0 by default). Here, layer fc1 and fc2 are computed on the GPU. + +- device=-1: use the CPU for layer fc3. + +- trainer_count: + - trainer_count=1: if gpu\_id is not set, then use the first GPU to compute layers fc1 and fc2. Otherwise use the GPU with gpu\_id. + + - trainer_count>1: use trainer\_count GPUs to compute one layer using data parallelism. For example, trainer\_count=2 means that GPUs 0 and 1 will use data parallelism to compute layer fc1 and fc2. + +### Case 2: Specify Layers in Different Devices + +``` +#command line: +paddle train --use_gpu=true --parallel_nn=true --trainer_count=COUNT + +#network: +fc2=fc_layer(input=l1, layer_attr=ExtraAttr(device=0), ...) +fc3=fc_layer(input=l1, layer_attr=ExtraAttr(device=1), ...) +fc4=fc_layer(input=fc2, layer_attr=ExtraAttr(device=-1), ...) +``` +In this case, we assume that there are 4 GPUs in one machine. + +- trainer_count=1: + - Use GPU 0 to compute layer fc2. + - Use GPU 1 to compute layer fc3. + - Use CPU to compute layer fc4. + +- trainer_count=2: + - Use GPU 0 and 1 to compute layer fc2. + - Use GPU 2 and 3 to compute layer fc3. + - Use CPU to compute fc4 in two threads. + +- trainer_count=4: + - It will fail (note, we have assumed that there are 4 GPUs in machine), because argument `allow_only_one_model_on_one_gpu` is true by default. + +**Allocation of device ID when `device!=-1`**: + +``` +(deviceId + gpu_id + threadId * numLogicalDevices_) % numDevices_ + +deviceId: specified in layer. +gpu_id: 0 by default. +threadId: thread ID, range: 0,1,..., trainer_count-1 +numDevices_: device (GPU) count in machine. +numLogicalDevices_: min(max(deviceId + 1), numDevices_) +``` diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ce72b0803818d5bf0c18753c421848cf2fc1b668 --- /dev/null +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -0,0 +1,689 @@ + +# Distributed PaddlePaddle Training on AWS with Kubernetes + +We will show you step by step on how to run distributed PaddlePaddle training on AWS cluster with Kubernetes. Let's start from core concepts. + +## Distributed PaddlePaddle Training Core Concepts + +### Distributed Training Job + +A distributed training job is represented by a [Kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). + +Each Kuberentes job is described by a job config file, which specifies the information like the number of [pods](https://kubernetes.io/docs/user-guide/pods/#what-is-a-pod) in the job and environment variables. + +In a distributed training job, we would: + +1. prepare partitioned training data and configuration file on a distributed file system (in this tutorial we use Amazon Elastic File System), and +1. create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. + +### Parameter Servers and Trainers + +There are two roles in a PaddlePaddle cluster: *parameter server (pserver)* and *trainer*. Each parameter server process maintains a shard of the global model. Each trainer has its local copy of the model, and uses its local data to update the model. During the training process, trainers send model updates to parameter servers, parameter servers are responsible for aggregating these updates, so that trainers can synchronize their local copy with the global model. + +
![Model is partitioned into two shards. Managed by two parameter servers respectively.](src/pserver_and_trainer.png)
+ +In order to communicate with pserver, trainer needs to know the ip address of each pserver. In kubernetes it's better to use a service discovery mechanism (e.g., DNS hostname) rather than static ip address, since any pserver's pod may be killed and a new pod could be schduled onto another node of different ip address. However, now we are using static ip. This will be improved. + +Parameter server and trainer are packaged into a same docker image. They will run once pod is scheduled by kubernetes job. + +### Trainer ID + +Each trainer process requires a trainer ID, a zero-based index value, passed in as a command-line parameter. The trainer process thus reads the data partition indexed by this ID. + +### Training + +The entry-point of a container is a shell script. It can see some environment variables pre-defined by Kubernetes. This includes one that gives the job's identity, which can be used in a remote call to the Kubernetes apiserver that lists all pods in the job. + +We rank each pod by sorting them by their ips. The rank of each pod could be the "pod ID". Because we run one trainer and one parameter server in each pod, we can use this "pod ID" as the trainer ID. A detailed workflow of the entry-point script is as follows: + +1. Query the api server to get pod information, and assign the `trainer_id` by sorting the ip. +1. Copy the training data from EFS persistent volume into container. +1. Parse the `paddle pserver` and `paddle trainer` startup parameters from environment variables, and then start up the processes. +1. Trainer with `train_id` 0 will automatically write results onto EFS volume. + + +## PaddlePaddle on AWS with Kubernetes + +### Choose AWS Service Region +This tutorial requires several AWS services work in the same region. Before we create anything in AWS, please check the following link +https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services/ +Choose a region which has the following services available: EC2, EFS, VPS, CloudFormation, KMS, VPC, S3. +In this tutorial, we use "Oregon(us-west-2)" as example. + +### Create AWS Account and IAM Account + +Under each AWS account, we can create multiple [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html) users. This allows us to grant some privileges to each IAM user and to create/operate AWS clusters as an IAM user. + +To sign up an AWS account, please +follow +[this guide](http://docs.aws.amazon.com/lambda/latest/dg/setting-up.html). +To create IAM users and user groups under an AWS account, please +follow +[this guide](http://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html). + +Please be aware that this tutorial needs the following privileges for the user in IAM: + +- AmazonEC2FullAccess +- AmazonS3FullAccess +- AmazonRoute53FullAccess +- AmazonRoute53DomainsFullAccess +- AmazonElasticFileSystemFullAccess +- AmazonVPCFullAccess +- IAMUserSSHKeys +- IAMFullAccess +- NetworkAdministrator +- AWSKeyManagementServicePowerUser + + +### Download kube-aws and kubectl + +#### kube-aws + +[kube-aws](https://github.com/coreos/kube-aws) is a CLI tool to automate cluster deployment to AWS. +##### Verify kube-aws integrity +Note: if you are using a non-official release (e.g RC release) kube-aws, you can skip this setp. +Import the CoreOS Application Signing Public Key: + +``` +gpg2 --keyserver pgp.mit.edu --recv-key FC8A365E +``` + +Validate the key fingerprint: + +``` +gpg2 --fingerprint FC8A365E +``` +The correct key fingerprint is `18AD 5014 C99E F7E3 BA5F 6CE9 50BD D3E0 FC8A 365E` + +We can download `kube-aws` from its [release page](https://github.com/coreos/kube-aws/releases). In this tutorial, we use version 0.9.1 + +Validate the tarball's GPG signature: + +``` +PLATFORM=linux-amd64 + # Or +PLATFORM=darwin-amd64 + +gpg2 --verify kube-aws-${PLATFORM}.tar.gz.sig kube-aws-${PLATFORM}.tar.gz +``` +##### Install kube-aws +Extract the binary: + +``` +tar zxvf kube-aws-${PLATFORM}.tar.gz +``` + +Add kube-aws to your path: + +``` +mv ${PLATFORM}/kube-aws /usr/local/bin +``` + + +#### kubectl + +[kubectl](https://kubernetes.io/docs/user-guide/kubectl-overview/) is a command line interface for running commands against Kubernetes clusters. + +Download `kubectl` from the Kubernetes release artifact site with the `curl` tool. + +``` +# OS X +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/darwin/amd64/kubectl + +# Linux +curl -O https://storage.googleapis.com/kubernetes-release/release/"$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)"/bin/linux/amd64/kubectl +``` + +Make the kubectl binary executable and move it to your PATH (e.g. `/usr/local/bin`): + +``` +chmod +x ./kubectl +sudo mv ./kubectl /usr/local/bin/kubectl +``` + +### Configure AWS Credentials + +First check out [this](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) for installing the AWS command line interface. + +And then configure your AWS account information: + +``` +aws configure +``` + + +Fill in the required fields: + + +``` +AWS Access Key ID: YOUR_ACCESS_KEY_ID +AWS Secrete Access Key: YOUR_SECRETE_ACCESS_KEY +Default region name: us-west-2 +Default output format: json +``` + +`YOUR_ACCESS_KEY_ID`, and `YOUR_SECRETE_ACCESS_KEY` is the IAM key and secret from [Create AWS Account and IAM Account](#create-aws-account-and-iam-account) + +Verify that your credentials work by describing any instances you may already have running on your account: + +``` +aws ec2 describe-instances +``` + +### Define Cluster Parameters + +#### EC2 key pair + +The keypair that will authenticate SSH access to your EC2 instances. The public half of this key pair will be configured on each CoreOS node. + +Follow [EC2 Keypair User Guide](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair + +After creating a key pair, you will use the key pair name to configure the cluster. + +Key pairs are only available to EC2 instances in the same region. We are using us-west-2 in our tutorial, so make sure to creat key pairs in that region (Oregon). + +Your browser will download a `key-name.pem` file which is the key to access the EC2 instances. We will use it later. + + +#### KMS key + +Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you already have a KMS Key that you would like to use, you can skip creating a new key and provide the Arn string for your existing key. + +You can create a KMS key with the aws command line tool: + +``` +aws kms --region=us-west-2 create-key --description="kube-aws assets" +{ + "KeyMetadata": { + "CreationDate": 1458235139.724, + "KeyState": "Enabled", + "Arn": "arn:aws:kms:us-west-2:aaaaaaaaaaaaa:key/xxxxxxxxxxxxxxxxxxx", + "AWSAccountId": "xxxxxxxxxxxxx", + "Enabled": true, + "KeyUsage": "ENCRYPT_DECRYPT", + "KeyId": "xxxxxxxxx", + "Description": "kube-aws assets" + } +} +``` + +We will need to use the value of `Arn` later. + +And then let's add several inline policies in your IAM user permission. + +Go to [IAM Console](https://console.aws.amazon.com/iam/home?region=us-west-2#/home). Click on button `Users`, click user that we just created, and then click on `Add inline policy` button, and select `Custom Policy`. + +Paste into following inline policies: + +``` + (Caution: node_0, node_1, node_2 directories represents PaddlePaddle node and train_id, not the Kubernetes node){ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "Stmt1482205552000", + "Effect": "Allow", + "Action": [ + "kms:Decrypt", + "kms:Encrypt" + ], + "Resource": [ + "arn:aws:kms:*:AWS_ACCOUNT_ID:key/*" + ] + }, + { + "Sid": "Stmt1482205746000", + "Effect": "Allow", + "Action": [ + "cloudformation:CreateStack", + "cloudformation:UpdateStack", + "cloudformation:DeleteStack", + "cloudformation:DescribeStacks", + "cloudformation:DescribeStackResource", + "cloudformation:GetTemplate", + "cloudformation:DescribeStackEvents" + ], + "Resource": [ + "arn:aws:cloudformation:us-west-2:AWS_ACCOUNT_ID:stack/MY_CLUSTER_NAME/*" + ] + } + ] +} +``` +`Version` : Its value has to be exactly "2012-10-17". +`AWS_ACCOUNT_ID`: You can get it from following command line: + +``` +aws sts get-caller-identity --output text --query Account +``` + +`MY_CLUSTER_NAME`: Pick a MY_CLUSTER_NAME that you like, you will use it later as well. +Please note, stack name must satisfy regular expression pattern: [a-zA-Z][-a-zA-Z0-9*]*, which means no "_" or "-" in stack name, or kube-aws will throw error in later steps. + +#### External DNS name + +When the cluster is created, the controller will expose the TLS-secured API on a DNS name. + +DNS name should have a CNAME points to cluster DNS name or an A record points to the cluster IP address. + +We will need to use DNS name later in tutorial. If you don't already own one, you can choose any DNS name (e.g., `paddle`) and modify `/etc/hosts` to associate cluster IP with that DNS name for your local machine. And add name service (route53) in aws to associate the IP to paddle for cluster. We will find the cluster IP in later steps. + +#### S3 bucket + +You need to create an S3 bucket before startup the Kubernetes cluster. + +There are some bugs in aws cli in creating S3 bucket, so let's use the [S3 Console](https://console.aws.amazon.com/s3/home?region=us-west-2). + +Click on `Create Bucket`, fill in a unique BUCKET_NAME, and make sure region is us-west-2 (Oregon). + + +#### Initialize Assets + +Create a directory on your local machine to hold the generated assets: + +``` +$ mkdir my-cluster +$ cd my-cluster +``` + +Initialize the cluster CloudFormation stack with the KMS Arn, key pair name, and DNS name from the previous step: + +``` +kube-aws init \ +--cluster-name=MY_CLUSTER_NAME \ +--external-dns-name=MY_EXTERNAL_DNS_NAME \ +--region=us-west-2 \ +--availability-zone=us-west-2a \ +--key-name=KEY_PAIR_NAME \ +--kms-key-arn="arn:aws:kms:us-west-2:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" +``` + +`MY_CLUSTER_NAME`: the one you picked in [KMS key](#kms-key) + +`MY_EXTERNAL_DNS_NAME`: see [External DNS name](#external-dns-name) + +`KEY_PAIR_NAME`: see [EC2 key pair](#ec2-key-pair) + +`--kms-key-arn`: the "Arn" in [KMS key](#kms-key) + +Here `us-west-2a` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. + +Please check if `us-west-2a` is supported by `aws ec2 --region us-west-2 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-2a`, or `us-west-2b`) + + +There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster. + +By default `kube-aws` will only create one worker node. Let's edit `cluster.yaml` and change `workerCount` from 1 to 3. + + +#### Render contents of the asset directory + +In the simplest case, you can have kube-aws generate both your TLS identities and certificate authority for you. + +``` +kube-aws render credentials --generate-ca +``` + +The next command generates the default set of cluster assets in your asset directory. + +``` +kube-aws render stack +``` +Assets (templates and credentials) that are used to create, update and interact with your Kubernetes cluster will be created under your current folder. + + +### Kubernetes Cluster Start Up + +#### Create the instances defined in the CloudFormation template + +Now let's create your cluster (choose any `PREFIX` for the command below): + +``` +kube-aws up --s3-uri s3://BUCKET_NAME/PREFIX +``` + +`BUCKET_NAME`: the bucket name that you used in [S3 bucket](#s3-bucket) + + +#### Configure DNS + +You can invoke `kube-aws status` to get the cluster API endpoint after cluster creation. + +``` +$ kube-aws status +Cluster Name: paddle-cluster +Controller DNS Name: paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com +``` + +If you own a DNS name, set the A record to any of the above ip. __Or__ you can set up CNAME point to `Controller DNS Name` (`paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com`) + +##### Find IP address + +Use command `dig` to check the load balancer hostname to get the ip address. + +``` +$ dig paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com + +;; QUESTION SECTION: +;paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. IN A + +;; ANSWER SECTION: +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.241.164.52 +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.67.102.112 +``` + +In the above output, both ip `54.241.164.52`, `54.67.102.112` will work. + +*If you own a DNS name*, set the A record to any of the above ip. Then you can skip to the step "Access the cluster". + +*If you do not own a DNS name*: +##### Update local DNS association +Edit `/etc/hosts` to associate above ip with the DNS name. +##### Add Route53 private name service in VPC + - Open [Route53 Console](https://console.aws.amazon.com/route53/home) + - Create hosted zone with following config + - Domain name: "paddle" + - Type: "Private hosted zone for amazon VPC" + - VPC ID: `` + + ![route53 zone setting](src/route53_create_zone.png) + - Add A record + - Click on the zone "paddle" just created + - Click the button "Create record set" + - Name : leave blank + - type: "A" + - Value: `` + + ![route53 create recordset](src/route53_create_recordset.png) + - Verify name service + - Connect to any instance created by kube-aws via ssh + - Run command "host paddle", see if the ip returned is the private ip of kube-controller + +#### Access the cluster + +Once the API server is running, you should see: + +``` +$ kubectl --kubeconfig=kubeconfig get nodes +NAME STATUS AGE +ip-10-0-0-134.us-west-2.compute.internal Ready 6m +ip-10-0-0-238.us-west-2.compute.internal Ready 6m +ip-10-0-0-50.us-west-2.compute.internal Ready 6m +ip-10-0-0-55.us-west-2.compute.internal Ready 6m +``` + + +### Setup Elastic File System for Cluster + +Training data is usually served on a distributed filesystem, we use Elastic File System (EFS) on AWS. + +1. Create security group for EFS in [security group console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId) + 1. Look up security group id for `paddle-cluster-sg-worker` (`sg-055ee37d` in the image below) +
![](src/worker_security_group.png)
+ 2. Add security group `paddle-efs` with `ALL TCP` inbound rule and custom source as group id of `paddle-cluster-sg-worker`. And VPC of `paddle-cluster-vpc`. Make sure availability zone is same as the one you used in [Initialize Assets](#initialize-assets). +
![](src/add_security_group.png)
+ +2. Create the Elastic File System in [EFS console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2#/wizard/1) with `paddle-cluster-vpc` VPC. Make sure subnet is `paddle-cluster-Subnet0` andd security group is `paddle-efs`. +
![](src/create_efs.png)
+ + +### Start PaddlePaddle Training Demo on AWS + +#### Configure Kubernetes Volume that Points to EFS + +First we need to create a [PersistentVolume](https://kubernetes.io/docs/user-guide/persistent-volumes/) to provision EFS volumn. + +Save following snippet as `pv.yaml` +``` +apiVersion: v1 +kind: PersistentVolume +metadata: + name: efsvol +spec: + capacity: + storage: 100Gi + accessModes: + - ReadWriteMany + nfs: + server: EFS_DNS_NAME + path: "/" +``` + +`EFS_DNS_NAME`: DNS name as shown in description of `paddle-efs` that we created. Looks similar to `fs-2cbf7385.efs.us-west-2.amazonaws.com` + +Run following command to create a persistent volumn: +``` +kubectl --kubeconfig=kubeconfig create -f pv.yaml +``` + +Next let's create a [PersistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/) to claim the persistent volume. + +Save following snippet as `pvc.yaml`. +``` +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: efsvol +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 50Gi +``` + +Run following command to create a persistent volumn claim: +``` +kubectl --kubeconfig=kubeconfig create -f pvc.yaml +``` + +#### Prepare Training Data + +We will now launch a kubernetes job that downloads, saves and evenly splits training data into 3 shards on the persistent volumn that we just created. + +save following snippet as `paddle-data-job.yaml` +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-data +spec: + template: + metadata: + name: pi + spec: + containers: + - name: paddle-data + image: paddledev/paddle-tutorial:k8s_data + imagePullPolicy: Always + volumeMounts: + - mountPath: "/efs" + name: efs + env: + - name: OUT_DIR + value: /efs/paddle-cluster-job + - name: SPLIT_COUNT + value: "3" + volumes: + - name: efs + persistentVolumeClaim: + claimName: efsvol + restartPolicy: Never +``` + +Run following command to launch the job: +``` +kubectl --kubeconfig=kubeconfig create -f paddle-data-job.yaml +``` + +Job may take 7 min to finish, use following command to check job status. Do not proceed until `SUCCESSFUL` for `paddle-data` job is `1` +``` +$ kubectl --kubeconfig=kubeconfig get jobs +NAME DESIRED SUCCESSFUL AGE +paddle-data 1 1 6m +``` + +Data preparation is done by docker image `paddledev/paddle-tutorial:k8s_data`, see [here](src/k8s_data/README.md) for how to build this docker image and source code. + +#### Start Training + +Now we are ready to start paddle training job. Save following snippet as `paddle-cluster-job.yaml` +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-cluster-job +spec: + parallelism: 3 + completions: 3 + template: + metadata: + name: paddle-cluster-job + spec: + volumes: + - name: efs + persistentVolumeClaim: + claimName: efsvol + containers: + - name: trainer + image: paddledev/paddle-tutorial:k8s_train + command: ["bin/bash", "-c", "/root/start.sh"] + env: + - name: JOB_NAME + value: paddle-cluster-job + - name: JOB_PATH + value: /home/jobpath + - name: JOB_NAMESPACE + value: default + - name: TRAIN_CONFIG_DIR + value: quick_start + - name: CONF_PADDLE_NIC + value: eth0 + - name: CONF_PADDLE_PORT + value: "7164" + - name: CONF_PADDLE_PORTS_NUM + value: "2" + - name: CONF_PADDLE_PORTS_NUM_SPARSE + value: "2" + - name: CONF_PADDLE_GRADIENT_NUM + value: "3" + - name: TRAINER_COUNT + value: "3" + volumeMounts: + - mountPath: "/home/jobpath" + name: efs + ports: + - name: jobport0 + hostPort: 7164 + containerPort: 7164 + - name: jobport1 + hostPort: 7165 + containerPort: 7165 + - name: jobport2 + hostPort: 7166 + containerPort: 7166 + - name: jobport3 + hostPort: 7167 + containerPort: 7167 + restartPolicy: Never +``` + +`parallelism: 3, completions: 3` means this job will simultaneously start 3 PaddlePaddle pods, and this job will be finished when there are 3 finished pods. + +`env` field represents container's environment variables, we specify PaddlePaddle parameters by environment variables. + +`ports` indicates that TCP port 7164 - 7167 are exposed for communication between `pserver` ans trainer. port starts continously from `CONF_PADDLE_PORT` (7164) to `CONF_PADDLE_PORT + CONF_PADDLE_PORTS_NUM + CONF_PADDLE_PORTS_NUM_SPARSE - 1` (7167). We use multiple ports for dense and sparse paramter updates to improve latency. + +Run following command to launch the job. +``` +kubectl --kubeconfig=kubeconfig create -f paddle-claster-job.yaml +``` + +Inspect individual pods + +``` +$ kubectl --kubeconfig=kubeconfig get pods +NAME READY STATUS RESTARTS AGE +paddle-cluster-job-cm469 1/1 Running 0 9m +paddle-cluster-job-fnt03 1/1 Running 0 9m +paddle-cluster-job-jx4xr 1/1 Running 0 9m +``` + +Inspect individual console output +``` +kubectl --kubeconfig=kubeconfig log -f POD_NAME +``` + +`POD_NAME`: name of any pod (e.g., `paddle-cluster-job-cm469`). + +Run `kubectl --kubeconfig=kubeconfig describe job paddle-cluster-job` to check training job status. It will complete in around 20 minutes. + +The details for start `pserver` and `trainer` are hidden inside docker image `paddledev/paddle-tutorial:k8s_train`, see [here](src/k8s_train/README.md) for how to build the docker image and source code. + +#### Inspect Training Output + +Training output (model snapshot and logs) will be saved in EFS. We can ssh into worker EC2 instance, mount EFS and check training output. + +1. ssh Into Worker EC2 instance +``` +chmod 400 key-name.pem +ssh -i key-name.pem core@INSTANCE_IP +``` + +`INSTANCE_IP`: public IP address of EC2 kubernetes worker node. Go to [EC2 console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#Instances:sort=instanceId) and check `public IP` of any `paddle-cluster-kube-aws-worker` instance. + +2. Mount EFS +``` +mkdir efs +sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 EFS_DNS_NAME:/ efs +``` + +`EFS_DNS_NAME`: DNS name as shown in description of `paddle-efs` that we created. Look similar to `fs-2cbf7385.efs.us-west-2.amazonaws.com`. + +Now folder `efs` will have structure similar to: +``` +-- paddle-cluster-job + |-- ... + |-- output + | |-- node_0 + | | |-- server.log + | | `-- train.log + | |-- node_1 + | | |-- server.log + | | `-- train.log + | |-- node_2 + | | |-- server.log + | | `-- train.log + | |-- pass-00000 + | | |-- ___fc_layer_0__.w0 + | | |-- ___fc_layer_0__.wbias + | | |-- done + | | |-- path.txt + | | `-- trainer_config.lr.py + | |-- pass-00001... +``` +`server.log` contains log for `pserver`. `train.log` contains log for `trainer`. Model description and snapshot is stored in `pass-0000*`. + +### Kubernetes Cluster Tear Down + +#### Delete EFS + +Go to [EFS Console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2) and delete the EFS volumn that we created. + +#### Delete security group + +Go to [Security Group Console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId) and delete security group `paddle-efs`. + + +#### Delete S3 Bucket + +Go to [S3 Console](https://console.aws.amazon.com/s3/home?region=us-west-2#) and delete the S3 bucket that we created. + +#### Destroy Cluster + +``` +kube-aws destroy +``` + +The command will return immediately, but it might take 5 min to tear down the whole cluster. + +You can go to [CludFormation Console](https://us-west-2.console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks?filter=active) to check destroy process. diff --git a/doc/howto/usage/k8s/k8s_basis_cn.md b/doc/howto/usage/k8s/k8s_basis_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..4c3dc81ed38f239c1f4a83d22b49cf57b5d16a8b --- /dev/null +++ b/doc/howto/usage/k8s/k8s_basis_cn.md @@ -0,0 +1,75 @@ +# Kubernetes 简介 + +[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws),[Azure](http://kubernetes.io/docs/getting-started-guides/azure/),[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。 + +- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。 + +- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。 + +- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 描述Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods,job启动后会创建这些pod并开始执行一个程序,等待这个程序执行成功并返回0则成功退出,如果执行失败,也可以配置不同的重试机制。 + +- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。 + +- [*Namespaces*](https://kubernetes.io/docs/user-guide/namespaces/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。 + +- [*PersistentVolume*](https://kubernetes.io/docs/user-guide/persistent-volumes/): 和[*PersistentVolumeClaim*](https://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims)结合,将外部的存储服务在Kubernetes中描述成为统一的资源形式,便于存储资源管理和Pod引用。 + +## 部署Kubernetes集群 + +Kubernetes提供了多种集群部署的方案,本文档内不重复介绍。这里给出集中常见的部署方法: + +- [*minikube*](https://kubernetes.io/docs/getting-started-guides/minikube/): 快速在本地启动一个单机的kubernetes服务器,便于本地验证和测试。 +- [*kubeadm*](http://kubernetes.io/docs/getting-started-guides/kubeadm/): 在不同操作系统,不同主机(Bare-Metal, AWS, GCE)条件下,快速部署集群。 +- [*AWS EC2*](https://kubernetes.io/docs/getting-started-guides/aws/): 在aws上快速部署集群。 +- [*Bare-Metal*](https://kubernetes.io/docs/getting-started-guides/centos/centos_manual_config/): 在物理机上手动部署。 + +可以参考[这个表格](https://kubernetes.io/docs/getting-started-guides/#table-of-solutions)选择适合您的场景的合适方案。 + +## 选择存储方案 + +容器不会保留在运行时生成的数据,job或者应用程序在容器中运行时生成的数据会在容器销毁时消失。为了完成分布式机器学习训练任务,需要有一个外部的存储服务来保存训练所需数据和训练输出。 +常见的可选存储服务包括: + +- [*NFS*](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/nfs): 可以将磁盘上某个目录共享给网络中其他机器访问。部署和配置比较简单,可以用于小量数据的验证。不提供分布式存储,高可用,冗余等功能。NFS的部署方法可以参考[这里](http://www.tecmint.com/how-to-setup-nfs-server-in-linux/)。 +- [*GlusterFS*](http://gluster.readthedocs.io/en/latest/Quick-Start-Guide/Quickstart/): 网络分布式文件系统,可以在Kubernetes中按照[这个](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/glusterfs)例子使用。 +- [*Ceph*](http://docs.ceph.com/docs/master/): 分布式文件系统,支持rbd,POSIX API接口(ceph fs)和对象存储API,参考[这里](https://kubernetes.io/docs/user-guide/volumes/#rbd)。 +- [*MooseFS*](https://moosefs.com/documentation.html): 一个分布式的存储系统。需要先挂载到服务器Node上再通过kubernetes hostPath Volume挂载到容器中。 + +## 配置kubectl + +### 安装kubectl +``` +# OS X +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/darwin/amd64/kubectl + +# Linux +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl + +# Windows +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/windows/amd64/kubectl.exe +``` + +### 配置kubectl访问你的kubernetes集群 + +编辑`~/.kube/config`这个配置文件,修改`Master-IP`的地址。如果使用SSL认证,则需要配置`certificate-authority`和`users`中的用户证书。如果是使用非SSL方式访问(比如通过8080端口),也可以去掉这些证书的配置。 +``` +apiVersion: v1 +clusters: +- cluster: + certificate-authority: /path/to/ca.crt + server: https://[Master-IP]:443 + name: minikube +contexts: +- context: + cluster: minikube + user: minikube + name: minikube +current-context: minikube +kind: Config +preferences: {} +users: +- name: minikube + user: + client-certificate: /path/to/apiserver.crt + client-key: /Users/wuyi/.minikube/apiserver.key +``` diff --git a/doc/howto/usage/k8s/k8s_cn.md b/doc/howto/usage/k8s/k8s_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..ab07cb9cd5b135ddea82b3360720537f1dc5a801 --- /dev/null +++ b/doc/howto/usage/k8s/k8s_cn.md @@ -0,0 +1,205 @@ +# Kubernetes单机训练 + +在这篇文档里,我们介绍如何在 Kubernetes 集群上启动一个单机使用CPU的Paddle训练作业。在下一篇中,我们将介绍如何启动分布式训练作业。 + +## 制作Docker镜像 + +在一个功能齐全的Kubernetes机群里,通常我们会安装Ceph等分布式文件系统来存储训练数据。这样的话,一个分布式Paddle训练任务中的每个进程都可以从Ceph读取数据。在这个例子里,我们只演示一个单机作业,所以可以简化对环境的要求,把训练数据直接放在 +Paddle的Docker image里。为此,我们需要制作一个包含训练数据的Paddle镜像。 + +Paddle 的 [Quick Start Tutorial](http://www.paddlepaddle.org/doc/demo/quick_start/index_en.html) +里介绍了用Paddle源码中的脚本下载训练数据的过程。 +而 `paddledev/paddle:cpu-demo-latest` 镜像里有 Paddle 源码与demo,( 请注意,默认的 +Paddle镜像 `paddledev/paddle:cpu-latest` 是不包括源码的, Paddle的各版本镜像可以参考 [Docker installation guide](http://www.paddlepaddle.org/doc/build/docker_install.html) ),所以我们使用这个镜像来下载训练数据到Docker container中,然后把这个包含了训练数据的container保存为一个新的镜像。 + +### 运行容器 + +``` +$ docker run --name quick_start_data -it paddledev/paddle:cpu-demo-latest +``` + +### 下载数据 + +进入容器`/root/paddle/demo/quick_start/data`目录,使用`get_data.sh`下载数据 + +``` +$ root@fbd1f2bb71f4:~/paddle/demo/quick_start/data# ./get_data.sh + +Downloading Amazon Electronics reviews data... +--2016-10-31 01:33:43-- http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz +Resolving snap.stanford.edu (snap.stanford.edu)... 171.64.75.80 +Connecting to snap.stanford.edu (snap.stanford.edu)|171.64.75.80|:80... connected. +HTTP request sent, awaiting response... 200 OK +Length: 495854086 (473M) [application/x-gzip] +Saving to: 'reviews_Electronics_5.json.gz' + + 10% [=======> ] 874,279 64.7KB/s eta 2h 13m + +``` + +### 修改启动脚本 + +下载完数据后,修改`/root/paddle/demo/quick_start/train.sh`文件,内容如下(增加了一条cd命令) +``` +set -e +cd /root/paddle/demo/quick_start +cfg=trainer_config.lr.py +#cfg=trainer_config.emb.py +#cfg=trainer_config.cnn.py +#cfg=trainer_config.lstm.py +#cfg=trainer_config.bidi-lstm.py +#cfg=trainer_config.db-lstm.py +paddle train \ + --config=$cfg \ + --save_dir=./output \ + --trainer_count=4 \ + --log_period=20 \ + --num_passes=15 \ + --use_gpu=false \ + --show_parameter_stats_period=100 \ + --test_all_data_in_one_period=1 \ + 2>&1 | tee 'train.log' +``` + +### 提交镜像 + +修改启动脚本后,退出容器,使用`docker commit`命令创建新镜像。 + +``` +$ docker commit quick_start_data mypaddle/paddle:quickstart +``` + +## 使用 Kubernetes 进行训练 + +>针对任务运行完成后容器自动退出的场景,Kubernetes有Job类型的资源来支持。下文就是用Job类型的资源来进行训练。 + +### 编写yaml文件 + +在训练时,输出结果可能会随着容器的消耗而被删除,需要在创建容器前挂载卷以便我们保存训练结果。使用我们之前构造的镜像,可以创建一个 [Kubernetes Job](http://kubernetes.io/docs/user-guide/jobs/#what-is-a-job),简单的yaml文件如下: + +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: quickstart +spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: quickstart + spec: + volumes: + - name: output + hostPath: + path: /home/work/paddle_output + containers: + - name: pi + image: mypaddle/paddle:quickstart + command: ["bin/bash", "-c", "/root/paddle/demo/quick_start/train.sh"] + volumeMounts: + - name: output + mountPath: /root/paddle/demo/quick_start/output + restartPolicy: Never +``` + +### 创建Paddle Job + +使用上文创建的yaml文件创建Kubernetes Job,命令为: + +``` +$ kubectl create -f paddle.yaml +``` + +查看job的详细情况: + +``` +$ kubectl get job +NAME DESIRED SUCCESSFUL AGE +quickstart 1 0 58s + +$ kubectl describe job quickstart +Name: quickstart +Namespace: default +Image(s): registry.baidu.com/public/paddle:cpu-demo-latest +Selector: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84 +Parallelism: 1 +Completions: 1 +Start Time: Mon, 31 Oct 2016 11:20:16 +0800 +Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart +Pods Statuses: 0 Running / 1 Succeeded / 0 Failed +Volumes: + output: + Type: HostPath (bare host directory volume) + Path: /home/work/paddle_output +Events: + FirstSeen LastSeen Count From SubobjectPath Type Reason Message + --------- -------- ----- ---- ------------- -------- ------ ------- + 1m 1m 1 {job-controller } Normal SuccessfulCreate Created pod: quickstart-fa0wx +``` + +### 查看训练结果 + +根据Job对应的Pod信息,可以查看此Pod运行的宿主机。 + +``` +kubectl describe pod quickstart-fa0wx +Name: quickstart-fa0wx +Namespace: default +Node: paddle-demo-let02/10.206.202.44 +Start Time: Mon, 31 Oct 2016 11:20:17 +0800 +Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart +Status: Succeeded +IP: 10.0.0.9 +Controllers: Job/quickstart +Containers: + quickstart: + Container ID: docker://b8561f5c79193550d64fa47418a9e67ebdd71546186e840f88de5026b8097465 + Image: registry.baidu.com/public/paddle:cpu-demo-latest + Image ID: docker://18e457ce3d362ff5f3febf8e7f85ffec852f70f3b629add10aed84f930a68750 + Port: + Command: + bin/bash + -c + /root/paddle/demo/quick_start/train.sh + QoS Tier: + cpu: BestEffort + memory: BestEffort + State: Terminated + Reason: Completed + Exit Code: 0 + Started: Mon, 31 Oct 2016 11:20:20 +0800 + Finished: Mon, 31 Oct 2016 11:21:46 +0800 + Ready: False + Restart Count: 0 + Environment Variables: +Conditions: + Type Status + Ready False +Volumes: + output: + Type: HostPath (bare host directory volume) + Path: /home/work/paddle_output +``` + +我们还可以登录到宿主机上查看训练结果。 + +``` +[root@paddle-demo-let02 paddle_output]# ll +total 60 +drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00000 +drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00001 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00002 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00003 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00004 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00005 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00006 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00007 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00008 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00009 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00010 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00011 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00012 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00013 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00014 +``` diff --git a/doc/howto/usage/k8s/k8s_distributed_cn.md b/doc/howto/usage/k8s/k8s_distributed_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..3121b3f59df650c0a22d0bd305a6f793b202d30e --- /dev/null +++ b/doc/howto/usage/k8s/k8s_distributed_cn.md @@ -0,0 +1,315 @@ +# Kubernetes分布式训练 + +前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。 + +有关Kubernetes相关概念以及如何搭建和配置Kubernetes集群,可以参考[k8s_basis](./k8s_basis_cn.md)。 + +## 整体方案 + +在训练之前,用户将配置与训练数据切分好放在分布式文件系统预先分配好的目录中(不同的分布式文件系统,需要使用其制定的方式挂载后并导入数据),训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下: + +![paddle on kubernetes结构图](src/k8s-paddle-arch.png) + +上图描述了一个3节点的分布式训练场景,在每个Pod上都通过volume方式挂载分布式文件系统的一个目录用于保存训练数据和输出结果。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。 + +根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,按照下面步骤即可: + +1. [制作PaddlePaddle镜像](#制作镜像) +1. [将训练文件与切分好的数据上传到共享存储](#上传训练文件) +1. [编写本次训练的YAML文件,创建一个Kubernetes job](#创建Job) +1. [训练结束后查看输出结果](#查看输出) + +下面就根据这几个步骤分别介绍。 + +### 制作镜像 + +PaddlePaddle镜像需要提供`paddle pserver`与`paddle train`进程的运行环境,用这个镜像创建的容器需要有以下两个功能: + +- 拷贝训练文件到容器内 +- 生成`paddle pserver`与`paddle train`进程的启动参数,并且启动训练 + +因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。参考镜像的[*Dockerfile*](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/src/k8s_train/Dockerfile)。 + +```bash +$ cd doc/howto/usage/k8s/src/k8s_train +$ docker build -t [YOUR_REPO]/paddle:mypaddle . +``` + +然后将构建成功的镜像上传到镜像仓库。 + +```bash +docker push [YOUR_REPO]/paddle:mypaddle +``` + +注意上述命令中`[YOUR_REPO]`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`[YOUR_REPO]/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。 + +### 准备训练数据 + +这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据,也可以通过修改[k8s_train](./src/k8s_train/README.md)的内容来定制image. + +在启动Job之前,需要根据不同的分布式存储来绑定一个[persistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/),生成的数据将会存储在这个volume下. + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-data +spec: + template: + metadata: + name: pi + spec: + hostNetwork: true + containers: + - name: paddle-data + image: paddledev/paddle-tutorial:k8s_data + imagePullPolicy: Always + volumeMounts: + - mountPath: "/mnt" + name: nfs + env: + - name: OUT_DIR + value: /home/work/mfs/paddle-cluster-job + - name: SPLIT_COUNT + value: "3" + volumes: + - name: nfs + persistentVolumeClaim: + claimName: mfs + restartPolicy: Never +``` + +完成后volume中的文件内容大致如下: +```base +[root@paddle-kubernetes-node0 nfsdir]$ tree -d +. +`-- paddle-cluster-job + |-- 0 + | `-- data + |-- 1 + | `-- data + |-- 2 + | `-- data + |-- output + |-- quick_start +``` + +目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。 + +### 创建Job + +Kubernetes可以通过YAML文件来创建相关对象,然后可以使用命令行工具创建job。 + +Job YAML文件描述了这次训练使用的Docker镜像,需要启动的节点个数以及 `paddle pserver`与 `paddle train`进程启动的必要参数,也描述了容器需要使用的存储卷挂载的情况。YAML文件中各个字段的具体含义,可以查看[Kubernetes Job API](http://kubernetes.io/docs/api-reference/batch/v1/definitions/#_v1_job)。例如,本次训练的YAML文件可以写成: + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-cluster-job +spec: + parallelism: 3 + completions: 3 + template: + metadata: + name: paddle-cluster-job + spec: + volumes: + - name: jobpath + hostPath: + path: /home/work/mfs + containers: + - name: trainer + image: [YOUR_REPO]/paddle:mypaddle + command: ["bin/bash", "-c", "/root/start.sh"] + env: + - name: JOB_NAME + value: paddle-cluster-job + - name: JOB_PATH + value: /home/jobpath + - name: JOB_NAMESPACE + value: default + - name: TRAIN_CONFIG_DIR + value: recommendation + - name: CONF_PADDLE_NIC + value: eth0 + - name: CONF_PADDLE_PORT + value: "7164" + - name: CONF_PADDLE_PORTS_NUM + value: "2" + - name: CONF_PADDLE_PORTS_NUM_SPARSE + value: "2" + - name: CONF_PADDLE_GRADIENT_NUM + value: "3" + volumeMounts: + - name: jobpath + mountPath: /home/jobpath + restartPolicy: Never +``` + +文件中,`metadata`下的`name`表示这个job的名字。`parallelism,completions`字段表示这个job会同时开启3个PaddlePaddle节点,成功训练且退出的pod数目为3时,这个job才算成功结束。然后申明一个存储卷`jobpath`,代表宿主机目录`/home/work/mfs`,在对容器的描述`containers`字段中,将此目录挂载为容器的`/home/jobpath`目录,这样容器的`/home/jobpath`目录就成为了共享存储,放在这个目录里的文件其实是保存到了MFS上。 + +`env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。 + +环境变量 | 说明 +--- | --- +JOB_PATH | 共享存储挂在的路径 +JOB_NAME | Job的名字 +TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径 +CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数,即网卡名 +CONF_PADDLE_PORT | `paddle paserver`的`--port`参数 +CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即`--ports_num`参数 +CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即`--ports_num_for_sparse`参数 +CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即`--num_gradient_servers参数` + +这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)。 + +编写完YAML文件后,可以使用Kubernetes的命令行工具创建job。 + +```bash +kubectl create -f job.yaml +``` + +创建成功后,Kubernetes就会创建3个pod作为PaddlePaddle节点然后拉取镜像,启动容器开始训练。 + + +### 查看输出 + +在训练过程中,可以在共享存储上查看输出的日志和模型,例如output目录下就存放了输出结果。注意node_0,node_1,node_2这几个目录表示PaddlePaddle节点与trainer_id,并不是Kubernetes中的node概念。 + +```bash +[root@paddle-kubernetes-node0 output]# tree -d +. +├── node_0 +│   ├── server.log +│   └── train.log +├── node_1 +│   ├── server.log +│   └── train.log +├── node_2 +...... +├── pass-00002 +│   ├── done +│   ├── ___embedding_0__.w0 +│   ├── ___embedding_1__.w0 +...... +``` + +我们可以通过日志查看容器训练的情况,例如: + +```bash +[root@paddle-kubernetes-node0 node_0]# cat train.log +I1116 09:10:17.123121 50 Util.cpp:155] commandline: + /usr/local/bin/../opt/paddle/bin/paddle_trainer + --nics=eth0 --port=7164 + --ports_num=2 --comment=paddle_process_by_paddle + --pservers=192.168.129.66,192.168.223.143,192.168.129.71 + --ports_num_for_sparse=2 --config=./trainer_config.py + --trainer_count=4 --num_passes=10 --use_gpu=0 + --log_period=50 --dot_period=10 --saving_period=1 + --local=0 --trainer_id=0 + --save_dir=/home/jobpath/paddle-cluster-job/output +I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions +I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done. +[WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config. +[INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating] +[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__mse_cost_0__] +I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal +I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process +I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process +I1116 09:10:17.681543 50 GradientMachine.cpp:134] Initing parameters.. +I1116 09:10:18.012390 50 GradientMachine.cpp:141] Init parameters done. +I1116 09:10:18.018641 50 ParameterClient2.cpp:122] pserver 0 192.168.129.66:7164 +I1116 09:10:18.018950 50 ParameterClient2.cpp:122] pserver 1 192.168.129.66:7165 +I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:7164 +I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165 +I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 +I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 +``` + + +## 一些细节的补充 + +### 使用环境变量 + +使用容器方式运行训练任务的Kubernetes Job,通常会使用环境变量配置Job的配置信息`start_paddle.py`提供了一个启动脚本,将环境变量转换成paddle的命令行参数: +``` +API = "/api/v1/namespaces/" +JOBSELECTOR = "labelSelector=job-name=" +JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME") +JOB_PATH_OUTPUT = JOB_PATH + "/output" +JOBNAME = os.getenv("JOB_NAME") +NAMESPACE = os.getenv("JOB_NAMESPACE") +PADDLE_NIC = os.getenv("CONF_PADDLE_NIC") +PADDLE_PORT = os.getenv("CONF_PADDLE_PORT") +PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM") +PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE") +PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM") +``` + +### Pod间通信 +`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。 + +```python +parser = argparse.ArgumentParser(prog="start_paddle.py", + description='simple tool for k8s') + args, train_args_list = parser.parse_known_args() + train_args = refine_unknown_args(train_args_list) + train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) + podlist = getPodList() +``` + +然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。 + +```python + podlist = getPodList() + # need to wait until all pods are running + while not isPodAllRunning(podlist): + time.sleep(10) + podlist = getPodList() + idMap = getIdMap(podlist) +``` +* *注意*: `getPodList()`会获取当前namespace下的所有pod,如果已经有pod运行,可能会导致出错。这种集群节点管理方式会在将来使用[statfulsets](https://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/)代替。 + +在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。 + +```python +def getIdMap(podlist): + ''' + generate tainer_id by ip + ''' + ips = [] + for pod in podlist["items"]: + ips.append(pod["status"]["podIP"]) + ips.sort() + idMap = {} + for i in range(len(ips)): + idMap[ips[i]] = i + return idMap +``` + +在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver`与`paddle train`的启动参数并执行进程。 + +### 启动任务 + +在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver`与`paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC`,`PADDLE_PORT`,`PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`。 + +```python + program = 'paddle train' + args = " --nics=" + PADDLE_NIC + args += " --port=" + str(PADDLE_PORT) + args += " --ports_num=" + str(PADDLE_PORTS_NUM) + args += " --comment=" + "paddle_process_by_paddle" + ip_string = "" + for ip in idMap.keys(): + ip_string += (ip + ",") + ip_string = ip_string.rstrip(",") + args += " --pservers=" + ip_string + args_ext = "" + for key, value in train_args_dict.items(): + args_ext += (' --' + key + '=' + value) + localIP = socket.gethostbyname(socket.gethostname()) + trainerId = idMap[localIP] + args += " " + args_ext + " --trainer_id=" + \ + str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT +``` diff --git a/doc/howto/usage/k8s/k8s_en.md b/doc/howto/usage/k8s/k8s_en.md new file mode 100644 index 0000000000000000000000000000000000000000..0c3ab05b708e7a924577c26496b8c55126e76c62 --- /dev/null +++ b/doc/howto/usage/k8s/k8s_en.md @@ -0,0 +1,201 @@ +# Paddle On Kubernetes + +>In this article, we will introduce how to run Paddle training job on single CPU machine using Kubernetes. In next article, we will introduce how to run Paddle training job on distributed cluster. + +## Build Docker Image + +In distributed Kubernetes cluster, we will use Ceph or other shared storage system for storing training related data so that all processes in Paddle training can retrieve data from Ceph. In this example, we will only demo training job on single machine. In order to simplify the requirement of the environment, we will directly put training data into Paddle's Docker Image, so we need to create a Paddle Docker image that already includes the training data. + +Paddle's [Quick Start Tutorial](http://www.paddlepaddle.org/doc/demo/quick_start/index_en.html) introduces how to download and train data by using script from Paddle's source code. +And `paddledev/paddle:cpu-demo-latest` image has the Paddle source code and demo. (Caution: Default Paddle image `paddledev/paddle:cpu-latest` doesn't include the source code, Paddle's different versions of image can be referred here: [Docker installation guide](http://www.paddlepaddle.org/doc/build/docker_install.html)), so we run this container and download the training data, and then commit the whole container to be a new Docker image. + +### Run Docker Container + +``` +$ docker run --name quick_start_data -it paddledev/paddle:cpu-demo-latest +``` + +### Download Training Data + +Getting into `/root/paddle/demo/quick_start/data` Directory,using `get_data.sh` to download training data. +Then getting into `/root/paddle/demo/quick_start` Directory, using `preprocess.sh` to pre-process training data. + +``` +$ root@fbd1f2bb71f4:~/paddle/demo/quick_start/data# ./get_data.sh + +Downloading Amazon Electronics reviews data... +--2016-10-31 01:33:43-- http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz +Resolving snap.stanford.edu (snap.stanford.edu)... 171.64.75.80 +Connecting to snap.stanford.edu (snap.stanford.edu)|171.64.75.80|:80... connected. +HTTP request sent, awaiting response... 200 OK +Length: 495854086 (473M) [application/x-gzip] +Saving to: 'reviews_Electronics_5.json.gz' + + 10% [=======> ] 874,279 64.7KB/s eta 2h 13m + +``` + +### Modify Startup Script + +After downloading the data,modify `/root/paddle/demo/quick_start/train.sh` file contents are as follows (one more cd cmd): +``` +set -e +cd /root/paddle/demo/quick_start +cfg=trainer_config.lr.py +#cfg=trainer_config.emb.py +#cfg=trainer_config.cnn.py +#cfg=trainer_config.lstm.py +#cfg=trainer_config.bidi-lstm.py +#cfg=trainer_config.db-lstm.py +paddle train \ + --config=$cfg \ + --save_dir=./output \ + --trainer_count=4 \ + --log_period=20 \ + --num_passes=15 \ + --use_gpu=false \ + --show_parameter_stats_period=100 \ + --test_all_data_in_one_period=1 \ + 2>&1 | tee 'train.log' +``` + +### Commit Docker Image + +``` +$ docker commit quick_start_data mypaddle/paddle:quickstart +``` + +## Use Kubernetes For Training + +>We will use Kubernetes job for training process, following steps shows how to do the training with Kubernetes. + +### Create Yaml Files + +The output result in container will be demolished when job finished (container stopped running), so we need to mount the volume out to the local disk when creating the container to store the training result. Using our previously created image, we can create a [Kubernetes Job](http://kubernetes.io/docs/user-guide/jobs/#what-is-a-job), the yaml contents are as follows: + +``` +apiVersion: batch/v1 +kind: Job +metadata: + name: quickstart +spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: quickstart + spec: + volumes: + - name: output + hostPath: + path: /home/work/paddle_output + containers: + - name: pi + image: mypaddle/paddle:quickstart + command: ["bin/bash", "-c", "/root/paddle/demo/quick_start/train.sh"] + volumeMounts: + - name: output + mountPath: /root/paddle/demo/quick_start/output + restartPolicy: Never +``` + +### Start Paddle Job + +Using the above yaml file to start the Kubernetes job. + +``` +$ kubectl create -f paddle.yaml +``` + +Get the detailed status of the job: + +``` +$ kubectl get job +NAME DESIRED SUCCESSFUL AGE +quickstart 1 0 58s + +$ kubectl describe job quickstart +Name: quickstart +Namespace: default +Image(s): registry.baidu.com/public/paddle:cpu-demo-latest +Selector: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84 +Parallelism: 1 +Completions: 1 +Start Time: Mon, 31 Oct 2016 11:20:16 +0800 +Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart +Pods Statuses: 0 Running / 1 Succeeded / 0 Failed +Volumes: + output: + Type: HostPath (bare host directory volume) + Path: /home/work/paddle_output +Events: + FirstSeen LastSeen Count From SubobjectPath Type Reason Message + --------- -------- ----- ---- ------------- -------- ------ ------- + 1m 1m 1 {job-controller } Normal SuccessfulCreate Created pod: quickstart-fa0wx +``` + +### Get Training Result + +We can use kubectl command to take a look at the status of related pod. + +``` +$ kubectl describe pod quickstart-fa0wx +Name: quickstart-fa0wx +Namespace: default +Node: paddle-demo-let02/10.206.202.44 +Start Time: Mon, 31 Oct 2016 11:20:17 +0800 +Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart +Status: Succeeded +IP: 10.0.0.9 +Controllers: Job/quickstart +Containers: + quickstart: + Container ID: docker://b8561f5c79193550d64fa47418a9e67ebdd71546186e840f88de5026b8097465 + Image: registry.baidu.com/public/paddle:cpu-demo-latest + Image ID: docker://18e457ce3d362ff5f3febf8e7f85ffec852f70f3b629add10aed84f930a68750 + Port: + Command: + bin/bash + -c + /root/paddle/demo/quick_start/train.sh + QoS Tier: + cpu: BestEffort + memory: BestEffort + State: Terminated + Reason: Completed + Exit Code: 0 + Started: Mon, 31 Oct 2016 11:20:20 +0800 + Finished: Mon, 31 Oct 2016 11:21:46 +0800 + Ready: False + Restart Count: 0 + Environment Variables: +Conditions: + Type Status + Ready False +Volumes: + output: + Type: HostPath (bare host directory volume) + Path: /home/work/paddle_output +``` + +We can also ssh to Kubernetes node to take a look at the training result. + +``` +[root@paddle-demo-let02 paddle_output]# ll +total 60 +drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00000 +drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00001 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00002 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00003 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00004 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00005 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00006 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00007 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00008 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00009 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00010 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00011 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00012 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00013 +drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00014 +``` diff --git a/doc_cn/cluster/k8s/Dockerfile b/doc/howto/usage/k8s/src/Dockerfile similarity index 100% rename from doc_cn/cluster/k8s/Dockerfile rename to doc/howto/usage/k8s/src/Dockerfile diff --git a/doc/howto/usage/k8s/src/add_security_group.png b/doc/howto/usage/k8s/src/add_security_group.png new file mode 100644 index 0000000000000000000000000000000000000000..bd34f46c9b0ada7027fd53e553e7d033255d25fc Binary files /dev/null and b/doc/howto/usage/k8s/src/add_security_group.png differ diff --git a/doc/howto/usage/k8s/src/create_efs.png b/doc/howto/usage/k8s/src/create_efs.png new file mode 100644 index 0000000000000000000000000000000000000000..e5f1526033d1daf401700989af1d25919bcb7675 Binary files /dev/null and b/doc/howto/usage/k8s/src/create_efs.png differ diff --git a/doc/howto/usage/k8s/src/efs_mount.png b/doc/howto/usage/k8s/src/efs_mount.png new file mode 100644 index 0000000000000000000000000000000000000000..0f9e3cab98445707e5e9baa18ddabe15cdf04576 Binary files /dev/null and b/doc/howto/usage/k8s/src/efs_mount.png differ diff --git a/doc/howto/usage/k8s/src/k8s-paddle-arch.png b/doc/howto/usage/k8s/src/k8s-paddle-arch.png new file mode 100644 index 0000000000000000000000000000000000000000..2183a232ad402b76f82a67234a5c93e13ce97ac3 Binary files /dev/null and b/doc/howto/usage/k8s/src/k8s-paddle-arch.png differ diff --git a/doc/howto/usage/k8s/src/k8s_data/Dockerfile b/doc/howto/usage/k8s/src/k8s_data/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6d3a12ae393aa594b8e6e9a5f726109426937284 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine + +RUN apk update && apk upgrade && apk add coreutils +ADD quick_start /quick_start +ADD get_data.sh /bin/ +RUN chmod +x /bin/get_data.sh +ENTRYPOINT ["/bin/get_data.sh"] diff --git a/doc/howto/usage/k8s/src/k8s_data/README.md b/doc/howto/usage/k8s/src/k8s_data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..83cef7affd0ac4d3a1ca08ea5b046fa81e1bc630 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/README.md @@ -0,0 +1,6 @@ +To build PaddlePaddle data preparation image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following commands: + +``` +cp -r ../../../../../../demo/quick_start . +docker build . -t prepare-data-image-name +``` diff --git a/doc/howto/usage/k8s/src/k8s_data/get_data.sh b/doc/howto/usage/k8s/src/k8s_data/get_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..d187ba5ac8d03f69dfdefd4f63610ed7921575be --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/get_data.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +out_dir=$OUT_DIR +split_count=$SPLIT_COUNT + +set -e + +mkdir -p $out_dir +cp -r /quick_start $out_dir/ + +mkdir -p $out_dir/0/data +cd $out_dir/0/data +wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz +tar zxvf preprocessed_data.tar.gz +rm preprocessed_data.tar.gz + +split -d --number=l/$split_count -a 5 train.txt train. +mv train.00000 train.txt + +cd $out_dir +end=$(expr $split_count - 1) +for i in $(seq 1 $end); do + mkdir -p $i/data + cp -r 0/data/* $i/data + mv $i/data/train.`printf %05d $i` $i/data/train.txt +done; diff --git a/doc/howto/usage/k8s/src/k8s_train/Dockerfile b/doc/howto/usage/k8s/src/k8s_train/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c0fca1f9a945921e6e8899fee2db8845e66136a1 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/Dockerfile @@ -0,0 +1,6 @@ +FROM paddledev/paddle:cpu-latest + +COPY start.sh /root/ +COPY start_paddle.py /root/ +RUN chmod +x /root/start.sh +CMD ["bash"," -c","/root/start.sh"] diff --git a/doc/howto/usage/k8s/src/k8s_train/README.md b/doc/howto/usage/k8s/src/k8s_train/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96bf65497ffa23e90c4c9350504f86367b48daf2 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/README.md @@ -0,0 +1,5 @@ +To build PaddlePaddle training image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following command: + +``` +docker build . -t train-image-name +``` diff --git a/doc/howto/usage/k8s/src/k8s_train/start.sh b/doc/howto/usage/k8s/src/k8s_train/start.sh new file mode 100755 index 0000000000000000000000000000000000000000..12dfe1e6386885a6989d3887f21c6922f137a9ae --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/start.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -eu + +jobconfig=${JOB_PATH}"/"${JOB_NAME}"/"${TRAIN_CONFIG_DIR} +cd /root +cp -rf $jobconfig/* . + +python /root/start_paddle.py \ + --dot_period=10 \ + --ports_num=$CONF_PADDLE_PORTS_NUM \ + --ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM_SPARSE \ + --log_period=50 \ + --num_passes=10 \ + --trainer_count=$TRAINER_COUNT \ + --saving_period=1 \ + --local=0 \ + --config=trainer_config.lr.py \ + --use_gpu=0 diff --git a/doc/howto/usage/k8s/src/k8s_train/start_paddle.py b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py new file mode 100755 index 0000000000000000000000000000000000000000..935c12bb67e1fe08bc135a7a2220fcd43c548482 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py @@ -0,0 +1,170 @@ +#!/usr/bin/python +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +import time +import socket +import os +import argparse + +# configuration for cluster +API = "/api/v1/namespaces/" +JOBSELECTOR = "labelSelector=job-name=" +JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME") +JOB_PATH_OUTPUT = JOB_PATH + "/output" +JOBNAME = os.getenv("JOB_NAME") +NAMESPACE = os.getenv("JOB_NAMESPACE") +PADDLE_NIC = os.getenv("CONF_PADDLE_NIC") +PADDLE_PORT = os.getenv("CONF_PADDLE_PORT") +PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM") +PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE") +PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM") + +tokenpath = '/var/run/secrets/kubernetes.io/serviceaccount/token' + + +def refine_unknown_args(cmd_args): + ''' + refine unknown parameters to handle some special parameters + ''' + new_args = [] + for arg in cmd_args: + if arg.startswith("--") and arg.find("=") != -1: + equal_pos = arg.find("=") # find first = pos + arglist = list(arg) + arglist[equal_pos] = " " + arg = "".join(arglist) + arg = arg.lstrip("-") + new_args += arg.split(" ") + elif arg.startswith("--") and arg.find("=") == -1: + arg = arg.lstrip("-") + new_args.append(arg) + else: + new_args.append(arg) + return new_args + + +def isPodAllRunning(podlist): + ''' + check all pod is running + ''' + require = len(podlist["items"]) + running = 0 + for pod in podlist["items"]: + if pod["status"]["phase"] == "Running": + running += 1 + print "waiting for pods running, require:", require, "running:", running + if require == running: + return True + return False + + +def getPodList(): + ''' + get all container status of the job + ''' + apiserver = "https://" + \ + os.getenv("KUBERNETES_SERVICE_HOST") + ":" + \ + os.getenv("KUBERNETES_SERVICE_PORT_HTTPS") + + pod = API + NAMESPACE + "/pods?" + job = JOBNAME + if os.path.isfile(tokenpath): + tokenfile = open(tokenpath, mode='r') + token = tokenfile.read() + Bearer = "Bearer " + token + headers = {"Authorization": Bearer} + return requests.get(apiserver + pod + JOBSELECTOR + job, + headers=headers, + verify=False).json() + else: + return requests.get(apiserver + pod + JOBSELECTOR + job, + verify=False).json() + + +def getIdMap(podlist): + ''' + generate tainer_id by ip + ''' + ips = [] + for pod in podlist["items"]: + ips.append(pod["status"]["podIP"]) + ips.sort() + idMap = {} + for i in range(len(ips)): + idMap[ips[i]] = i + return idMap + + +def startPaddle(idMap={}, train_args_dict=None): + ''' + start paddle pserver and trainer + ''' + program = 'paddle train' + args = " --nics=" + PADDLE_NIC + args += " --port=" + str(PADDLE_PORT) + args += " --ports_num=" + str(PADDLE_PORTS_NUM) + args += " --comment=" + "paddle_process_by_paddle" + ip_string = "" + for ip in idMap.keys(): + ip_string += (ip + ",") + ip_string = ip_string.rstrip(",") + args += " --pservers=" + ip_string + args_ext = "" + for key, value in train_args_dict.items(): + args_ext += (' --' + key + '=' + value) + localIP = socket.gethostbyname(socket.gethostname()) + trainerId = idMap[localIP] + args += " " + args_ext + " --trainer_id=" + \ + str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT + logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) + if not os.path.exists(JOB_PATH_OUTPUT): + os.makedirs(JOB_PATH_OUTPUT) + if not os.path.exists(logDir): + os.mkdir(logDir) + copyCommand = 'cp -rf ' + JOB_PATH + \ + "/" + str(trainerId) + "/data/*" + " ./data/" + os.system(copyCommand) + startPserver = 'nohup paddle pserver' + \ + " --port=" + str(PADDLE_PORT) + \ + " --ports_num=" + str(PADDLE_PORTS_NUM) + \ + " --ports_num_for_sparse=" + str(PADDLE_PORTS_NUM_SPARSE) + \ + " --nics=" + PADDLE_NIC + \ + " --comment=" + "paddle_process_by_paddle" + \ + " --num_gradient_servers=" + str(PADDLE_SERVER_NUM) +\ + " > " + logDir + "/server.log 2>&1 &" + print startPserver + os.system(startPserver) + # wait until pservers completely start + time.sleep(20) + startTrainer = program + args + " 2>&1 | tee " + \ + logDir + "/train.log" + print startTrainer + os.system(startTrainer) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + prog="start_paddle.py", description='simple tool for k8s') + args, train_args_list = parser.parse_known_args() + train_args = refine_unknown_args(train_args_list) + train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) + podlist = getPodList() + # need to wait until all pods are running + while not isPodAllRunning(podlist): + time.sleep(20) + podlist = getPodList() + idMap = getIdMap(podlist) + startPaddle(idMap, train_args_dict) diff --git a/doc/howto/usage/k8s/src/managed_policy.png b/doc/howto/usage/k8s/src/managed_policy.png new file mode 100644 index 0000000000000000000000000000000000000000..c7ecda555b81d7750e9292a9ab72d2f517f76a2a Binary files /dev/null and b/doc/howto/usage/k8s/src/managed_policy.png differ diff --git a/doc/howto/usage/k8s/src/pserver_and_trainer.png b/doc/howto/usage/k8s/src/pserver_and_trainer.png new file mode 100644 index 0000000000000000000000000000000000000000..f41fe48920590333ad332bb51eb18e03dc251541 Binary files /dev/null and b/doc/howto/usage/k8s/src/pserver_and_trainer.png differ diff --git a/doc/howto/usage/k8s/src/route53_create_recordset.png b/doc/howto/usage/k8s/src/route53_create_recordset.png new file mode 100644 index 0000000000000000000000000000000000000000..34e476c7beac30fcdde13fccc4cc8d08b4be3d35 Binary files /dev/null and b/doc/howto/usage/k8s/src/route53_create_recordset.png differ diff --git a/doc/howto/usage/k8s/src/route53_create_zone.png b/doc/howto/usage/k8s/src/route53_create_zone.png new file mode 100644 index 0000000000000000000000000000000000000000..25b7ddb831c5cba97f4b2edddd27da3234d621af Binary files /dev/null and b/doc/howto/usage/k8s/src/route53_create_zone.png differ diff --git a/doc/howto/usage/k8s/src/worker_security_group.png b/doc/howto/usage/k8s/src/worker_security_group.png new file mode 100644 index 0000000000000000000000000000000000000000..57eb0265a34ad4223b69600d2a3dd355482e0bf5 Binary files /dev/null and b/doc/howto/usage/k8s/src/worker_security_group.png differ diff --git a/doc/index.rst b/doc/index.rst deleted file mode 100644 index c107239438b038fb6a4a6123e9b61f424b60142f..0000000000000000000000000000000000000000 --- a/doc/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -PaddlePaddle Documentation -========================== - -.. toctree:: - :maxdepth: 1 - - getstarted/index_en.rst - tutorials/index_en.md - howto/index_en.rst - api/index_en.rst - about/index_en.rst diff --git a/doc/index_cn.rst b/doc/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..9279bac7f4b2898c18979630a8d6dfcb2dba70e0 --- /dev/null +++ b/doc/index_cn.rst @@ -0,0 +1,10 @@ +PaddlePaddle 文档 +====================== + +.. toctree:: + :maxdepth: 1 + + getstarted/index_cn.rst + howto/index_cn.rst + api/index_cn.rst + faq/index_cn.rst diff --git a/doc/index_en.rst b/doc/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..168c7667c61da677905585d6c4b5037ce80b3765 --- /dev/null +++ b/doc/index_en.rst @@ -0,0 +1,10 @@ +PaddlePaddle Documentation +========================== + +.. toctree:: + :maxdepth: 1 + + getstarted/index_en.rst + howto/index_en.rst + api/index_en.rst + about/index_en.rst diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in new file mode 100644 index 0000000000000000000000000000000000000000..673948dfe7928240817b552141ec9bc2f8a672b7 --- /dev/null +++ b/doc/templates/conf.py.cn.in @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +# +# documentation build configuration file, created by +# sphinx-quickstart on Thu Jul 23 19:40:08 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. +import sys +import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +import shlex +from recommonmark import parser, transform +import paddle +import paddle.v2 + +MarkdownParser = parser.CommonMarkParser +AutoStructify = transform.AutoStructify +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +templates_path = ["@PROJ_ROOT@/doc_theme/templates"] + +# -- General configuration ------------------------------------------------ + +# General information about the project. +project = u'PaddlePaddle' +author = u'%s developers' % project +copyright = u'2016, %s' % author +github_doc_root = '' + +# add markdown parser +MarkdownParser.github_doc_root = github_doc_root +source_parsers = { + '.md': MarkdownParser, + '.Rmd': MarkdownParser, +} +os.environ['PADDLE_BUILD_DOC'] = '1' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', + 'sphinx.ext.napoleon', + 'sphinx.ext.graphviz' +] +mathjax_path="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js" +table_styling_embed_css = True + +autodoc_member_order = 'bysource' + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ['.rst', '.md', '.Rmd'] + +# The encoding of source files. +source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index_cn' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'zh_CN' + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', '**/*_en*', '*_en*'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['@PROJ_ROOT@/doc_theme/static'] + +# Output file base name for HTML help builder. +htmlhelp_basename = project + 'doc' + +# -- Options for LaTeX output --------------------------------------------- +latex_elements = { +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, '%s.tex' % project, project, + author, 'manual'), +] + +# Use the .. admonition:: directive for Notes sections. +# False to use the .. rubric:: directive instead. +napoleon_use_admonition_for_notes = True + +def setup(app): + # Add hook for building doxygen xml when needed + # no c++ API for now + app.add_config_value('recommonmark_config', { + 'url_resolver': lambda url: github_doc_root + url, + }, True) + app.add_transform(AutoStructify) diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in new file mode 100644 index 0000000000000000000000000000000000000000..b6b50b7dcd5647b50a13703160489323ed90a1b4 --- /dev/null +++ b/doc/templates/conf.py.en.in @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# +# documentation build configuration file, created by +# sphinx-quickstart on Thu Jul 23 19:40:08 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. +import sys +import os, subprocess +sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +import shlex +from recommonmark import parser, transform +import paddle +import paddle.v2 + + +MarkdownParser = parser.CommonMarkParser +AutoStructify = transform.AutoStructify +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +templates_path = ["@PROJ_ROOT@/doc_theme/templates"] + +# -- General configuration ------------------------------------------------ + +# General information about the project. +project = u'PaddlePaddle' +author = u'%s developers' % project +copyright = u'2016, %s' % author +github_doc_root = '' + +# add markdown parser +MarkdownParser.github_doc_root = github_doc_root +source_parsers = { + '.md': MarkdownParser, + '.Rmd': MarkdownParser, +} +os.environ['PADDLE_BUILD_DOC'] = '1' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.mathjax', + 'sphinx.ext.napoleon', +] + + +autodoc_member_order = 'bysource' + + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ['.rst', '.md', '.Rmd'] + +# The encoding of source files. +source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index_en' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build', '**/*_cn*', '*_cn*'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['@PROJ_ROOT@/doc_theme/static'] + +# Output file base name for HTML help builder. +htmlhelp_basename = project + 'doc' + +# -- Options for LaTeX output --------------------------------------------- +latex_elements = { +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, '%s.tex' % project, project, + author, 'manual'), +] + +# Use the .. admonition:: directive for Notes sections. +# False to use the .. rubric:: directive instead. +napoleon_use_admonition_for_notes = True + +def setup(app): + # Add hook for building doxygen xml when needed + # no c++ API for now + app.add_config_value('recommonmark_config', { + 'url_resolver': lambda url: github_doc_root + url, + 'enable_eval_rst': True, + }, True) + app.add_transform(AutoStructify) diff --git a/doc/tutorials/embedding_model/index_cn.md b/doc/tutorials/embedding_model/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..2b4a79fbbfc0c4af74aa73c540919f5d9cf2635b --- /dev/null +++ b/doc/tutorials/embedding_model/index_cn.md @@ -0,0 +1,139 @@ +# 中文词向量模型的使用 # +---------- +本文档介绍如何在PaddlePaddle平台上,使用预训练的标准格式词向量模型。 + +在此感谢 @lipeng 提出的代码需求,并给出的相关模型格式的定义。 + +## 介绍 ### +### 中文字典 ### +我们的字典使用内部的分词工具对百度知道和百度百科的语料进行分词后产生。分词风格如下: "《红楼梦》"将被分为 "《","红楼梦","》",和 "《红楼梦》"。字典采用UTF8编码,输出有2列:词本身和词频。字典共包含 3206326个词和4个特殊标记: + - ``: 分词序列的开始 + - ``: 分词序列的结束 + - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: 占位符,没有实际意义 + - ``: 未知词 + +### 中文词向量的预训练模型 ### +遵循文章 [A Neural Probabilistic Language Model](http://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf)中介绍的方法,模型采用 n-gram 语言模型,结构如下图:6元上下文作为输入层->全连接层->softmax层 。对应于字典,我们预训练得到4种不同维度的词向量,分别为:32维、64维、128维和256维。 +
![](./neural-n-gram-model.png)
+
Figure 1. neural-n-gram-model
+ +### 下载和数据抽取 ### +运行以下的命令下载和获取我们的字典和预训练模型: + + cd $PADDLE_ROOT/demo/model_zoo/embedding + ./pre_DictAndModel.sh + +## 中文短语改写的例子 ## +以下示范如何使用预训练的中文字典和词向量进行短语改写。 + +### 数据的准备和预处理 ### +首先,运行以下的命令下载数据集。该数据集(utf8编码)包含20个训练样例,5个测试样例和2个生成式样例。 + + cd $PADDLE_ROOT/demo/seqToseq/data + ./paraphrase_data.sh + +第二步,将数据处理成规范格式,在训练数集上训练生成词向量字典(数据将保存在 `$PADDLE_SOURCE_ROOT/demo/seqToseq/data/pre-paraphrase`): + + cd $PADDLE_ROOT/demo/seqToseq/ + python preprocess.py -i data/paraphrase [--mergeDict] + +- 其中,如果使用`--mergeDict`选项,源语言短语和目标语言短语的字典将被合并(源语言和目标语言共享相同的编码字典)。本实例中,源语言和目标语言都是相同的语言,因此可以使用该选项。 + + +### 使用用户指定的词向量字典 ### +使用如下命令,从预训练模型中,根据用户指定的字典,抽取对应的词向量构成新的词表: + cd $PADDLE_ROOT/demo/model_zoo/embedding + python extract_para.py --preModel PREMODEL --preDict PREDICT --usrModel USRMODEL--usrDict USRDICT -d DIM + +- `--preModel PREMODEL`: 预训练词向量字典模型的路径 +- `--preDict PREDICT`: 预训练模型使用的字典的路径 +- `--usrModel USRMODEL`: 抽取出的新词表的保存路径 +- `--usrDict USRDICT`: 用户指定新的字典的路径,用于构成新的词表 +- `-d DIM`: 参数(词向量)的维度 + +此处,你也可以简单的运行以下的命令: + + cd $PADDLE_ROOT/demo/seqToseq/data/ + ./paraphrase_model.sh + +运行成功以后,你将会看到以下的模型结构: + + paraphrase_model + |--- _source_language_embedding + |--- _target_language_embedding + +### 在PaddlePaddle平台训练模型 ### +首先,配置模型文件,配置如下(可以参考保存在 `demo/seqToseq/paraphrase/train.conf`的配置): + + from seqToseq_net import * + is_generating = False + + ################## Data Definition ##################### + train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase", + job_mode = job_mode) + + ############## Algorithm Configuration ################## + settings( + learning_method = AdamOptimizer(), + batch_size = 50, + learning_rate = 5e-4) + + ################# Network configure ##################### + gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32) + +这个配置与`demo/seqToseq/translation/train.conf` 基本相同 + +然后,使用以下命令进行模型训练: + + cd $PADDLE_SOURCE_ROOT/demo/seqToseq/paraphrase + ./train.sh + +其中,`train.sh` 与`demo/seqToseq/translation/train.sh` 基本相同,只有2个配置不一样: + +- `--init_model_path`: 初始化模型的路径配置为`data/paraphrase_modeldata/paraphrase_model` +- `--load_missing_parameter_strategy`:如果参数模型文件缺失,除词向量模型外的参数将使用正态分布随机初始化 + +如果用户想要了解详细的数据集的格式、模型的结构和训练过程,请查看 [Text generation Tutorial](../text_generation/index_cn.md). + +## 可选功能 ## +### 观测词向量 +PaddlePaddle 平台为想观测词向量的用户提供了将二进制词向量模型转换为文本模型的功能: + + cd $PADDLE_ROOT/demo/model_zoo/embedding + python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM + +- `-i INPUT`: 输入的(二进制)词向量模型名称 +- `-o OUTPUT`: 输出的文本模型名称 +- `-d DIM`: (词向量)参数维度 + +运行完以上命令,用户可以在输出的文本模型中看到: + + 0,4,32156096 + -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ...... + 0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ...... + ...... + +- 其中,第一行是`PaddlePaddle` 输出文件的格式说明,包含3个属性:: + - `PaddlePaddle`的版本号,本例中为0 + - 浮点数占用的字节数,本例中为4 + - 总计的参数个数,本例中为32,156,096 +- 其余行是(词向量)参数行(假设词向量维度为32) + - 每行打印32个参数以','分隔 + - 共有32,156,096/32 = 1,004,877行,也就是说,模型共包含1,004,877个被向量化的词 + +### 词向量模型的修正 +`PaddlePaddle` 为想修正词向量模型的用户提供了将文本词向量模型转换为二进制模型的命令: + + cd $PADDLE_ROOT/demo/model_zoo/embedding + python paraconvert.py --t2b -i INPUT -o OUTPUT + +- `-i INPUT`: 输入的文本词向量模型名称 +- `-o OUTPUT`: 输出的二进制词向量模型名称 + +请注意,输入的文本格式如下: + + -0.7845433,1.1937413,-0.1704215,0.4154715,0.9566584,-0.5558153,-0.2503305, ...... + 0.0000909,0.0009465,-0.0008813,-0.0008428,0.0007879,0.0000183,0.0001984, ...... + ...... +- 输入文本中没有头部(格式说明)行 +- (输入文本)每行存储一个词,以逗号','分隔 diff --git a/doc/tutorials/embedding_model/index_en.md b/doc/tutorials/embedding_model/index_en.md index 06f3ff1f009e470cdb9687658613a76acbb79751..9525f64f9b5384c8e44690fb0887fb2293108e0a 100644 --- a/doc/tutorials/embedding_model/index_en.md +++ b/doc/tutorials/embedding_model/index_en.md @@ -6,9 +6,10 @@ We thank @lipeng for the pull request that defined the model schemas and pretrai ## Introduction ### ### Chinese Word Dictionary ### -Our Chinese-word dictionary is created on Baidu ZhiDao and Baidu Baike by using in-house word segmentor. For example, the participle of "《红楼梦》" is "《","红楼梦","》",and "《红楼梦》". Our dictionary (using UTF-8 format) has has two columns: word and its frequency. The total word count is 3206325, including 3 special token: +Our Chinese-word dictionary is created on Baidu ZhiDao and Baidu Baike by using in-house word segmentor. For example, the participle of "《红楼梦》" is "《","红楼梦","》",and "《红楼梦》". Our dictionary (using UTF-8 format) has has two columns: word and its frequency. The total word count is 3206326, including 4 special token: - ``: the start of a sequence - ``: the end of a sequence + - `PALCEHOLDER_JUST_IGNORE_THE_EMBEDDING`: a placeholder, just ignore it and its embedding - ``: a word not included in dictionary ### Pretrained Chinese Word Embedding Model ### @@ -93,7 +94,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the - `--init_model_path`: path of the initialization model, here is `data/paraphrase_model` - `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer -For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md). +For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/index_en.md). ## Optional Function ## ### Embedding Parameters Observation diff --git a/doc/tutorials/gan/gan.png b/doc/tutorials/gan/gan.png new file mode 100644 index 0000000000000000000000000000000000000000..0eafd7cb49b545f412f8e775804bcd0b22c42454 Binary files /dev/null and b/doc/tutorials/gan/gan.png differ diff --git a/doc/tutorials/gan/index_en.md b/doc/tutorials/gan/index_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ac9ed37b2264778869f92c0910b1cb946fb4427f --- /dev/null +++ b/doc/tutorials/gan/index_en.md @@ -0,0 +1,137 @@ +# Generative Adversarial Networks (GAN) + +This demo implements GAN training described in the original [GAN paper](https://arxiv.org/abs/1406.2661) and deep convolutional generative adversarial networks [DCGAN paper](https://arxiv.org/abs/1511.06434). + +The high-level structure of GAN is shown in Figure. 1 below. It is composed of two major parts: a generator and a discriminator, both of which are based on neural networks. The generator takes in some kind of noise with a known distribution and transforms it into an image. The discriminator takes in an image and determines whether it is artificially generated by the generator or a real image. So the generator and the discriminator are in a competitive game in which generator is trying to generate image to look as real as possible to fool the discriminator, while the discriminator is trying to distinguish between real and fake images. + +
![](./gan.png)
+

+ Figure 1. GAN-Model-Structure + figure credit +

+ +The generator and discriminator take turn to be trained using SGD. The objective function of the generator is for its generated images being classified as real by the discriminator, and the objective function of the discriminator is to correctly classify real and fake images. When the GAN model is trained to converge to the equilibrium state, the generator will transform the given noise distribution to the distribution of real images, and the discriminator will not be able to distinguish between real and fake images at all. + +## Implementation of GAN Model Structure +Since GAN model involves multiple neural networks, it requires to use paddle python API. So the code walk-through below can also partially serve as an introduction to the usage of Paddle Python API. + +There are three networks defined in gan_conf.py, namely **generator_training**, **discriminator_training** and **generator**. The relationship to the model structure we defined above is that **discriminator_training** is the discriminator, **generator** is the generator, and the **generator_training** combined the generator and discriminator since training generator would require the discriminator to provide loss function. This relationship is described in the following code: +```python +if is_generator_training: + noise = data_layer(name="noise", size=noise_dim) + sample = generator(noise) + +if is_discriminator_training: + sample = data_layer(name="sample", size=sample_dim) + +if is_generator_training or is_discriminator_training: + label = data_layer(name="label", size=1) + prob = discriminator(sample) + cost = cross_entropy(input=prob, label=label) + classification_error_evaluator( + input=prob, label=label, name=mode + '_error') + outputs(cost) + +if is_generator: + noise = data_layer(name="noise", size=noise_dim) + outputs(generator(noise)) +``` + +In order to train the networks defined in gan_conf.py, one first needs to initialize a Paddle environment, parse the config, create GradientMachine from the config and create trainer from GradientMachine as done in the code chunk below: +```python +import py_paddle.swig_paddle as api +# init paddle environment +api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', + '--log_period=100', '--gpu_id=' + args.gpu_id, + '--save_dir=' + "./%s_params/" % data_source) + +# Parse config +gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source) +dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source) +generator_conf = parse_config(conf, "mode=generator,data=" + data_source) + +# Create GradientMachine +dis_training_machine = api.GradientMachine.createFromConfigProto( +dis_conf.model_config) +gen_training_machine = api.GradientMachine.createFromConfigProto( +gen_conf.model_config) +generator_machine = api.GradientMachine.createFromConfigProto( +generator_conf.model_config) + +# Create trainer +dis_trainer = api.Trainer.create(dis_conf, dis_training_machine) +gen_trainer = api.Trainer.create(gen_conf, gen_training_machine) +``` + +In order to balance the strength between generator and discriminator, we schedule to train whichever one is performing worse by comparing their loss function value. The loss function value can be calculated by a forward pass through the GradientMachine. +```python +def get_training_loss(training_machine, inputs): + outputs = api.Arguments.createArguments(0) + training_machine.forward(inputs, outputs, api.PASS_TEST) + loss = outputs.getSlotValue(0).copyToNumpyMat() + return numpy.mean(loss) +``` + +After training one network, one needs to sync the new parameters to the other networks. The code below demonstrates one example of such use case: +```python +# Train the gen_training +gen_trainer.trainOneDataBatch(batch_size, data_batch_gen) + +# Copy the parameters from gen_training to dis_training and generator +copy_shared_parameters(gen_training_machine, +dis_training_machine) +copy_shared_parameters(gen_training_machine, generator_machine) +``` + + +## A Toy Example +With the infrastructure explained above, we can now walk you through a toy example of generating two dimensional uniform distribution using 10 dimensional Gaussian noise. + +The Gaussian noises are generated using the code below: +```python +def get_noise(batch_size, noise_dim): + return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32') +``` + +The real samples (2-D uniform) are generated using the code below: +```python +# synthesize 2-D uniform data in gan_trainer.py:114 +def load_uniform_data(): + data = numpy.random.rand(1000000, 2).astype('float32') + return data +``` + +The generator and discriminator network are built using fully-connected layer and batch_norm layer, and are defined in gan_conf.py. + +To train the GAN model, one can use the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu). +```bash +$python gan_trainer.py -d uniform --useGpu 1 +``` +The generated samples can be found in ./uniform_samples/ and one example is shown below as Figure 2. One can see that it roughly recovers the 2D uniform distribution. + +
![](./uniform_sample.png)
+

+ Figure 2. Uniform Sample +

+ +## MNIST Example +### Data preparation +To download the MNIST data, one can use the following commands: +```bash +$cd data/ +$./get_mnist_data.sh +``` + +### Model description +Following the DC-Gan paper (https://arxiv.org/abs/1511.06434), we use convolution/convolution-transpose layer in the discriminator/generator network to better deal with images. The details of the network structures are defined in gan_conf_image.py. + +### Training the model +To train the GAN model on mnist data, one can use the following command: +```bash +$python gan_trainer.py -d mnist --useGpu 1 +``` +The generated sample images can be found at ./mnist_samples/ and one example is shown below as Figure 3. +
![](./mnist_sample.png)
+

+ Figure 3. MNIST Sample +

diff --git a/doc/tutorials/gan/mnist_sample.png b/doc/tutorials/gan/mnist_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..f9c7bf7ddd7f148eac4fe347e9c38afaa8876760 Binary files /dev/null and b/doc/tutorials/gan/mnist_sample.png differ diff --git a/doc/tutorials/gan/uniform_sample.png b/doc/tutorials/gan/uniform_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..e716c48e782019a757bed0cb443f2ed97386cbe2 Binary files /dev/null and b/doc/tutorials/gan/uniform_sample.png differ diff --git a/doc/tutorials/image_classification/index_cn.md b/doc/tutorials/image_classification/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..87f465522a0fa21c8c03754b4be8dcb035c4de81 --- /dev/null +++ b/doc/tutorials/image_classification/index_cn.md @@ -0,0 +1,205 @@ +图像分类教程 +========== + +在本教程中,我们将使用CIFAR-10数据集训练一个卷积神经网络,并使用这个神经网络来对图片进行分类。如下图所示,卷积神经网络可以辨识图片中的主体,并给出分类结果。 +
![Image Classification](./image_classification.png)
+ +## 数据准备 +首先下载CIFAR-10数据集。下面是CIFAR-10数据集的官方网址: + + + +我们准备了一个脚本,可以用于从官方网站上下载CIFAR-10数据集,转为jpeg文件并存入特定的目录。使用这个脚本前请确认已经安装了pillow及相关依赖模块。可以参照下面的命令进行安装: + +1. 安装pillow + +```bash +sudo apt-get install libjpeg-dev +pip install pillow +``` + +2. 下载数据集 + +```bash +cd demo/image_classification/data/ +sh download_cifar.sh +``` + +CIFAR-10数据集包含60000张32x32的彩色图片。图片分为10类,每个类包含6000张。其中50000张图片作为训练集,10000张作为测试集。 + +下图展示了所有的图片类别,每个类别中随机抽取了10张图片。 +
![Image Classification](./cifar.png)
+ +脚本运行完成后,我们应当会得到一个名为cifar-out的文件夹,其下子文件夹的结构如下 + + +``` +train +---airplane +---automobile +---bird +---cat +---deer +---dog +---frog +---horse +---ship +---truck +test +---airplane +---automobile +---bird +---cat +---deer +---dog +---frog +---horse +---ship +---truck +``` + +cifar-out下包含`train`和`test`两个文件夹,其中分别包含了CIFAR-10中的训练集和测试集。这两个文件夹下各自有10个子文件夹,每个子文件夹下存储相应分类的图片。将图片按照上述结构存储好之后,我们就可以着手对分类模型进行训练了。 + +## 预处理 +数据下载之后,还需要进行预处理,将数据转换为Paddle的格式。我们可以通过如下命令进行预处理工作: + +``` +cd demo/image_classification/ +sh preprocess.sh +``` + +其中`preprocess.sh` 调用 `./demo/image_classification/preprocess.py` 对图片进行预处理 +```sh +export PYTHONPATH=$PYTHONPATH:../../ +data_dir=./data/cifar-out +python preprocess.py -i $data_dir -s 32 -c 1 +``` + +`./demo/image_classification/preprocess.py` 使用如下参数: + +- `-i` 或 `--input` 给出输入数据所在路径; +- `-s` 或 `--size` 给出图片尺寸; +- `-c` 或 `--color` 标示图片是彩色图或灰度图 + +## 模型训练 +在开始训练之前,我们需要先创建一个模型配置文件。下面我们给出了一个配置示例。**注意**,这里的列出的和`vgg_16_cifar.py`文件稍有差别,因为该文件可适用于预测。 + +```python +from paddle.trainer_config_helpers import * +data_dir='data/cifar-out/batches/' +meta_path=data_dir+'batches.meta' +args = {'meta':meta_path, 'mean_img_size': 32, + 'img_size': 32, 'num_classes': 10, + 'use_jpeg': 1, 'color': "color"} +define_py_data_sources2(train_list=data_dir+"train.list", + test_list=data_dir+'test.list', + module='image_provider', + obj='processData', + args=args) +settings( + batch_size = 128, + learning_rate = 0.1 / 128.0, + learning_method = MomentumOptimizer(0.9), + regularization = L2Regularization(0.0005 * 128)) + +img = data_layer(name='image', size=3*32*32) +lbl = data_layer(name="label", size=10) +# small_vgg is predined in trainer_config_helpers.network +predict = small_vgg(input_image=img, num_channels=3) +outputs(classification_cost(input=predict, label=lbl)) +``` + +在第一行中我们载入用于定义网络的函数。 +```python +from paddle.trainer_config_helpers import * +``` + +之后定义的`define_py_data_sources2`使用Python数据提供器,其中 `args`将在`image_provider.py`进行使用,该文件负责产生图片数据并传递给Paddle系统 + - `meta`: 训练集平均值。 + - `mean_img_size`: 平均特征图的高度及宽度。 + - `img_size`:输入图片的高度及宽度。 + - `num_classes`:类别个数。 + - `use_jpeg`:处理过程中数据存储格式。 + - `color`:标示是否为彩色图片。 + + `settings`用于设置训练算法。在下面的例子中,learning rate被设置为0.1除以batch size,而weight decay则为0.0005乘以batch size。 + + ```python +settings( + batch_size = 128, + learning_rate = 0.1 / 128.0, + learning_method = MomentumOptimizer(0.9), + regularization = L2Regularization(0.0005 * 128) +) +``` + +`small_vgg`定义了网络结构。这里我们使用的是一个小的VGG网络。关于VGG卷积神经网络的描述可以参考:[http://www.robots.ox.ac.uk/~vgg/research/very_deep/](http://www.robots.ox.ac.uk/~vgg/research/very_deep/)。 +```python +# small_vgg is predined in trainer_config_helpers.network +predict = small_vgg(input_image=img, num_channels=3) +``` +配置创建完毕后,可以运行脚本train.sh来训练模型。 + +```bash +config=vgg_16_cifar.py +output=./cifar_vgg_model +log=train.log + +paddle train \ +--config=$config \ +--dot_period=10 \ +--log_period=100 \ +--test_all_data_in_one_period=1 \ +--use_gpu=1 \ +--save_dir=$output \ +2>&1 | tee $log + +python -m paddle.utils.plotcurve -i $log > plot.png +``` +- 这里我们使用的是GPU模式进行训练。如果你没有GPU环境,可以设置`use_gpu=0`。 +- `./demo/image_classification/vgg_16_cifar.py`是网络和数据配置文件。各项参数的详细说明可以在命令行参数相关文档中找到。 +- 脚本`plotcurve.py`依赖于python的`matplotlib`模块。因此如果这个脚本运行失败,也许是因为需要安装`matplotlib`。 +在训练完成后,训练及测试误差曲线图会被`plotcurve.py`脚本保存在 `plot.png`中。下面是一个误差曲线图的示例: + +
![Training and testing curves.](./plot.png)
+ +## 预测 +在训练完成后,模型及参数会被保存在路径`./cifar_vgg_model/pass-%05d`下。例如第300个pass的模型会被保存在`./cifar_vgg_model/pass-00299`。 + +要对一个图片的进行分类预测,我们可以使用`predict.sh`,该脚本将输出预测分类的标签: + +``` +sh predict.sh +``` + +predict.sh: +``` +model=cifar_vgg_model/pass-00299/ +image=data/cifar-out/test/airplane/seaplane_s_000978.png +use_gpu=1 +python prediction.py $model $image $use_gpu +``` + +## 练习 +在CUB-200数据集上使用VGG模型训练一个鸟类图片分类模型。相关的鸟类数据集可以从如下地址下载,其中包含了200种鸟类的照片(主要来自北美洲)。 + + + + + + +## 细节探究 +### 卷积神经网络 +卷积神经网络是一种使用卷积层的前向神经网络,很适合构建用于理解图片内容的模型。一个典型的神经网络如下图所示: + +![Convolutional Neural Network](./lenet.png) + +一个卷积神经网络包含如下层: + +- 卷积层:通过卷积操作从图片或特征图中提取特征 +- 池化层:使用max-pooling对特征图下采样 +- 全连接层:使输入层到隐藏层的神经元是全部连接的。 + +卷积神经网络在图片分类上有着惊人的性能,这是因为它发掘出了图片的两类重要信息:局部关联性质和空间不变性质。通过交替使用卷积和池化处理, 卷积神经网络能够很好的表示这两类信息。 + +关于如何定义网络中的层,以及如何在层之间进行连接,请参考Layer文档。 diff --git a/doc/tutorials/image_classification/index_en.md b/doc/tutorials/image_classification/index_en.md index 29cfc99702c362d1eaeeff5332f56122b8de337a..60c81a6a539944634773f38ec4c9a59709dd4afc 100644 --- a/doc/tutorials/image_classification/index_en.md +++ b/doc/tutorials/image_classification/index_en.md @@ -147,7 +147,7 @@ for classification. A description of VGG network can be found here [http://www.r # small_vgg is predined in trainer_config_helpers.network predict = small_vgg(input_image=img, num_channels=3) ``` -After writing the config, we can train the model by running the script train.sh. Notice that the following script assumes the you run the script in the `./demo/image_classification` folder. If you run the script in a different folder, you need to change the paths of the scripts and the configuration files accordingly. +After writing the config, we can train the model by running the script train.sh. ```bash config=vgg_16_cifar.py diff --git a/doc/tutorials/image_classification/src/cifar.png b/doc/tutorials/image_classification/src/cifar.png new file mode 100644 index 0000000000000000000000000000000000000000..f54a0c58837cb3385b32dc57d02cec92666ef0f1 Binary files /dev/null and b/doc/tutorials/image_classification/src/cifar.png differ diff --git a/doc/tutorials/image_classification/src/image_classification.png b/doc/tutorials/image_classification/src/image_classification.png new file mode 100644 index 0000000000000000000000000000000000000000..14f255805081c1b4fab27eaf336fd389fa93ca19 Binary files /dev/null and b/doc/tutorials/image_classification/src/image_classification.png differ diff --git a/doc/tutorials/image_classification/src/lenet.png b/doc/tutorials/image_classification/src/lenet.png new file mode 100644 index 0000000000000000000000000000000000000000..1e6f2b32bad797f3fccb929c72a121fc935b0cbb Binary files /dev/null and b/doc/tutorials/image_classification/src/lenet.png differ diff --git a/doc/tutorials/image_classification/src/plot.png b/doc/tutorials/image_classification/src/plot.png new file mode 100644 index 0000000000000000000000000000000000000000..a31f99791c670e18bb8c62b7604ec8cb0284ffb4 Binary files /dev/null and b/doc/tutorials/image_classification/src/plot.png differ diff --git a/doc/tutorials/imagenet_model/resnet_model_cn.md b/doc/tutorials/imagenet_model/resnet_model_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..82ec9d70b345c11aba3aa86f8206eedc8072bb88 --- /dev/null +++ b/doc/tutorials/imagenet_model/resnet_model_cn.md @@ -0,0 +1,284 @@ +# Model Zoo - ImageNet # + +[ImageNet](http://www.image-net.org/) 是通用物体分类领域一个众所周知的数据库。本教程提供了一个用于ImageNet上的卷积分类网络模型。 + +## ResNet 介绍 + +论文 [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385) 中提出的ResNet网络结构在2015年ImageNet大规模视觉识别竞赛(ILSVRC 2015)的分类任务中赢得了第一名。他们提出残差学习的框架来简化网络的训练,所构建网络结构的的深度比之前使用的网络有大幅度的提高。下图展示的是基于残差的连接方式。左图构造网络模块的方式被用于34层的网络中,而右图的瓶颈连接模块用于50层,101层和152层的网络结构中。 + +
![resnet_block](./resnet_block.jpg)
+
图 1. ResNet 网络模块
+ +本教程中我们给出了三个ResNet模型,这些模型都是由原作者提供的模型转换过来的。我们使用PaddlePaddle在ILSVRC的验证集共50,000幅图像上测试了模型的分类错误率,其中输入图像的颜色通道顺序为**BGR**,保持宽高比缩放到短边为256,只截取中心方形的图像区域。分类错误率和模型大小由下表给出。 +
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +
ResNetTop-1Model Size
ResNet-5024.9%99M
ResNet-10123.7%173M
ResNet-15223.2%234M
+
+ +## ResNet 模型 + +50层,101层和152层的网络配置文件可参照```demo/model_zoo/resnet/resnet.py```。你也可以通过在命令行参数中增加一个参数如```--config_args=layer_num=50```来指定网络层的数目。 + +### 网络可视化 + +你可以通过执行下面的命令来得到ResNet网络的结构可视化图。该脚本会生成一个dot文件,然后可以转换为图片。需要安装graphviz来转换dot文件为图片。 + +``` +cd demo/model_zoo/resnet +./net_diagram.sh +``` + +### 模型下载 + +``` +cd demo/model_zoo/resnet +./get_model.sh +``` +你可以执行上述命令来下载所有的模型和均值文件,如果下载成功,这些文件将会被保存在```demo/model_zoo/resnet/model```路径下。 + +``` +mean_meta_224 resnet_101 resnet_152 resnet_50 +``` + * resnet_50: 50层网络模型。 + * resnet_101: 101层网络模型。 + * resnet_152: 152层网络模型。 + * mean\_meta\_224: 均值图像文件,图像大小为3 x 224 x 224,颜色通道顺序为**BGR**。你也可以使用这三个值: 103.939, 116.779, 123.68。 + +### 参数信息 + +* **卷积层权重** + + 由于每个卷积层后面连接的是batch normalization层,因此该层中没有偏置(bias)参数,并且只有一个权重。 + 形状: `(Co, ky, kx, Ci)` + * Co: 输出特征图的通道数目 + * ky: 滤波器核在垂直方向上的尺寸 + * kx: 滤波器核在水平方向上的尺寸 + * Ci: 输入特征图的通道数目 + + 二维矩阵: (Co * ky * kx, Ci), 行优先次序存储。 + +* **全连接层权重** + + 二维矩阵: (输入层尺寸, 本层尺寸), 行优先次序存储。 + +* **[Batch Normalization]() 层权重** + +本层有四个参数,实际上只有.w0和.wbias是需要学习的参数,另外两个分别是滑动均值和方差。在测试阶段它们将会被加载到模型中。下表展示了batch normalization层的参数。 +
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数名尺寸含义
_res2_1_branch1_bn.w0256gamma, 缩放参数
_res2_1_branch1_bn.w1256特征图均值
_res2_1_branch1_bn.w2256特征图方差
_res2_1_branch1_bn.wbias256beta, 偏置参数
+
+ +### 参数读取 + +使用者可以使用下面的Python脚本来读取参数值: + +``` +import sys +import numpy as np + +def load(file_name): + with open(file_name, 'rb') as f: + f.read(16) # skip header for float type. + return np.fromfile(f, dtype=np.float32) + +if __name__=='__main__': + weight = load(sys.argv[1]) +``` + +或者直接使用下面的shell命令: + +``` +od -j 16 -f _res2_1_branch1_bn.w0 +``` + +## 特征提取 + +我们提供了C++和Python接口来提取特征。下面的例子使用了`demo/model_zoo/resnet/example`中的数据,详细地展示了整个特征提取的过程。 + +### C++接口 + +首先,在配置文件中的`define_py_data_sources2`里指定图像数据列表,具体请参照示例`demo/model_zoo/resnet/resnet.py`。 + +``` + train_list = 'train.list' if not is_test else None + # mean.meta is mean file of ImageNet dataset. + # mean.meta size : 3 x 224 x 224. + # If you use three mean value, set like: + # "mean_value:103.939,116.779,123.68;" + args={ + 'mean_meta': "model/mean_meta_224/mean.meta", + 'image_size': 224, 'crop_size': 224, + 'color': True,'swap_channel:': [2, 1, 0]} + define_py_data_sources2(train_list, + 'example/test.list', + module="example.image_list_provider", + obj="processData", + args=args) +``` + +第二步,在`resnet.py`文件中指定要提取特征的网络层的名字。例如, + +``` +Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn") +``` + +第三步,在`extract_fea_c++.sh`文件中指定模型路径和输出的目录,然后执行下面的命令。 + +``` +cd demo/model_zoo/resnet +./extract_fea_c++.sh +``` + +如果执行成功,特征将会存到`fea_output/rank-00000`文件中,如下所示。同时你可以使用`load_feature.py`文件中的`load_feature_c`接口来加载该文件。 + +``` +-0.115318 -0.108358 ... -0.087884;-1.27664 ... -1.11516 -2.59123; +-0.126383 -0.116248 ... -0.00534909;-1.42593 ... -1.04501 -1.40769; +``` + +* 每行存储的是一个样本的特征。其中,第一行存的是图像`example/dog.jpg`的特征,第二行存的是图像`example/cat.jpg`的特征。 +* 不同层的特征由分号`;`隔开,并且它们的顺序与`Outputs()`中指定的层顺序一致。这里,左边是`res5_3_branch2c_conv`层的特征,右边是`res5_3_branch2c_bn`层特征。 + +### Python接口 + +示例`demo/model_zoo/resnet/classify.py`中展示了如何使用Python来提取特征。下面的例子同样使用了`./example/test.list`中的数据。执行的命令如下: + +``` +cd demo/model_zoo/resnet +./extract_fea_py.sh +``` + +extract_fea_py.sh: + +``` +python classify.py \ + --job=extract \ + --conf=resnet.py\ + --use_gpu=1 \ + --mean=model/mean_meta_224/mean.meta \ + --model=model/resnet_50 \ + --data=./example/test.list \ + --output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \ + --output_dir=features + +``` +* \--job=extract: 指定工作模式来提取特征。 +* \--conf=resnet.py: 网络配置文件。 +* \--use_gpu=1: 指定是否使用GPU。 +* \--model=model/resnet_50: 模型路径。 +* \--data=./example/test.list: 数据列表。 +* \--output_layer="xxx,xxx": 指定提取特征的层。 +* \--output_dir=features: 输出目录。 + +如果运行成功,你将会看到特征存储在`features/batch_0`文件中,该文件是由cPickle产生的。你可以使用`load_feature.py`中的`load_feature_py`接口来打开该文件,它将返回如下的字典: + +``` +{ +'cat.jpg': {'res5_3_branch2c_conv': array([[-0.12638293, -0.116248 , -0.11883899, ..., -0.00895038, 0.01994277, -0.00534909]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.42593431, -1.28918779, -1.32414699, ..., -1.45933616, -1.04501402, -1.40769434]], dtype=float32)}, +'dog.jpg': {'res5_3_branch2c_conv': array([[-0.11531784, -0.10835785, -0.08809858, ...,0.0055237, 0.01505112, -0.08788397]], dtype=float32), 'res5_3_branch2c_bn': array([[-1.27663755, -1.18272924, -0.90937918, ..., -1.25178063, -1.11515927, -2.59122872]], dtype=float32)} +} +``` + +仔细观察,这些特征值与上述使用C++接口提取的结果是一致的。 + +## 预测 + +`classify.py`文件也可以用于对样本进行预测。我们提供了一个示例脚本`predict.sh`,它使用50层的ResNet模型来对`example/test.list`中的数据进行预测。 + +``` +cd demo/model_zoo/resnet +./predict.sh +``` + +predict.sh调用了`classify.py`: + +``` +python classify.py \ + --job=predict \ + --conf=resnet.py\ + --multi_crop \ + --model=model/resnet_50 \ + --use_gpu=1 \ + --data=./example/test.list +``` +* \--job=extract: 指定工作模型进行预测。 +* \--conf=resnet.py: 网络配置文件。network configure. +* \--multi_crop: 使用10个裁剪图像块,预测概率取平均。 +* \--use_gpu=1: 指定是否使用GPU。 +* \--model=model/resnet_50: 模型路径。 +* \--data=./example/test.list: 数据列表。 + +如果运行成功,你将会看到如下结果,其中156和285是这些图像的分类标签。 + +``` +Label of example/dog.jpg is: 156 +Label of example/cat.jpg is: 282 +``` diff --git a/doc/tutorials/imagenet_model/resnet_model_en.md b/doc/tutorials/imagenet_model/resnet_model_en.md index 5403ab9f17d2399fee878d0f3c512cb166aba06f..478ad06193b14ba7fe02238df621db1f7b0804d4 100644 --- a/doc/tutorials/imagenet_model/resnet_model_en.md +++ b/doc/tutorials/imagenet_model/resnet_model_en.md @@ -52,7 +52,7 @@ See ```demo/model_zoo/resnet/resnet.py```. This config contains network of 50, 1 ### Network Visualization -You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which uses installed draw_dot tool in our server. If you can not access the server, just install graphviz to convert dot file. +You can get a diagram of ResNet network by running the following commands. The script generates dot file and then converts dot file to PNG file, which needs to install graphviz to convert. ``` cd demo/model_zoo/resnet @@ -138,7 +138,7 @@ There are four parameters in this layer. In fact, only .w0 and .wbias are the le ### Parameter Observation -Users who want to observe the parameters can use python to read: +Users who want to observe the parameters can use Python to read: ``` import sys @@ -209,7 +209,7 @@ If successful, features are saved in `fea_output/rank-00000` as follows. And you ### Python Interface -`demo/model_zoo/resnet/classify.py` is an example to show how to use python to extract features. Following example still uses data of `./example/test.list`. Command is as follows: +`demo/model_zoo/resnet/classify.py` is an example to show how to use Python to extract features. Following example still uses data of `./example/test.list`. Command is as follows: ``` cd demo/model_zoo/resnet @@ -238,8 +238,6 @@ python classify.py \ * \--output_layer="xxx,xxx": specify layers to extract features. * \--output_dir=features: output diretcoty. -Note, since the convolution layer in these ResNet models is suitable for the cudnn implementation which only support GPU. It not support CPU mode because of compatibility issue and we will fix later. - If run successfully, you will see features saved in `features/batch_0`, this file is produced with cPickle. You can use `load_feature_py` interface in `load_feature.py` to open the file, and it returns a dictionary as follows: ``` diff --git a/doc/tutorials/index_cn.md b/doc/tutorials/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..6a27004d58d24cc466d930322be8cdbb2f434c74 --- /dev/null +++ b/doc/tutorials/index_cn.md @@ -0,0 +1,13 @@ +# 完整教程 + +* [快速入门](quick_start/index_cn.rst) +* [个性化推荐](rec/ml_regression_cn.rst) +* [图像分类](image_classification/index_cn.md) +* [情感分析](sentiment_analysis/index_cn.md) +* [语义角色标注](semantic_role_labeling/index_cn.md) +* [机器翻译](text_generation/index_cn.md) + +## 常用模型 + +* [ResNet模型](imagenet_model/resnet_model_cn.md) +* [词向量模型](embedding_model/index_cn.md) diff --git a/doc/tutorials/index_en.md b/doc/tutorials/index_en.md index 97de356665d23543ddc241552c6e3c896a78db86..77331a703b6f0fdf92921ebcc476325b7327e976 100644 --- a/doc/tutorials/index_en.md +++ b/doc/tutorials/index_en.md @@ -1,22 +1,13 @@ # TUTORIALS -There are serveral examples and demos here. - -## [Quick Start](quick_start/index_en.md) - -## Image +There are several examples and demos here. +* [Quick Start](quick_start/index_en.md) +* [MovieLens Regression](rec/ml_regression_en.rst) * [Image Classification](image_classification/index_en.md) - -## NLP - * [Sentiment Analysis](sentiment_analysis/index_en.md) -* [Text Generation](text_generation/index_en.md) * [Semantic Role Labeling](semantic_role_labeling/index_en.md) - -## Recommendation - -* [MovieLens Dataset](rec/ml_dataset_en.md) -* [MovieLens Regression](rec/ml_regression_en.rst) +* [Text Generation](text_generation/index_en.md) +* [Image Auto-Generation](gan/index_en.md) ## Model Zoo * [ImageNet: ResNet](imagenet_model/resnet_model_en.md) diff --git a/doc/tutorials/quick_start/index_cn.rst b/doc/tutorials/quick_start/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..d565fcf95ef8489eb22a5a1b5a552b5336f4e371 --- /dev/null +++ b/doc/tutorials/quick_start/index_cn.rst @@ -0,0 +1,397 @@ +============= +快速入门教程 +============= + +我们将以 `文本分类问题 `_ 为例, +介绍PaddlePaddle的基本使用方法。 + +安装 +==== + +请参考 :ref:`install_steps` 安装PaddlePaddle。 + +使用概述 +======== + +**文本分类问题**:对于给定的一条文本,我们从提前给定的类别集合中选择其所属类别。 + +比如, 在购物网站上,通过查看买家对某个产品的评价反馈, 评估该产品的质量。 + +- 这个显示器很棒! (好评) +- 用了两个月之后这个显示器屏幕碎了。(差评) + +使用PaddlePaddle, 每一个任务流程都可以被划分为如下五个步骤。 + + .. image:: src/Pipeline_cn.jpg + :align: center + :scale: 80% + +1. 数据格式准备 + - 本例每行保存一条样本,类别Id和文本信息用 ``Tab`` 间隔,文本中的单词用空格分隔(如果不切词,则字与字之间用空格分隔),例如:``类别Id '\t' 这 个 显 示 器 很 棒 !`` +2. 向系统传送数据 + - PaddlePaddle可以执行用户的python脚本程序来读取各种格式的数据文件。 + - 本例的所有字符都将转换为连续整数表示的Id传给模型。 +3. 描述网络结构和优化算法 + - 本例由易到难展示4种不同的文本分类网络配置:逻辑回归模型,词向量模型,卷积模型,时序模型。 + - 常用优化算法包括Momentum, RMSProp,AdaDelta,AdaGrad,Adam,Adamax等,本例采用Adam优化方法,加了L2正则和梯度截断。 +4. 训练模型 +5. 应用模型 + +数据格式准备 +------------ + +接下来我们将展示如何用PaddlePaddle训练一个文本分类模型,将 `Amazon电子产品评论数据 `_ 分为好评(正样本)和差评(负样本)两种类别。 +`源代码 `_ 的 ``demo/quick_start`` 目录里提供了该数据的下载脚本和预处理脚本,你只需要在命令行输入以下命令,就能够很方便的完成数据下载和相应的预处理工作。 + +.. code-block:: bash + + cd demo/quick_start + ./data/get_data.sh + ./preprocess.sh + +数据预处理完成之后,通过配置类似于 ``dataprovider_*.py`` 的数据读取脚本和类似于 ``trainer_config.*.py`` 的训练模型脚本,PaddlePaddle将以设置参数的方式来设置 +相应的数据读取脚本和训练模型脚本。接下来,我们将对这两个步骤给出了详细的解释,你也可以先跳过本文的解释环节,直接进入训练模型章节, 使用 ``sh train.sh`` 开始训练模型, +查看`train.sh`内容,通过 **自底向上法** (bottom-up approach)来帮助你理解PaddlePaddle的内部运行机制。 + + +向系统传送数据 +============== + +Python脚本读取数据 +------------------ + +`DataProvider` 是PaddlePaddle负责提供数据的模块,主要职责在于将训练数据传入内存或者显存,让模型能够得到训练更新,其包括两个函数: + +* initializer:PaddlePaddle会在调用读取数据的Python脚本之前,先调用initializer函数。在下面例子里,我们在initialzier函数里初始化词表,并且在随后的读取数据过程中填充词表。 +* process:PaddlePaddle调用process函数来读取数据。每次读取一条数据后,process函数会用yield语句输出这条数据,从而能够被PaddlePaddle 捕获 (harvest)。 + +``dataprovider_bow.py`` 文件给出了完整例子: + +.. literalinclude:: ../../../demo/quick_start/dataprovider_bow.py + :language: python + :lines: 21-70 + :linenos: + :emphasize-lines: 8,33 + +详细内容请参见 :ref:`api_dataprovider` 。 + +配置中的数据加载定义 +-------------------- + +在模型配置中通过 ``define_py_data_sources2`` 接口来加载数据: + +.. literalinclude:: ../../../demo/quick_start/trainer_config.emb.py + :language: python + :lines: 19-35 + :linenos: + :emphasize-lines: 12 + + +以下是对上述数据加载的解释: + +- data/train.list,data/test.list: 指定训练数据和测试数据 +- module="dataprovider_bow": 处理数据的Python脚本文件 +- obj="process": 指定生成数据的函数 +- args={"dictionary": word_dict}: 额外的参数,这里指定词典 + +更详细数据格式和用例请参考 :ref:`api_pydataprovider2` 。 + +模型网络结构 +============ + +本小节我们将介绍模型网络结构。 + + .. image:: src/PipelineNetwork_cn.jpg + :align: center + :scale: 80% + + +我们将以最基本的逻辑回归网络作为起点,并逐渐展示更加深入的功能。更详细的网络配置连接请参考 :ref:`api_trainer_config_helpers_layers` 。 +所有配置都能在 `源代码 `_ 的 ``demo/quick_start`` 目录下找到。 + +逻辑回归模型 +------------ + +具体流程如下: + + .. image:: src/NetLR_cn.jpg + :align: center + :scale: 80% + +- 获取利用 `one-hot vector `_ 表示的每个单词,维度是词典大小 + + .. code-block:: python + + word = data_layer(name="word", size=word_dim) + +- 获取该条样本类别Id,维度是类别个数。 + + .. code-block:: python + + label = data_layer(name="label", size=label_dim) + +- 利用逻辑回归模型对该向量进行分类,同时会计算分类准确率 + + .. code-block:: python + + # Define a fully connected layer with logistic activation (also called softmax activation). + output = fc_layer(input=word, + size=label_dim, + act_type=SoftmaxActivation()) + # Define cross-entropy classification loss and error. + classification_cost(input=output, label=label) + + + - input: 除去data层,每个层都有一个或多个input,多个input以list方式输入 + - size: 该层神经元个数 + - act_type: 激活函数类型 + +**效果总结**:我们将在后面介绍训练和预测流程的脚本。在此为方便对比不同网络结构,我们总结了各个网络的复杂度和效果。 + + ===================== =============================== ================= + 网络名称 参数数量 错误率 + ===================== =============================== ================= + 逻辑回归 252 KB 8.652 % + ===================== =============================== ================= + +词向量模型 +---------- + +embedding模型需要稍微改变提供数据的Python脚本,即 ``dataprovider_emb.py``,词向量模型、 +卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。 + +.. code-block:: python + + def initializer(settings, dictionary, **kwargs): + settings.word_dict = dictionary + settings.input_types = [ + # Define the type of the first input as sequence of integer. + # The value of the integers range from 0 to len(dictrionary)-1 + integer_value_sequence(len(dictionary)), + # Define the second input for label id + integer_value(2)] + + @provider(init_hook=initializer) + def process(settings, file_name): + ... + # omitted, it is same as the data provider for LR model + +该模型依然使用逻辑回归分类网络的框架, 只是将句子用连续向量表示替换为用稀疏向量表示, 即对第三步进行替换。句子表示的计算更新为两步: + +.. image:: src/NetContinuous_cn.jpg + :align: center + :scale: 80% + +- 利用单词Id查找该单词对应的连续向量(维度为word_dim), 输入N个单词,输出为N个word_dim维度向量 + + .. code-block:: python + + emb = embedding_layer(input=word, size=word_dim) + +- 将该句话包含的所有单词向量求平均, 得到句子的表示 + + .. code-block:: python + + avg = pooling_layer(input=emb, pooling_type=AvgPooling()) + +其它部分和逻辑回归网络结构一致。 + +**效果总结:** + + ===================== =============================== ================== + 网络名称 参数数量 错误率 + ===================== =============================== ================== + 词向量模型 15 MB 8.484 % + ===================== =============================== ================== + +卷积模型 +----------- + +卷积网络是一种特殊的从词向量表示到句子表示的方法, 也就是将词向量模型进一步演化为三个新步骤。 + +.. image:: src/NetConv_cn.jpg + :align: center + :scale: 80% + +文本卷积分可为三个步骤: + +1. 首先,从每个单词左右两端分别获取k个相邻的单词, 拼接成一个新的向量; + +2. 其次,对该向量进行非线性变换(例如Sigmoid变换), 使其转变为维度为hidden_dim的新向量; + +3. 最后,对整个新向量集合的每一个维度取最大值来表示最后的句子。 + +这三个步骤可配置为: + +.. code-block:: python + + text_conv = sequence_conv_pool(input=emb, + context_start=k, + context_len=2 * k + 1) + +**效果总结:** + + ===================== =============================== ======================== + 网络名称 参数数量 错误率 + ===================== =============================== ======================== + 卷积模型 16 MB 5.628 % + ===================== =============================== ======================== + +时序模型 +---------- + +.. image:: src/NetRNN_cn.jpg + :align: center + :scale: 80% + +时序模型,也称为RNN模型, 包括简单的 `RNN模型 `_, `GRU模型 `_ 和 `LSTM模型 `_ 等等。 + +- GRU模型配置: + + .. code-block:: python + + gru = simple_gru(input=emb, size=gru_size) + + +- LSTM模型配置: + + .. code-block:: python + + lstm = simple_lstm(input=emb, size=lstm_size) + +本次试验,我们采用单层LSTM模型,并使用了Dropout,**效果总结:** + + ===================== =============================== ========================= + 网络名称 参数数量 错误率 + ===================== =============================== ========================= + 时序模型 16 MB 4.812 % + ===================== =============================== ========================= + +优化算法 +========= + +`优化算法 `_ 包括 +Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优化方法,同时使用了L2正则(L2 Regularization)和梯度截断(Gradient Clipping)。 + +.. code-block:: python + + settings(batch_size=128, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25) + +训练模型 +========= + +在数据加载和网络配置完成之后, 我们就可以训练模型了。 + +.. image:: src/PipelineTrain_cn.jpg + :align: center + :scale: 80% + +训练模型,我们只需要运行 ``train.sh`` 训练脚本: + + .. code-block:: bash + + ./train.sh + +``train.sh`` 中包含了训练模型的基本命令。训练时所需设置的主要参数如下: + + .. code-block:: bash + + paddle train \ + --config=trainer_config.py \ + --log_period=20 \ + --save_dir=./output \ + --num_passes=15 \ + --use_gpu=false + +这里只简单介绍了单机训练,如何进行分布式训练,请参考 :ref:`cluster_train` 。 + +预测 +===== + +当模型训练好了之后,我们就可以进行预测了。 + +.. image:: src/PipelineTest_cn.jpg + :align: center + :scale: 80% + +之前配置文件中 ``test.list`` 指定的数据将会被测试,这里直接通过预测脚本 ``predict.sh`` 进行预测, +更详细的说明,请参考 :ref:`api_swig_py_paddle` 。 + + .. code-block:: bash + + model="output/pass-00003" + paddle train \ + --config=trainer_config.lstm.py \ + --use_gpu=false \ + --job=test \ + --init_model_path=$model \ + --config_args=is_predict=1 \ + --predict_output_dir=. \ + + mv rank-00000 result.txt + +这里以 ``output/pass-00003`` 为例进行预测,用户可以根据训练日志,选择测试结果最好的模型来预测。 + +预测结果以文本的形式保存在 ``result.txt`` 中,一行为一个样本,格式如下: + + .. code-block:: bash + + 预测ID;ID为0的概率 ID为1的概率 + 预测ID;ID为0的概率 ID为1的概率 + +总体效果总结 +============== + +在 ``/demo/quick_start`` 目录下,能够找到这里使用的所有数据, 网络配置, 训练脚本等等。 +对于Amazon-Elec测试集(25k), 如下表格,展示了上述网络模型的训练效果: + + ===================== =============================== ============= ================================== + 网络名称 参数数量 错误率 配置文件 + ===================== =============================== ============= ================================== + 逻辑回归模型 252 KB 8.652% trainer_config.lr.py + 词向量模型 15 MB 8.484% trainer_config.emb.py + 卷积模型 16 MB 5.628% trainer_config.cnn.py + 时序模型 16 MB 4.812% trainer_config.lstm.py + ===================== =============================== ============= ================================== + + +附录 +===== + +命令行参数 +---------- + +* \--config:网络配置 +* \--save_dir:模型存储路径 +* \--log_period:每隔多少batch打印一次日志 +* \--num_passes:训练轮次,一个pass表示过一遍所有训练样本 +* \--config_args:命令指定的参数会传入网络配置中。 +* \--init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。 + +默认一个pass保存一次模型,也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。 +可以通过show_parameter_stats_period设置打印参数信息等。 +其他参数请参考 命令行参数文档(链接待补充)。 + +输出日志 +--------- + +.. code-block:: bash + + TrainerInternal.cpp:160] Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297 CurrentEval: classification_error_evaluator=0.304297 + +模型训练会看到类似上面这样的日志信息,详细的参数解释,请参考如下表格: + + =========================================== ============================================================== + 名称 解释 + =========================================== ============================================================== + Batch=20 表示过了20个batch + samples=2560 表示过了2560个样本 + AvgCost 每个pass的第0个batch到当前batch所有样本的平均cost + CurrentCost 当前log_period个batch所有样本的平均cost + Eval: classification_error_evaluator 每个pass的第0个batch到当前batch所有样本的平均分类错误率 + CurrentEval: classification_error_evaluator 当前log_period个batch所有样本的平均分类错误率 + =========================================== ============================================================== diff --git a/doc/tutorials/quick_start/index_en.md b/doc/tutorials/quick_start/index_en.md index ec548b5393d7b210d6409328c00917aeb679a451..ca110431cf921ae0480d3fb2b17c58f90a84cc0e 100644 --- a/doc/tutorials/quick_start/index_en.md +++ b/doc/tutorials/quick_start/index_en.md @@ -12,7 +12,7 @@ This tutorial will teach the basics of deep learning (DL), including how to impl To get started, please install PaddlePaddle on your computer. Throughout this tutorial, you will learn by implementing different DL models for text classification. -To install PaddlePaddle, please follow the instructions here: Build and Install. +To install PaddlePaddle, please follow the instructions here: Build and Install. ## Overview For the first step, you will use PaddlePaddle to build a **text classification** system. For example, suppose you run an e-commence website, and you want to analyze the sentiment of user reviews to evaluate product quality. @@ -32,7 +32,7 @@ The monitor breaks down two months after purchase. the classifier should output “negative“. To build your text classification system, your code will need to perform five steps: -
![](./Pipeline_en.jpg)
+
![](./src/Pipeline_en.jpg)
- Preprocess data into a standardized format. - Provide data to the learning model. @@ -156,18 +156,18 @@ define_py_data_sources2(train_list='data/train.list', obj="process", args={"dictionary": word_dict}) ``` -You can refer to the following link for more detailed examples and data formats: PyDataProvider2. +You can refer to the following link for more detailed examples and data formats: PyDataProvider2. ## Network Architecture -You will describe four kinds of network architectures in this section. -
![](./PipelineNetwork_en.jpg)
+We will describe four kinds of network architectures in this section. +
![](./src/PipelineNetwork_en.jpg)
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures. -For more detailed documentation, you could refer to: Layer documentation。All configuration files are in `demo/quick_start` directory. +For more detailed documentation, you could refer to: layer documentation. All configuration files are in `demo/quick_start` directory. ### Logistic Regression The architecture is illustrated in the following picture: -
![](./NetLR_en.png)
+
![](./src/NetLR_en.png)
- You need define the data for text features. The size of the data layer is the number of words in the dictionary. @@ -182,10 +182,10 @@ label = data_layer(name="label", size=label_dim) ``` - It uses logistic regression model to classify the vector, and it will output the classification error during training. - - Each layer has an *input* argument that specifies its input layer. Some layers can have multiple input layers. You can use a list of the input layers as input in that case. - - *size* for each layer means the number of neurons of the layer. - - *act_type* means activation function applied to the output of each neuron independently. - - Some layers can have additional special inputs. For example, `classification_cost` needs ground truth label as input to compute classification loss and error. + - Each layer has an *input* argument that specifies its input layer. Some layers can have multiple input layers. You can use a list of the input layers as input in that case. + - *size* for each layer means the number of neurons of the layer. + - *act_type* means activation function applied to the output of each neuron independently. + - Some layers can have additional special inputs. For example, `classification_cost` needs ground truth label as input to compute classification loss and error. ```python # Define a fully connected layer with logistic activation (also called softmax activation). output = fc_layer(input=word, @@ -240,7 +240,7 @@ def process(settings, file_name): ``` This model is very similar to the framework of logistic regression, but it uses word embedding vectors instead of a sparse vectors to represent words. -
![](./NetContinuous_en.png)
+
![](./src/NetContinuous_en.png)
- It can look up the dense word embedding vector in the dictionary (its words embedding vector is `word_dim`). The input is a sequence of N words, the output is N word_dim dimensional vectors. @@ -283,7 +283,7 @@ The performance is summarized in the following table: ### Convolutional Neural Network Model Convolutional neural network converts a sequence of word embeddings into a sentence representation using temporal convolutions. You will transform the fully connected layer of the word embedding model to 3 new sub-steps. -
![](./NetConv_en.png)
+
![](./src/NetConv_en.png)
Text convolution has 3 steps: @@ -295,8 +295,8 @@ Text convolution has 3 steps: # context_len means convolution kernel size. # context_start means the start of the convolution. It can be negative. In that case, zero padding is applied. text_conv = sequence_conv_pool(input=emb, - context_start=k, - context_len=2 * k + 1) + context_start=k, + context_len=2 * k + 1) ``` The performance is summarized in the following table: @@ -324,7 +324,7 @@ The performance is summarized in the following table:
### Recurrent Model -
![](./NetRNN_en.png)
+
![](./src/NetRNN_en.png)
You can use Recurrent neural network as our time sequence model, including simple RNN model, GRU model, and LSTM model。 @@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
## Optimization Algorithm -Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. +Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. ```python settings(batch_size=128, @@ -378,7 +378,7 @@ settings(batch_size=128, ## Training Model After completing data preparation and network architecture specification, you will run the training script. -
![](./PipelineTrain_en.png)
+
![](./src/PipelineTrain_en.png)
Training script: our training script is in `train.sh` file. The training arguments are listed below: @@ -391,10 +391,11 @@ paddle train \ --use_gpu=false ``` -If you want to install the remote training platform, which enables distributed training on clusters, follow the instructions here: Platform documentation. We do not provide examples on how to train on clusters. Please refer to other demos or platform training documentation for mode details on training on clusters. +We do not provide examples on how to train on clusters here. If you want to train on clusters, please follow the distributed training documentation or other demos for more details. + ## Inference You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy. -
![](./PipelineTest_en.png)
+
![](./src/PipelineTest_en.png)
The test script is listed below. PaddlePaddle can evaluate a model on the data with labels specified in `test.list`. @@ -406,7 +407,7 @@ paddle train \ --init_model_path=./output/pass-0000x ``` -We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to: Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation. +We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation. inference script (predict.sh): @@ -508,7 +509,7 @@ The scripts of data downloading, network configurations, and training scrips are * \--config_args:Other configuration arguments. * \--init_model_path:The path of the initial model parameter. -By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in command line argument documentation。 +By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in command line argument documentation。 ### Log diff --git a/doc_cn/demo/quick_start/NetContinuous.jpg b/doc/tutorials/quick_start/src/NetContinuous_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/NetContinuous.jpg rename to doc/tutorials/quick_start/src/NetContinuous_cn.jpg diff --git a/doc/tutorials/quick_start/NetContinuous_en.png b/doc/tutorials/quick_start/src/NetContinuous_en.png similarity index 100% rename from doc/tutorials/quick_start/NetContinuous_en.png rename to doc/tutorials/quick_start/src/NetContinuous_en.png diff --git a/doc_cn/demo/quick_start/NetConv.jpg b/doc/tutorials/quick_start/src/NetConv_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/NetConv.jpg rename to doc/tutorials/quick_start/src/NetConv_cn.jpg diff --git a/doc/tutorials/quick_start/NetConv_en.png b/doc/tutorials/quick_start/src/NetConv_en.png similarity index 100% rename from doc/tutorials/quick_start/NetConv_en.png rename to doc/tutorials/quick_start/src/NetConv_en.png diff --git a/doc_cn/demo/quick_start/NetLR.jpg b/doc/tutorials/quick_start/src/NetLR_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/NetLR.jpg rename to doc/tutorials/quick_start/src/NetLR_cn.jpg diff --git a/doc/tutorials/quick_start/NetLR_en.png b/doc/tutorials/quick_start/src/NetLR_en.png similarity index 100% rename from doc/tutorials/quick_start/NetLR_en.png rename to doc/tutorials/quick_start/src/NetLR_en.png diff --git a/doc_cn/demo/quick_start/NetRNN.jpg b/doc/tutorials/quick_start/src/NetRNN_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/NetRNN.jpg rename to doc/tutorials/quick_start/src/NetRNN_cn.jpg diff --git a/doc/tutorials/quick_start/NetRNN_en.png b/doc/tutorials/quick_start/src/NetRNN_en.png similarity index 100% rename from doc/tutorials/quick_start/NetRNN_en.png rename to doc/tutorials/quick_start/src/NetRNN_en.png diff --git a/doc_cn/demo/quick_start/PipelineNetwork.jpg b/doc/tutorials/quick_start/src/PipelineNetwork_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/PipelineNetwork.jpg rename to doc/tutorials/quick_start/src/PipelineNetwork_cn.jpg diff --git a/doc/tutorials/quick_start/PipelineNetwork_en.jpg b/doc/tutorials/quick_start/src/PipelineNetwork_en.jpg similarity index 100% rename from doc/tutorials/quick_start/PipelineNetwork_en.jpg rename to doc/tutorials/quick_start/src/PipelineNetwork_en.jpg diff --git a/doc_cn/demo/quick_start/PipelineTest.jpg b/doc/tutorials/quick_start/src/PipelineTest_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/PipelineTest.jpg rename to doc/tutorials/quick_start/src/PipelineTest_cn.jpg diff --git a/doc/tutorials/quick_start/PipelineTest_en.png b/doc/tutorials/quick_start/src/PipelineTest_en.png similarity index 100% rename from doc/tutorials/quick_start/PipelineTest_en.png rename to doc/tutorials/quick_start/src/PipelineTest_en.png diff --git a/doc_cn/demo/quick_start/PipelineTrain.jpg b/doc/tutorials/quick_start/src/PipelineTrain_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/PipelineTrain.jpg rename to doc/tutorials/quick_start/src/PipelineTrain_cn.jpg diff --git a/doc/tutorials/quick_start/PipelineTrain_en.png b/doc/tutorials/quick_start/src/PipelineTrain_en.png similarity index 100% rename from doc/tutorials/quick_start/PipelineTrain_en.png rename to doc/tutorials/quick_start/src/PipelineTrain_en.png diff --git a/doc_cn/demo/quick_start/Pipeline.jpg b/doc/tutorials/quick_start/src/Pipeline_cn.jpg similarity index 100% rename from doc_cn/demo/quick_start/Pipeline.jpg rename to doc/tutorials/quick_start/src/Pipeline_cn.jpg diff --git a/doc/tutorials/quick_start/Pipeline_en.jpg b/doc/tutorials/quick_start/src/Pipeline_en.jpg similarity index 100% rename from doc/tutorials/quick_start/Pipeline_en.jpg rename to doc/tutorials/quick_start/src/Pipeline_en.jpg diff --git a/doc/tutorials/rec/ml_dataset_cn.md b/doc/tutorials/rec/ml_dataset_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..2207a776f0774e72aba15169e59258dd04583637 --- /dev/null +++ b/doc/tutorials/rec/ml_dataset_cn.md @@ -0,0 +1,105 @@ +```eval_rst +.. _demo_ml_dataset: + +``` + +# MovieLens数据集 + +[MovieLens 数据集](http://grouplens.org/datasets/movielens/)由GroupLens Research实验室搜集整理。 +该数据集包含一些用户信息、电影信息以及电影评分\[1-5\]。根据数据量规模,该数据及有很多不同的版本。 +我们用[MovieLens 百万数据集](http://files.grouplens.org/datasets/movielens/ml-1m.zip)作为示例数据 +集,其中包含6,000位用户对4,000部电影的1,000,000条评价。该数据集于2003年2月发布。 + +## 数据集特征 + +在[ml-1m 数据集](http://files.grouplens.org/datasets/movielens/ml-1m.zip)中有许多的特征。在[ml-1m 数据集] +(http://files.grouplens.org/datasets/movielens/ml-1m.zip)中的这些数据文件(含有".dat"的后缀)实际上是CSV文件, +分隔符为"::"。以下我们翻译数据集网站中README文件的描述: + +### 评分文件描述(ratings.dat) + + +所有的评分数据都包含在"ratings.dat"文件中,遵循如下的格式: + +用户ID::电影ID::评分::时间戳 + +- 用户ID范围从1到6040 +- 电影ID范围从1到3952 +- 评分被调整为5星的规模(只允许整数的星级) +- 时间戳表示为从1970-01-01(UTC)来的秒数,与time(2)的返回值一致 +- 每位用户至少有20条评分 + +### 用户文件描述(users.dat) + +所有的用户信息都包含在"users.dat"文件中,遵循如下的格式: + +用户ID::性别::年龄::职业::邮编 + +所有的人口统计学信息由用户自愿提供,没有进行正确性的检查。只有含有人 +口统计学信息的用户才被包含在数据集中。 + +- 性别,用"M"表示男性,"F"表示女性 +- 年龄从下列列表范围中选取: + + * 1: "18岁以下" + * 18: "18-24岁" + * 25: "25-34岁" + * 35: "35-44岁" + * 45: "45-49岁" + * 50: "50-55岁" + * 56: "56+" + +- 职业从下面所列中选择: + + * 0: "其他"或不确定 + * 1: "学术/教育工作者" + * 2: "艺术家" + * 3: "文书工作/管理员" + * 4: "大学生/研究生" + * 5: "客户服务" + * 6: "医生/医疗保健" + * 7: "行政工作/管理人员" + * 8: "农民" + * 9: "操持家务者" + * 10: "高中毕业生" + * 11: "律师" + * 12: "程序员" + * 13: "退休人员" + * 14: "销售/市场" + * 15: "科学家" + * 16: "自由职业者" + * 17: "技术员/工程师" + * 18: "推销员/手工艺者" + * 19: "无业人士" + * 20: "作家" + +### 电影文件描述(movies.dat) + +所有的电影信息都包含在"movies.dat"文件中,遵循如下的格式: + +电影ID::电影名称::电影类型 + +- 电影名称(包括发行时间)与IMDB网站提供的一致 +- 电影类型如符合多种用管道符号|分割,选自下列类型: + + * 动作片 + * 冒险片 + * 动画片 + * 儿童片 + * 喜剧片 + * 犯罪片 + * 纪录片 + * 戏剧 + * 奇幻片 + * 黑色电影 + * 恐怖片 + * 音乐剧 + * 悬疑片 + * 浪漫片 + * 科幻片 + * 惊险电影 + * 战争片 + * 西部片 + +- 由于意外的副本记录和测试记录,有些电影ID可能与实际电影不相符合 +- 电影大部分是手工输入数据,因此可能会有一些错误和不一致发生 diff --git a/doc/tutorials/rec/ml_dataset_en.md b/doc/tutorials/rec/ml_dataset_en.md index dc11a5e06031b62d9f86e4dd83a14b2f1a72afc3..25dea5c4afbf1ce1c1ac6195cbd245b116459e2e 100644 --- a/doc/tutorials/rec/ml_dataset_en.md +++ b/doc/tutorials/rec/ml_dataset_en.md @@ -1,6 +1,5 @@ ```eval_rst -.. _demo_ml_dataset_en: - +.. _demo_ml_dataset: ``` # MovieLens Dataset diff --git a/doc/tutorials/rec/ml_regression_cn.rst b/doc/tutorials/rec/ml_regression_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..9278c9f603b648099f448963bc2246b8dc014ab7 --- /dev/null +++ b/doc/tutorials/rec/ml_regression_cn.rst @@ -0,0 +1,349 @@ +MovieLens数据集评分回归模型 +=========================== + +这里我们在MovieLens数据集描述一种 **余弦相似度回归** 任务。 +该示例将展示paddle如何进行词向量嵌入,处理相似度回归,针对文本 +的单词级别的卷积神经网络,以及paddle如何处理多种类型的输入。 +需要注意的是,该模型网络只是用于进行demo展示paddle如何工作,而 +没有进行结构的微调。 + + +**我们非常欢迎您用PADDLEPADDLE构建更好的示例,如果您有好的建议来 +让这个示例变得更好,希望能让我们知晓。** + +数据准备 +````````` +下载并解压数据集 +''''''''''''''''' +这里我们使用 :ref:`demo_ml_dataset` 。 +要下载和解压数据集,只需要简单的运行下面的命令即可。 + +.. code-block:: bash + + cd demo/recommendation/data + ./ml_data.sh + +:code:`demo/recommendation/data/ml-1m` 的目录结构为: + +.. code-block:: text + + +--ml-1m + +--- movies.dat # 电影特征 + +--- ratings.dat # 评分 + +--- users.dat # 用户特征 + +--- README # 数据集描述 + +字段配置文件 +''''''''''''' +**字段配置文件** 用来具体说明数据集的字段和文件格式, +例如,说明每个特征文件具体字段是 **什么** 类型。 + +ml-1m的字段配置文件在目录 :code:`demo/recommendation/data/config.json` 中。 +其具体说明了字段类型和文件名称: + +1) 用户文件中有四种类型的字段\: 编号,性别,年龄和职业; + +2) 文件名称为"users.dat",文件的分隔符为"::"。 + +.. include:: ../../../demo/recommendation/data/config.json + :code: json + :literal: + +准备数据 +````````` +你需要安装python的第三方库。 +**强烈推荐使用VIRTUALENV来创造一个干净的python环境。** + +.. code-block:: bash + + pip install -r requirements.txt + +预处理数据一般的命令为: + +.. code-block:: bash + + cd demo/recommendation + ./preprocess.sh + +下面介绍预处理过程具体的步骤。 + +提取电影或用户的特征并生成python对象 +''''''''''''''''''''''''''''''''''''' + +在movielens 1m数据集中,电影和用户有许多的特征。 +评分文件的每一行仅仅提供电影或用户的编号来代表相应的电影或用户。 +我们首先处理电影或用户的特征文件,然后用pickle命令将特征( **Meta** )对象存储为文件。 + +Meta配置文件 +............. + +**Meta配置文件** 用来具体描述 **如何** 解析数据集中的每一个字段。 +该文件可以从字段配置文件生成,或是手动编辑生成。文件的格式可以 +为json或yaml格式。解析器能通过文件的扩展名自动识别文件的格式。 + +要将字段配置文件转化为meta配置文件,只需要运行: + +.. code-block:: bash + + cd demo/recommendation/data + python config_generator.py config.json > meta_config.json + +生成的meta配置文件如下所示: + +.. include:: ../../../demo/recommendation/data/meta_config.json + :code: json + :literal: + +在meta文件中有两种特征\: 电影和用户。 + +* 在电影文件movies.dat中 + * 我们仅用"::"来分隔每一行 + * pos 0 代表编号 + * pos 1 特征: + * name是电影名 + * 利用正则表达式来解析该特征 + * 基于字母的词嵌入特征 + * 是序列 + * pos 2 特征: + * name是体裁 + * type是one hot稠密向量 + * dictionary由解析自动生成,每一个key由'|'分隔 +* 在用户文件users.dat中 + * 我们仅用"::"来分隔每一行 + * pos 0 代表编号 + * pos 1 特征: + * name是性别 + * 简单的基于字母的词嵌入 + * pos 2 特征: + * name是年龄 + * 是整个的词嵌入 + * 嵌入编号会根据单词排序 + * pos 3 特征: + * name是职业 + * 简单的整个词嵌入 + + +Meta文件 +'''''''' + +有了meta配置文件之后,我们可以生成 **Meta文件** ,该文件是python的pickle对象, +存储着电影或用户信息。可以运行下面的命令来生成。 + +.. code-block:: bash + + python meta_generator.py ml-1m meta.bin --config=meta_config.json + +meta文件 :code:`meta.bin` 的结构如下: + +.. code-block:: text + + +--+ movie + | +--+ __meta__ + | | +--+ raw_meta # 每个特征的meta配置。列表 + | | | + + | | | | # 编号字段,我们用编号作为key + | | | +--+ {'count': 3883, 'max': 3952, 'is_key': True, 'type': 'id', 'min': 1} + | | | | + | | | | # 电影名字段,嵌入特征字典 + | | | +--+ {'dict': [ ... ], 'type': 'embedding', 'name': 'title', 'seq': 'sequence'} + | | | | + | | | | # 体裁字段,体裁字典 + | | | +--+ {'dict': [ ... ], 'type': 'one_hot_dense', 'name': 'genres'} + | | | + | | +--+ feature_map [1, 2] # a list for raw_meta index for feature field. + | | # it means there are 2 features for each key. + | | # * 0 offset of feature is raw_meta[1], Title. + | | # * 1 offset of feature is raw_meta[2], Genres. + | | + | +--+ 1 # 电影1的特征 + | | + + | | +---+ [[...], [...]] # title ids, genres dense vector + | | + | +--+ 2 + | | + | +--+ ... + | + +--- user + +--+ __meta__ + | + + | +--+ raw_meta + | | + + | | +--+ id field as user + | | | + | | +--+ {'dict': ['F', 'M'], 'type': 'embedding', 'name': 'gender', 'seq': 'no_sequence'} + | | | + | | +--+ {'dict': ['1', '18', '25', '35', '45', '50', '56'], 'type': 'embedding', 'name': 'age', 'seq': 'no_sequence'} + | | | + | | +--+ {'dict': [...], 'type': 'embedding', 'name': 'occupation', 'seq': 'no_sequence'} + | | + | +--+ feature_map [1, 2, 3] + | + +--+ 1 # 用户1的特征 + | + +--+ 2 + +--+ ... + + +分割训练/测试文件 +'''''''''''''''''' + +我们将 :code:`ml-1m/ratings.dat` 文件分割为训练和测试文件。分割文件的方法是:对于每位用户,我们将评分分成两部分。 +这样的话每位用户在测试文件中将与训练文件含有同样的信息。 + +用 :code:`separate.py` 来分离训练和测试文件。 + +.. code-block:: bash + + python split.py ml-1m/ratings.dat --delimiter="::" --test_ratio=0.1 + +这样就会生成两个文件::code:`ml-1m/ratings.dat.train` 和 :code:`ml-1m/ratings.data.test` 。 +将他们移动到目录 :code:`data` ,然后进行随机打乱,再为paddle的训练过程提供文件列表。 + +.. code-block:: bash + + shuf ml-1m/ratings.dat.train > ratings.dat.train + cp ml-1m/ratings.dat.test . + echo "./data/ratings.dat.train" > train.list + echo "./data/ratings.dat.test" > test.list + + +神经网络结构配置 +````````````````` + +训练器配置文件 +''''''''''''''' + +网络结构如下图所示: + +.. image:: rec_regression_network.png + :align: center + :alt: rec_regression_network + +该示例的神经网络配置文件 :code:`trainer_config.py` 如下所示: + +.. literalinclude:: ../../../demo/recommendation/trainer_config.py + :language: python + :lines: 15- + +在文件 :code:`trainer_config.py` 中,我们仅仅是将每个特征种类映射到一个特征向量中,以下 +展示了如何将每个特征映射到一个向量。 + +* :code:`id` \: 仅仅是简单的嵌入,然后添加一个全连接层。 +* :code:`embedding` \: + - 如果是序列,则先做嵌入,然后再做一次文本卷积网络操作, + 然后得到平均采样的结果。 + - 如果不是序列,则先做嵌入,然后添加一个全连接层。 +* :code:`one_host_dense` \: + - 仅仅是两个全连接层。 + +然后我们利用多输入的:code:`fc_layer` 全连接层将电影的每个特征结合成一个电影特征, +并且对用户的特征做同样的操作,也得到一个用户特征。然后我们求这两个特征的余弦相似度。 + +在这些网络中,我们用以下的一些:ref:`api_trainer_config` 中的接口。 + +* 数据层, :ref:`api_trainer_config_helpers_layers_data_layer` +* 全连接层, :ref:`api_trainer_config_helpers_layers_fc_layer` +* 嵌入层, :ref:`api_trainer_config_helpers_layers_embedding_layer` +* 文本投影层, :ref:`api_trainer_config_helpers_layers_context_projection` +* 采样层, :ref:`api_trainer_config_helpers_layers_pooling_layer` +* 余弦相似度层, :ref:`api_trainer_config_helpers_layers_cos_sim` +* 文本卷积采样层, :ref:`api_trainer_config_helpers_network_text_conv_pool` +* 声明Python数据源, :ref:`api_trainer_config_helpers_data_sources` + +数据提供脚本 +''''''''''''' + +.. literalinclude:: ../../../demo/recommendation/dataprovider.py + :language: python + :lines: 15- + +数据提供脚本仅仅是读取meta.bin和评分文件,生成训练需要的样本。 +在脚本 :code:`dataprovider.py` 中,我们需要设置: + +* obj.slots\: 特征的类型和维度。 +* use_seq\: :code:`dataprovider.py` 中的数据是否为序列模式。 +* process\: 返回数据的每一条样本给 :code:`paddle` 。 + +数据提供脚本的细节文档可以参考 :ref:`api_pydataprovider2` 。 + +训练 +```` + +准备好数据,配置了网络,编写好数据提供脚本后,现在我们可以开始paddle训练了。 + +代码 :code:`run.sh` 如下: + +.. literalinclude:: ../../../demo/recommendation/run.sh + :language: bash + :lines: 16- + +该脚本仅仅是开始一个paddle训练过程,将日志写入文件 :code:`log.txt` ,然后 +打印在屏幕上。 + +脚本 :code:`run.sh` 中的每一行命令,请参考页面 :ref:`cmd_line_index` 。 +这些参数的简短介绍如下: + +* config\: 告诉paddle哪个文件是神经网络的配置文件。 +* save_dir\: 告诉paddle将模型保存在: code:`./output` 中。 +* use_gpu\: 是否使用GPU,默认为不使用。 +* trainer_count\: 一台机器上面的线程数量。 +* test_all_data_in_one_period\: 每一个测试周期测试一次所有数据。否则, + 每个测试周期测试: code:`batch_size` 批次的数据。 +* log_period\: 在训练了: code:`log_period` 批次后打印日志。 +* dot_period\: 在每训练: code:`dot_period` 个批次后打印一个 :code:`.` 。 +* num_passes\: 训练至多: code:`num_passes` 轮。 + +如果训练过程启动成功的话,输出应该类似如下: + +.. code-block:: text + + I0601 08:07:22.832059 10549 TrainerInternal.cpp:157] Batch=100 samples=160000 AvgCost=4.13494 CurrentCost=4.13494 Eval: CurrentEval: + + I0601 08:07:50.672627 10549 TrainerInternal.cpp:157] Batch=200 samples=320000 AvgCost=3.80957 CurrentCost=3.48421 Eval: CurrentEval: + + I0601 08:08:18.877369 10549 TrainerInternal.cpp:157] Batch=300 samples=480000 AvgCost=3.68145 CurrentCost=3.42519 Eval: CurrentEval: + + I0601 08:08:46.863963 10549 TrainerInternal.cpp:157] Batch=400 samples=640000 AvgCost=3.6007 CurrentCost=3.35847 Eval: CurrentEval: + + I0601 08:09:15.413025 10549 TrainerInternal.cpp:157] Batch=500 samples=800000 AvgCost=3.54811 CurrentCost=3.33773 Eval: CurrentEval: + I0601 08:09:36.058670 10549 TrainerInternal.cpp:181] Pass=0 Batch=565 samples=902826 AvgCost=3.52368 Eval: + I0601 08:09:46.215489 10549 Tester.cpp:101] Test samples=97383 cost=3.32155 Eval: + I0601 08:09:46.215966 10549 GradientMachine.cpp:132] Saving parameters to ./output/model/pass-00000 + I0601 08:09:46.233397 10549 ParamUtil.cpp:99] save dir ./output/model/pass-00000 + I0601 08:09:46.233438 10549 Util.cpp:209] copy trainer_config.py to ./output/model/pass-00000 + I0601 08:09:46.233541 10549 ParamUtil.cpp:147] fileName trainer_config.py + +模型被保存在 :code:`output/` 目录中。你可以在任何时候用 :code:`Ctrl-C` 来停止训练。 + +模型评估和预测 +``````````````` + +在训练了几个轮次以后,你可以对模型进行评估,得到最好轮次下的模型。运行下面命令即可: + +.. code-block:: bash + + ./evaluate.sh + +你将看到如下的信息: + +.. code-block:: text + + Best pass is 00009, error is 3.06949, which means predict get error as 0.875998002281 + evaluating from pass output/pass-00009 + +然后,你可以预测任何用户对于任何一部电影的评价,运行下面命令即可: + +.. code-block:: bash + + python prediction.py 'output/pass-00009/' + +预测程序将读取用户的输入,然后输出预测分数。用户预测的命令行界面如下: + +.. code-block:: text + + Input movie_id: 9 + Input user_id: 4 + Prediction Score is 2.56 + Input movie_id: 8 + Input user_id: 2 + Prediction Score is 3.13 diff --git a/doc/tutorials/rec/ml_regression_en.rst b/doc/tutorials/rec/ml_regression_en.rst index ddc00dc706535e1204b033b505ee8bd579f8dea3..993b9a516f134ff8b59e8755b721f76c8f32f0fd 100644 --- a/doc/tutorials/rec/ml_regression_en.rst +++ b/doc/tutorials/rec/ml_regression_en.rst @@ -16,7 +16,7 @@ Data Preparation ```````````````` Download and extract dataset '''''''''''''''''''''''''''' -We use :ref:`demo_ml_dataset_en` here. +We use :ref:`demo_ml_dataset` here. To download and unzip the dataset, simply run the following commands. .. code-block:: bash @@ -36,7 +36,7 @@ And the directory structure of :code:`demo/recommendation/data/ml-1m` is: Field config file ''''''''''''''''' -**Field config file** is used to specific the fields dataset and file format, +**Field config file** is used to specify the fields of the dataset and the file format, i.e, specific **WHAT** type it is in each feature file. The field config file of ml-1m shows in :code:`demo/recommendation/data/config.json`. @@ -188,7 +188,7 @@ Split Training/Testing files We split :code:`ml-1m/ratings.dat` into a training and testing file. The way to split file is for each user, we split the rating by two parts. So each user in testing file will have some rating information in training file. -Use separate.py to separate the training and testing file. +Use :code:`separate.py` to separate the training and testing file. .. code-block:: bash @@ -217,7 +217,7 @@ The network structure shows below. :align: center :alt: rec_regression_network -The demo's neural network config file "trainer_config.py" show as below. +The demo's neural network config file :code:`trainer_config.py` show as below. .. literalinclude:: ../../../demo/recommendation/trainer_config.py :language: python @@ -239,7 +239,7 @@ Then we combine each features of movie into one movie feature by a get one user feature. Then we calculate the cosine similarity of these two features. -In these network, we use several api in :ref:`api_trainer_config` . There are +In these networks, we use several APIs in :ref:`api_trainer_config` . There are * Data Layer, :ref:`api_trainer_config_helpers_layers_data_layer` * Fully Connected Layer, :ref:`api_trainer_config_helpers_layers_fc_layer` @@ -264,26 +264,26 @@ In this :code:`dataprovider.py`, we should set\: * use_seq\: Whether this :code:`dataprovider.py` in sequence mode or not. * process\: Return each sample of data to :code:`paddle`. -The data provider details document see :ref:`api_pydataprovider`. +The data provider details document see :ref:`api_pydataprovider2`. Train ````` After prepare data, config network, writting data provider, now we can run paddle training. -The run.sh is shown as follow: +The :code:`run.sh` is shown as follow: .. literalinclude:: ../../../demo/recommendation/run.sh :language: bash :lines: 16- -It just start a paddle training process, write the log to `log.txt`, +It just start a paddle training process, write the log to :code:`log.txt`, then print it on screen. -Each command line argument in :code:`run.sh`, please refer to the :ref:`cmd_line_index_en` page. The short description of these arguments is shown as follow. +Each command line argument in :code:`run.sh`, please refer to the :ref:`cmd_line_index` page. The short description of these arguments is shown as follow. * config\: Tell paddle which file is neural network configuration. -* save_dir\: Tell paddle save model into './output' +* save_dir\: Tell paddle save model into :code:`./output`. * use_gpu\: Use gpu or not. Default is false. * trainer_count\: The compute thread in one machine. * test_all_data_in_one_period\: Test All Data during one test period. Otherwise, diff --git a/doc/tutorials/semantic_role_labeling/index_cn.md b/doc/tutorials/semantic_role_labeling/index_cn.md index c7e0a78f5071ed0d1702036f4ee0af3881096c68..f6061766c038a7bb6e4ae376685a10cd5669d2ed 100644 --- a/doc/tutorials/semantic_role_labeling/index_cn.md +++ b/doc/tutorials/semantic_role_labeling/index_cn.md @@ -149,7 +149,7 @@ paddle train \ 训练后,模型将保存在目录`output`中。 我们的训练曲线如下:
-![pic](./curve.jpg) +![pic](./src/curve.jpg)
### 测试 diff --git a/doc/tutorials/semantic_role_labeling/index_en.md b/doc/tutorials/semantic_role_labeling/index_en.md index f5bdf64487aa189cefcd55d633cc6638912b9e31..92d7c634832119c718711a57c16f69492d405f28 100644 --- a/doc/tutorials/semantic_role_labeling/index_en.md +++ b/doc/tutorials/semantic_role_labeling/index_en.md @@ -1,3 +1,7 @@ +```eval_rst +.. _semantic_role_labeling: +``` + # Semantic Role labeling Tutorial # Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering. An instance is as following [1]: @@ -41,13 +45,13 @@ Unlike Bidirectional-LSTM that used in Sentiment Analysis demo, the DB-LSTM ado The following figure shows a temporal expanded 2-layer DB-LSTM network.
-![pic](./network_arch.png) +![pic](./src/network_arch.png)
### Features Two input features play an essential role in this pipeline: predicate (pred) and argument (argu). Two other features: predicate context (ctx-p) and region mark (mr) are also adopted. Because a single predicate word can not exactly describe the predicate information, especially when the same words appear more than one times in a sentence. With the predicate context, the ambiguity can be largely eliminated. Similarly, we use region mark mr = 1 to denote the argument position if it locates in the predicate context region, or mr = 0 if does not. These four simple features are all we need for our SRL system. Features of one sample with context size set to 1 is showed as following[2]:
-![pic](./feature.jpg) +![pic](./src/feature.jpg)
In this sample, the coresponding labelled sentence is: @@ -148,7 +152,7 @@ paddle train \ After training, the models will be saved in directory `output`. Our training curve is as following:
-![pic](./curve.jpg) +![pic](./src/curve.jpg)
### Run testing diff --git a/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md b/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md deleted file mode 100644 index f3c855a9fd72b894ab69050b08c750fe9e4aa1a2..0000000000000000000000000000000000000000 --- a/doc/tutorials/semantic_role_labeling/semantic_role_labeling_cn.md +++ /dev/null @@ -1,201 +0,0 @@ -# 语义角色标注教程 # - -语义角色标注(Semantic role labeling, SRL)是浅语义解析的一种形式,其目的是在给定的输入句子中发现每个谓词的谓词参数结构。 SRL作为很多自然语言处理任务中的中间步骤是很有用的,如信息提取、文档自动分类和问答。 实例如下 [1]: - - [ A0 他 ] [ AM-MOD 将 ][ AM-NEG 不会 ] [ V 接受] [ A1 任何东西 ] 从 [A2 那些他写的东西中 ]。 - -- V: 动词 -- A0: 接受者 -- A1: 接受的东西 -- A2: 从……接受 -- A3: 属性 -- AM-MOD: 情态动词 -- AM-NEG: 否定 - -给定动词“接受”,句子中的大部分将会扮演某些语义角色。这里,标签方案来自 Penn Proposition Bank。 - -到目前为止,大多数成功的SRL系统是建立在某种形式的解析结果之上的,其中在语法结构上使用了预先定义的特征模板。 本教程将介绍使用深度双向长短期记忆(DB-LSTM)模型[2]的端到端系统来解决SRL任务,这在很大程度上优于先前的最先进的系统。 这个系统将SRL任务视为序列标记问题。 - -## 数据描述 -相关论文[2]采用 CoNLL-2005&2012 共享任务中设置的数据进行训练和测试。根据数据许可证,演示采用 CoNLL-2005 的测试数据集,可以在网站上找到。 - -用户只需执行以下命令就可以下载并处理原始数据: - -```bash -cd data -./get_data.sh -``` -`data `目录会出现如下几个新的文件: -```bash -conll05st-release:the test data set of CoNll-2005 shared task -test.wsj.words:the Wall Street Journal data sentences -test.wsj.props: the propositional arguments -feature: the extracted features from data set -``` - -## 训练 -### DB-LSTM -请参阅情绪分析的演示以了解有关长期短期记忆单元的更多信息。 - -与在 Sentiment Analysis 演示中使用的 Bidirectional-LSTM 不同,DB-LSTM 采用另一种方法来堆叠LSTM层。首先,标准LSTM以正向处理该序列。该 LSTM 层的输入和输出作为下一个 LSTM 层的输入,并被反向处理。这两个标准 LSTM 层组成一对 LSTM。然后我们堆叠一对对的 LSTM 层后得到深度 LSTM 模型。 - -下图展示了时间扩展的2层 DB-LSTM 网络。 -
-![pic](./network_arch.png) -
- -### 特征 -两个输入特性在这个管道中起着至关重要的作用:predicate(pred)和argument(arguments)。 还采用了两个其他特征:谓词上下文(ctx-p)和区域标记(mr)。 因为单个谓词不能精确地描述谓词信息,特别是当相同的词在句子中出现多于一次时。 使用谓词上下文,可以在很大程度上消除歧义。类似地,如果它位于谓词上下文区域中,则使用区域标记 mr = 1 来表示参数位置,反之则 mr = 0。这四个简单的特征是我们的SRL系统所需要的。上下文大小设置为1的一个样本的特征如下[2]所示: -
-![pic](./feature.jpg) -
- -在这个示例中,相应的标记句子是: - -[ A1 A record date ] has [ AM-NEG n't ] been [ V set ] . - -在演示中, 我们采用上面的特征模板, 包括: `argument`, `predicate`, `ctx-p (p=-1,0,1)`, `mark` 并使用 `B/I/O` 方案来标记每个参数。这些特征和标签存储在 `feature` 文件中, 用`\t`分割。 - -### 数据提供 - -`dataprovider.py` 是一个包装数据的 Python 文件。 函数 `hook()` 定义了网络的数据槽。六个特征和标签都是索引槽。 -``` -def hook(settings, word_dict, label_dict, **kwargs): - settings.word_dict = word_dict - settings.label_dict = label_dict - #all inputs are integral and sequential type - settings.slots = [ - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(predicate_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(2), - integer_value_sequence(len(label_dict))] -``` -相应的数据迭代器如下: -``` -@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size, - can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ - line.strip().split('\t') - - words = sentence.split() - sen_len = len(words) - word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words] - - predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len - ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - label_list = label.split() - label_slot = [settings.label_dict.get(w) for w in label_list] - yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot -``` -函数 `process` 产出有8个特征和标签的9个表。 - -### 神经网络配置 - -`db_lstm.py` 是在训练过程中加载字典并定义数据提供程序模块和网络架构的神经网络配置文件。 - -九个 `data_layer` 从数据提供程序加载实例。八个特征分别转换为嵌入,并由`mixed_layer`混合。 深度双向LSTM层提取softmax层的特征。目标函数是标签的交叉熵。 - -### 训练 -训练的脚本是 `train.sh`,用户只需执行: -```bash - ./train.sh -``` -`train.sh` 中的内容: -``` -paddle train \ - --config=./db_lstm.py \ - --use_gpu=0 \ - --log_period=5000 \ - --trainer_count=1 \ - --show_parameter_stats_period=5000 \ - --save_dir=./output \ - --num_passes=10000 \ - --average_test_period=10000000 \ - --init_model_path=./data \ - --load_missing_parameter_strategy=rand \ - --test_all_data_in_one_period=1 \ -2>&1 | tee 'train.log' -``` - -- \--config=./db_lstm.py : 网络配置文件 -- \--use_gpu=false: 使用 CPU 训练(如果已安装 PaddlePaddle GPU版本并想使用 GPU 训练可以设置为true,目前 crf_layer 不支持 GPU) -- \--log_period=500: 每20批(batch)输出日志 -- \--trainer_count=1: 设置线程数(或 GPU 数) -- \--show_parameter_stats_period=5000: 每100批显示参数统计 -- \--save_dir=./output: 模型输出路径 -- \--num_passes=10000: 设置通过数,一次通过意味着PaddlePaddle训练数据集中的所有样本一次 -- \--average_test_period=10000000: 每个 average_test_period 批次对平均参数进行测试 -- \--init_model_path=./data: 参数初始化路径 -- \--load_missing_parameter_strategy=rand: 随机初始不存在的参数 -- \--test_all_data_in_one_period=1: 在一个周期内测试所有数据 - - -训练后,模型将保存在目录`output`中。 我们的训练曲线如下: -
-![pic](./curve.jpg) -
- -### 测试 -测试脚本是 `test.sh`, 执行: -```bash - ./test.sh -``` -`tesh.sh` 的主要部分: -``` -paddle train \ - --config=./db_lstm.py \ - --model_list=$model_list \ - --job=test \ - --config_args=is_test=1 \ -``` - - - \--config=./db_lstm.py: 网络配置文件 - - \--model_list=$model_list.list: 模型列表文件 - - \--job=test: 指示测试任务 - - \--config_args=is_test=1: 指示测试任务的标记 - - \--test_all_data_in_one_period=1: 在一个周期内测试所有数据 - - -### 预测 -预测脚本是 `predict.sh`,用户只需执行: -```bash - ./predict.sh - -``` -在`predict.sh`中,用户应该提供网络配置文件,模型路径,标签文件,字典文件,特征文件。 -``` -python predict.py - -c $config_file \ - -w $best_model_path \ - -l $label_file \ - -p $predicate_dict_file \ - -d $dict_file \ - -i $input_file \ - -o $output_file -``` - -`predict.py` 是主要的可执行python脚本,其中包括函数:加载模型,加载数据,数据预测。网络模型将输出标签的概率分布。 在演示中,我们使用最大概率的标签作为结果。用户还可以根据概率分布矩阵实现集束搜索或维特比解码。 - -预测后,结果保存在 `predict.res` 中。 - -## 引用 -[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005. - -[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015. diff --git a/doc/tutorials/semantic_role_labeling/curve.jpg b/doc/tutorials/semantic_role_labeling/src/curve.jpg similarity index 100% rename from doc/tutorials/semantic_role_labeling/curve.jpg rename to doc/tutorials/semantic_role_labeling/src/curve.jpg diff --git a/doc/tutorials/semantic_role_labeling/src/feature.jpg b/doc/tutorials/semantic_role_labeling/src/feature.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0e3310e4ace5613917e7779d3198ccbb3cdc5ada Binary files /dev/null and b/doc/tutorials/semantic_role_labeling/src/feature.jpg differ diff --git a/doc/tutorials/semantic_role_labeling/src/network_arch.png b/doc/tutorials/semantic_role_labeling/src/network_arch.png new file mode 100644 index 0000000000000000000000000000000000000000..4ae7864212f2a0a38102ee7ff600527ea99fec82 Binary files /dev/null and b/doc/tutorials/semantic_role_labeling/src/network_arch.png differ diff --git a/doc/tutorials/sentiment_analysis/index_cn.md b/doc/tutorials/sentiment_analysis/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..1323ec1a6abb2e7b5eeb2fbfff9cce5fe78a2c06 --- /dev/null +++ b/doc/tutorials/sentiment_analysis/index_cn.md @@ -0,0 +1,325 @@ +# 情感分析教程 + +情感分析有许多应用场景。 一个基本的应用场景是区分给定文本的褒贬两极性,给定的文本可以是一个文档、句子、或者是一个小的文本片段。 一个简单的例子如:把用户在购物网站、旅游网站、团购网站(亚马逊、天猫、淘宝等)上发表的评论分成正面评论和负面评论两类。 + +情感分析也常用于基于大量评论和个人博客来监控社会媒体。 例如,研究人员分析了几个关于消费者信心和政治观点的调查,结果发现它们与同时期的Twitter消息中的情绪词频率相关 [1]。 另一个例子是通过分析每日Twitter博客的文本内容来预测股票变动 [2]。 + +另一方面,抓取产品的用户评论并分析他们的情感,有助于理解用户对不同公司,不同产品,甚至不同竞争对手产品的偏好。 + +本教程将指导您完成长期短期记忆(LSTM)网络的训练过程,以分类来自[大型电影评论数据集](http://ai.stanford.edu/~amaas/data/sentiment/)(有时称为[互联网电影数据库 (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf))的句子的情感 。 此数据集包含电影评论及其相关联的类别标签,即正面和负面。 + +## 数椐准备 + +### IMDB 数椐介绍 + +训练模型之前, 我们需要预处理数椐并构建一个字典。 首先, 你可以使用下面的脚本下载 IMDB 数椐集和[Moses](http://www.statmt.org/moses/)工具, 这是一个基于统计的机器翻译系统. 我们提供了一个数据预处理脚本,它不仅能够处理IMDB数据,还能处理其他用户自定义的数据。 为了使用提前编写的脚本,需要将标记的训练和测试样本移动到另一个路径,这已经在`get_imdb.sh`中完成。 + +``` +cd demo/sentiment/data +./get_imdb.sh +``` +如果数椐获取成功,你将在目录```./demo/sentiment/data```中看到下面的文件: + +``` +aclImdb get_imdb.sh imdb mosesdecoder-master +``` + +* aclImdb: 从外部网站上下载的原始数椐集。 +* imdb: 仅包含训练和测试数椐集。 +* mosesdecoder-master: Moses 工具。 + +IMDB数据集包含25,000个已标注过的高极性电影评论用于训练,25,000个用于测试。负面的评论的得分小于等于4,正面的评论的得大于等于7,总评分10分。 运行完脚本 `./get_imdb.sh`后, 我们可以看到在目录 `aclImdb`中的数椐集的结构如下: + +``` +imdbEr.txt imdb.vocab README test train +``` +* train: 训练数椐集。 +* test : 测试数椐集。 +* imdb.vocab: 字典文件。 +* imdbEr.txt: 字典imdb.vocab中每个切分单词的预期评级。 +* README: 数椐说明文档。 + +测试集和训练集目录包含下面的文件: + +``` +labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt +``` + +* pos: 正面评价样本,包含12,500个txt文件,每个文件是一个电影评论。 +* neg: 负面评价样本,包含12,500个txt文件,每个文件是一个电影评论。 +* unsup: 未标记的评价样本,包含50,000个txt文件。 +* urls_xx.txt: 每个评论的网址。 +* xxBow.feat: 用于统计词频的Bow模型特征。 + +### IMDB 数椐准备 + +在这个例子中,我们只使用已经标注过的训练集和测试集,且默认在训练集上构建字典,而不使用IMDB数椐集中的imdb.vocab做为字典。训练集已经做了随机打乱排序而测试集没有。 Moses 工具中的脚本`tokenizer.perl` 用于切分单单词和标点符号。执行下面的命令就可以预处理数椐。 + +``` +cd demo/sentiment/ +./preprocess.sh +``` +preprocess.sh: + +``` +data_dir="./data/imdb" +python preprocess.py -i data_dir +``` + +* data_dir: 输入数椐所在目录。 +* preprocess.py: 预处理脚本。 + +运行成功后目录`demo/sentiment/data/pre-imdb` 结构如下: + +``` +dict.txt labels.list test.list test_part_000 train.list train_part_000 +``` +* test\_part\_000 and train\_part\_000: 所有标记的测试集和训练集, 训练集已经随机打乱。 +* train.list and test.list: 训练集和测试集文件列表。 +* dict.txt: 利用训练集生成的字典。 +* labels.txt: neg 0, pos 1, 含义:标签0表示负面的评论,标签1表示正面的评论。 + +### 用户自定义数椐预处理 + +如果你执行其它的用情感分析来分类文本的任务,可以按如下的结构来准备数椐. 我们提供了脚本来构建字典和预处理数椐。所以你只用按下面的结构来组织数椐就行了。 + +``` +dataset +|----train +| |----class1 +| | |----text_files +| |----class2 +| | |----text_files +| | ... +|----test +| |----class1 +| | |----text_files +| |----class2 +| | |----text_files +| | ... +``` +* dataset: 一级目录。 +* train, test: 二级目录。 +* class1,class2,...: 三级目录。 +* text_files: 文本格式的实例文件。 + +所有同目录下的文本实例文件都是同级别的。 每个文本文件包含一个或者多个实例,每一行表示一个实例。 为了充分的随机打乱训练集, 在预处理含有多行数椐的文本文件时参数设置稍有不同, 执行`preprocess.sh`脚本时需要加上`-m True`参数。 tokenizer.perl 默认用来切分单记和标点符号,如果你不需要这个操作,在运行`preprocess.sh`时加上`-t False`参数即可。 + +## 训练模型 + +在这步任务中,我们使用了循环神经网络(RNN)的 LSTM 架构来训练情感分析模型。 引入LSTM模型主要是为了克服消失梯度的问题。 LSTM网络类似于具有隐藏层的标准循环神经网络, 但是隐藏层中的每个普通节点被一个记忆单元替换。 每个记忆单元包含四个主要的元素: 输入门, 具有自循环连接的神经元,忘记门和输出门。 更多的细节可以在文献中找到[4]。 LSTM架构的最大优点是它可以在长时间间隔内记忆信息,而没有短时记忆的损失。在有新的单词来临的每一个时间步骤内,存储在记忆单元区块的历史信息被更新用来迭代的学习单词以合理的序列程现。 + +
![LSTM](src/lstm.png)
+
图表 1. LSTM [3]
+ +情感分析是自然语言理解中最典型的问题之一。 它的目的是预测在一个序列中表达的情感态度。 通常, ,仅仅是一些关键词,如形容词和副词,在预测序列或段落的情感中起主要作用。然而有些评论上下文非常长,例如 IMDB的数椐集。 我们只所以使用LSTM来执行这个任务是因为其改进的设计并且具有门机制。 首先,它能够从词级到具有可变上下文长度的上下文级别来总结表示。 第二,它可以在句子级别利用可扩展的上下文, 而大多数方法只是利用n-gram级别的知识。第三,它直接学习段落表示,而不是组合上下文级别信息。 + +在本演示中,我们提供两个网络,即双向LSTM和三层堆叠LSTM。 + +#### 双向LSTM + +图2是双向LSTM网络,后面连全连接层和softmax层。 + +
![BiLSTM](src/bi_lstm.jpg)
+
图 2. Bidirectional-LSTM
+ +#### Stacked-LSTM +图3是三层LSTM结构。图的底部是word embedding(对文档处理后形成的单词向量)。 接下来,连接三个LSTM隐藏层,并且第二个是反向LSTM。然后提取隐藏LSTM层的所有时间步长的最大词向量作为整个序列的表示。 最后,使用具有softmax激活的全连接前馈层来执行分类任务。 更多内容可查看参考文献 [5]。 + +
![StackedLSTM](src/stacked_lstm.jpg)
+
图 3. Stacked-LSTM for sentiment analysis
+ +**配置** + +进入`demo/sentiment` 目录 , `trainer_config.py` 是一个配置文件的例子, 其中包含算法和网络配置。第一行从`sentiment_net.py`中导出预定义的网络。 + +trainer_config.py: + +```python +from sentiment_net import * + +data_dir = "./data/pre-imdb" +# whether this config is used for test +is_test = get_config_arg('is_test', bool, False) +# whether this config is used for prediction +is_predict = get_config_arg('is_predict', bool, False) +dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict) + +################## Algorithm Config ##################### + +settings( + batch_size=128, + learning_rate=2e-3, + learning_method=AdamOptimizer(), + regularization=L2Regularization(8e-4), + gradient_clipping_threshold=25 +) + +#################### Network Config ###################### +stacked_lstm_net(dict_dim, class_dim=class_dim, + stacked_num=3, is_predict=is_predict) +#bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict) +``` + +* **数椐定义**: + * get\_config\_arg(): 获取通过 `--config_args=xx` 设置的命令行参数。 + * 定义训练数椐和测试数椐提供者, 这里使用了PaddlePaddle的Python接口来加载数椐。想了解更多细节可以参考PyDataProvider部分的文档 + +* **算法配置**: + * 使用随机梯度下降(sgd)算法。 + * 使用 adam 优化。 + * 设置batch size大小为128。 + * 设置平均sgd窗口。 + * 设置全局学习率。 +* **网络配置**: + * dict_dim: 获取字典维度。 + * class_dim: 设置类别数,IMDB有两个标签,即正面评价标签和负面评价标签。 + * `stacked_lstm_net`: 预定义网络如图3所示,默认情况下使用此网络 + * `bidirectional_lstm_net`: 预定义网络,如图2所示。 + +**训练** + +首先安装PaddlePaddle。 然后使用下面的脚本 `train.sh` 来开启本地的训练。 + +``` +cd demo/sentiment/ +./train.sh +``` + +train.sh: + +``` +config=trainer_config.py +output=./model_output +paddle train --config=$config \ + --save_dir=$output \ + --job=train \ + --use_gpu=false \ + --trainer_count=4 \ + --num_passes=10 \ + --log_period=20 \ + --dot_period=20 \ + --show_parameter_stats_period=100 \ + --test_all_data_in_one_period=1 \ + 2>&1 | tee 'train.log' +``` + +* \--config=$config: 设置网络配置。 +* \--save\_dir=$output: 设置输出路径以保存训练完成的模型。 +* \--job=train: 设置工作模式为训练。 +* \--use\_gpu=false: 使用CPU训练,如果你安装GPU版本的PaddlePaddle,并想使用GPU来训练设置为true。 +* \--trainer\_count=4:设置线程数(或GPU个数)。 +* \--num\_passes=15: 设置pass,PaddlePaddle中的一个pass意味着对数据集中的所有样本进行一次训练。 +* \--log\_period=20: 每20个batch打印一次日志。 +* \--show\_parameter\_stats\_period=100: 每100个batch打印一次统计信息。 +* \--test\_all_data\_in\_one\_period=1: 每次测试都测试所有数据。 + +如果运行成功,输出日志保存在路径 `demo/sentiment/train.log`中,模型保存在目录`demo/sentiment/model_output/`中。 输出日志说明如下: + +``` +Batch=20 samples=2560 AvgCost=0.681644 CurrentCost=0.681644 Eval: classification_error_evaluator=0.36875 CurrentEval: classification_error_evaluator=0.36875 +... +Pass=0 Batch=196 samples=25000 AvgCost=0.418964 Eval: classification_error_evaluator=0.1922 +Test samples=24999 cost=0.39297 Eval: classification_error_evaluator=0.149406 +``` +- Batch=xx: 表示训练了xx个Batch。 +- samples=xx: 表示训练了xx个样本。。 +- AvgCost=xx: 从第0个batch到当前batch的平均损失。 +- CurrentCost=xx: 最新log_period个batch处理的当前损失。 +- Eval: classification\_error\_evaluator=xx: 表示第0个batch到当前batch的分类错误。 +- CurrentEval: classification\_error\_evaluator: 最新log_period个batch的分类错误。 +- Pass=0: 通过所有训练集一次称为一遍。 0表示第一次经过训练集。 + +默认情况下,我们使用`stacked_lstm_net`网络,当传递相同的样本数时,它的收敛速度比`bidirectional_lstm_net`快。如果要使用双向LSTM,只需删除最后一行中的注释并把“stacked_lstm_net”注释掉。 + +## 测试模型 + +测试模型是指使用训练出的模型评估已标记的验证集。 + +``` +cd demo/sentiment +./test.sh +``` + +test.sh: + +```bash +function get_best_pass() { + cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ + sed -r 'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \ + sort | head -n 1 +} + +log=train.log +LOG=`get_best_pass $log` +LOG=(${LOG}) +evaluate_pass="model_output/pass-${LOG[1]}" + +echo 'evaluating from pass '$evaluate_pass + +model_list=./model.list +touch $model_list | echo $evaluate_pass > $model_list +net_conf=trainer_config.py +paddle train --config=$net_conf \ + --model_list=$model_list \ + --job=test \ + --use_gpu=false \ + --trainer_count=4 \ + --config_args=is_test=1 \ + 2>&1 | tee 'test.log' +``` + +函数`get_best_pass`依据分类错误率获得最佳模型进行测试。 在本示例中,我们默认使用IMDB的测试数据集作为验证。 与训练不同,它需要在这里指定`--job = test`和模型路径,即`--model_list = $model_list`。如果运行成功,日志将保存在“demo / sentiment / test.log”的路径中。例如,在我们的测试中,最好的模型是`model_output / pass-00002`,分类误差是0.115645,如下: + +``` +Pass=0 samples=24999 AvgCost=0.280471 Eval: classification_error_evaluator=0.115645 +``` + +## 预测 + +`predict.py`脚本提供了一个预测接口。在使用它之前请安装PaddlePaddle的python api。 预测IMDB的未标记评论的一个实例如下: + +``` +cd demo/sentiment +./predict.sh +``` +predict.sh: + +``` +#Note the default model is pass-00002, you shold make sure the model path +#exists or change the mode path. +model=model_output/pass-00002/ +config=trainer_config.py +label=data/pre-imdb/labels.list +cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \ + --tconf=$config\ + --model=$model \ + --label=$label \ + --dict=./data/pre-imdb/dict.txt \ + --batch_size=1 +``` + +* `cat ./data/aclImdb/test/pos/10007_10.txt` : 输入预测样本。 +* `predict.py` : 预测接口脚本。 +* `--tconf=$config` : 设置网络配置。 +* `--model=$model` : 设置模型路径。 +* `--label=$label` : 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。 +* `--dict=data/pre-imdb/dict.txt` : 设置字典文件。 +* `--batch_size=1` : 设置batch size。 + +注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。 + +本示例的预测结果: + +``` +Loading parameters from model_output/pass-00002/ +./data/aclImdb/test/pos/10014_7.txt: predicting label is pos +``` +我们真诚地感谢您的关注,并欢迎您来参与贡献。 + +## 参考文档 +[1] Brendan O'Connor, Ramnath Balasubramanyan, Bryan R. Routledge, and Noah A. Smith. 2010. [From Tweets to Polls: Linking Text Sentiment to Public Opinion Time Series](http://homes.cs.washington.edu/~nasmith/papers/oconnor+balasubramanyan+routledge+smith.icwsm10.pdf). In ICWSM-2010.
+[2] Johan Bollen, Huina Mao, Xiaojun Zeng. 2011. [Twitter mood predicts the stock market](http://arxiv.org/abs/1010.3003), Journal of Computational Science.
+[3] Alex Graves, Marcus Liwicki, Santiago Fernan- dez, Roman Bertolami, Horst Bunke, and Ju ̈rgen Schmidhuber. 2009. [A novel connectionist system for unconstrained handwriting recognition. IEEE Transactions on Pattern Analysis and Machine In- telligence](http://www.cs.toronto.edu/~graves/tpami_2009.pdf), 31(5):855–868.
+[4] Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http://arxiv.org/abs/1506.00019v1), arXiv:1506.00019.
+[5] Jie Zhou and Wei Xu; [End-to-end Learning of Semantic Role Labeling Using Recurrent Neural Networks](http://www.aclweb.org/anthology/P/P15/P15-1109.pdf); ACL-IJCNLP 2015.
diff --git a/doc/tutorials/sentiment_analysis/src/bi_lstm.jpg b/doc/tutorials/sentiment_analysis/src/bi_lstm.jpg new file mode 100644 index 0000000000000000000000000000000000000000..adec1606d64d6e35ffe7e62abfa9a09309b05c84 Binary files /dev/null and b/doc/tutorials/sentiment_analysis/src/bi_lstm.jpg differ diff --git a/doc/tutorials/sentiment_analysis/src/lstm.png b/doc/tutorials/sentiment_analysis/src/lstm.png new file mode 100644 index 0000000000000000000000000000000000000000..aaf1fc690da2ffb8418cde5ed81848ddb5263030 Binary files /dev/null and b/doc/tutorials/sentiment_analysis/src/lstm.png differ diff --git a/doc/tutorials/sentiment_analysis/src/stacked_lstm.jpg b/doc/tutorials/sentiment_analysis/src/stacked_lstm.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4239055050966e0095e188a8c81d860711bce29d Binary files /dev/null and b/doc/tutorials/sentiment_analysis/src/stacked_lstm.jpg differ diff --git a/doc/tutorials/text_generation/index_cn.md b/doc/tutorials/text_generation/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..41a87b926db399d692d677e5278e7d5a0b7b5594 --- /dev/null +++ b/doc/tutorials/text_generation/index_cn.md @@ -0,0 +1,339 @@ +# 文本生成教程 # + +在语言生成领域中,“序列到序列”(sequence to sequence)的方法已被证明是一种强大的模型。它可以被应用于进行机器翻译(machine translation)、query改写(query rewriting)、图像描述(image captioning)等等。 + +本篇教程将会指导你通过训练一个“序列到序列”的神经网络机器翻译(NMT)模型来将法语翻译成英语。 + +我们遵循 [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473) 这篇文章,其中详细说明了模型架构,以及在WMT-14数据集上得到良好表现的训练过程。本篇教程在PaddlePaddle中重现了这一良好的训练结果。 + +我们感谢@caoying的pull request,其中定义了模型架构和solver配置。 + +## 数据准备 ## +### 下载与解压缩 ### +从该链接 [http://www-lium.univ-lemans.fr/~schwenk/cslm\_joint\_paper/](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/) 下载WMT-14数据集,然后解压,并将Develop和Test数据分别放入不同的文件夹。 + +- **Train data**: [bitexts (选择过后的)](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz) +- **Develop and Test data**: [dev 与 test 数据](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz) + +在Linux下,只需要简单地运行以下命令。否则你需要自己下载、解压、拆分到不同文件夹、并且分别重命名文件后缀。 + +```bash +cd demo/seqToseq/data +./wmt14_data.sh +``` + +我们会发现数据集 `wmt14` 中包含如下表所示的3个文件夹。 + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
folder nameFrench-English parallel corpora filenumber of total filesize
train_dataccb2_pc30.src, ccb2_pc30.trg, etc123.55G
test_datantst1213.src, ntst1213.trg21636k
gen_datantst14.src, ntst14.trg2864k
+
+ +- 每个文件夹都包含法语到英语的平行语料库 +- **XXX.src** 是原始法语文件;**XXX.trg** 是目标英语文件 +- **XXX.src** 和 **XXX.trg** 的行数应该一致 +- 每行都是一个法语或者英语的句子 +- **XXX.src** 和 **XXX.trg** 中任意第i行的句子之间都有着一一对应的关系 + +### 用户自定义数据集 ### + +如果你想进行诸如语义转述(Paraphrasing)等其他“序列到序列”的任务,你只需要按照如下方式组织数据,并将它们放在`demo/seqToseq/data`目录下: + + dataset + train + file1.src file1.trg + file2.src file2.trg + ...... + test + file1.src file1.trg + file2.src file2.trg + ...... + gen + file1.src file1.trg + file2.src file2.trg + ...... + +- 一级目录:数据集文件夹名称 +- 二级目录:train、test和gen这三个文件夹是固定的 +- 三级目录:源语言到目标语言的平行语料库文件 + - **XXX.src** 是源语言的文件,**XXX.trg** 时目标语言的文件 + - 文件中的每行都必须是一个句子 + - **XXX.src** 和 **XXX.trg** 中任意第i行的句子之间都必须有着一一对应的关系 + +## 数据预处理 ## +### 预处理工作流程 ### +- 将每个源语言到目标语言的平行语料库文件合并为一个文件: + - 合并每个 **XXX.src** 和 **XXX.trg** 文件为 **XXX** + - **XXX** 中的第i行 = **XXX.src** 中的第i行 + '\t' + **XXX.trg**中的第i行 +- 创建训练数据的“源字典”和“目标字典”,每个字典都有DICTSIZE个单词,包括: + - 词频最高的(DICTSIZE - 3)个单词 + - 3个特殊符号 + - ``:序列的开始 + - ``:序列的结束 + - ``:未包含在字典中的单词 + +### 预处理命令和结果 +对数据集进行预处理的基本命令是: + +```python +cd demo/seqToseq/ +python preprocess.py -i INPUT [-d DICTSIZE] [-m] +``` + +- `-i INPUT`:输入的原始数据集路径 +- `-d DICTSIZE`:指定的字典单词数,如果没有设置,字典会包含输入数据集中的所有单词 +- `-m --mergeDict`:合并 “源字典”和“目标字典”,使得两个字典有相同的上下文 + +你将会看到如下消息: + + concat parallel corpora for dataset + build source dictionary for train data + build target dictionary for train data + dictionary size is XXX + +然后你只需要运行以下命令: + +```python +python preprocess.py -i data/wmt14 -d 30000 +``` + +这将花费数分钟的时间,并且将预处理好的数据集存放在`demo/seqToseq/data/pre-wmt14`目录下。目录结构如下: + + train test gen train.list test.list gen.list src.dict trg.dict# Text generation Tutorial # + +- **train, test, gen**:分别包含了法语到英语的平行语料库的训练数据、测试数据和生成数据。文件夹中的每个文件的每一行包含两部分,首先是法语序列,然后是对应的英语序列。 +- **train.list, test.list, gen.list**:分别为train,test,gen文件夹中的文件列表 +- **src.dict, trg.dict**:源(法语)/目标(英语)字典,每个字典包含总共30000个单词:29997个最高频单词和3个特殊符号 + +## 模型训练 ## +### 简介### + +神经网络机器翻译(NMT)旨在建立一个可以被协同调至最优翻译效果的单神经元网络。近期提出的NMT模型通常都属于编解码模型(encoder–decoder models)的一种。编解码模型将一个源语句编码为一个定长的向量,然后解码器通过这个向量生成一个目标语句。 + +在这个任务中,我们使用了一个编解码模型的扩展,它同时学习排列(align)与翻译。每当模型在翻译过程中生成了一个单词,它就会在源语句中搜索出最相关信息的位置的集合。解码器根据上下文向量预测出一个目标单词,这个向量与源中搜索出的位置和所有之前生成的目标单词有关。如想了解更多详细的解释,可以参考 [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473)。 + +这个模型对于编解码模型来说,最不同的特色是它并没有将输入语句编码为一个单独的定长向量。相反,它将输入语句编码为向量的序列,其中每个向量对应输入语句中的一个元素。然后在解码被翻译的语句时,会自适应地从这些向量中选择一个子集出来。这使得NMT模型得以解放出来,不必再将任意长度源语句中的所有信息压缩至一个定长的向量中。该模型在长语句翻译的场景下效果提升更加明显,在任意长度语句翻译的场景下都可以观察到其效果的提升。 +
![](./encoder-decoder-attention-model.png)
+
Figure 1. Encoder-Decoder-Attention-Model
+ +### 使用PaddlePaddle训练模型 ### +我们在训练之前需要常见一个模型配置文件,这里是一个例子`demo/seqToseq/translation/train.conf`。前三行import了定义network,job_mode和attention_mode的python函数。 + +```python +from seqToseq_net import * +is_generating = False + +### Data Definiation +train_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14", + is_generating = is_generating) + +### Algorithm Configuration +settings( + learning_method = AdamOptimizer(), + batch_size = 50, + learning_rate = 5e-4) + +### Network Architecture +gru_encoder_decoder(train_conf, is_generating) +``` + +1. **Data Definiation**:在示例中我们定义了一个序列到序列的训练和测试数据。它返回train_conf作为配置,其输入参数如下: + - data_dir:训练数据和测试数据的目录 + - is_generating:这个配置是否用来生成,这里设置为False +2. **Algorithm Configuration**:在示例中我们使用SGD训练算法(默认),和ADAM学习方法,指定batch_size为50,learning_rate为5e-4 +3. **Network Architecture**:在示例中我们使用attention版本的GRU编解码网络。它包括了一个双向的GRU作为编码器和解码器,它模拟了解码翻译过程中在源语句中的搜索。 + +### 训练模型的命令与结果### +写完模型配置之后,我们可以通过以下命令来训练模型: + +```bash +cd demo/seqToseq/translation +./train.sh +``` + +`train.sh` 的内容如下所示: + +```bash +paddle train \ +--config='translation/train.conf' \ +--save_dir='translation/model' \ +--use_gpu=false \ +--num_passes=16 \ +--show_parameter_stats_period=100 \ +--trainer_count=4 \ +--log_period=10 \ +--dot_period=5 \ +2>&1 | tee 'translation/train.log' +``` +- config: 设置神经网络的配置文件 +- save_dir: 设置保存模型的输出路径 +- use_gpu: 是否使用GPU训练,这里设置为使用CPU +- num_passes: 设置passes的数量。paddle中的一条pass表示训练数据集中所有的样本一次 +- show_parameter_stats_period: 这里每隔100个batch显示一次参数统计信息 +- trainer_count: 设置CPU线程数或者GPU设备数 +- log_period: 这里每隔10个batch打印一次日志 +- dot_period: 这里每个5个batch打印一个点"." + +训练的损失函数默认每隔10个batch打印一次,你将会看到如下消息: + + I0719 19:16:45.952062 15563 TrainerInternal.cpp:160] Batch=10 samples=500 AvgCost=198.475 CurrentCost=198.475 Eval: classification_error_evaluator=0.737155 CurrentEval: classification_error_evaluator=0.737155 + I0719 19:17:56.707319 15563 TrainerInternal.cpp:160] Batch=20 samples=1000 AvgCost=157.479 CurrentCost=116.483 Eval: classification_error_evaluator=0.698392 CurrentEval: classification_error_evaluator=0.659065 + ..... +- AvgCost:从第0个batch到当前batch的平均cost +- CurrentCost::当前batch的cost +- classification\_error\_evaluator(Eval):从第0个评估到当前评估中,每个单词的预测错误率 +- classification\_error\_evaluator(CurrentEval):当前评估中,每个单词的预测错误率 + +当classification\_error\_evaluator的值低于0.35时,模型就训练成功了。 + +## 文本生成 ## +### 简介### + +一般而言,NMT模型受制于源语句的编码,并且通过给出当前目标单词来预测下一个目标单词。在训练过程中,当前单词在相比之下总是被当作真值(ground truth)。在生成过程中,当前单词是解码器最后一步的输出,这来自于PaddlePaddle的内存中。 + +而且,我们使用集束搜索(Beam Search)来生成序列。集束搜索使用广度优先搜索来构建搜索树。对于树的每一层,生成当前层的所有后继状态,并将它们按照启发代价(heuristic cost)升序排列。但是这种方法在每层只保存预设数量的最优状态(这个数量称为beam size)。 + +### 预训练的模型 ### +我们在拥有50个节点的集群中训练模型,每个节点有两个6核CPU。我们在5天里训练了16个pass,其中每条pass花费了7个小时。model_dir中有16个子目录,每个里面都包含202MB的全部的模型参数。然后我们发现pass-00012的模型有着最高的BLEU值27.77(参考文献[BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf))。要下载解压这个模型,只需在linux下运行如下命令: + +```bash +cd demo/seqToseq/data +./wmt14_model.sh +``` + +### 使用PaddlePaddle生成模型 ### +在翻译法语句子之前,我们需要创建模型配置文件。这里是一个例子`demo/seqToseq/translation/gen.conf`。前三行import了定义network,job_mode和attention_mode的python函数。 + +```python +from seqToseq_net import * +is_generating = True + +################## Data Definiation ##################### +gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14", + is_generating = is_generating, + gen_result = "./translation/gen_result") + +############## Algorithm Configuration ################## +settings( + learning_method = AdamOptimizer(), + batch_size = 1, + learning_rate = 0) + +################# Network configure ##################### +gru_encoder_decoder(gen_conf, is_generating) +``` + +1. **Data Definiation**:在示例中我们定义了一个序列到序列的生成数据。它返回gen_conf作为配置,其输入参数如下: + - data_dir:生成数据的目录 +  - is_generating:这个配置是否用来生成,这里设置为True +  - gen_result:保存生成结果的文件 +2. **Algorithm Configuration**:在生成过程中我们使用SGD训练算法,并指定batch_size为1(每次生成1个序列),learning_rate为0 +3. **Network Architecture**:本质上与训练模型一样 + +### 生成模型的命令与结果 ### +写完模型配置之后,我们可以通过以下命令来进行从法语到英语的文本翻译: + +```bash +cd demo/seqToseq/translation +./gen.sh +``` + + `gen.sh` 的内容如下所示。与训练模型不同的是,这里有一些不同的参数需要指定: + +```bash +paddle train \ +--job=test \ +--config='translation/gen.conf' \ +--save_dir='data/wmt14_model' \ +--use_gpu=true \ +--num_passes=13 \ +--test_pass=12 \ +--trainer_count=1 \ +2>&1 | tee 'translation/gen.log' +``` +- job:设置任务的模式为测试 +- save_dir:存储模型的路径 +- num_passes and test_pass:从test_pass到(num_passes - 1)加载模型参数,这里只加载 `data/wmt14_model/pass-00012` + +你将会看到这样的消息: + + I0706 14:48:31.178915 31441 GradientMachine.cpp:143] Loading parameters from data/wmt14_model/pass-00012 + I0706 14:48:40.012039 31441 Tester.cpp:125] Batch=100 samples=100 AvgCost=0 + I0706 14:48:48.898632 31441 Tester.cpp:125] Batch=200 samples=200 AvgCost=0 + ... + +然后在`demo/seqToseq/translation/gen_result`中的生成结果如下所示: + + 0 + 0 -11.1314 The about the width of the seats while large controls are at stake + 1 -11.1519 The on the width of the seats while large controls are at stake + 2 -11.5988 The about the width of the seats while large controls are at stake . + + 1 + 0 -24.4149 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the flights , paving the way for a confrontation during the month of the Dubai . + 1 -26.9524 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the flights , paving the way for a confrontation during the month of Dubai ' s . + 2 -27.9574 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the flights , paving the way for a confrontation during the month of Dubai ' s Dubai . + ... + +- 这是集束搜索的结果,其中beam size是3 +- 第一行的“0”和第6行的“1”表示生成数据的序列id +- 其他六行列出了集束搜索的结果 + - 第二列是集束搜索的得分(从大到小) + - 第三列是生成的英语序列 +- 有两个特殊标识: + - ``:序列的结尾 + - ``:不包含在字典中的单词 + +### BLEU评估 ### +对机器翻译的人工评估工作很广泛但也很昂贵。一篇论文 [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf) 展示了一种方法,当需要快速或者频繁的评估时,使用自动的替补来替代经验丰富的人工评判。[Moses](http://www.statmt.org/moses/) 是一个统计学的机器翻译系统,我们使用其中的 [multi-bleu.perl](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/multi-bleu.perl) 来做BLEU评估。运行以下命令来下载这个脚本: + +```bash +cd demo/seqToseq/translation +./moses_bleu.sh +``` + +由于标准的翻译结果已经下载到这里`data/wmt14/gen/ntst14.trg`,我们可以运行以下命令来做BLEU评估。 + +```bash +cd demo/seqToseq/translation +./eval_bleu.sh FILE BEAMSIZE +``` + +- FILE:生成的结果文件 +- BEAMSIZE:集束搜索中的扩展广度 diff --git a/doc/tutorials/text_generation/index_en.md b/doc/tutorials/text_generation/index_en.md index d63f5cb6074c5768f9cff7937c5f0771c2619642..5d8e667c20bd1fda64a6e11a88517d52112b72fa 100644 --- a/doc/tutorials/text_generation/index_en.md +++ b/doc/tutorials/text_generation/index_en.md @@ -260,8 +260,8 @@ gru_encoder_decoder(gen_conf, is_generating) 1. **Data Definiation**: We defines an SeqToSeq gen data in our example. It returns gen_conf as the configuration, following is its input arguments: - data\_dir: directory of gen data - - is\_generating: whether this config is used for generating, here is false - - gen\_result: file to store the generation result +   - is\_generating: whether this config is used for generating, here is true +   - gen\_result: file to store the generation result 2. **Algorithm Configuration**: We use SGD traing algorithm in generation, and specify batch_size as 1 (each time generate one sequence), and learning rate as 0. 3. **Network Architecture**: Essentially the same as the training model. diff --git a/doc_cn/CMakeLists.txt b/doc_cn/CMakeLists.txt deleted file mode 100644 index 314b34525ca1d328f4e3b9814ee26deed39d89fd..0000000000000000000000000000000000000000 --- a/doc_cn/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -if(NOT DEFINED SPHINX_THEME) - set(SPHINX_THEME default) -endif() - -if(NOT DEFINED SPHINX_THEME_DIR) - set(SPHINX_THEME_DIR) -endif() - -# configured documentation tools and intermediate build results -set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build") - -# Sphinx cache with pickled ReST documents -set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees") - -# HTML output directory -set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html") - -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in" - "${BINARY_BUILD_DIR}/conf.py" - @ONLY) - -sphinx_add_target(paddle_docs_cn - html - ${BINARY_BUILD_DIR} - ${SPHINX_CACHE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR} - ${SPHINX_HTML_DIR}) - -add_dependencies(paddle_docs_cn - gen_proto_py) diff --git a/doc_cn/algorithm/rnn/hierarchical-layer.rst b/doc_cn/algorithm/rnn/hierarchical-layer.rst deleted file mode 100644 index a9906b8b9c2036ae349f30d7edee770884f73f99..0000000000000000000000000000000000000000 --- a/doc_cn/algorithm/rnn/hierarchical-layer.rst +++ /dev/null @@ -1,95 +0,0 @@ -########################### -支持双层序列作为输入的Layer -########################### - -.. contents:: - -概述 -==== - -在自然语言处理任务中,序列是一种常见的数据类型。一个独立的词语,可以看作是一个非序列输入,或者,我们称之为一个0层的序列;由词语构成的句子,是一个单层序列;若干个句子构成一个段落,是一个双层的序列。 - -双层序列是一个嵌套的序列,它的每一个元素,又是一个单层的序列。这是一种非常灵活的数据组织方式,帮助我们构造一些复杂的输入信息。 - -我们可以按照如下层次定义非序列,单层序列,以及双层序列。 - -+ 0层序列:一个独立的元素,类型可以是PaddlePaddle支持的任意输入数据类型 -+ 单层序列:排成一列的多个元素,每个元素是一个0层序列,元素之间的顺序是重要的输入信息 -+ 双层序列:排成一列的多个元素,每个元素是一个单层序列,称之为双层序列的一个子序列(subseq),subseq的每个元素是一个0层序列 - -在 PaddlePaddle中,下面这些Layer能够接受双层序列作为输入,完成相应的计算。 - -pooling_layer -============== - -pooling_layer 的使用示例如下,详细见 `pooling_layer`_ 配置API。 - -.. code-block:: bash - - seq_pool = pooling_layer(input=layer, - pooling_type=AvgPooling(), - agg_level=AggregateLevel.EACH_SEQUENCE) - -- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。 - -- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - - - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - - 输入:一个双层序列,或一个单层序列 - - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值) - -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: - - - 作用:一个双层序列经过运算变成一个单层序列 - - 输入:必须是一个双层序列 - - 输出:一个单层序列,序列的每个元素是原来双层序列每个subseq元素的平均值(或最大值) - -last_seq 和 first_seq -===================== - -last_seq 的使用示例如下( `first_seq`_ 类似),详细见 `last_seq`_ 配置API。 - -.. code-block:: bash - - last = last_seq(input=layer, - agg_level=AggregateLevel.EACH_SEQUENCE) - -- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - - - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - - 输入:一个双层序列或一个单层序列 - - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。 - -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: - - 作用:一个双层序列经过运算变成一个单层序列 - - 输入:必须是一个双层序列 - - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 - -expand_layer -============ - -expand_layer 的使用示例如下,详细见 `expand_layer`_ 配置API。 - -.. code-block:: bash - - expand = expand_layer(input=layer1, - expand_as=layer2, - expand_level=ExpandLevel.FROM_TIMESTEP) - -- `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值): - - - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列 - - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息 - - 输出:一个单层序列或一个双层序列,输出序列的类型(双层序列或单层序列)和序列中含有元素的数目同 layer2 一致。若输出是单层序列,单层序列的每个元素(0层序列),都是对layer1元素的拷贝;若输出是双层序列,双层序列每个subseq中每个元素(0层序列),都是对layer1元素的拷贝 - -- `expand_level=ExpandLevel.FROM_SEQUENCE` 时: - - - 作用:一个单层序列经过运算扩展成一个双层序列 - - 输入:layer1必须是一个单层序列,是待扩展的数据;layer2 必须是一个双层序列,提供扩展的长度信息 - - 输出:一个双层序列,序列中含有元素的数目同 layer2 一致。要求单层序列含有元素的数目(0层序列)和双层序列含有subseq 的数目一致。单层序列第i个元素(0层序列),被扩展为一个单层序列,构成了输出双层序列的第i个 subseq 。 - - -.. _pooling_layer: ../../../doc/ui/api/trainer_config_helpers/layers.html#pooling-layer -.. _last_seq: ../../../doc/ui/api/trainer_config_helpers/layers.html#last-seq -.. _first_seq: ../../../doc/ui/api/trainer_config_helpers/layers.html#first-seq -.. _expand_layer: ../../../doc/ui/api/trainer_config_helpers/layers.html#expand-layer diff --git a/doc_cn/algorithm/rnn/hrnn_demo.rst b/doc_cn/algorithm/rnn/hrnn_demo.rst deleted file mode 100644 index 96396ff105d134920396ded9ad8f00494357a37c..0000000000000000000000000000000000000000 --- a/doc_cn/algorithm/rnn/hrnn_demo.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _algo_hrnn_demo: - -################# -双层RNN的使用示例 -################# - -TBD \ No newline at end of file diff --git a/doc_cn/algorithm/rnn/hrnn_rnn_api_compare.rst b/doc_cn/algorithm/rnn/hrnn_rnn_api_compare.rst deleted file mode 100644 index 9baa0b578041ab82331a94c2a9e4d081697a5fda..0000000000000000000000000000000000000000 --- a/doc_cn/algorithm/rnn/hrnn_rnn_api_compare.rst +++ /dev/null @@ -1,230 +0,0 @@ -.. _algo_hrnn_rnn_api_compare: - -##################### -单双层RNN API对比介绍 -##################### - -本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。 - -示例1:双层RNN,子序列间无Memory -================================ - -在双层RNN中的经典情况是将内层的每一个时间序列数据,分别进行序列操作;并且内层的序列操作之间独立无依赖,即不需要使用Memory\ 。 - -在本示例中,单层RNN和双层RNN的网络配置,都是将每一句分好词后的句子,使用LSTM作为encoder,压缩成一个向量。区别是RNN使用两层序列模型,将多句话看成一个整体同时使用encoder压缩。二者语意上完全一致。这组语义相同的示例配置如下: - -* 单层RNN\: `sequence_layer_group.conf `_ -* 双层RNN\: `sequence_nest_layer_group.conf `_ - - -读取双层序列数据 ----------------- - -首先,本示例中使用的原始数据如下\: - -- 本例中的原始数据一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。这个数据也被单层RNN网络直接使用。 - -.. literalinclude:: ../../../paddle/gserver/tests/Sequence/tour_train_wdseg - :language: text - - -- 双层序列数据一共有4个样本。 每个样本间用空行分开,整体数据和原始数据完全一样。但于双层序列的LSTM来说,第一个样本同时encode两条数据成两个向量。这四条数据同时处理的句子数量为\ :code:`[2, 3, 2, 3]`\ 。 - -.. literalinclude:: ../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest - :language: text - -其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\: - -.. literalinclude:: ../../../paddle/gserver/tests/sequenceGen.py - :language: python - :lines: 21-39 - :linenos: - -- 这是普通的单层时间序列的DataProvider代码,其说明如下: - - * DataProvider共返回两个数据,分别是words和label。即上述代码中的第19行。 - - words是原始数据中的每一句话,所对应的词表index数组。它是integer_value_sequence类型的,即整数数组。words即为这个数据中的单层时间序列。 - - label是原始数据中对于每一句话的分类标签,它是integer_value类型的。 - -.. literalinclude:: ../../../paddle/gserver/tests/sequenceGen.py - :language: python - :lines: 42-71 - :linenos: - -- 对于同样的数据,双层时间序列的DataProvider的代码。其说明如下: - - - DataProvider共返回两组数据,分别是sentences和labels。即在双层序列的原始数据中,每一组内的所有句子和labels - - sentences是双层时间序列的数据。由于它内部包含了每组数据中的所有句子,且每个句子表示为对应的词表索引数组,因此它是integer_value_sub_sequence 类型的,即双层时间序列。 - - labels是每组内每个句子的标签,故而是一个单层时间序列。 - - -模型配置的模型配置 ------------------------------------------- - -首先,我们看一下单层RNN的配置。代码中9-15行(高亮部分)即为单层RNN序列的使用代码。这里使用了PaddlePaddle预定义好的RNN处理函数。在这个函数中,RNN对于每一个时间步通过了一个LSTM网络。 - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_layer_group.conf - :language: python - :lines: 38-63 - :linenos: - :emphasize-lines: 9-15 - - -其次,我们看一下语义相同的双层RNN的网络配置\: - -* PaddlePaddle中的许多layer并不在意输入是否是时间序列,例如\ :code:`embedding_layer`\ 。在这些layer中,所有的操作都是针对每一个时间步来进行的。 - -* 在该配置的7-26行(高亮部分),将双层时间序列数据先变换成单层时间序列数据,再对每一个单层时间序列进行处理。 - - * 使用\ :code:`recurrent_group`\ 这个函数进行变换,在变换时需要将输入序列传入。由于我们想要的变换是双层时间序列=> 单层时间序列,所以我们需要将输入数据标记成\ :code:`SubsequenceInput`\ 。 - - * 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。 - -* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.EACH_SEQUENCE`\ 。 - -* 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_layer_group.conf - :language: python - :lines: 38-64 - :linenos: - :emphasize-lines: 7-26 - -示例2:双层RNN,子序列间有Memory -================================ - -本示例意图使用单层RNN和双层RNN实现两个完全等价的全连接RNN。 - -* 对于单层RNN,输入数据为一个完整的时间序列,例如\ :code:`[4, 5, 2, 0, 9, 8, 1, 4]`\ 。 - -* 对于双层RNN,输入数据为在单层RNN数据里面,任意将一些数据组合成双层时间序列,例如\ :code:`[ [4, 5, 2], [0, 9], [8, 1, 4]]`。 - -模型配置的模型配置 ------------------- - -我们选取单双层序列配置中的不同部分,来对比分析两者语义相同的原因。 - -- 单层RNN:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。 - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_rnn.conf - :language: python - :lines: 36-48 - -- 双层RNN,外层memory是一个元素: - - - 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。 - - 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每个时间步都用了上一个时间步的输出结果”一致。 - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_rnn.conf - :language: python - :lines: 39-66 - -.. warning:: - PaddlePaddle目前只支持在每个时间步中,Memory的时间序列长度一致的情况。 - -示例3:双层RNN,输入不等长 -========================== - -.. role:: red - -.. raw:: html - - - -**输入不等长** 是指recurrent_group的多个输入序列,在每个时间步的子序列长度可以不相等。但序列输出时,需要指定与某一个输入的序列信息是一致的。使用\ :red:`targetInlink`\ 可以指定哪一个输入和输出序列信息一致,默认指定第一个输入。 - -示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。 - -示例3对于单层RNN和双层RNN数据完全相同。 - -* 对于单层RNN的数据一共有两个样本,他们分别是\ :code:`[1, 2, 4, 5, 2], [5, 4, 1, 3, 1]`\ 和\ :code:`[0, 2, 2, 5, 0, 1, 2], [1, 5, 4, 2, 3, 6, 1]`\ 。对于每一个单层RNN的数据,均有两组特征。 - -* 在单层数据的基础上,双层RNN数据随意加了一些隔断,例如将第一条数据转化为\ :code:`[[0, 2], [2, 5], [0, 1, 2]],[[1, 5], [4], [2, 3, 6, 1]]`\ 。 - -* 需要注意的是PaddlePaddle目前只支持子序列数目一样的多输入双层RNN。例如本例中的两个特征,均有三个子序列。每个子序列长度可以不一致,但是子序列的数目必须一样。 - - -模型配置 --------- - -和示例2中的配置类似,示例3的配置使用了单层RNN和双层RNN,实现两个完全等价的全连接RNN。 - -* 单层RNN\: - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py - :language: python - :lines: 42-59 - :linenos: - -* 双层RNN\ \: - -.. literalinclude:: ../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py - :language: python - :lines: 41-80 - :linenos: - -在上面代码中,单层和双层序列的使用和示例2中的示例类似,区别是同时处理了两个输入。而对于双层序列,两个输入的子序列长度也并不相同。但是,我们使用了\ :code:`targetInlink`\ 参数设置了外层\ :code:`recurrent_group`\ 的输出格式。所以外层输出的序列形状,和\ :code:`emb2`\ 的序列形状一致。 - -示例4:beam_search的生成 -======================== - -TBD - - -词汇表 -====== - -.. _glossary_memory: - -Memory ------- - -Memory是PaddlePaddle实现RNN时候使用的一个概念。RNN即时间递归神经网络,通常要求时间步之间具有一些依赖性,即当前时间步下的神经网络依赖前一个时间步神经网络中某一个神经元输出。如下图所示。 - -.. graphviz:: glossary_rnn.dot - -上图中虚线的连接,即是跨越时间步的网络连接。PaddlePaddle在实现RNN的时候,将这种跨越时间步的连接用一个特殊的神经网络单元实现。这个神经网络单元就叫Memory。Memory可以缓存上一个时刻某一个神经元的输出,然后在下一个时间步输入给另一个神经元。使用Memory的RNN实现便如下图所示。 - -.. graphviz:: glossary_rnn_with_memory.dot - -使用这种方式,PaddlePaddle可以比较简单的判断哪些输出是应该跨越时间步的,哪些不是。 - -.. _glossary_timestep: - -时间步 ------- - -参考时间序列。 - - -.. _glossary_sequence: - -时间序列 --------- - -时间序列(time series)是指一系列的特征数据。这些特征数据之间的顺序是有意义的。即特征的数组,而不是特征的集合。而这每一个数组元素,或者每一个系列里的特征数据,即为一个时间步(time step)。值得注意的是,时间序列、时间步的概念,并不真正的和『时间』有关。只要一系列特征数据中的『顺序』是有意义的,即为时间序列的输入。 - -举例说明,例如文本分类中,我们通常将一句话理解成一个时间序列。比如一句话中的每一个单词,会变成词表中的位置。而这一句话就可以表示成这些位置的数组。例如 :code:`[9, 2, 3, 5, 3]` 。 - -关于时间序列(time series)的更详细准确的定义,可以参考 `维基百科页面 Time series `_ 或者 `维基百科中文页面 时间序列 `_ 。 - -另外,Paddle中经常会将时间序列成为 :code:`Sequence` 。他们在Paddle的文档和API中是一个概念。 - -.. _glossary_RNN: - -RNN ---- - -RNN 在PaddlePaddle的文档中,一般表示 :code:`Recurrent neural network`,即时间递归神经网络。详细介绍可以参考 `维基百科页面 Recurrent neural network `_ 或者 `中文维基百科页面 `_ 中关于时间递归神经网络的介绍。 - -RNN 一般在PaddlePaddle中,指对于一个时间序列输入数据,每一个时间步之间的神经网络具有一定的相关性。例如,某一个神经元的一个输入为上一个时间步网络中某一个神经元的输出。或者,从每一个时间步来看,神经网络的网络结构中具有有向环结构。 - -.. _glossary_双层RNN: - -双层RNN -------- - -双层RNN顾名思义,即RNN之间有一次嵌套关系。输入数据整体上是一个时间序列,而对于每一个内层特征数据而言,也是一个时间序列。即二维数组,或者数组的数组这个概念。 而双层RNN是可以处理这种输入数据的网络结构。 - -例如,对于段落的文本分类,即将一段话进行分类。我们将一段话看成句子的数组,每个句子又是单词的数组。这便是一种双层RNN的输入数据。而将这个段落的每一句话用lstm编码成一个向量,再对每一句话的编码向量用lstm编码成一个段落的向量。再对这个段落向量进行分类,即为这个双层RNN的网络结构。 - diff --git a/doc_cn/algorithm/rnn/rnn-tutorial.md b/doc_cn/algorithm/rnn/rnn-tutorial.md deleted file mode 100644 index 9e488b0d51956e86f9fb76f450fdb438f596e239..0000000000000000000000000000000000000000 --- a/doc_cn/algorithm/rnn/rnn-tutorial.md +++ /dev/null @@ -1,96 +0,0 @@ -# Recurrent Group教程 - -## 概述 - -序列数据是自然语言处理任务面对的一种主要输入数据类型。 - -一句话是由词语构成的序列,多句话进一步构成了段落。因此,段落可以看作是一个嵌套的双层的序列,这个序列的每个元素又是一个序列。 - -双层序列是PaddlePaddle支持的一种非常灵活的数据组织方式,帮助我们更好地描述段落、多轮对话等更为复杂的语言数据。基于双层序列输入,我们可以设计搭建一个灵活的、层次化的RNN,分别从词语和句子级别编码输入数据,同时也能够引入更加复杂的记忆机制,更好地完成一些复杂的语言理解任务。 - -在PaddlePaddle中,`recurrent_group`是一种任意复杂的RNN单元,用户只需定义RNN在一个时间步内完成的计算,PaddlePaddle负责完成信息和误差在时间序列上的传播。 - -更进一步,`recurrent_group`同样可以扩展到双层序列的处理上。通过两个嵌套的`recurrent_group`分别定义子句级别和词语级别上需要完成的运算,最终实现一个层次化的复杂RNN。 - -目前,在PaddlePaddle中,能够对双向序列进行处理的有`recurrent_group`和部分Layer,具体可参考文档:支持双层序列作为输入的Layer。 - -## 相关概念 - -### 基本原理 -`recurrent_group` 是PaddlePaddle支持的一种任意复杂的RNN单元。使用者只需要关注于设计RNN在一个时间步之内完成的计算,PaddlePaddle负责完成信息和梯度在时间序列上的传播。 - -PaddlePaddle中,`recurrent_group`的一个简单调用如下: - -``` python -recurrent_group(step, input, reverse) -``` -- step:一个可调用的函数,定义一个时间步之内RNN单元完成的计算 -- input:输入,必须是一个单层序列,或者一个双层序列 -- reverse:是否以逆序处理输入序列 - -使用`recurrent_group`的核心是设计step函数的计算逻辑。step函数内部可以自由组合PaddlePaddle支持的各种layer,完成任意的运算逻辑。`recurrent_group` 的输入(即input)会成为step函数的输入,由于step 函数只关注于RNN一个时间步之内的计算,在这里`recurrent_group`替我们完成了原始输入数据的拆分。 - -### 输入 -`recurrent_group`处理的输入序列主要分为以下三种类型: - -- **数据输入**:一个双层序列进入`recurrent_group`会被拆解为一个单层序列,一个单层序列进入`recurrent_group`会被拆解为非序列,然后交给step函数,这一过程对用户是完全透明的。可以有以下两种:1)通过data_layer拿到的用户输入;2)其它layer的输出。 - -- **只读Memory输入**:`StaticInput` 定义了一个只读的Memory,由`StaticInput`指定的输入不会被`recurrent_group`拆解,`recurrent_group` 循环展开的每个时间步总是能够引用所有输入,可以是一个非序列,或者一个单层序列。 - -- **序列生成任务的输入**:`GeneratedInput`只用于在序列生成任务中指定输入数据。 - -### 输入示例 - -序列生成任务大多遵循encoder-decoer架构,encoder和decoder可以是能够处理序列的任意神经网络单元,而RNN是最流行的选择。 - -给定encoder输出和当前词,decoder每次预测产生下一个最可能的词语。在这种结构中,decoder接受两个输入: - -- 要生成的目标序列:是decoder的数据输入,也是decoder循环展开的依据,`recurrent_group`会对这类输入进行拆解。 - -- encoder输出,可以是一个非序列,或者一个单层序列:是一个unbounded memory,decoder循环展开的每一个时间步会引用全部结果,不应该被拆解,这种类型的输入必须通过`StaticInput`指定。关于Unbounded Memory的更多讨论请参考论文 [Neural Turning Machine](https://arxiv.org/abs/1410.5401)。 - -在序列生成任务中,decoder RNN总是引用上一时刻预测出的词的词向量,作为当前时刻输入。`GeneratedInput`自动完成这一过程。 - -### 输出 -`step`函数必须返回一个或多个Layer的输出,这个Layer的输出会作为整个`recurrent_group` 最终的输出结果。在输出的过程中,`recurrent_group` 会将每个时间步的输出拼接,这个过程对用户也是透明的。 - -### memory -memory只能在`recurrent_group`中定义和使用。memory不能独立存在,必须指向一个PaddlePaddle定义的Layer。引用memory得到这layer上一时刻输出,因此,可以将memory理解为一个时延操作。 - -可以显示地指定一个layer的输出用于初始化memory。不指定时,memory默认初始化为0。 - -## 双层RNN介绍 -`recurrent_group`帮助我们完成对输入序列的拆分,对输出的合并,以及计算逻辑在序列上的循环展开。 - -利用这种特性,两个嵌套的`recurrent_group`能够处理双层序列,实现词语和句子两个级别的双层RNN结构。 - -- 单层(word-level)RNN:每个状态(state)对应一个词(word)。 -- 双层(sequence-level)RNN:一个双层RNN由多个单层RNN组成,每个单层RNN(即双层RNN的每个状态)对应一个子句(subseq)。 - -为了描述方便,下文以NLP任务为例,将含有子句(subseq)的段落定义为一个双层序列,将含有词语的句子定义为一个单层序列,那么0层序列即为一个词语。 - -## 双层RNN的使用 - -### 训练流程的使用方法 -使用 `recurrent_group`需要遵循以下约定: - -- **单进单出**:输入和输出都是单层序列。 - - 如果有多个输入,不同输入序列含有的词语数必须严格相等。 - - 输出一个单层序列,输出序列的词语数和输入序列一致。 - - memory:在step函数中定义 memory指向一个layer,通过引用memory得到这个layer上一个时刻输出,形成recurrent 连接。memory的is_seq参数必须为false。如果没有定义memory,每个时间步之内的运算是独立的。 - - boot_layer:memory的初始状态,默认初始状为0,memory的is_seq参数必须为false。 - -- **双进双出**:输入和输出都是双层序列。 - - 如果有多个输入序列,不同输入含有的子句(subseq)数必须严格相等,但子句含有的词语数可以不相等。 - - 输出一个双层序列,子句(subseq)数、子句的单词数和指定的一个输入序列一致,默认为第一个输入。 - - memory:在step函数中定义memory,指向一个layer,通过引用memory得到这个layer上一个时刻的输出,形成recurrent连接。定义在外层`recurrent_group` step函数中的memory,能够记录上一个subseq 的状态,可以是一个单层序列(只作为read-only memory),也可以是一个词语。如果没有定义memory,那么 subseq 之间的运算是独立的。 - - boot_layer:memory 初始状态,可以是一个单层序列(只作为read-only memory)或一个向量。默认不设置,即初始状态为0。 - -- **双进单出**:目前还未支持,会报错"In hierachical RNN, all out links should be from sequences now"。 - - -### 生成流程的使用方法 -使用`beam_search`需要遵循以下约定: - -- 单层RNN:从一个word生成下一个word。 -- 双层RNN:即把单层RNN生成后的subseq给拼接成一个新的双层seq。从语义上看,也不存在一个subseq直接生成下一个subseq的情况。 diff --git a/doc_cn/build_and_install/cmake/compile_options.csv b/doc_cn/build_and_install/cmake/compile_options.csv deleted file mode 100644 index 12b45eebb2822d77447fa1bc754360605971dcab..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/cmake/compile_options.csv +++ /dev/null @@ -1,14 +0,0 @@ -选项,说明,默认值 -WITH_GPU,是否支持GPU。,取决于是否寻找到CUDA工具链 -WITH_DOUBLE,是否使用双精度浮点数。,否 -WITH_DSO,是否运行时动态加载CUDA动态库,而非静态加载CUDA动态库。,是 -WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制文件,是 -WITH_PYTHON,是否内嵌PYTHON解释器。方便今后的嵌入式移植工作。,是 -WITH_STYLE_CHECK,是否编译时进行代码风格检查,是 -WITH_RDMA,是否开启RDMA,否 -WITH_GLOG,是否开启GLOG。如果不开启,则会使用一个简化版的日志,同时方便今后的嵌入式移植工作。,取决于是否寻找到GLOG -WITH_GFLAGS,是否使用GFLAGS。如果不开启,则会使用一个简化版的命令行参数解析器,同时方便今后的嵌入式移植工作。,取决于是否寻找到GFLAGS -WITH_TIMER,是否开启计时功能。如果开启会导致运行略慢,打印的日志变多,但是方便调试和测Benchmark,否 -WITH_TESTING,是否开启单元测试,取决于是否寻找到GTEST -WITH_DOC,是否编译中英文文档,否 -WITH_SWIG_PY,是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练,取决于是否寻找到SWIG \ No newline at end of file diff --git a/doc_cn/build_and_install/cmake/compile_options.rst b/doc_cn/build_and_install/cmake/compile_options.rst deleted file mode 100644 index f345ead2bf851bdad7be2fb8185d16fd2a318a66..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/cmake/compile_options.rst +++ /dev/null @@ -1,43 +0,0 @@ -PaddlePaddle的编译选项 -====================== - -PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。用户可在调用cmake的时候设置它们,详细的cmake使用方法可以参考 `官方文档 `_ 。 - -Bool型的编译选项 ----------------- -用户可在cmake的命令行中,通过使用 ``-D`` 命令设置该类编译选项,例如 - -.. code-block:: bash - - cmake .. -DWITH_GPU=OFF - -.. csv-table:: Bool型的编译选项 - :widths: 1, 7, 2 - :file: compile_options.csv - -BLAS/CUDA/Cudnn的编译选项 --------------------------- -BLAS -+++++ - -PaddlePaddle支持以下任意一种BLAS库:`MKL `_ ,`ATLAS `_ ,`OpenBlAS `_ 和 `REFERENCE BLAS `_ 。 - -.. csv-table:: BLAS路径相关的编译选项 - :widths: 1, 2, 7 - :file: cblas_settings.csv - -CUDA/Cudnn -+++++++++++ - -PaddlePaddle可以使用cudnn v2之后的任何一个版本来编译运行,但尽量请保持编译和运行使用的cudnn是同一个版本。 我们推荐使用最新版本的cudnn v5.1。 - -编译选项的设置 -++++++++++++++ - -PaddePaddle通过编译时指定路径来实现引用各种BLAS/CUDA/Cudnn库。cmake编译时,首先在系统路径(/usr/lib\:/usr/local/lib)中搜索这几个库,同时也会读取相关路径变量来进行搜索。 通过使用 ``-D`` 命令可以设置,例如 - -.. code-block:: bash - - cmake .. -DMKL_ROOT=/opt/mkl/ -DCUDNN_ROOT=/opt/cudnnv5 - -注意:这几个编译选项的设置,只在第一次cmake的时候有效。如果之后想要重新设置,推荐清理整个编译目录(``rm -rf``)后,再指定。 \ No newline at end of file diff --git a/doc_cn/build_and_install/cmake/index.rst b/doc_cn/build_and_install/cmake/index.rst deleted file mode 100644 index e2a12c500177ea5b075416380796ab82e1217f60..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/cmake/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -使用cmake编译PaddlePaddle -========================= - -.. toctree:: - - install_deps.rst - compile_options.rst - make_and_install.rst diff --git a/doc_cn/build_and_install/cmake/install_deps.rst b/doc_cn/build_and_install/cmake/install_deps.rst deleted file mode 100644 index 7fa4665a954bd41e74145c4a1b00734c3ac41d83..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/cmake/install_deps.rst +++ /dev/null @@ -1,4 +0,0 @@ -安装编译PaddlePaddle需要的依赖 -============================== - -参见 `安装编译依赖 <../../../doc/build/build_from_source.html#install-dependencies>`_ diff --git a/doc_cn/build_and_install/cmake/make_and_install.rst b/doc_cn/build_and_install/cmake/make_and_install.rst deleted file mode 100644 index 212b9c9352b01db5215221a6c2faafe0d679d962..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/cmake/make_and_install.rst +++ /dev/null @@ -1,4 +0,0 @@ -make和make install -================== - -参见 `make和make install <../../../doc/build/build_from_source.html#build-and-install>`_ diff --git a/doc_cn/build_and_install/index.rst b/doc_cn/build_and_install/index.rst deleted file mode 100644 index 48163fb36e561fe5fd8f6907379687a8b5c97f68..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/index.rst +++ /dev/null @@ -1,27 +0,0 @@ -编译与安装 -======================== - -安装 -++++ - -PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜像,ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境,同时欢迎贡献更多的安装包。 - -.. toctree:: - :maxdepth: 1 - - install/docker_install.rst - install/ubuntu_install.rst - - - -编译 -++++ - -.. warning:: - - 编译选项主要推荐高级用户查看,普通用户请走安装流程。 - -.. toctree:: - :maxdepth: 1 - - cmake/index.rst diff --git a/doc_cn/build_and_install/install/docker_install.rst b/doc_cn/build_and_install/install/docker_install.rst deleted file mode 100644 index 40339659be406ec72da8ad89b6d5dd38d72bb5ae..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/install/docker_install.rst +++ /dev/null @@ -1,141 +0,0 @@ -安装PaddlePaddle的Docker镜像 -============================ - -PaddlePaddle项目提供官方 `Docker `_ 镜像。Docker镜像是我们目前唯一官方支持的部署和运行方式。 - -下述内容将分为如下几个类别描述。 - -* PaddlePaddle提供的Docker镜像版本 -* 下载和运行Docker镜像 -* 注意事项 - -PaddlePaddle提供的Docker镜像版本 --------------------------------- - -我们提供了12个 `Docker image `_ ,他们的image name都是 :code:`paddle-dev/paddle` ,tag分别为 - -+-----------------+------------------+------------------------+-----------------------+ -| | normal | devel | demo | -+=================+==================+========================+=======================+ -| CPU | cpu-latest | cpu-devel-latest | cpu-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| GPU | gpu-latest | gpu-devel-latest | gpu-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| CPU WITHOUT AVX | cpu-noavx-latest | cpu-noavx-devel-latest | cpu-noavx-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ -| GPU WITHOUT AVX | gpu-noavx-latest | gpu-noavx-devel-latest | gpu-noavx-demo-latest | -+-----------------+------------------+------------------------+-----------------------+ - -其中,横向包括三个版本,normal,devel和demo。 - -* Normal: 正常的Docker image,只包括paddle的二进制 -* Devel: 包括Paddle的二进制、编译环境和源代码 -* Demo: 包括Paddle运行demo所需要的依赖 - -纵向包括四个版本,他们是。 - -* CPU: CPU版本。需要支持AVX指令集的CPU -* GPU: GPU版本。需要支持AVX指令集的CPU -* CPU WITHOUT AVX: CPU版本,不支持AVX指令集的CPU也可以运行 -* GPU WITHOUT AVX: GPU版本,不需要AVX指令集的CPU也可以运行。 - -用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU是否支持 :code:`AVX` 指令集\: - -.. code-block:: bash - - if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi - -如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddle-dev/paddle:cpu-devel-latest` 来引用这个image。 - -PaddlePaddle提供的镜像并不包含任何命令运行,想要运行PaddlePaddle,您需要进入镜像运行PaddlePaddle -程序或者自定义一个含有启动脚本的image。具体请参考注意事项中的 :code:`使用ssh访问PaddlePaddle镜像` - -下载和运行Docker镜像 --------------------- - -为了运行PaddlePaddle的docker镜像,您需要在机器中安装好Docker。安装Docker需要您的机器 -至少具有3.10以上的linux kernel。安装方法请参考 -`Docker的官方文档 `_ 。如果您使用 -mac osx或者是windows机器,请参考 -`mac osx的安装文档 `_ 和 -`windows 的安装文档 `_ 。 - -您可以使用 :code:`docker pull` 命令预先下载镜像,也可以直接执行 -:code:`docker run` 命令运行镜像。执行方法如下: - -.. code-block:: bash - - $ docker run -it paddledev/paddle:cpu-latest - -即可启动和进入PaddlePaddle的container。如果运行GPU版本的PaddlePaddle,则需要先将 -cuda相关的Driver和设备映射进container中,脚本类似于 - -.. code-block:: bash - - $ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest - -进入Docker container后,运行 :code:`paddle version` 即可打印出PaddlePaddle的版本和构建 -信息。安装完成的PaddlePaddle主体包括三个部分, :code:`paddle` 脚本, python的 -:code:`paddle` 包和 :code:`py_paddle` 包。其中\: - -* :code:`paddle` 脚本和 :code:`paddle` 的python包是PaddlePaddle的训练主要程序。使用 - :code:`paddle` 脚本可以启动PaddlePaddle的训练进程和pserver。而 :code:`paddle` 脚本 - 中的二进制使用了 :code:`paddle` 的python包来做配置文件解析等工作。 -* python包 :code:`py_paddle` 是一个swig封装的PaddlePaddle包,用来做预测和简单的定制化 - 训练。 - -注意事项 --------- - -性能问题 -++++++++ - -由于Docker是基于容器的轻量化虚拟方案,所以在CPU的运算性能上并不会有严重的影响。 -而GPU的驱动和设备全部映射到了容器内,所以GPU在运算性能上也不会有严重的影响。 - -但是如果使用了高性能的网卡,例如RDMA网卡(RoCE 40GbE 或者 IB 56GbE),或者高性能的 -以太网卡 (10GbE)。推荐使用将本地网卡,即 "--net=host" 来进行训练。而不使用docker -的网桥来进行网络通信。 - -远程访问问题和二次开发 -++++++++++++++++++++++ - -由于PaddlePaddle的Docker镜像并不包含任何预定义的运行命令。所以如果想要在后台启用ssh -远程访问,则需要进行一定的二次开发,将ssh装入系统内并开启远程访问。二次开发可以 -使用Dockerfile构建一个全新的docker image。需要参考 -`Dockerfile的文档 `_ 和 -`Dockerfile的最佳实践 `_ -两个文档。 - -简单的含有ssh的Dockerfile如下: - -.. literalinclude:: paddle_ssh.Dockerfile - -使用该Dockerfile构建出镜像,然后运行这个container即可。相关命令为\: - -.. code-block:: bash - - # cd到含有Dockerfile的路径中 - $ docker build . -t paddle_ssh - # 运行这个container,将宿主机的8022端口映射到container的22端口上 - $ docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh - -执行如下命令即可以关闭这个container,并且删除container中的数据\: - -.. code-block:: bash - - # 关闭container - $ docker stop paddle_ssh_machine - # 删除container - $ docker rm paddle_ssh_machine - -如果想要在外部机器访问这个container,即可以使用ssh访问宿主机的8022端口。用户名为 -root,密码也是root。命令为\: - -.. code-block:: bash - - $ ssh -p 8022 root@YOUR_HOST_MACHINE - -至此,您就可以远程的使用PaddlePaddle啦。 diff --git a/doc_cn/build_and_install/install/paddle_ssh.Dockerfile b/doc_cn/build_and_install/install/paddle_ssh.Dockerfile deleted file mode 100644 index 7cb947bddf4593259cb69f525b44015836291605..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/install/paddle_ssh.Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -FROM paddledev/paddle:cpu-latest - -MAINTAINER PaddlePaddle dev team - -RUN apt-get update -RUN apt-get install -y openssh-server -RUN mkdir /var/run/sshd -RUN echo 'root:root' | chpasswd - -RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config -RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config - -EXPOSE 22 - -CMD ["/usr/sbin/sshd", "-D"] diff --git a/doc_cn/build_and_install/install/paddle_version.txt b/doc_cn/build_and_install/install/paddle_version.txt deleted file mode 100644 index a80873303fd0d05d963482629000d76260185ef6..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/install/paddle_version.txt +++ /dev/null @@ -1,11 +0,0 @@ -PaddlePaddle 0.8.0b1, compiled with - with_avx: ON - with_gpu: OFF - with_double: OFF - with_python: ON - with_rdma: OFF - with_glog: ON - with_gflags: ON - with_metric_learning: - with_timer: OFF - with_predict_sdk: diff --git a/doc_cn/build_and_install/install/ubuntu_install.rst b/doc_cn/build_and_install/install/ubuntu_install.rst deleted file mode 100644 index 4500d6e0b03be9280e3e6c25cddbf7fb389671b8..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/install/ubuntu_install.rst +++ /dev/null @@ -1,61 +0,0 @@ -Ubuntu部署PaddlePaddle -=================================== - -PaddlePaddle提供了ubuntu 14.04 deb安装包。 - -安装 ------- - -安装包的下载地址是\: https://github.com/PaddlePaddle/Paddle/releases - -它包含四个版本\: - -* cpu版本: 支持主流x86处理器平台, 使用了avx指令集。 - -* cpu-noavx版本:支持主流x86处理器平台,没有使用avx指令集。 - -* gpu版本:支持主流x86处理器平台,支持nvidia cuda平台,使用了avx指令集。 - -* gpu-noavx版本:支持主流x86处理器平台,支持nvidia cuda平台,没有使用avx指令集。 - -下载完相关安装包后,执行: - -.. code-block:: shell - - sudo apt-get install gdebi - gdebi paddle-*-cpu.deb - -或者: - -.. code-block:: shell - - dpkg -i paddle-*-cpu.deb - apt-get install -f - - -在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的, -在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。 - -安装完成后,可以使用命令 :code:`paddle version` 查看安装后的paddle 版本: - -.. literalinclude:: paddle_version.txt - -可能遇到的问题 --------------- - -libcudart.so/libcudnn.so找不到 -++++++++++++++++++++++++++++++ - -安装完成后,运行 :code:`paddle train` 报错\: - -.. code-block:: shell - - 0831 12:36:04.151525 1085 hl_dso_loader.cc:70] Check failed: nullptr != *dso_handle For Gpu version of PaddlePaddle, it couldn't find CUDA library: libcudart.so Please make sure you already specify its path.Note: for training data on Cpu using Gpu version of PaddlePaddle,you must specify libcudart.so via LD_LIBRARY_PATH. - -原因是未设置cuda运行时环境变量。 如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,并设置: - -.. code-block:: shell - - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH - export PATH=/usr/local/cuda/bin:$PATH - diff --git a/doc_cn/build_and_install/paddle_on_kubernetes.md b/doc_cn/build_and_install/paddle_on_kubernetes.md deleted file mode 100644 index f8c9f19a9fef50c03f6ffee639a580adbf29844a..0000000000000000000000000000000000000000 --- a/doc_cn/build_and_install/paddle_on_kubernetes.md +++ /dev/null @@ -1,205 +0,0 @@ -# Paddle On Kubernetes:单机训练 - -在这篇文档里,我们介绍如何在 Kubernetes 集群上启动一个单机使用CPU的Paddle训练作业。在下一篇中,我们将介绍如何启动分布式训练作业。 - -## 制作Docker镜像 - -在一个功能齐全的Kubernetes机群里,通常我们会安装Ceph等分布式文件系统来存储训练数据。这样的话,一个分布式Paddle训练任务中的每个进程都可以从Ceph读取数据。在这个例子里,我们只演示一个单机作业,所以可以简化对环境的要求,把训练数据直接放在 -Paddle的Docker image里。为此,我们需要制作一个包含训练数据的Paddle镜像。 - -Paddle 的 [Quick Start Tutorial](http://www.paddlepaddle.org/doc/demo/quick_start/index_en.html) -里介绍了用Paddle源码中的脚本下载训练数据的过程。 -而 `paddledev/paddle:cpu-demo-latest` 镜像里有 Paddle 源码与demo,( 请注意,默认的 -Paddle镜像 `paddledev/paddle:cpu-latest` 是不包括源码的, Paddle的各版本镜像可以参考 [Docker installation guide](http://www.paddlepaddle.org/doc/build/docker_install.html) ),所以我们使用这个镜像来下载训练数据到Docker container中,然后把这个包含了训练数据的container保存为一个新的镜像。 - -### 运行容器 - -``` -$ docker run --name quick_start_data -it paddledev/paddle:cpu-demo-latest -``` - -### 下载数据 - -进入容器`/root/paddle/demo/quick_start/data`目录,使用`get_data.sh`下载数据 - -``` -$ root@fbd1f2bb71f4:~/paddle/demo/quick_start/data# ./get_data.sh - -Downloading Amazon Electronics reviews data... ---2016-10-31 01:33:43-- http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz -Resolving snap.stanford.edu (snap.stanford.edu)... 171.64.75.80 -Connecting to snap.stanford.edu (snap.stanford.edu)|171.64.75.80|:80... connected. -HTTP request sent, awaiting response... 200 OK -Length: 495854086 (473M) [application/x-gzip] -Saving to: 'reviews_Electronics_5.json.gz' - - 10% [=======> ] 874,279 64.7KB/s eta 2h 13m - -``` - -### 修改启动脚本 - -下载完数据后,修改`/root/paddle/demo/quick_start/train.sh`文件,内容如下(增加了一条cd命令) -``` -set -e -cd /root/paddle/demo/quick_start -cfg=trainer_config.lr.py -#cfg=trainer_config.emb.py -#cfg=trainer_config.cnn.py -#cfg=trainer_config.lstm.py -#cfg=trainer_config.bidi-lstm.py -#cfg=trainer_config.db-lstm.py -paddle train \ - --config=$cfg \ - --save_dir=./output \ - --trainer_count=4 \ - --log_period=20 \ - --num_passes=15 \ - --use_gpu=false \ - --show_parameter_stats_period=100 \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' -``` - -### 提交镜像 - -修改启动脚本后,退出容器,使用`docker commit`命令创建新镜像。 - -``` -$ docker commit quick_start_data mypaddle/paddle:quickstart -``` - -## 使用 Kubernetes 进行训练 - ->针对任务运行完成后容器自动退出的场景,Kubernetes有Job类型的资源来支持。下文就是用Job类型的资源来进行训练。 - -### 编写yaml文件 - -在训练时,输出结果可能会随着容器的消耗而被删除,需要在创建容器前挂载卷以便我们保存训练结果。使用我们之前构造的镜像,可以创建一个 [Kubernetes Job](http://kubernetes.io/docs/user-guide/jobs/#what-is-a-job),简单的yaml文件如下: - -``` -apiVersion: batch/v1 -kind: Job -metadata: - name: quickstart -spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: quickstart - spec: - volumes: - - name: output - hostPath: - path: /home/work/paddle_output - containers: - - name: pi - image: mypaddle/paddle:quickstart - command: ["bin/bash", "-c", "/root/paddle/demo/quick_start/train.sh"] - volumeMounts: - - name: output - mountPath: /root/paddle/demo/quick_start/output - restartPolicy: Never -``` - -### 创建Paddle Job - -使用上文创建的yaml文件创建Kubernetes Job,命令为: - -``` -$ kubectl create -f paddle.yaml -``` - -查看job的详细情况: - -``` -$ kubectl get job -NAME DESIRED SUCCESSFUL AGE -quickstart 1 0 58s - -$ kubectl describe job quickstart -Name: quickstart -Namespace: default -Image(s): registry.baidu.com/public/paddle:cpu-demo-latest -Selector: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84 -Parallelism: 1 -Completions: 1 -Start Time: Mon, 31 Oct 2016 11:20:16 +0800 -Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart -Pods Statuses: 0 Running / 1 Succeeded / 0 Failed -Volumes: - output: - Type: HostPath (bare host directory volume) - Path: /home/work/paddle_output -Events: - FirstSeen LastSeen Count From SubobjectPath Type Reason Message - --------- -------- ----- ---- ------------- -------- ------ ------- - 1m 1m 1 {job-controller } Normal SuccessfulCreate Created pod: quickstart-fa0wx -``` - -### 查看训练结果 - -根据Job对应的Pod信息,可以查看此Pod运行的宿主机。 - -``` -kubectl describe pod quickstart-fa0wx -Name: quickstart-fa0wx -Namespace: default -Node: paddle-demo-let02/10.206.202.44 -Start Time: Mon, 31 Oct 2016 11:20:17 +0800 -Labels: controller-uid=f120da72-9f18-11e6-b363-448a5b355b84,job-name=quickstart -Status: Succeeded -IP: 10.0.0.9 -Controllers: Job/quickstart -Containers: - quickstart: - Container ID: docker://b8561f5c79193550d64fa47418a9e67ebdd71546186e840f88de5026b8097465 - Image: registry.baidu.com/public/paddle:cpu-demo-latest - Image ID: docker://18e457ce3d362ff5f3febf8e7f85ffec852f70f3b629add10aed84f930a68750 - Port: - Command: - bin/bash - -c - /root/paddle/demo/quick_start/train.sh - QoS Tier: - cpu: BestEffort - memory: BestEffort - State: Terminated - Reason: Completed - Exit Code: 0 - Started: Mon, 31 Oct 2016 11:20:20 +0800 - Finished: Mon, 31 Oct 2016 11:21:46 +0800 - Ready: False - Restart Count: 0 - Environment Variables: -Conditions: - Type Status - Ready False -Volumes: - output: - Type: HostPath (bare host directory volume) - Path: /home/work/paddle_output -``` - -我们还可以登录到宿主机上查看训练结果。 - -``` -[root@paddle-demo-let02 paddle_output]# ll -total 60 -drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00000 -drwxr-xr-x 2 root root 4096 Oct 31 11:20 pass-00001 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00002 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00003 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00004 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00005 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00006 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00007 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00008 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00009 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00010 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00011 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00012 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00013 -drwxr-xr-x 2 root root 4096 Oct 31 11:21 pass-00014 -``` diff --git a/doc_cn/cluster/index.rst b/doc_cn/cluster/index.rst deleted file mode 100644 index 25313a9635bbf567a1aedfac3c379802d601d283..0000000000000000000000000000000000000000 --- a/doc_cn/cluster/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -集群训练 -======== - -* `集群训练 <../../doc/cluster/index.html>`_ - -.. toctree:: - :maxdepth: 2 - :glob: - - 集群训练(对内) - diff --git a/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md b/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md deleted file mode 100644 index d9ed431ec0566cf90f11ebaeec56560ff69e71fe..0000000000000000000000000000000000000000 --- a/doc_cn/cluster/k8s/distributed_training_on_kubernetes.md +++ /dev/null @@ -1,309 +0,0 @@ - -# PaddlePaddle on Kubernetes:分布式训练 - -前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。 - -## Kubernetes 基本概念 - -[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、 扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws),[Azure](http://kubernetes.io/docs/getting-started-guides/azure/),[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。 - -- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。 - -- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。 - -- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 是Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods。 - -- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。 - -- [*Namespaces*](http://kubernetes.io/docs/user-guide/volumes/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。 - -## 整体方案 - -### 部署Kubernetes集群 - -首先,我们需要拥有一个Kubernetes集群,在这个集群中所有node与pod都可以互相通信。关于Kubernetes集群搭建,可以参考[官方文档](http://kubernetes.io/docs/getting-started-guides/kubeadm/),在以后的文章中我们也会介绍AWS上搭建的方案。本文假设大家能找到几台物理机,并且可以按照官方文档在上面部署Kubernetes。在本文的环境中,Kubernetes集群中所有node都挂载了一个[MFS](http://moosefs.org/)(Moose filesystem,一种分布式文件系统)共享目录,我们通过这个目录来存放训练文件与最终输出的模型。关于MFS的安装部署,可以参考[MooseFS documentation](https://moosefs.com/documentation.html)。在训练之前,用户将配置与训练数据切分好放在MFS目录中,训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下: - -![paddle on kubernetes结构图](k8s-paddle-arch.png) - -上图描述了一个3节点的分布式训练场景,Kubernetes集群的每个node上都挂载了一个MFS目录,这个目录可以通过volume的形式挂载到容器中。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。 - -### 使用 Job - -我们使用Kubernetes中的job这个概念来代表一次分布式训练。Job表示一次性作业,在作业完成后,Kubernetes会销毁job产生的容器并且释放相关资源。 - -在Kubernetes中,可以通过编写一个YAML文件,来描述这个job,在这个文件中,主要包含了一些配置信息,例如PaddlePaddle的节点个数,`paddle pserver`开放的端口个数与端口号,使用的网卡设备等,这些信息通过环境变量的形式传递给容器内的程序使用。 - -在一次分布式训练中,用户确定好本次训练需要的PaddlePaddle节点个数,将切分好的训练数据与配置文件上传到MFS共享目录中。然后编写这次训练的job YAML文件,提交给Kubernetes集群创建并开始作业。 - -### 创建PaddlePaddle节点 - -当Kubernetes master收到请求,解析完YAML文件后,会创建出多个pod(个数为PaddlePaddle节点数),Kubernetes会把这些pod调度到集群的node上运行。一个pod就代表一个PaddlePaddle节点,当pod被成功分配到一台物理/虚拟机上后,Kubernetes会启动pod内的容器,这个容器会根据YAML文件中的环境变量,启动`paddle pserver`与`paddle train`进程。 - -### 启动训练 - -在容器启动后,会通过脚本来启动这次分布式训练,我们知道`paddle train`进程启动时需要知道其他节点的IP地址以及本节点的trainer_id,由于PaddlePaddle本身不提供类似服务发现的功能,所以在本文的启动脚本中,每个节点会根据job name向Kubernetes apiserver查询这个job对应的所有pod信息(Kubernetes默认会在每个容器的环境变量中写入apiserver的地址)。 - -根据这些pod信息,就可以通过某种方式,为每个pod分配一个唯一的trainer_id。本文把所有pod的IP地址进行排序,将顺序作为每个PaddlePaddle节点的trainer_id。启动脚本的工作流程大致如下: - - 1. 查询Kubernetes apiserver获取pod信息,根据IP分配trainer_id - 1. 从MFS共享目录中拷贝训练文件到容器内 - 1. 根据环境变量,解析出`paddle pserver`与`paddle train`的启动参数,启动进程 - 1. 训练时,PaddlePaddle会自动将结果保存在trainer_id为0的节点上,将输出路径设置为MFS目录,保存输出的文件 - - -## 搭建过程 - -根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,主要分为以下几个步骤: - -1. 制作PaddlePaddle镜像 -1. 将训练文件与切分好的数据上传到共享存储 -1. 编写本次训练的YAML文件,创建一个Kubernetes job -1. 训练结束后查看输出结果 - -下面就根据这几个步骤分别介绍。 - - -### 制作镜像 - -PaddlePaddle镜像需要提供`paddle pserver`与`paddle train`进程的运行环境,用这个镜像创建的容器需要有以下两个功能: - -- 拷贝训练文件到容器内 - -- 生成`paddle pserver`与`paddle train`进程的启动参数,并且启动训练 - -因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。镜像的*Dockerfile*如下: - -```Dockerfile -FROM paddledev/paddle:cpu-latest - -MAINTAINER zjsxzong89@gmail.com - -COPY start.sh /root/ -COPY start_paddle.py /root/ -CMD ["bash"," -c","/root/start.sh"] -``` - -[`start.sh`](start.sh)文件拷贝训练文件到容器内,然后执行[`start_paddle.py`](start_paddle.py)脚本启动训练,前文提到的获取其他节点IP地址,分配`trainer_id`等都在`start_paddle.py`脚本中完成。 - -`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。 - -```python -parser = argparse.ArgumentParser(prog="start_paddle.py", - description='simple tool for k8s') - args, train_args_list = parser.parse_known_args() - train_args = refine_unknown_args(train_args_list) - train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) - podlist = getPodList() -``` - -然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。 - -```python - podlist = getPodList() - # need to wait until all pods are running - while not isPodAllRunning(podlist): - time.sleep(10) - podlist = getPodList() - idMap = getIdMap(podlist) -``` - -在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。 - -```python -def getIdMap(podlist): - ''' - generate tainer_id by ip - ''' - ips = [] - for pod in podlist["items"]: - ips.append(pod["status"]["podIP"]) - ips.sort() - idMap = {} - for i in range(len(ips)): - idMap[ips[i]] = i - return idMap -``` - -在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver`与`paddle train`的启动参数并执行进程。 - -在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver`与`paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC`,`PADDLE_PORT`,`PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`。 - -```python - program = 'paddle train' - args = " --nics=" + PADDLE_NIC - args += " --port=" + str(PADDLE_PORT) - args += " --ports_num=" + str(PADDLE_PORTS_NUM) - args += " --comment=" + "paddle_process_by_paddle" - ip_string = "" - for ip in idMap.keys(): - ip_string += (ip + ",") - ip_string = ip_string.rstrip(",") - args += " --pservers=" + ip_string - args_ext = "" - for key, value in train_args_dict.items(): - args_ext += (' --' + key + '=' + value) - localIP = socket.gethostbyname(socket.gethostname()) - trainerId = idMap[localIP] - args += " " + args_ext + " --trainer_id=" + \ - str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT -``` - -使用 `docker build` 构建镜像: - -```bash -docker build -t your_repo/paddle:mypaddle . -``` - -然后将构建成功的镜像上传到镜像仓库。 - -```bash -docker push your_repo/paddle:mypaddle -``` - -### 上传训练文件 - -本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到MFS共享存储。完成后MFS上的文件内容大致如下: - -```bash -[root@paddle-kubernetes-node0 mfs]# tree -d -. -└── paddle-cluster-job - ├── data - │   ├── 0 - │   │ - │   ├── 1 - │   │ - │   └── 2 - ├── output - └── recommendation -``` - -目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。 - -### 创建Job - -Kubernetes可以通过YAML文件来创建相关对象,然后可以使用命令行工具创建job。 - -Job YAML文件描述了这次训练使用的Docker镜像,需要启动的节点个数以及 `paddle pserver`与 `paddle train`进程启动的必要参数,也描述了容器需要使用的存储卷挂载的情况。YAML文件中各个字段的具体含义,可以查看[Kubernetes Job API](http://kubernetes.io/docs/api-reference/batch/v1/definitions/#_v1_job)。例如,本次训练的YAML文件可以写成: - -```yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: paddle-cluster-job -spec: - parallelism: 3 - completions: 3 - template: - metadata: - name: paddle-cluster-job - spec: - volumes: - - name: jobpath - hostPath: - path: /home/work/mfs - containers: - - name: trainer - image: your_repo/paddle:mypaddle - command: ["bin/bash", "-c", "/root/start.sh"] - env: - - name: JOB_NAME - value: paddle-cluster-job - - name: JOB_PATH - value: /home/jobpath - - name: JOB_NAMESPACE - value: default - - name: TRAIN_CONFIG_DIR - value: recommendation - - name: CONF_PADDLE_NIC - value: eth0 - - name: CONF_PADDLE_PORT - value: "7164" - - name: CONF_PADDLE_PORTS_NUM - value: "2" - - name: CONF_PADDLE_PORTS_NUM_SPARSE - value: "2" - - name: CONF_PADDLE_GRADIENT_NUM - value: "3" - volumeMounts: - - name: jobpath - mountPath: /home/jobpath - restartPolicy: Never -``` - -文件中,`metadata`下的`name`表示这个job的名字。`parallelism,completions`字段表示这个job会同时开启3个PaddlePaddle节点,成功训练且退出的pod数目为3时,这个job才算成功结束。然后申明一个存储卷`jobpath`,代表宿主机目录`/home/work/mfs`,在对容器的描述`containers`字段中,将此目录挂载为容器的`/home/jobpath`目录,这样容器的`/home/jobpath`目录就成为了共享存储,放在这个目录里的文件其实是保存到了MFS上。 - -`env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。 - -`JOB_PATH`表示共享存储挂载的路径,`JOB_NAME`表示job名字,`TRAIN_CONFIG_DIR`表示本次训练文件所在目录,这三个变量组合就可以找到本次训练需要的文件路径。 - -`CONF_PADDLE_NIC`表示`paddle pserver`进程需要的`--nics`参数,即网卡名 - -`CONF_PADDLE_PORT`表示`paddle pserver`的`--port`参数,`CONF_PADDLE_PORTS_NUM`则表示稠密更新的端口数量,也就是`--ports_num`参数。 - -`CONF_PADDLE_PORTS_NUM_SPARSE`表示稀疏更新的端口数量,也就是`--ports_num_for_sparse`参数。 - -`CONF_PADDLE_GRADIENT_NUM`表示训练节点数量,即`--num_gradient_servers`参数 - -编写完YAML文件后,可以使用Kubernetes的命令行工具创建job。 - -```bash -kubectl create -f job.yaml -``` - -创建成功后,Kubernetes就会创建3个pod作为PaddlePaddle节点然后拉取镜像,启动容器开始训练。 - - -### 查看输出 - -在训练过程中,可以在共享存储上查看输出的日志和模型,例如output目录下就存放了输出结果。注意node_0,node_1,node_2这几个目录表示PaddlePaddle节点与trainer_id,并不是Kubernetes中的node概念。 - -```bash -[root@paddle-kubernetes-node0 output]# tree -d -. -├── node_0 -│   ├── server.log -│   └── train.log -├── node_1 -│   ├── server.log -│   └── train.log -├── node_2 -...... -├── pass-00002 -│   ├── done -│   ├── ___embedding_0__.w0 -│   ├── ___embedding_1__.w0 -...... -``` - -我们可以通过日志查看容器训练的情况,例如: - -```bash -[root@paddle-kubernetes-node0 node_0]# cat train.log -I1116 09:10:17.123121 50 Util.cpp:155] commandline: - /usr/local/bin/../opt/paddle/bin/paddle_trainer - --nics=eth0 --port=7164 - --ports_num=2 --comment=paddle_process_by_paddle - --pservers=192.168.129.66,192.168.223.143,192.168.129.71 - --ports_num_for_sparse=2 --config=./trainer_config.py - --trainer_count=4 --num_passes=10 --use_gpu=0 - --log_period=50 --dot_period=10 --saving_period=1 - --local=0 --trainer_id=0 - --save_dir=/home/jobpath/paddle-cluster-job/output -I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions -I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done. -[WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config. -[INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating] -[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__regression_cost_0__] -I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal -I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process -I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process -I1116 09:10:17.681543 50 GradientMachine.cpp:134] Initing parameters.. -I1116 09:10:18.012390 50 GradientMachine.cpp:141] Init parameters done. -I1116 09:10:18.018641 50 ParameterClient2.cpp:122] pserver 0 192.168.129.66:7164 -I1116 09:10:18.018950 50 ParameterClient2.cpp:122] pserver 1 192.168.129.66:7165 -I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:7164 -I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165 -I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 -I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 -``` \ No newline at end of file diff --git a/doc_cn/cluster/k8s/job.yaml b/doc_cn/cluster/k8s/job.yaml deleted file mode 100644 index 1e0ac464b2ec71e98c28f090124690b01b0755ce..0000000000000000000000000000000000000000 --- a/doc_cn/cluster/k8s/job.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: paddle-cluster-job -spec: - parallelism: 3 - completions: 3 - template: - metadata: - name: paddle-cluster-job - spec: - volumes: - - name: jobpath - hostPath: - path: /home/work/paddle_output - containers: - - name: trainer - image: registry.baidu.com/public/paddle:mypaddle - command: ["bin/bash", "-c", "/root/start.sh"] - env: - - name: JOB_NAME - value: paddle-cluster-job - - name: JOB_PATH - value: /home/jobpath - - name: JOB_NAMESPACE - value: default - - name: TRAIN_CONFIG_DIR - value: recommendation - - name: CONF_PADDLE_NIC - value: eth0 - - name: CONF_PADDLE_PORT - value: "7164" - - name: CONF_PADDLE_PORTS_NUM - value: "2" - - name: CONF_PADDLE_PORTS_NUM_SPARSE - value: "2" - - name: CONF_PADDLE_GRADIENT_NUM - value: "3" - volumeMounts: - - name: jobpath - mountPath: /home/jobpath - restartPolicy: Never - \ No newline at end of file diff --git a/doc_cn/cluster/k8s/k8s-paddle-arch.png b/doc_cn/cluster/k8s/k8s-paddle-arch.png deleted file mode 100644 index a8c64550b1fa7f41de1eaa9a037c65cddc0cd30e..0000000000000000000000000000000000000000 Binary files a/doc_cn/cluster/k8s/k8s-paddle-arch.png and /dev/null differ diff --git a/doc_cn/cluster/k8s/start.sh b/doc_cn/cluster/k8s/start.sh deleted file mode 100755 index b3a1334174a20b018d35de3b01b149fc5b10d49d..0000000000000000000000000000000000000000 --- a/doc_cn/cluster/k8s/start.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -set -eu - -jobconfig=${JOB_PATH}"/"${JOB_NAME}"/"${TRAIN_CONFIG_DIR} -cd /root -cp -rf $jobconfig . -cd $TRAIN_CONFIG_DIR - - -python /root/start_paddle.py \ - --dot_period=10 \ - --ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM \ - --log_period=50 \ - --num_passes=10 \ - --trainer_count=4 \ - --saving_period=1 \ - --local=0 \ - --config=./trainer_config.py \ - --use_gpu=0 diff --git a/doc_cn/cluster/k8s/start_paddle.py b/doc_cn/cluster/k8s/start_paddle.py deleted file mode 100755 index 6a461614101aa74f3badf67e65c0d6fcb985ee9b..0000000000000000000000000000000000000000 --- a/doc_cn/cluster/k8s/start_paddle.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import requests -import time -import socket -import os -import argparse - - -# configuration for cluster -API = "/api/v1/namespaces/" -JOBSELECTOR = "labelSelector=job-name=" -JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME") -JOB_PATH_DATA = JOB_PATH + "/data" -JOB_PATH_OUTPUT = JOB_PATH + "/output" -JOBNAME = os.getenv("JOB_NAME") -NAMESPACE = os.getenv("JOB_NAMESPACE") -PADDLE_NIC = os.getenv("CONF_PADDLE_NIC") -PADDLE_PORT = os.getenv("CONF_PADDLE_PORT") -PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM") -PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE") -PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM") - - -def refine_unknown_args(cmd_args): - ''' - refine unknown parameters to handle some special parameters - ''' - new_args = [] - for arg in cmd_args: - if arg.startswith("--") and arg.find("=") != -1: - equal_pos = arg.find("=") # find first = pos - arglist = list(arg) - arglist[equal_pos] = " " - arg = "".join(arglist) - arg = arg.lstrip("-") - new_args += arg.split(" ") - elif arg.startswith("--") and arg.find("=") == -1: - arg = arg.lstrip("-") - new_args.append(arg) - else: - new_args.append(arg) - return new_args - - -def isPodAllRunning(podlist): - ''' - check all pod is running - ''' - require = len(podlist["items"]) - running = 0 - for pod in podlist["items"]: - if pod["status"]["phase"] == "Running": - running += 1 - if require == running: - return True - return False - - -def getPodList(): - ''' - get all container status of the job - ''' - apiserver = "https://" + \ - os.getenv("KUBERNETES_SERVICE_HOST") + ":" + \ - os.getenv("KUBERNETES_SERVICE_PORT_HTTPS") - - pod = API + NAMESPACE + "/pods?" - job = JOBNAME - return requests.get(apiserver + pod + JOBSELECTOR + job, - verify=False).json() - - -def getIdMap(podlist): - ''' - generate tainer_id by ip - ''' - ips = [] - for pod in podlist["items"]: - ips.append(pod["status"]["podIP"]) - ips.sort() - idMap = {} - for i in range(len(ips)): - idMap[ips[i]] = i - return idMap - - -def startPaddle(idMap={}, train_args_dict=None): - ''' - start paddle pserver and trainer - ''' - program = 'paddle train' - args = " --nics=" + PADDLE_NIC - args += " --port=" + str(PADDLE_PORT) - args += " --ports_num=" + str(PADDLE_PORTS_NUM) - args += " --comment=" + "paddle_process_by_paddle" - ip_string = "" - for ip in idMap.keys(): - ip_string += (ip + ",") - ip_string = ip_string.rstrip(",") - args += " --pservers=" + ip_string - args_ext = "" - for key, value in train_args_dict.items(): - args_ext += (' --' + key + '=' + value) - localIP = socket.gethostbyname(socket.gethostname()) - trainerId = idMap[localIP] - args += " " + args_ext + " --trainer_id=" + \ - str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT - logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) - if not os.path.exists(JOB_PATH_OUTPUT): - os.makedirs(JOB_PATH_OUTPUT) - os.mkdir(logDir) - copyCommand = 'cp -rf ' + JOB_PATH_DATA + \ - "/" + str(trainerId) + " ./data" - os.system(copyCommand) - startPserver = 'nohup paddle pserver' + \ - " --port=" + str(PADDLE_PORT) + \ - " --ports_num=" + str(PADDLE_PORTS_NUM) + \ - " --ports_num_for_sparse=" + str(PADDLE_PORTS_NUM_SPARSE) + \ - " --nics=" + PADDLE_NIC + \ - " --comment=" + "paddle_process_by_paddle" + \ - " --num_gradient_servers=" + str(PADDLE_SERVER_NUM) +\ - " > " + logDir + "/server.log 2>&1 &" - print startPserver - os.system(startPserver) - # wait until pservers completely start - time.sleep(10) - startTrainer = program + args + " > " + \ - logDir + "/train.log 2>&1 < /dev/null" - print startTrainer - os.system(startTrainer) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(prog="start_paddle.py", - description='simple tool for k8s') - args, train_args_list = parser.parse_known_args() - train_args = refine_unknown_args(train_args_list) - train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) - podlist = getPodList() - # need to wait until all pods are running - while not isPodAllRunning(podlist): - time.sleep(10) - podlist = getPodList() - idMap = getIdMap(podlist) - startPaddle(idMap, train_args_dict) diff --git a/doc_cn/concepts/nn.rst b/doc_cn/concepts/nn.rst deleted file mode 100644 index f4d2cf490d14761f4b9f6a308180c5e8015cbecb..0000000000000000000000000000000000000000 --- a/doc_cn/concepts/nn.rst +++ /dev/null @@ -1,3 +0,0 @@ -TBD - -目前正在书写中。敬请期待。 \ No newline at end of file diff --git a/doc_cn/concepts/program_concepts.rst b/doc_cn/concepts/program_concepts.rst deleted file mode 100644 index af5bbdac260afce0a032461ab913d05bc2f55929..0000000000000000000000000000000000000000 --- a/doc_cn/concepts/program_concepts.rst +++ /dev/null @@ -1,4 +0,0 @@ -TBD -### - -目前正在书写中。敬请期待。 \ No newline at end of file diff --git a/doc_cn/concepts/pserver_topology.dot b/doc_cn/concepts/pserver_topology.dot deleted file mode 100644 index 9ff658b8495030f322d4f553f3bf72ddf8d3a578..0000000000000000000000000000000000000000 --- a/doc_cn/concepts/pserver_topology.dot +++ /dev/null @@ -1,68 +0,0 @@ -graph pp_topology { - rankdir=BT; - subgraph cluster_node0 { - style=filled; - color=lightgrey; - node [style=filled, color=white, shape=box]; - label = "机器0" - - pserver0 [label="Parameter \n Server 0"] - trainer0 [label="Trainer 0"] - } - subgraph cluster_node1 { - style=filled; - color=lightgrey; - node [style=filled, color=white, shape=box]; - label = "机器1" - - pserver1 [label="Parameter \n Server 1"] - trainer1 [label="Trainer 1"] - } - - subgraph cluster_node2 { - style=filled; - color=lightgrey; - node [style=filled, color=white, shape=box]; - label = "机器2" - - pserver2 [label="Parameter \n Server 2"] - trainer2 [label="Trainer 2"] - } - - subgraph cluster_node3 { - style=filled; - color=lightgrey; - node [style=filled, color=white, shape=box]; - label = "机器3" - - pserver3 [label="Parameter \n Server 3"] - trainer3 [label="Trainer 3"] - } - - data [label="数据", shape=hexagon] - - trainer0 -- pserver0 - trainer0 -- pserver1 - trainer0 -- pserver2 - trainer0 -- pserver3 - - trainer1 -- pserver0 - trainer1 -- pserver1 - trainer1 -- pserver2 - trainer1 -- pserver3 - - trainer2 -- pserver0 - trainer2 -- pserver1 - trainer2 -- pserver2 - trainer2 -- pserver3 - - trainer3 -- pserver0 - trainer3 -- pserver1 - trainer3 -- pserver2 - trainer3 -- pserver3 - - data -- trainer0 - data -- trainer1 - data -- trainer2 - data -- trainer3 -} diff --git a/doc_cn/concepts/trainer_config.py b/doc_cn/concepts/trainer_config.py deleted file mode 100644 index 3eccbd7bc11f4865130286de718d1be74e4d1722..0000000000000000000000000000000000000000 --- a/doc_cn/concepts/trainer_config.py +++ /dev/null @@ -1,29 +0,0 @@ -from paddle.trainer_config_helpers import * - -define_py_data_sources2( - train_list='train.list', - test_list='test.list', - module='provider', - obj='process') -settings( - batch_size=128, - learning_rate=1e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(0.5)) - -img = data_layer(name='pixel', size=28 * 28) - -hidden1 = simple_img_conv_pool( - input=img, filter_size=3, num_filters=32, pool_size=3, num_channel=1) - -hidden2 = fc_layer( - input=hidden1, - size=200, - act=TanhActivation(), - layer_attr=ExtraAttr(drop_rate=0.5)) -predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - -outputs( - classification_cost( - input=predict, label=data_layer( - name='label', size=10))) diff --git a/doc_cn/concepts/use_concepts.rst b/doc_cn/concepts/use_concepts.rst deleted file mode 100644 index 2d27e29fac37d54e4a31540cf75361464f51b193..0000000000000000000000000000000000000000 --- a/doc_cn/concepts/use_concepts.rst +++ /dev/null @@ -1,155 +0,0 @@ -######################### -PaddlePaddle 基本使用概念 -######################### - -PaddlePaddle是一个深度学习框架,支持单机模式和多机模式。 - -单机模式用命令 ``paddle train`` 可以启动一个trainer进程,单机训练通常只包括一个trainer进程。如果数据规模比较大,希望加速训练,可以启动分布式作业。一个分布式作业里包括若干trainer进程和若干Parameter Server(或称pserver)进程。用命令 ``paddle pserver`` 可以启动 pserver 进程,pserver进程用于协调多个trainer进程之间的通信。 - -本文首先介绍trainer进程中的一些使用概念,然后介绍pserver进程中概念。 - -.. contents:: - -系统框图 -======== - -下图描述了用户使用框图,PaddlePaddle的trainer进程里内嵌了Python解释器,trainer进程可以利用这个解释器执行Python脚本,Python脚本里定义了模型配置、训练算法、以及数据读取函数。其中,数据读取程序往往定义在一个单独Python脚本文件里,被称为数据提供器(DataProvider),通常是一个Python函数。模型配置、训练算法通常定义在另一单独Python文件中, 称为训练配置文件。下面将分别介绍这两部分。 - -.. graphviz:: - - digraph pp_process { - rankdir=LR; - config_file [label="用户神经网络配置"]; - subgraph cluster_pp { - style=filled; - color=lightgrey; - node [style=filled, color=white, shape=box]; - label = "PaddlePaddle C++"; - py [label="Python解释器"]; - } - data_provider [label="用户数据解析"]; - config_file -> py; - py -> data_provider [dir="back"]; - } - -数据提供器 -========== - -DataProvider是PaddlePaddle系统的数据提供器,将用户的原始数据转换成系统可以识别的数据类型。每当系统需要新的数据训练时, trainer进程会调用DataProvider函数返回数据。当所有数据读取完一轮后,DataProvider返回空数据,通知系统一轮数据读取结束,并且系统每一轮训练开始时会重置DataProvider。需要注意的是,DataProvider是被系统调用,而不是新数据驱动系统,一些随机化噪声添加都应该在DataProvider中完成。 - -在不同的应用里,训练数据的格式往往各不相同。因此,为了用户能够灵活的处理数据,我们提供了Python处理数据的接口,称为 `PyDataProvider`_ 。在 ``PyDataProvider`` 中,系统C++模块接管了shuffle、处理batch、GPU和CPU通信、双缓冲、异步读取等问题,一些情况下(如:``min_pool_size=0``)需要Python接口里处理shuffle,可以参考 `PyDataProvider`_ 的相关文档继续深入了解。 - - -训练配置文件 -============ - -训练配置文件主要包括数据源、优化算法、网络结构配置三部分。 其中数据源配置与DataProvider的关系是:DataProvider里定义数据读取函数,训练配置文件的数据源配置中指定DataProvider文件名字、生成数据函数接口,请不要混淆。 - -一个简单的训练配置文件为: - -.. literalinclude:: trainer_config.py - :linenos: - -文件开头 ``from paddle.trainer_config_helpers import *`` ,是因为PaddlePaddle配置文件与C++模块通信的最基础协议是protobuf,为了避免用户直接写复杂的protobuf string,我们为用户定以Python接口来配置网络,该Python代码可以生成protobuf包,这就是`trainer_config_helpers`_的作用。因此,在文件的开始,需要import这些函数。 这个包里面包含了模型配置需要的各个模块。 - -下面分别介绍数据源配置、优化算法配置、网络结构配置这三部分该概念。 - -数据源配置 ----------- - -使用 `PyDataProvider`_ 的函数 ``define_py_data_sources2`` 配置数据源。``define_py_data_sources2`` 里通过train_list和test_list指定是训练文件列表和测试文件列表。 如果传入字符串的话,是指一个数据列表文件。这个数据列表文件中包含的是每一个训练或者测试文件的路径。如果传入一个list的话,则会默认生成一个list文件,再传入给train.list或者test.list。 - -``module`` 和 ``obj`` 指定了DataProvider的文件名和返回数据的函数名。更详细的使用,请参考 `PyDataProvider`_ 。 - -优化算法配置 ------------- - -通过 `settings`_ 接口设置神经网络所使用的训练参数和 `优化算法`_ ,包括学习率、batch_size、优化算法、正则方法等,具体的使用方法请参考 `settings`_ 文档。 - -网络结构配置 ------------- - -神经网络配置主要包括网络连接、激活函数、损失函数、评估器。 - -- 网络连接: 主要由Layer组成,每个Layer返回的都是一个 ``LayerOutput`` 对象,Layer里面可以定义参数属性、激活类型等。 - - 为了更灵活的配置,PaddlePaddle提供了基于 Projection 或者 Operator 的配置,这两个需要与 ``mixed_layer`` 配合使用。这里简单介绍Layer、Projection、Operator的概念: - - - Layer: 神经网络的某一层,可以有可学习的参数,一般是封装了许多复杂操作的集合。 - - Projection:需要与 ``mixed_layer`` 配合使用,含可学习参数。 - - Operator: 需要与 ``mixed_layer`` 配合使用,不含可学习参数,输入全是其他Layer的输出。 - - - 这个配置文件网络由 ``data_layer`` 、 ``simple_img_conv_pool`` 、 ``fc_layer`` 组成。 - - - `data_layer`_ : 通常每个配置文件都会包括 ``data_layer`` ,定义输入数据大小。 - - `simple_img_conv_pool`_ :是一个组合层,包括了图像的卷积 (convolution)和池化(pooling)。 - - `fc_layer`_ :全连接层,激活函数为Softmax,这里也可叫分类层。 - - -- 损失函数和评估器:损失函数即为网络的优化目标,评估器可以评价模型结果。 - - PaddlePaddle包括很多损失函数和评估起,详细可以参考 `损失函数层`_ 和 `评估器`_ 。这里 ``classification_cost`` 默认使用多类交叉熵损失函数和分类错误率统计评估器。 - -- ``outputs``: 标记网络输出的函数为 ``outputs`` 。 - - 训练阶段,网络的输出为神经网络的优化目标;预测阶段,网络的输出也可通过 ``outputs`` 标记。 - - -这里对 ``mixed_layer`` 稍做详细说明, 该Layer将多个输入(Projection 或 Operator)累加求和,具体计算是通过内部的 Projection 和 Operator 完成,然后加 Bias 和 activation 操作, - -例如,和 ``fc_layer`` 同样功能的 ``mixed_layer`` 是: - -.. code-block:: python - - data = data_layer(name='data', size=200) - with mixed_layer(size=200) as out: - out += full_matrix_projection(input=data) - -PaddlePaddle 可以使用 ``mixed layer`` 配置出非常复杂的网络,甚至可以直接配置一个完整的LSTM。用户可以参考 `mixed_layer`_ 的相关文档进行配置。 - - -分布式训练 -========== - -PaddlePaddle多机采用经典的 Parameter Server 架构对多个节点的 trainer 进行同步。多机训练的经典拓扑结构如下\: - -.. graphviz:: pserver_topology.dot - -图中每个灰色方块是一台机器,在每个机器中,先使用命令 ``paddle pserver`` 启动一个pserver进程,并指定端口号,可能的参数是\: - -.. code-block:: bash - - paddle pserver --port=5000 --num_gradient_servers=4 --tcp_rdma='tcp' --nics='eth0' - -* ``--port=5000`` : 指定 pserver 进程端口是 5000 。 -* ``--gradient_servers=4`` : 有四个训练进程(PaddlePaddle 将 trainer 也称作 GradientServer ,因为其为负责提供Gradient) 。 -* ``--tcp_rdma='tcp' --nics=`eth0```: 指定以太网类型为TCP网络,指定网络接口名字为eth0。 - -启动之后 pserver 进程之后,需要启动 trainer 训练进程,在各个机器上运行如下命令\: - -.. code-block:: bash - - paddle train --port=5000 --pservers=192.168.100.101,192.168.100.102,192.168.100.103,192.168.100.104 --config=... - -对于简单的多机协同训练使用上述方式即可。另外,pserver/train 通常在高级情况下,还需要设置下面两个参数\: - -* --ports_num\: 一个 pserver 进程共绑定多少个端口用来做稠密更新,默认是1。 -* --ports_num_for_sparse\: 一个pserver进程共绑定多少端口用来做稀疏更新,默认是0。 - -使用手工指定端口数量,是因为Paddle的网络通信中,使用了 int32 作为消息长度,比较容易在大模型下溢出。所以,在 pserver 进程中可以启动多个子线程去接受 trainer 的数据,这样单个子线程的长度就不会溢出了。但是这个值不可以调的过大,因为增加这个值,对性能尤其是内存占用有一定的开销,另外稀疏更新的端口如果太大的话,很容易导致某一个参数服务器没有分配到任何参数。 - -详细的说明可以参考,使用 `集群训练Paddle`_ 。 - - -.. _PyDataProvider: ../ui/data_provider/pydataprovider2.html -.. _settings: ../../doc/ui/api/trainer_config_helpers/optimizers.html#settings -.. _优化算法: ../../doc/ui/api/trainer_config_helpers/optimizers.html#optimizers -.. _trainer_config_helper: ../../doc/ui/api/trainer_config_helpers/index.html -.. _data_layer: ../../doc/ui/api/trainer_config_helpers/layers.html#data-layer -.. _simple_img_conv_pool: ../../doc/ui/api/trainer_config_helpers/networks.html#simple-img-conv-pool -.. _fc_layer: ../../doc/ui/api/trainer_config_helpers/layers.html#fc-layer -.. _损失函数层: ../../doc/ui/api/trainer_config_helpers/layers.html#cost-layers -.. _评估器: ../../doc/ui/api/trainer_config_helpers/evaluators.html -.. _mixed_layer: ../../doc/ui/api/trainer_config_helpers/layers.html#mixed-layer -.. _集群训练Paddle: ../cluster/index.html diff --git a/doc_cn/conf.py.in b/doc_cn/conf.py.in deleted file mode 100644 index 4f3afb814f1e779a711e3535da1f8853aa0d97c6..0000000000000000000000000000000000000000 --- a/doc_cn/conf.py.in +++ /dev/null @@ -1,147 +0,0 @@ -# -*- coding: utf-8 -*- -# -# documentation build configuration file, created by -# sphinx-quickstart on Thu Jul 23 19:40:08 2015. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. -import sys -import os, subprocess -import shlex -from recommonmark import parser, transform - -MarkdownParser = parser.CommonMarkParser -AutoStructify = transform.AutoStructify -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, '@PROJ_ROOT@/python') -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] - -# -- General configuration ------------------------------------------------ - -# General information about the project. -project = u'PaddlePaddle' -author = u'%s developers' % project -copyright = u'2016, %s' % author -github_doc_root = '' - -# add markdown parser -MarkdownParser.github_doc_root = github_doc_root -source_parsers = { - '.md': MarkdownParser, - '.Rmd': MarkdownParser, -} -os.environ['PADDLE_BUILD_DOC'] = '1' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.mathjax', - 'sphinx.ext.napoleon', - 'sphinx.ext.graphviz' -] -table_styling_embed_css = True - -autodoc_member_order = 'bysource' - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = ['.rst', '.md', '.Rmd'] - -# The encoding of source files. -source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'zh_CN' - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] - -# Output file base name for HTML help builder. -htmlhelp_basename = project + 'doc' - -# -- Options for LaTeX output --------------------------------------------- -latex_elements = { -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, '%s.tex' % project, project, - author, 'manual'), -] - -# Use the .. admonition:: directive for Notes sections. -# False to use the .. rubric:: directive instead. -napoleon_use_admonition_for_notes = True - -def setup(app): - # Add hook for building doxygen xml when needed - # no c++ API for now - app.add_config_value('recommonmark_config', { - 'url_resolver': lambda url: github_doc_root + url, - }, True) - app.add_transform(AutoStructify) diff --git a/doc_cn/demo/index.rst b/doc_cn/demo/index.rst deleted file mode 100644 index e15e839f93d4ac0d455e49fd8b1cde8bf60a29ac..0000000000000000000000000000000000000000 --- a/doc_cn/demo/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -使用示例 -======== - -图像 -'''' - -* `图像分类 <../../doc/demo/image_classification/index.html>`_ - -自然语言处理 -'''''''''''' - -* `情感分析 `_ -* `文本生成 <../../doc/demo/text_generation/index.html>`_ -* `词性标注 <../../doc/demo/semantic_role_labeling/index.html>`_ - -推荐 -'''' - -* `MovieLens数据集 <../../doc/demo/rec/ml_dataset.html>`_ -* `MovieLens评分回归 <../../doc/demo/rec/ml_regression.html>`_ - -常用模型 -'''''''' - -* `ImageNet: ResNet <../../doc/demo/imagenet_model/resnet_model.html>`_ -* `Embedding: Chinese Word <../../doc/demo/embedding_model/index.html>`_ diff --git a/doc_cn/demo/quick_start/index.md b/doc_cn/demo/quick_start/index.md deleted file mode 100644 index 4a6e07ee1ffd94cf8f781af307b53a96a78e6b93..0000000000000000000000000000000000000000 --- a/doc_cn/demo/quick_start/index.md +++ /dev/null @@ -1,543 +0,0 @@ -# PaddlePaddle快速入门教程 - -我们以文本分类问题作为背景,介绍PaddlePaddle使用流程和常用的网络基础单元的配置方法。 - -## 安装(Install) - -首先请参考安装教程安装PaddlePaddle。 - -## 使用概述(Overview) - -**文本分类问题**:对于给定的一条文本, 我们从提前给定的类别集合中选择其所属类 -别。比如通过用户对电子商务网站评论,评估产品的质量: - -- 这个显示器很棒! (好评) -- 用了两个月之后这个显示器屏幕碎了。(差评) - -每一个任务流程都可以分为如下5个基础部分。 -
![](./Pipeline.jpg)
- -1. 数据格式准备 - - 每行保存一条样本,类别Id 和文本信息用Tab间隔, 文本中的单词用空格分隔(如果不切词,则字与字之间用空格分隔),例如:```类别Id ‘\t’ 这 个 显 示 器 很 棒 !``` -2. 数据向模型传送 - - PaddlePaddle可以读取Python写的传输数据脚本,所有字符都将转换为连续整数表示的Id传给模型 -3. 网络结构(由易到难展示4种不同的网络配置) - - 逻辑回归模型 - - 词向量模型 - - 卷积模型 - - 时序模型 - - 优化算法 -4. 训练模型 -5. 预测 - -## 数据格式准备(Data Preparation) -在本问题中,我们使用[Amazon电子产品评论数据](http://jmcauley.ucsd.edu/data/amazon/), -将评论分为好评(正样本)和差评(负样本)两类。[源码](https://github.com/PaddlePaddle/Paddle)的`demo/quick_start`里提供了下载已经预处理数据的脚本(如果想从最原始的数据处理,可以使用脚本 `./demo/quick_start/data/proc_from_raw_data/get_data.sh`)。 - -```bash -cd demo/quick_start -./data/get_data.sh -``` - -## 数据向模型传送(Transfer Data to Model) - -### Python数据加载脚本(Data Provider Script) - -下面dataprovider_bow.py文件给出了完整例子,主要包括两部分: - -* initalizer: 定义文本信息、类别Id的数据类型。 -* process: yield文本信息和类别Id,和initalizer里定义顺序一致。 - -```python -from paddle.trainer.PyDataProvider2 import * - -# id of the word not in dictionary -UNK_IDX = 0 - -# initializer is called by the framework during initialization. -# It allows the user to describe the data types and setup the -# necessary data structure for later use. -# `settings` is an object. initializer need to properly fill settings.input_types. -# initializer can also store other data structures needed to be used at process(). -# In this example, dictionary is stored in settings. -# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py -def initializer(settings, dictionary, **kwargs): - # Put the word dictionary into settings - settings.word_dict = dictionary - - # setting.input_types specifies what the data types the data provider - # generates. - settings.input_types = [ - # The first input is a sparse_binary_vector, - # which means each dimension of the vector is either 0 or 1. It is the - # bag-of-words (BOW) representation of the texts. - sparse_binary_vector(len(dictionary)), - # The second input is an integer. It represents the category id of the - # sample. 2 means there are two labels in the dataset. - # (1 for positive and 0 for negative) - integer_value(2)] - -# Delaring a data provider. It has an initializer 'data_initialzer'. -# It will cache the generated data of the first pass in memory, so that -# during later pass, no on-the-fly data generation will be needed. -# `setting` is the same object used by initializer() -# `file_name` is the name of a file listed train_list or test_list file given -# to define_py_data_sources2(). See trainer_config.lr.py. -@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - # Open the input data file. - with open(file_name, 'r') as f: - # Read each line. - for line in f: - # Each line contains the label and text of the comment, separated by \t. - label, comment = line.strip().split('\t') - - # Split the words into a list. - words = comment.split() - - # convert the words into a list of ids by looking them up in word_dict. - word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words] - - # Return the features for the current comment. The first is a list - # of ids representing a 0-1 binary sparse vector of the text, - # the second is the integer id of the label. - yield word_vector, int(label) -``` - -### 配置中的数据加载定义(Data Provider in Configure) - -在模型配置中利用`define_py_data_sources2`加载数据: - -```python -from paddle.trainer_config_helpers import * - -file = "data/dict.txt" -word_dict = dict() -with open(dict_file, 'r') as f: - for i, line in enumerate(f): - w = line.strip().split()[0] - word_dict[w] = i -# define the data sources for the model. -# We need to use different process for training and prediction. -# For training, the input data includes both word IDs and labels. -# For prediction, the input data only includs word Ids. -define_py_data_sources2(train_list='data/train.list', - test_list='data/test.list', - module="dataprovider_bow", - obj="process", - args={"dictionary": word_dict}) -``` -* data/train.list,data/test.list: 指定训练、测试数据 -* module="dataprovider": 数据处理Python文件名 -* obj="process": 指定生成数据的函数 -* args={"dictionary": word_dict}: 额外的参数,这里指定词典 - -更详细数据格式和用例请参考 -PyDataProvider2。 - -## 网络结构(Network Architecture) -本节我们将专注于网络结构的介绍。 -
![](./PipelineNetwork.jpg)
- -我们将以基本的逻辑回归网络作为起点,并逐渐展示更加深入的功能。更详细的网络配置 -连接请参考Layer文档。 -所有配置在[源码](https://github.com/PaddlePaddle/Paddle)`demo/quick_start`目录,首先列举逻辑回归网络。 - -### 逻辑回归模型(Logistic Regression) - -流程如下: -
![](./NetLR.jpg)
- -- 获取利用one-hot vector表示的每个单词,维度是词典大小 - -```python -word = data_layer(name="word", size=word_dim) -``` - -- 获取该条样本类别Id,维度是类别个数。 - -```python -label = data_layer(name="label", size=label_dim) -``` - -- 利用逻辑回归模型对该向量进行分类,同时会计算分类准确率 - -```python -# Define a fully connected layer with logistic activation (also called softmax activation). -output = fc_layer(input=word, - size=label_dim, - act_type=SoftmaxActivation()) -# Define cross-entropy classification loss and error. -classification_cost(input=output, label=label) -``` - - - input: 除过data层,每个层都有一个或多个input,多个input以list方式输入 - - size: 该层神经元个数 - - act_type: 激活函数类型 - -效果总结:我们将在后面介绍训练和预测的流程的脚本。在此为方便对比不同网络结构, -我们随时总结了各个网络的复杂度和效果。 - - -
- - - - - - - - - - - - - - - - - -
网络名称参数数量错误率
逻辑回归252 KB8.652%
- -
- -### 词向量模型(Word Vector) - -embedding模型需要稍微改变数据提供的脚本,即`dataprovider_emb.py`,词向量模型、 -卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。 - -``` -def initializer(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - # Define the type of the first input as sequence of integer. - # The value of the integers range from 0 to len(dictrionary)-1 - integer_value_sequence(len(dictionary)), - # Define the second input for label id - integer_value(2)] - -@provider(init_hook=initializer) -def process(settings, file_name): - ... - # omitted, it is same as the data provider for LR model -``` - -该模型依然是使用逻辑回归分类网络的框架, 只是将句子利用连续向量表示替换稀疏 -向量表示, 即对第3步进行替换。句子表示的计算更新为2步: -
![](./NetContinuous.jpg)
- -- 利用单词Id查找对应的该单词的连续表示向量(维度为word_dim), 输入N个单词,输出为N个word_dim维度向量 - -```python -emb = embedding_layer(input=word, size=word_dim) -``` - -- 将该句话包含的所有单词向量求平均得到句子的表示 - -```python -avg = pooling_layer(input=emb, pooling_type=AvgPooling()) -``` - -其它部分和逻辑回归网络结构一致。 -效果总结: - - -
- - - - - - - - - - - - - - - - - -
网络名称参数数量错误率
词向量模型15 MB8.484%
-
-
- -### 卷积模型(Convolution) -卷积网络是一种特殊的从词向量表示到句子表示的方法, 也就是将词向量模型额步 -骤3-2进行进一步演化, 变为3个新的子步骤。 -
![](./NetConv.jpg)
- -文本卷积分为三个步骤: -1. 获取每个单词左右各k个近邻, 拼接成一个新的向量表示; -2. 对该表示进行非线性变换 (例如Sigmoid变换), 成为维度为hidden_dim的新的向量; -3. 在每个维度上取出在该句话新的向量集合上该维度的最大值作为最后的句子表示向量。 这3个子步骤可配置为: - -```python -text_conv = sequence_conv_pool(input=emb, - context_start=k, - context_len=2 * k + 1) -``` - -效果总结: - - -
- - - - - - - - - - - - - - - - - -
网络名称参数数量错误率
卷积模型16 MB5.628%
-
- -### 时序模型(Time Sequence) -
![](./NetRNN.jpg)
- -时序模型即为RNN模型, 包括简单的RNN模型、GRU模型、LSTM模型等。 - -- GRU模型配置: - -```python -gru = simple_gru(input=emb, size=gru_size) -``` - -- LSTM模型配置: - -```python -lstm = simple_lstm(input=emb, size=lstm_size) -``` - -针对本问题,我们采用单层LSTM模型,并使用了Dropout,效果总结: - - -
- - - - - - - - - - - - - - - - - -
网络名称参数数量错误率
时序模型16 MB4.812%
- -
- -## 优化算法(Optimization Algorithm) -优化算法包括 -Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优化方法,加了L2正则和梯度截断。 - -```python -settings(batch_size=128, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) -``` - -## 训练模型(Training Model) -在完成了数据和网络结构搭建之后, 我们进入到训练部分。 -
![](./PipelineTrain.jpg)
- -训练脚本:我们将训练的命令行保存在了 `train.sh`文件中。训练时所需设置的主要参数如下: - -```bash -paddle train \ ---config=trainer_config.py \ ---log_period=20 \ ---save_dir=./output \ ---num_passes=15 \ ---use_gpu=false -``` -这里没有介绍多机分布式训练,可以参考分布式训练的demo学习如何进行多机训练。 - -## 预测(Prediction) -可以使用训练好的模型评估带有label的验证集,也可以预测没有label的测试集。 -
![](./PipelineTest.jpg)
- -测试脚本如下,将会测试配置文件中test.list指定的数据。 - -```bash -paddle train \ ---use_gpu=false \ ---job=test \ ---init_model_path=./output/pass-0000x -``` - -可以参考Python API预测 -教程,或其他demo的Python预测过程。也可以通过如下方式预测。 - -预测脚本(`predict.sh`): - -```bash -model="output/pass-00003" -paddle train \ - --config=trainer_config.lstm.py \ - --use_gpu=false \ - --job=test \ - --init_model_path=$model \ - --config_args=is_predict=1 \ - --predict_output_dir=. \ - -mv rank-00000 result.txt -``` -这里以`output/pass-00003`为例进行预测,用户可以根据训练log选择test结果最好的模型来预测。与训练网络配置不同的是:无需label相关的层,指定outputs输出概率层(softmax输出), -指定batch_size=1,数据传输无需label数据,预测数据指定test_list的位置。 - -预测结果以文本的形式保存在`result.txt`中,一行为一个样本,格式如下: - -``` -预测ID;ID为0的概率 ID为1的概率 -预测ID;ID为0的概率 ID为1的概率 -``` - -``` -is_predict = get_config_arg('is_predict', bool, False) -trn = 'data/train.list' if not is_predict else None -tst = 'data/test.list' if not is_predict else 'data/pred.list' -obj = 'process' if not is_predict else 'process_pre' -batch_size = 128 if not is_predict else 1 -if is_predict: - maxid = maxid_layer(output) - outputs([maxid,output]) -else: - label = data_layer(name="label", size=2) - cls = classification_cost(input=output, label=label) - outputs(cls) -``` - -## 总体效果总结(Summary) -这些流程中的数据下载、网络配置、训练脚本在`/demo/quick_start`目录,我们在此总 -结上述网络结构在Amazon-Elec测试集(25k)上的效果: - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
网络名称参数数量错误率配置文件
逻辑回归模型 252KB 8.652%trainer_config.lr.py
词向量模型 15MB 8.484%trainer_config.emb.py
卷积模型 16MB 5.628%trainer_config.cnn.py
时序模型 16MB 4.812%trainer_config.lstm.py
-
-
- -## 附录(Appendix) -### 命令行参数(Command Line Argument) - -* \--config:网络配置 -* \--save_dir:模型存储路径 -* \--log_period:每隔多少batch打印一次日志 -* \--num_passes:训练轮次,一个pass表示过一遍所有训练样本 -* \--config_args:命令指定的参数会传入网络配置中。 -* \--init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。 - -默认一个pass保存一次模型,也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。 -可以通过show_parameter_stats_period设置打印参数信息等。 -其他参数请参考令行参数文档。 - -### 输出日志(Log) - -``` -TrainerInternal.cpp:160] Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297 CurrentEval: classification_error_evaluator=0.304297 -``` -模型训练会看到这样的日志,详细的参数解释如下面表格: -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
名称解释
Batch=20 表示过了20个batch
samples=2560 表示过了2560个样本
AvgCost 每个pass的第0个batch到当前batch所有样本的平均cost
CurrentCost 当前log_period个batch所有样本的平均cost
Eval: classification_error_evaluator 每个pass的第0个batch到当前batch所有样本的平均分类错误率
CurrentEval: classification_error_evaluator 当前log_period个batch所有样本的平均分类错误率
-
-
diff --git a/doc_cn/demo/quick_start/index.rst b/doc_cn/demo/quick_start/index.rst deleted file mode 100644 index 0536936dc47689d3ff285b919586a10128a0c745..0000000000000000000000000000000000000000 --- a/doc_cn/demo/quick_start/index.rst +++ /dev/null @@ -1,395 +0,0 @@ -PaddlePaddle快速入门教程 -======================== - -我们将以 `文本分类问题 `_ 为例, -介绍PaddlePaddle的基本使用方法。 - -安装 -==== - -请参考 `安装教程 <../../build_and_install/index.html>`_ 安装PaddlePaddle。 - -使用概述 -======== - -**文本分类问题**:对于给定的一条文本,我们从提前给定的类别集合中选择其所属类别。 - -比如, 在购物网站上,通过查看买家对某个产品的评价反馈, 评估该产品的质量。 - -- 这个显示器很棒! (好评) -- 用了两个月之后这个显示器屏幕碎了。(差评) - -使用PaddlePaddle, 每一个任务流程都可以被划分为如下五个步骤。 - - .. image:: Pipeline.jpg - :align: center - :scale: 80% - -1. 数据格式准备 - - 本例每行保存一条样本,类别Id和文本信息用 ``Tab`` 间隔,文本中的单词用空格分隔(如果不切词,则字与字之间用空格分隔),例如:``类别Id '\t' 这 个 显 示 器 很 棒 !`` -2. 向系统传送数据 - - PaddlePaddle可以执行用户的python脚本程序来读取各种格式的数据文件。 - - 本例的所有字符都将转换为连续整数表示的Id传给模型。 -3. 描述网络结构和优化算法 - - 本例由易到难展示4种不同的文本分类网络配置:逻辑回归模型,词向量模型,卷积模型,时序模型。 - - 常用优化算法包括Momentum, RMSProp,AdaDelta,AdaGrad,Adam,Adamax等,本例采用Adam优化方法,加了L2正则和梯度截断。 -4. 训练模型 -5. 应用模型 - -数据格式准备 ------------- - -接下来我们将展示如何用PaddlePaddle训练一个文本分类模型,将 `Amazon电子产品评论数据 `_ 分为好评(正样本)和差评(负样本)两种类别。 -`源代码 `_ 的 ``demo/quick_start`` 目录里提供了该数据的下载脚本和预处理脚本,你只需要在命令行输入以下命令,就能够很方便的完成数据下载和相应的预处理工作。 - -.. code-block:: bash - - cd demo/quick_start - ./data/get_data.sh - ./preprocess.sh - -数据预处理完成之后,通过配置类似于 ``dataprovider_*.py`` 的数据读取脚本和类似于 ``trainer_config.*.py`` 的训练模型脚本,PaddlePaddle将以设置参数的方式来设置 -相应的数据读取脚本和训练模型脚本。接下来,我们将对这两个步骤给出了详细的解释,你也可以先跳过本文的解释环节,直接进入训练模型章节, 使用 ``sh train.sh`` 开始训练模型, -查看`train.sh`内容,通过 **自底向上法** (bottom-up approach)来帮助你理解PaddlePaddle的内部运行机制。 - - -向系统传送数据 -============== - -Python脚本读取数据 ------------------- - -`DataProvider <../../ui/data_provider/index.html>`_ 是PaddlePaddle负责提供数据的模块。``DataProvider`` 主要职责在于将训练数据传入内存或者显存,让模型能够得到训练更新,其包括两个函数: - -* initializer:PaddlePaddle会在调用读取数据的Python脚本之前,先调用initializer函数。在下面例子里,我们在initialzier函数里初始化词表,并且在随后的读取数据过程中填充词表。 -* process:PaddlePaddle调用process函数来读取数据。每次读取一条数据后,process函数会用yield语句输出这条数据,从而能够被PaddlePaddle 捕获 (harvest)。 - -``dataprovider_bow.py`` 文件给出了完整例子: - -.. literalinclude:: ../../../demo/quick_start/dataprovider_bow.py - :language: python - :lines: 21-70 - :linenos: - :emphasize-lines: 8,33 - - -配置中的数据加载定义 --------------------- - -在模型配置中通过 ``define_py_data_sources2`` 接口来加载数据: - -.. literalinclude:: ../../../demo/quick_start/trainer_config.emb.py - :language: python - :lines: 19-35 - :linenos: - :emphasize-lines: 12 - - -以下是对上述数据加载的解释: - -- data/train.list,data/test.list: 指定训练数据和测试数据 -- module="dataprovider_bow": 处理数据的Python脚本文件 -- obj="process": 指定生成数据的函数 -- args={"dictionary": word_dict}: 额外的参数,这里指定词典 - -更详细数据格式和用例请参考 `PyDataProvider2 <../../ui/data_provider/pydataprovider2.html>`_ 。 - -模型网络结构 -============ - -本小节我们将介绍模型网络结构。 - - .. image:: PipelineNetwork.jpg - :align: center - :scale: 80% - - -我们将以最基本的逻辑回归网络作为起点,并逐渐展示更加深入的功能。更详细的网络配置连接请参考 `Layer文档 <../../../doc/layer.html>`_ 。 -所有配置都能在 `源代码 `_ 的 ``demo/quick_start`` 目录下找到。 - -逻辑回归模型 ------------- - -具体流程如下: - - .. image:: NetLR.jpg - :align: center - :scale: 80% - -- 获取利用 `one-hot vector `_ 表示的每个单词,维度是词典大小 - - .. code-block:: python - - word = data_layer(name="word", size=word_dim) - -- 获取该条样本类别Id,维度是类别个数。 - - .. code-block:: python - - label = data_layer(name="label", size=label_dim) - -- 利用逻辑回归模型对该向量进行分类,同时会计算分类准确率 - - .. code-block:: python - - # Define a fully connected layer with logistic activation (also called softmax activation). - output = fc_layer(input=word, - size=label_dim, - act_type=SoftmaxActivation()) - # Define cross-entropy classification loss and error. - classification_cost(input=output, label=label) - - - - input: 除去data层,每个层都有一个或多个input,多个input以list方式输入 - - size: 该层神经元个数 - - act_type: 激活函数类型 - -**效果总结**:我们将在后面介绍训练和预测流程的脚本。在此为方便对比不同网络结构,我们总结了各个网络的复杂度和效果。 - - ===================== =============================== ================= - 网络名称 参数数量 错误率 - ===================== =============================== ================= - 逻辑回归 252 KB 8.652 % - ===================== =============================== ================= - -词向量模型 ----------- - -embedding模型需要稍微改变提供数据的Python脚本,即 ``dataprovider_emb.py``,词向量模型、 -卷积模型、时序模型均使用该脚本。其中文本输入类型定义为整数时序类型integer_value_sequence。 - -.. code-block:: python - - def initializer(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - # Define the type of the first input as sequence of integer. - # The value of the integers range from 0 to len(dictrionary)-1 - integer_value_sequence(len(dictionary)), - # Define the second input for label id - integer_value(2)] - - @provider(init_hook=initializer) - def process(settings, file_name): - ... - # omitted, it is same as the data provider for LR model - -该模型依然使用逻辑回归分类网络的框架, 只是将句子用连续向量表示替换为用稀疏向量表示, 即对第三步进行替换。句子表示的计算更新为两步: - -.. image:: NetContinuous.jpg - :align: center - :scale: 80% - -- 利用单词Id查找该单词对应的连续向量(维度为word_dim), 输入N个单词,输出为N个word_dim维度向量 - - .. code-block:: python - - emb = embedding_layer(input=word, size=word_dim) - -- 将该句话包含的所有单词向量求平均, 得到句子的表示 - - .. code-block:: python - - avg = pooling_layer(input=emb, pooling_type=AvgPooling()) - -其它部分和逻辑回归网络结构一致。 - -**效果总结:** - - ===================== =============================== ================== - 网络名称 参数数量 错误率 - ===================== =============================== ================== - 词向量模型 15 MB 8.484 % - ===================== =============================== ================== - -卷积模型 ------------ - -卷积网络是一种特殊的从词向量表示到句子表示的方法, 也就是将词向量模型进一步演化为三个新步骤。 - -.. image:: NetConv.jpg - :align: center - :scale: 80% - -文本卷积分可为三个步骤: - -1. 首先,从每个单词左右两端分别获取k个相邻的单词, 拼接成一个新的向量; - -2. 其次,对该向量进行非线性变换(例如Sigmoid变换), 使其转变为维度为hidden_dim的新向量; - -3. 最后,对整个新向量集合的每一个维度取最大值来表示最后的句子。 - -这三个步骤可配置为: - -.. code-block:: python - - text_conv = sequence_conv_pool(input=emb, - context_start=k, - context_len=2 * k + 1) - -**效果总结:** - - ===================== =============================== ======================== - 网络名称 参数数量 错误率 - ===================== =============================== ======================== - 卷积模型 16 MB 5.628 % - ===================== =============================== ======================== - -时序模型 ----------- - -.. image:: NetRNN.jpg - :align: center - :scale: 80% - -时序模型,也称为RNN模型, 包括简单的 `RNN模型 `_, `GRU模型 `_ 和 `LSTM模型 `_ 等等。 - -- GRU模型配置: - - .. code-block:: python - - gru = simple_gru(input=emb, size=gru_size) - - -- LSTM模型配置: - - .. code-block:: python - - lstm = simple_lstm(input=emb, size=lstm_size) - -本次试验,我们采用单层LSTM模型,并使用了Dropout,**效果总结:** - - ===================== =============================== ========================= - 网络名称 参数数量 错误率 - ===================== =============================== ========================= - 时序模型 16 MB 4.812 % - ===================== =============================== ========================= - -优化算法 -========= - -`优化算法 `_ 包括 -Momentum, RMSProp,AdaDelta,AdaGrad,ADAM,Adamax等,这里采用Adam优化方法,同时使用了L2正则(L2 Regularization)和梯度截断(Gradient Clipping)。 - -.. code-block:: python - - settings(batch_size=128, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) - -训练模型 -========= - -在数据加载和网络配置完成之后, 我们就可以训练模型了。 - -.. image:: PipelineTrain.jpg - :align: center - :scale: 80% - -训练模型,我们只需要运行 ``train.sh`` 训练脚本: - - .. code-block:: bash - - ./train.sh - -``train.sh``中包含了训练模型的基本命令。训练时所需设置的主要参数如下: - - .. code-block:: bash - - paddle train \ - --config=trainer_config.py \ - --log_period=20 \ - --save_dir=./output \ - --num_passes=15 \ - --use_gpu=false - -这里只简单介绍了单机训练,如何进行分布式训练,可以参考教程 `分布式训练 <../../cluster/index.html>`_ 。 - -预测 -===== - -当模型训练好了之后,我们就可以进行预测了。 - -.. image:: PipelineTest.jpg - :align: center - :scale: 80% - -之前配置文件中 ``test.list`` 指定的数据将会被测试,这里直接通过预测脚本 ``predict.sh`` 进行预测, -更详细的说明,可以参考 `Python API预测 <../../ui/predict/swig_py_paddle.html>`_ 教程。 - - .. code-block:: bash - - model="output/pass-00003" - paddle train \ - --config=trainer_config.lstm.py \ - --use_gpu=false \ - --job=test \ - --init_model_path=$model \ - --config_args=is_predict=1 \ - --predict_output_dir=. \ - - mv rank-00000 result.txt - -这里以 ``output/pass-00003`` 为例进行预测,用户可以根据训练日志,选择测试结果最好的模型来预测。 - -预测结果以文本的形式保存在 ``result.txt`` 中,一行为一个样本,格式如下: - - .. code-block:: bash - - 预测ID;ID为0的概率 ID为1的概率 - 预测ID;ID为0的概率 ID为1的概率 - -总体效果总结 -============== - -在 ``/demo/quick_start`` 目录下,能够找到这里使用的所有数据, 网络配置, 训练脚本等等。 -对于Amazon-Elec测试集(25k), 如下表格,展示了上述网络模型的训练效果: - - ===================== =============================== ============= ================================== - 网络名称 参数数量 错误率 配置文件 - ===================== =============================== ============= ================================== - 逻辑回归模型 252 KB 8.652% trainer_config.lr.py - 词向量模型 15 MB 8.484% trainer_config.emb.py - 卷积模型 16 MB 5.628% trainer_config.cnn.py - 时序模型 16 MB 4.812% trainer_config.lstm.py - ===================== =============================== ============= ================================== - - -附录 -===== - -命令行参数 ----------- - -* \--config:网络配置 -* \--save_dir:模型存储路径 -* \--log_period:每隔多少batch打印一次日志 -* \--num_passes:训练轮次,一个pass表示过一遍所有训练样本 -* \--config_args:命令指定的参数会传入网络配置中。 -* \--init_model_path:指定初始化模型路径,可用在测试或训练时指定初始化模型。 - -默认一个pass保存一次模型,也可以通过saving_period_by_batches设置每隔多少batch保存一次模型。 -可以通过show_parameter_stats_period设置打印参数信息等。 -其他参数请参考 `命令行参数文档 <../../ui/index.html#command-line-argument>`_ 。 - -输出日志 ---------- - -.. code-block:: bash - - TrainerInternal.cpp:160] Batch=20 samples=2560 AvgCost=0.628761 CurrentCost=0.628761 Eval: classification_error_evaluator=0.304297 CurrentEval: classification_error_evaluator=0.304297 - -模型训练会看到类似上面这样的日志信息,详细的参数解释,请参考如下表格: - - =========================================== ============================================================== - 名称 解释 - =========================================== ============================================================== - Batch=20 表示过了20个batch - samples=2560 表示过了2560个样本 - AvgCost 每个pass的第0个batch到当前batch所有样本的平均cost - CurrentCost 当前log_period个batch所有样本的平均cost - Eval: classification_error_evaluator 每个pass的第0个batch到当前batch所有样本的平均分类错误率 - CurrentEval: classification_error_evaluator 当前log_period个batch所有样本的平均分类错误率 - =========================================== ============================================================== diff --git a/doc_cn/demo/sentiment_analysis/index.rst b/doc_cn/demo/sentiment_analysis/index.rst deleted file mode 100644 index 82400b2459ebcaf89ff5e884edfe721b9ec01d7f..0000000000000000000000000000000000000000 --- a/doc_cn/demo/sentiment_analysis/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -情感分析教程 -=========================== - -.. toctree:: - :maxdepth: 3 - :glob: - - Training Locally \ No newline at end of file diff --git a/doc_cn/demo/sentiment_analysis/sentiment_analysis.md b/doc_cn/demo/sentiment_analysis/sentiment_analysis.md deleted file mode 100644 index ba307e97e3010629548460e25e894d082a6ddd4e..0000000000000000000000000000000000000000 --- a/doc_cn/demo/sentiment_analysis/sentiment_analysis.md +++ /dev/null @@ -1,325 +0,0 @@ -# 情感分析教程 - -情感分析有许多应用场景。 一个基本的应用场景是区分给定文本的褒贬两极性,给定的文本可以是一个文档、句子、或者是一个小的文本片段。 一个简单的例子如:把用户在购物网站、旅游网站、团购网站(亚马逊、天猫、淘宝等)上发表的评论分成正面评论和负面评论两类。 - -情感分析也常用于基于大量评论和个人博客来监控社会媒体。 例如,研究人员分析了几个关于消费者信心和政治观点的调查,结果发现它们与同时期的Twitter消息中的情绪词频率相关 [1]。 另一个例子是通过分析每日Twitter博客的文本内容来预测股票变动 [2]。 - -另一方面,抓取产品的用户评论并分析他们的情感,有助于理解用户对不同公司,不同产品,甚至不同竞争对手产品的偏好。 - -本教程将指导您完成长期短期记忆(LSTM)网络的训练过程,以分类来自[大型电影评论数据集](http://ai.stanford.edu/~amaas/data/sentiment/)(有时称为[互联网电影数据库 (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf))的句子的情感 。 此数据集包含电影评论及其相关联的类别标签,即正面和负面。 - -## 数椐准备 - -### IMDB 数椐介绍 - -训练模型之前, 我们需要预处理数椐并构建一个字典。 首先, 你可以使用下面的脚本下载 IMDB 数椐集和[Moses](http://www.statmt.org/moses/)工具, 这是一个基于统计的机器翻译系统. 我们提供了一个数据预处理脚本,它不仅能够处理IMDB数据,还能处理其他用户自定义的数据。 为了使用提前编写的脚本,需要将标记的训练和测试样本移动到另一个路径,这已经在`get_imdb.sh`中完成。 - -``` -cd demo/sentiment/data -./get_imdb.sh -``` -如果数椐获取成功,你将在目录```./demo/sentiment/data```中看到下面的文件: - -``` -aclImdb get_imdb.sh imdb mosesdecoder-master -``` - -* aclImdb: 从外部网站上下载的原始数椐集。 -* imdb: 仅包含训练和测试数椐集。 -* mosesdecoder-master: Moses 工具。 - -IMDB数据集包含25,000个已标注过的高极性电影评论用于训练,25,000个用于测试。负面的评论的得分小于等于4,正面的评论的得大于等于7,总评分10分。 运行完脚本 `./get_imdb.sh`后, 我们可以看到在目录 `aclImdb`中的数椐集的结构如下: - -``` -imdbEr.txt imdb.vocab README test train -``` -* train: 训练数椐集。 -* test : 测试数椐集。 -* imdb.vocab: 字典文件。 -* imdbEr.txt: 字典imdb.vocab中每个切分单词的预期评级。 -* README: 数椐说明文档。 - -测试集和训练集目录包含下面的文件: - -``` -labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt -``` - -* pos: 正面评价样本,包含12,500个txt文件,每个文件是一个电影评论。 -* neg: 负面评价样本,包含12,500个txt文件,每个文件是一个电影评论。 -* unsup: 未标记的评价样本,包含50,000个txt文件。 -* urls_xx.txt: 每个评论的网址。 -* xxBow.feat: 用于统计词频的Bow模型特征。 - -### IMDB 数椐准备 - -在这个例子中,我们只使用已经标注过的训练集和测试集,且默认在训练集上构建字典,而不使用IMDB数椐集中的imdb.vocab做为字典。训练集已经做了随机打乱排序而测试集没有。 Moses 工具中的脚本`tokenizer.perl` 用于切分单单词和标点符号。执行下面的命令就可以预处理数椐。 - -``` -cd demo/sentiment/ -./preprocess.sh -``` -preprocess.sh: - -``` -data_dir="./data/imdb" -python preprocess.py -i data_dir -``` - -* data_dir: 输入数椐所在目录。 -* preprocess.py: 预处理脚本。 - -运行成功后目录`demo/sentiment/data/pre-imdb` 结构如下: - -``` -dict.txt labels.list test.list test_part_000 train.list train_part_000 -``` -* test\_part\_000 and train\_part\_000: 所有标记的测试集和训练集, 训练集已经随机打乱。 -* train.list and test.list: 训练集和测试集文件列表。 -* dict.txt: 利用训练集生成的字典。 -* labels.txt: neg 0, pos 1, 含义:标签0表示负面的评论,标签1表示正面的评论。 - -### 用户自定义数椐预处理 - -如果你执行其它的用情感分析来分类文本的任务,可以按如下的结构来准备数椐. 我们提供了脚本来构建字典和预处理数椐。所以你只用按下面的结构来组织数椐就行了。 - -``` -dataset -|----train -| |----class1 -| | |----text_files -| |----class2 -| | |----text_files -| | ... -|----test -| |----class1 -| | |----text_files -| |----class2 -| | |----text_files -| | ... -``` -* dataset: 一级目录。 -* train, test: 二级目录。 -* class1,class2,...: 三级目录。 -* text_files: 文本格式的实例文件。 - -所有同目录下的文本实例文件都是同级别的。 每个文本文件包含一个或者多个实例,每一行表示一个实例。 为了充分的随机打乱训练集, 在预处理含有多行数椐的文本文件时参数设置稍有不同, 执行`preprocess.sh`脚本时需要加上`-m True`参数。 tokenizer.perl 默认用来切分单记和标点符号,如果你不需要这个操作,在运行`preprocess.sh`时加上`-t False`参数即可。 - -## 训练模型 - -在这步任务中,我们使用了循环神经网络(RNN)的 LSTM 架构来训练情感分析模型。 引入LSTM模型主要是为了克服消失梯度的问题。 LSTM网络类似于具有隐藏层的标准循环神经网络, 但是隐藏层中的每个普通节点被一个记忆单元替换。 每个记忆单元包含四个主要的元素: 输入门, 具有自循环连接的神经元,忘记门和输出门。 更多的细节可以在文献中找到[4]。 LSTM架构的最大优点是它可以在长时间间隔内记忆信息,而没有短时记忆的损失。在有新的单词来临的每一个时间步骤内,存储在记忆单元区块的历史信息被更新用来迭代的学习单词以合理的序列程现。 - -
![LSTM](../../../doc/demo/sentiment_analysis/lstm.png)
-
图表 1. LSTM [3]
- -情感分析是自然语言理解中最典型的问题之一。 它的目的是预测在一个序列中表达的情感态度。 通常, ,仅仅是一些关键词,如形容词和副词,在预测序列或段落的情感中起主要作用。然而有些评论上下文非常长,例如 IMDB的数椐集。 我们只所以使用LSTM来执行这个任务是因为其改进的设计并且具有门机制。 首先,它能够从词级到具有可变上下文长度的上下文级别来总结表示。 第二,它可以在句子级别利用可扩展的上下文, 而大多数方法只是利用n-gram级别的知识。第三,它直接学习段落表示,而不是组合上下文级别信息。 - -在本演示中,我们提供两个网络,即双向LSTM和三层堆叠LSTM。 - -#### 双向LSTM - -图2是双向LSTM网络,后面连全连接层和softmax层。 - -
![BiLSTM](../../../doc/demo/sentiment_analysis/bi_lstm.jpg)
-
图 2. Bidirectional-LSTM
- -#### Stacked-LSTM -图3是三层LSTM结构。图的底部是word embedding(对文档处理后形成的单词向量)。 接下来,连接三个LSTM隐藏层,并且第二个是反向LSTM。然后提取隐藏LSTM层的所有时间步长的最大词向量作为整个序列的表示。 最后,使用具有softmax激活的全连接前馈层来执行分类任务。 更多内容可查看参考文献 [5]。 - -
![StackedLSTM](../../../doc/demo/sentiment_analysis/stacked_lstm.jpg)
-
图 3. Stacked-LSTM for sentiment analysis
- -**配置** - -进入`demo/sentiment` 目录 , `trainer_config.py` 是一个配置文件的例子, 其中包含算法和网络配置。第一行从`sentiment_net.py`中导出预定义的网络。 - -trainer_config.py: - -```python -from sentiment_net import * - -data_dir = "./data/pre-imdb" -# whether this config is used for test -is_test = get_config_arg('is_test', bool, False) -# whether this config is used for prediction -is_predict = get_config_arg('is_predict', bool, False) -dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict) - -################## Algorithm Config ##################### - -settings( - batch_size=128, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25 -) - -#################### Network Config ###################### -stacked_lstm_net(dict_dim, class_dim=class_dim, - stacked_num=3, is_predict=is_predict) -#bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict) -``` - -* **数椐定义**: - * get\_config\_arg(): 获取通过 `--config_args=xx` 设置的命令行参数。 - * 定义训练数椐和测试数椐提供者, 这里使用了PaddlePaddle的Python接口来加载数椐。想了解更多细节可以参考PyDataProvider部分的文档 - -* **算法配置**: - * 使用随机梯度下降(sgd)算法。 - * 使用 adam 优化。 - * 设置batch size大小为128。 - * 设置平均sgd窗口。 - * 设置全局学习率。 -* **网络配置**: - * dict_dim: 获取字典维度。 - * class_dim: 设置类别数,IMDB有两个标签,即正面评价标签和负面评价标签。 - * `stacked_lstm_net`: 预定义网络如图3所示,默认情况下使用此网络 - * `bidirectional_lstm_net`: 预定义网络,如图2所示。 - -**训练** - -首先安装PaddlePaddle。 然后使用下面的脚本 `train.sh` 来开启本地的训练。 - -``` -cd demo/sentiment/ -./train.sh -``` - -train.sh: - -``` -config=trainer_config.py -output=./model_output -paddle train --config=$config \ - --save_dir=$output \ - --job=train \ - --use_gpu=false \ - --trainer_count=4 \ - --num_passes=10 \ - --log_period=20 \ - --dot_period=20 \ - --show_parameter_stats_period=100 \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' -``` - -* \--config=$config: 设置网络配置。 -* \--save\_dir=$output: 设置输出路径以保存训练完成的模型。 -* \--job=train: 设置工作模式为训练。 -* \--use\_gpu=false: 使用CPU训练,如果你安装GPU版本的PaddlePaddle,并想使用GPU来训练设置为true。 -* \--trainer\_count=4:设置线程数(或GPU个数)。 -* \--num\_passes=15: 设置pass,PaddlePaddle中的一个pass意味着对数据集中的所有样本进行一次训练。 -* \--log\_period=20: 每20个batch打印一次日志。 -* \--show\_parameter\_stats\_period=100: 每100个batch打印一次统计信息。 -* \--test\_all_data\_in\_one\_period=1: 每次测试都测试所有数据。 - -如果运行成功,输出日志保存在路径 `demo/sentiment/train.log`中,模型保存在目录`demo/sentiment/model_output/`中。 输出日志说明如下: - -``` -Batch=20 samples=2560 AvgCost=0.681644 CurrentCost=0.681644 Eval: classification_error_evaluator=0.36875 CurrentEval: classification_error_evaluator=0.36875 -... -Pass=0 Batch=196 samples=25000 AvgCost=0.418964 Eval: classification_error_evaluator=0.1922 -Test samples=24999 cost=0.39297 Eval: classification_error_evaluator=0.149406 -``` -- Batch=xx: 表示训练了xx个Batch。 -- samples=xx: 表示训练了xx个样本。。 -- AvgCost=xx: 从第0个batch到当前batch的平均损失。 -- CurrentCost=xx: 最新log_period个batch处理的当前损失。 -- Eval: classification\_error\_evaluator=xx: 表示第0个batch到当前batch的分类错误。 -- CurrentEval: classification\_error\_evaluator: 最新log_period个batch的分类错误。 -- Pass=0: 通过所有训练集一次称为一遍。 0表示第一次经过训练集。 - -默认情况下,我们使用`stacked_lstm_net`网络,当传递相同的样本数时,它的收敛速度比`bidirectional_lstm_net`快。如果要使用双向LSTM,只需删除最后一行中的注释并把“stacked_lstm_net”注释掉。 - -## 测试模型 - -测试模型是指使用训练出的模型评估已标记的验证集。 - -``` -cd demo/sentiment -./test.sh -``` - -test.sh: - -```bash -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \ - sort | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -evaluate_pass="model_output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass - -model_list=./model.list -touch $model_list | echo $evaluate_pass > $model_list -net_conf=trainer_config.py -paddle train --config=$net_conf \ - --model_list=$model_list \ - --job=test \ - --use_gpu=false \ - --trainer_count=4 \ - --config_args=is_test=1 \ - 2>&1 | tee 'test.log' -``` - -函数`get_best_pass`依据分类错误率获得最佳模型进行测试。 在本示例中,我们默认使用IMDB的测试数据集作为验证。 与训练不同,它需要在这里指定`--job = test`和模型路径,即`--model_list = $model_list`。如果运行成功,日志将保存在“demo / sentiment / test.log”的路径中。例如,在我们的测试中,最好的模型是`model_output / pass-00002`,分类误差是0.115645,如下: - -``` -Pass=0 samples=24999 AvgCost=0.280471 Eval: classification_error_evaluator=0.115645 -``` - -## 预测 - -`predict.py`脚本提供了一个预测接口。在使用它之前请安装PaddlePaddle的python api。 预测IMDB的未标记评论的一个实例如下: - -``` -cd demo/sentiment -./predict.sh -``` -predict.sh: - -``` -#Note the default model is pass-00002, you shold make sure the model path -#exists or change the mode path. -model=model_output/pass-00002/ -config=trainer_config.py -label=data/pre-imdb/labels.list -cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \ - --tconf=$config\ - --model=$model \ - --label=$label \ - --dict=./data/pre-imdb/dict.txt \ - --batch_size=1 -``` - -* `cat ./data/aclImdb/test/pos/10007_10.txt` : 输入预测样本。 -* `predict.py` : 预测接口脚本。 -* `--tconf=$config` : 设置网络配置。 -* `--model=$model` : 设置模型路径。 -* `--label=$label` : 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。 -* `--dict=data/pre-imdb/dict.txt` : 设置字典文件。 -* `--batch_size=1` : 设置batch size。 - -注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。 - -本示例的预测结果: - -``` -Loading parameters from model_output/pass-00002/ -./data/aclImdb/test/pos/10014_7.txt: predicting label is pos -``` -我们真诚地感谢您的关注,并欢迎您来参与贡献。 - -## 参考文档 -[1] Brendan O'Connor, Ramnath Balasubramanyan, Bryan R. Routledge, and Noah A. Smith. 2010. [From Tweets to Polls: Linking Text Sentiment to Public Opinion Time Series](http://homes.cs.washington.edu/~nasmith/papers/oconnor+balasubramanyan+routledge+smith.icwsm10.pdf). In ICWSM-2010.
-[2] Johan Bollen, Huina Mao, Xiaojun Zeng. 2011. [Twitter mood predicts the stock market](http://arxiv.org/abs/1010.3003), Journal of Computational Science.
-[3] Alex Graves, Marcus Liwicki, Santiago Fernan- dez, Roman Bertolami, Horst Bunke, and Ju ̈rgen Schmidhuber. 2009. [A novel connectionist system for unconstrained handwriting recognition. IEEE Transactions on Pattern Analysis and Machine In- telligence](http://www.cs.toronto.edu/~graves/tpami_2009.pdf), 31(5):855–868.
-[4] Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http://arxiv.org/abs/1506.00019v1), arXiv:1506.00019.
-[5] Jie Zhou and Wei Xu; [End-to-end Learning of Semantic Role Labeling Using Recurrent Neural Networks](http://www.aclweb.org/anthology/P/P15/P15-1109.pdf); ACL-IJCNLP 2015.
diff --git a/doc_cn/faq/index.rst b/doc_cn/faq/index.rst deleted file mode 100644 index df8f1308cbc4d93cfeab4d921dcbbf5155eb4cc1..0000000000000000000000000000000000000000 --- a/doc_cn/faq/index.rst +++ /dev/null @@ -1,287 +0,0 @@ -#################### -PaddlePaddle常见问题 -#################### - -.. contents:: - -1. 如何减少内存占用 ---------------------------------- - -神经网络的训练本身是一个非常消耗内存和显存的工作,经常会消耗数10GB的内存和数GB的显存。 -PaddlePaddle的内存占用主要分为如下几个方面\: - -* DataProvider缓冲池内存(只针对内存) -* 神经元激活内存(针对内存和显存) -* 参数内存 (针对内存和显存) -* 其他内存杂项 - -其中,其他内存杂项是指PaddlePaddle本身所用的一些内存,包括字符串分配,临时变量等等,暂不考虑在内。 - -减少DataProvider缓冲池内存 -++++++++++++++++++++++++++ - -PyDataProvider使用的是异步加载,同时在内存里直接随即选取数据来做Shuffle。即 - -.. graphviz:: - - digraph { - rankdir=LR; - 数据文件 -> 内存池 -> PaddlePaddle训练 - } - -所以,减小这个内存池即可减小内存占用,同时也可以加速开始训练前数据载入的过程。但是,这 -个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的, -那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为 - -.. literalinclude:: reduce_min_pool_size.py - -这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 `这里 -<../ui/data_provider/pydataprovider2.html#provider>`_ 。 - -神经元激活内存 -++++++++++++++ - -神经网络在训练的时候,会对每一个激活暂存一些数据,如神经元激活值等。 -在反向传递的时候,这些数据会被用来更新参数。这些数据使用的内存主要和两个参数有关系, -一是batch size,另一个是每条序列(Sequence)长度。所以,其实也是和每个mini-batch中包含 -的时间步信息成正比。 - -所以做法可以有两种: - -* 减小batch size。 即在网络配置中 :code:`settings(batch_size=1000)` 设置成一个小一些的值。但是batch size本身是神经网络的超参数,减小batch size可能会对训练结果产生影响。 -* 减小序列的长度,或者直接扔掉非常长的序列。比如,一个数据集大部分序列长度是100-200, - 但是突然有一个10000长的序列,就很容易导致内存超限,特别是在LSTM等RNN中。 - -参数内存 -++++++++ - -PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需要使用不同大小的内存。 -例如使用 :code:`adadelta` 算法,则需要使用等于权重参数规模大约5倍的内存。举例,如果参数保存下来的模型目录 -文件为 :code:`100M`, 那么该优化算法至少需要 :code:`500M` 的内存。 - -可以考虑使用一些优化算法,例如 :code:`momentum`。 - -2. 如何加速PaddlePaddle的训练速度 ---------------------------------- - -加速PaddlePaddle训练可以考虑从以下几个方面\: - -* 减少数据载入的耗时 -* 加速训练速度 -* 利用分布式训练驾驭更多的计算资源 - -减少数据载入的耗时 -++++++++++++++++++ - -使用 :code:`pydataprovider`时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。 -:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。 - -.. literalinclude:: reduce_min_pool_size.py - -同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。 - - -加速训练速度 -++++++++++++ - -PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 :code:`sparse_binary_vector` 、 :code:`sparse_vector` 、或者 :code:`integer_value` 的任一一种。同时,与这个训练数据交互的Layer,需要将其Parameter设置成 sparse 更新模式,即设置 :code:`sparse_update=True` - -这里使用简单的 :code:`word2vec` 训练语言模型距离,具体使用方法为\: - -使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为\: - -.. literalinclude:: word2vec_dataprovider.py - -这个任务的配置为\: - -.. literalinclude:: word2vec_config.py - -更多关于sparse训练的内容请参考 `sparse训练的文档 `_ - -利用更多的计算资源 -++++++++++++++++++ - -利用更多的计算资源可以分为一下几个方式来进行\: - -* 单机CPU训练 - * 使用多线程训练。设置命令行参数 :code:`trainer_count`。 - -* 单机GPU训练 - * 使用显卡训练。设置命令行参数 :code:`use_gpu`。 - * 使用多块显卡训练。设置命令行参数 :code:`use_gpu` 和 :code:`trainer_count` 。 - -* 多机训练 - * 具体的多机训练方法参考 `多机训练文档 <../ui/data_provider/pydataprovider2.html#provider>`_ 。 - - -3. 遇到“非法指令”或者是“illegal instruction” --------------------------------------------- - -PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二进制发行版可能会导致这种错误,请选择正确的版本。 - -4. 如何选择SGD算法的学习率 --------------------------- - -在采用sgd/async_sgd进行训练时,一个重要的问题是选择正确的learning_rate。如果learning_rate太大,那么训练有可能不收敛,如果learning_rate太小,那么收敛可能很慢,导致训练时间过长。 - -通常做法是从一个比较大的learning_rate开始试,如果不收敛,那减少学习率10倍继续试验,直到训练收敛为止。那么如何判断训练不收敛呢?可以估计出如果模型采用不变的输出最小的cost0是多少。 - -如果训练过程的的cost明显高于这个常数输出的cost,那么我们可以判断为训练不收敛。举一个例子,假如我们是三分类问题,采用multi-class-cross-entropy作为cost,数据中0,1,2三类的比例为 :code:`0.2, 0.5, 0.3` , 那么常数输出所能达到的最小cost是 :code:`-(0.2*log(0.2)+0.5*log(0.5)+0.3*log(0.3))=1.03` 。如果训练一个pass(或者更早)后,cost还大于这个数,那么可以认为训练不收敛,应该降低学习率。 - - -5. 如何初始化参数 ------------------ - -默认情况下,PaddlePaddle使用均值0,标准差为 :math:`\frac{1}{\sqrt{d}}` 来初始化参数。其中 :math:`d` 为参数矩阵的宽度。这种初始化方式在一般情况下不会产生很差的结果。如果用户想要自定义初始化方式,PaddlePaddle目前提供两种参数初始化的方式\: - -* 高斯分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_mean=0.0, initial_std=1.0)` -* 均匀分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0)` - -比如设置一个全连接层的参数初始化方式和bias初始化方式,可以使用如下代码。 - -.. code-block:: python - - hidden = fc_layer(input=ipt, param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0), - bias_attr=ParamAttr(initial_mean=1.0, initial_std=0.0)) - -上述代码将bias全部初始化为1.0, 同时将参数初始化为 :code:`[1.0, -1.0]` 的均匀分布。 - -6. 如何共享参数 ---------------- - -PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字的参数,会共享参数。设置参数的名字,可以使用 :code:`ParamAttr(name="YOUR_PARAM_NAME")` 来设置。更方便的设置方式,是使得要共享的参数使用同样的 :code:`ParamAttr` 对象。 - -简单的全连接网络,参数共享的配置示例为\: - -.. literalinclude:: ../../python/paddle/trainer_config_helpers/tests/configs/shared_fc.py - -这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。 - -7. *-cp27mu-linux_x86_64.whl is not a supported wheel on this platform. ------------------------------------------------------------------------ - -出现这个问题的主要原因是,系统编译wheel包的时候,使用的 :code:`wheel` 包是最新的, -而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。 -更新 :code:`pip` 包的方法是\: - -.. code-block:: bash - - pip install --upgrade pip - -8. python相关的单元测试都过不了 --------------------------------- - -如果出现以下python相关的单元测试都过不了的情况: - -.. code-block:: bash - - 24 - test_PyDataProvider (Failed) - 26 - test_RecurrentGradientMachine (Failed) - 27 - test_NetworkCompare (Failed) - 28 - test_PyDataProvider2 (Failed) - 32 - test_Prediction (Failed) - 33 - test_Compare (Failed) - 34 - test_Trainer (Failed) - 35 - test_TrainerOnePass (Failed) - 36 - test_CompareTwoNets (Failed) - 37 - test_CompareTwoOpts (Failed) - 38 - test_CompareSparse (Failed) - 39 - test_recurrent_machine_generation (Failed) - 40 - test_PyDataProviderWrapper (Failed) - 41 - test_config_parser (Failed) - 42 - test_swig_api (Failed) - 43 - layers_test (Failed) - -并且查询PaddlePaddle单元测试的日志,提示: - -.. code-block:: bash - - paddle package is already in your PYTHONPATH. But unittest need a clean environment. - Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'. - -解决办法是: - -* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。 - - -9. 运行Docker GPU镜像出现 "CUDA driver version is insufficient" ----------------------------------------------------------------- - -用户在使用PaddlePaddle GPU的Docker镜像的时候,常常出现 `Cuda Error: CUDA driver version is insufficient for CUDA runtime version`, 原因在于没有把机器上CUDA相关的驱动和库映射到容器内部。 -具体的解决方法是: - -.. code-block:: bash - - $ export CUDA_SO="$(\ls usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddlepaddle:latest-gpu - -更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 `_ 。 - - -10. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致 ----------------------------------------------------------- - -这是目前CMake寻找Python的逻辑存在缺陷,如果系统安装了多个Python版本,CMake找到的Python库和Python解释器版本可能有不一致现象,导致编译PaddlePaddle失败。正确的解决方法是, -用户强制指定特定的Python版本,具体操作如下: - - .. code-block:: bash - - cmake .. -DPYTHON_EXECUTABLE= -DPYTHON_LIBRARY= -DPYTHON_INCLUDE_DIR= - -用户需要指定本机上Python的路径:````, ````, ```` - -10. A protocol message was rejected because it was too big ----------------------------------------------------------- - -如果在训练NLP相关模型时,出现以下错误: - -.. code-block:: bash - - [libprotobuf ERROR google/protobuf/io/coded_stream.cc:171] A protocol message was rejected because it was too big (more than 67108864 bytes). To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. - F1205 14:59:50.295174 14703 TrainerConfigHelper.cpp:59] Check failed: m->conf.ParseFromString(configProtoStr) - -可能的原因是:传给dataprovider的某一个args过大,一般是由于直接传递大字典导致的。错误的define_py_data_sources2类似: - -.. code-block:: python - - src_dict = dict() - for line_count, line in enumerate(open(src_dict_path, "r")): - src_dict[line.strip()] = line_count - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={"src_dict": src_dict}) - -解决方案是:将字典的地址作为args传给dataprovider,然后在dataprovider里面根据该地址加载字典。即define_py_data_sources2应改为: - -.. code-block:: python - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={"src_dict_path": src_dict_path}) - -完整源码可参考 `seqToseq `_ 示例。 - -11. 如何指定GPU设备 -------------------- - -例如机器上有4块GPU,编号从0开始,指定使用2、3号GPU: - -* 方式1:通过 `CUDA_VISIBLE_DEVICES `_ 环境变量来指定特定的GPU。 - -.. code-block:: bash - - env CUDA_VISIBLE_DEVICES=2,3 paddle train --use_gpu=true --trainer_count=2 - -* 方式2:通过命令行参数 ``--gpu_id`` 指定。 - -.. code-block:: bash - - paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 diff --git a/doc_cn/howto/build_docker_image.rst b/doc_cn/howto/build_docker_image.rst deleted file mode 100644 index 46ba07d9ad7c1e1843cd953fa5c5fe1dedf6cdf1..0000000000000000000000000000000000000000 --- a/doc_cn/howto/build_docker_image.rst +++ /dev/null @@ -1,35 +0,0 @@ -构建PaddlePaddle的Docker Image -============================== -PaddlePaddle的Docker Image构建源码放置在 ``${源码根目录}/paddle/scripts/docker/`` 目录下。该目录有三类文件: - -- Dockerfile:Docker Image的描述文件,包括构建步骤、各种参数和维护人员等。 - - - 一共维护了12个Dockerfile,Dockerfile.m4是它们的模板。 - - PaddlePaddle中所有的Image都基于ubuntu 14.04。 - -- build.sh:Docker Image的构建脚本,使用方式见下一小节。 -- generate.sh:通过Dockerfile.m4模板生成不同的Dockerfile。 - -使用脚本构建Docker Image ------------------------- - -进入源码目录,执行 ``docker build`` 命令,即可在本地编译出PaddlePaddle的镜像。简单的使用样例为 - -.. code-block:: bash - - cd ${源码根目录}/paddle/scripts/docker/ - docker build --build-arg LOWEST_DL_SPEED=50K \ - --build-arg WITH_GPU=ON \ - --tag paddle_gpu:latest . - -其中,``--build-arg`` 传入的配置参数包括: - -- LOWEST\_DL\_SPEED\: 在多线程下载过程中,设置下载线程的最低速度。 - - - 默认单位是Bytes,但可以传入10K、10M、或10G等这样的单位。 - - 如果小于这个速度,那么这个线程将会关闭。当所有的线程都关闭了,那么下载进程将会重启。 -- WITH\_GPU\: ON or OFF,是否开启GPU功能。注意, - - **编译** PaddlePaddle的GPU版本 **不一定** 要在具有GPU的机器上进行。 - - **运行** PaddlePaddle的GPU版本 **一定** 要在具有GPU的机器上运行。 - -注意:所有Image的构建在Docker 1.12版本测试通过, 低于1.12的版本并没有测试。原因是旧版本可能缺乏 ``--build-arg`` 参数,从而不能在运行编译命令的时候接受参数。 diff --git a/doc_cn/howto/how_to_write_docs/index.rst b/doc_cn/howto/how_to_write_docs/index.rst deleted file mode 100644 index a1f983b3405fa40f436885e40fca2ebbb4695491..0000000000000000000000000000000000000000 --- a/doc_cn/howto/how_to_write_docs/index.rst +++ /dev/null @@ -1,54 +0,0 @@ -############################### -如何贡献/修改PaddlePaddle的文档 -############################### - -PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。 - - -如何构建PaddlePaddle的文档 -========================== - -PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式。构建PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。 - - -使用Docker构建PaddlePaddle的文档 --------------------------------- - -使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即 - -.. code-block:: bash - - cd TO_YOUR_PADDLE_CLONE_PATH - cd paddle/scripts/tools/build_docs - bash build_docs.sh - -编译完成后,该目录下会生成如下两个子目录\: - -* doc 英文文档目录 -* doc_cn 中文文档目录 - -打开浏览器访问对应目录下的index.html即可访问本地文档。 - -.. code-block:: bash - - open doc_cn/index.html - - -直接构建PaddlePaddle的文档 --------------------------- - -TBD - -如何书写PaddlePaddle的文档 -========================== - -TBD - -如何更新www.paddlepaddle.org文档 -================================ - -TBD - - -.. _cmake: https://cmake.org/ -.. _sphinx: http://www.sphinx-doc.org/en/1.4.8/ \ No newline at end of file diff --git a/doc_cn/index.rst b/doc_cn/index.rst deleted file mode 100644 index 88a9f79fd23c97785a054af2aa9ee53f8578ef63..0000000000000000000000000000000000000000 --- a/doc_cn/index.rst +++ /dev/null @@ -1,32 +0,0 @@ -PaddlePaddle文档 -================ - -使用指南 --------- -* `介绍 `_ -* `快速入门 `_ -* `基本使用概念 `_ -* `编译与安装 `_ -* `用户接口 `_ -* `使用示例 `_ -* `模型配置 <../doc/ui/api/trainer_config_helpers/index.html>`_ -* `集群训练 `_ - -开发指南 --------- -* `新写Layer <../doc/dev/new_layer/index.html>`_ -* `如何贡献文档 `_ -* `如何构建Docker Image `_ - -算法教程 --------- - -* `Recurrent Group教程 `_ -* `单层RNN示例 <../doc/algorithm/rnn/rnn.html>`_ -* :ref:`algo_hrnn_rnn_api_compare` -* `支持双层序列作为输入的Layer `_ - -常见问题 --------- - -* `常见问题 `_ diff --git a/doc_cn/introduction/index.rst b/doc_cn/introduction/index.rst deleted file mode 100644 index c996f5f4acd07011c98c3e1086080e85ed7dd1b4..0000000000000000000000000000000000000000 --- a/doc_cn/introduction/index.rst +++ /dev/null @@ -1,114 +0,0 @@ -简介 -==== - -PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍将向你展示如何利用PaddlePaddle来解决一个经典的线性回归问题。 - -1. 一个经典的任务 ------------------ - -我们展示如何用PaddlePaddle解决 `单变量的线性回归 `_ 问题。线性回归的输入是一批点 `(x, y)` ,其中 `y = wx + b + ε`, 而 ε 是一个符合高斯分布的随机变量。线性回归的输出是从这批点估计出来的参数 `w` 和 `b` 。 - -一个例子是房产估值。我们假设房产的价格(y)是其大小(x)的一个线性函数,那么我们可以通过收集市场上房子的大小和价格,用来估计线性函数的参数w 和 b。 - -2. 准备数据 ------------ - -假设变量 `x` 和 `y` 的真实关系为: `y = 2x + 0.3 + ε`,这里展示如何使用观测数据来拟合这一线性关系。首先,Python代码将随机产生2000个观测点,作为线性回归的输入。下面脚本符合PaddlePaddle期待的读取数据的Python程序的模式。 - -.. code-block:: python - - # dataprovider.py - from paddle.trainer.PyDataProvider2 import * - import random - - # 定义输入数据的类型: 2个浮点数 - @provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False) - def process(settings, input_file): - for i in xrange(2000): - x = random.random() - yield [x], [2*x+0.3] - -3. 训练模型 ------------ - -为了还原 `y = 2x + 0.3`,我们先从一条随机的直线 `y' = wx + b` 开始,然后利用观测数据调整 `w` 和 `b` 使得 `y'` 和 `y` 的差距不断减小,最终趋于接近。这个过程就是模型的训练过程,而 `w` 和 `b` 就是模型的参数,即我们的训练目标。 - -在PaddlePaddle里,该模型的网络配置如下。 - -.. code-block:: python - - # trainer_config.py - from paddle.trainer_config_helpers import * - - # 1. 定义数据来源,调用上面的process函数获得观测数据 - data_file = 'empty.list' - with open(data_file, 'w') as f: f.writelines(' ') - define_py_data_sources2(train_list=data_file, test_list=None, - module='dataprovider', obj='process',args={}) - - # 2. 学习算法。控制如何改变模型参数 w 和 b - settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) - - # 3. 神经网络配置 - x = data_layer(name='x', size=1) - y = data_layer(name='y', size=1) - # 线性计算网络层: ȳ = wx + b - ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) - # 计算误差函数,即 ȳ 和真实 y 之间的距离 - cost = regression_cost(input= ȳ, label=y) - outputs(cost) - -这段简短的配置展示了PaddlePaddle的基本用法: - -- 第一部分定义了数据输入。一般情况下,PaddlePaddle先从一个文件列表里获得数据文件地址,然后交给用户自定义的函数(例如上面的 `process`函数)进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件,所以放一个空列表(`empty.list`)即可。 - -- 第二部分主要是选择学习算法,它定义了模型参数改变的规则。PaddlePaddle提供了很多优秀的学习算法,这里使用一个基于momentum的随机梯度下降(SGD)算法,该算法每批量(batch)读取12个采样数据进行随机梯度计算来更新更新。 - -- 最后一部分是神经网络的配置。由于PaddlePaddle已经实现了丰富的网络层,所以很多时候你需要做的只是定义正确的网络层并把它们连接起来。这里使用了三种网络单元: - - - **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x` 和 `y`。 - - **全连接层**:全连接层 `fc_layer` 是基础的计算单元,这里利用它建模变量之间的线性关系。计算单元是神经网络的核心,PaddlePaddle支持大量的计算单元和任意深度的网络连接,从而可以拟合任意的函数来学习复杂的数据关系。 - - **回归误差代价层**:回归误差代价层 `regression_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。 - -定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令: - -.. code-block:: bash - - paddle train --config=trainer_config.py --save_dir=./output --num_passes=30 - -PaddlePaddle将在观测数据集上迭代训练30轮,并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到,随着轮数增加误差代价函数的输出在不断的减小,这意味着模型在训练数据上不断的改进,直到逼近真实解:` y = 2x + 0.3 ` - -4. 模型检验 ------------ - -训练完成后,我们希望能够检验模型的好坏。一种常用的做法是用学习的模型对另外一组测试数据进行预测,评价预测的效果。在这个例子中,由于已经知道了真实答案,我们可以直接观察模型的参数是否符合预期来进行检验。 - -PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件,所以可以利用如下方法读取模型的参数。 - -.. code-block:: python - - import numpy as np - import os - - def load(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. - return np.fromfile(f, dtype=np.float32) - - print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b')) - # w=1.999743, b=0.300137 - -.. image:: ./parameters.png - :align: center - :scale: 80 % - -从图中可以看到,虽然 `w` 和 `b` 都使用随机值初始化,但在起初的几轮训练中它们都在快速逼近真实值,并且后续仍在不断改进,使得最终得到的模型几乎与真实模型一致。 - -这样,我们用PaddlePaddle解决了单变量线性回归问题, 包括数据输入、模型训练和最后的结果验证。 - -5. 推荐后续阅读 ---------------- - -- `安装/编译 <../build_and_install/index.html>`_ :PaddlePaddle的安装与编译文档。 -- `快速入门 <../demo/quick_start/index.html>`_ :使用商品评论分类任务,系统性的介绍如何一步步改进,最终得到产品级的深度模型。 -- `示例 <../demo/index.html>`_ :各种实用案例,涵盖图像、文本、推荐等多个领域。 \ No newline at end of file diff --git a/doc_cn/introduction/parameters.png b/doc_cn/introduction/parameters.png deleted file mode 100644 index 2ec67480951e21f0400bce1c34b3108dcd65c18c..0000000000000000000000000000000000000000 Binary files a/doc_cn/introduction/parameters.png and /dev/null differ diff --git a/doc_cn/ui/cmd/index.rst b/doc_cn/ui/cmd/index.rst deleted file mode 100644 index 31a8b8a79f4a87101bd6030eb4e779fd11d65811..0000000000000000000000000000000000000000 --- a/doc_cn/ui/cmd/index.rst +++ /dev/null @@ -1,20 +0,0 @@ -命令 -==== - -安装好PaddlePaddle后,在命令行直接敲击 ``paddle`` 或 ``paddle --help`` 会显示如下一些命令。 - -* ``train`` Start a paddle_trainer - 启动一个PaddlePaddle训练进程。 ``paddle train`` 可以通过命令行参数 ``-local=true`` 启动一个单机的训练进程;也可以和 ``paddle pserver`` 一起使用启动多机的分布式训练进程。 -* ``pserver`` Start a paddle_pserver_main - 在多机分布式训练下启动PaddlePaddle的parameter server进程。 -* ``version`` Print paddle version - 用于打印当前PaddlePaddle的版本和编译选项相关信息。常见的输出格式如下:1)第一行说明了PaddlePaddle的版本信息;2)第二行开始说明了一些主要的编译选项,具体意义可以参考 `编译参数选项文件 <../../build_and_install/cmake/compile_options.html>`_ 。 - - .. literalinclude:: paddle_version.txt - -* ``merge_model`` Start a paddle_merge_model - 用于将PaddlePaddle的模型参数文件和模型配置文件打包成一个文件,方便做部署分发。 -* ``dump_config`` Dump the trainer config as proto string - 用于将PaddlePaddle的模型配置文件以proto string的格式打印出来。 -* ``make_diagram`` - 使用graphviz对PaddlePaddle的模型配置文件进行绘制。 \ No newline at end of file diff --git a/doc_cn/ui/cmd/paddle_version.txt b/doc_cn/ui/cmd/paddle_version.txt deleted file mode 100644 index 33e2e4de7c24afd481eb6ca7eabed4924863d2b7..0000000000000000000000000000000000000000 --- a/doc_cn/ui/cmd/paddle_version.txt +++ /dev/null @@ -1,11 +0,0 @@ -PaddlePaddle 0.8.0b, compiled with - with_avx: ON - with_gpu: ON - with_double: OFF - with_python: ON - with_rdma: OFF - with_glog: ON - with_gflags: ON - with_metric_learning: OFF - with_timer: OFF - with_predict_sdk: OFF diff --git a/doc_cn/ui/data_provider/dataprovider.rst b/doc_cn/ui/data_provider/dataprovider.rst deleted file mode 100644 index e6796429a78801eba5e5fb776dd6fbe3413115ea..0000000000000000000000000000000000000000 --- a/doc_cn/ui/data_provider/dataprovider.rst +++ /dev/null @@ -1,13 +0,0 @@ -DataProvider的介绍 -================== - -DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存,让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 `_ ,来自定义传数据的过程。如果有更复杂的使用,或者需要更高的效率,用户也可以在C++端自定义一个 ``DataProvider`` 。 - -PaddlePaddle需要用户在网络配置(trainer_config.py)中定义使用哪种DataProvider,并且在DataProvider中实现如何访问训练文件列表(train.list)或测试文件列表(test.list)。 - -- train.list和test.list存放在本地(推荐直接存放到训练目录,以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址: - - - 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。 - - 地址也可以为hdfs文件路径,或者数据库连接路径等。 - - 由于这个地址会被DataProvider使用,因此,如何解析该地址也是用户自定义DataProvider时需要考虑的地方。 -- 如果没有设置test.list,或设置为None,那么在训练过程中不会执行测试操作;否则,会根据命令行参数指定的测试方式,在训练过程中进行测试,从而防止过拟合。 diff --git a/doc_cn/ui/data_provider/pydataprovider2.rst b/doc_cn/ui/data_provider/pydataprovider2.rst deleted file mode 100644 index dce373118c5ae01c7ecf9afc15e1d9af9bf4ebe4..0000000000000000000000000000000000000000 --- a/doc_cn/ui/data_provider/pydataprovider2.rst +++ /dev/null @@ -1,227 +0,0 @@ -PyDataProvider2的使用 -===================== - -PyDataProvider2是PaddlePaddle使用Python提供数据的推荐接口。该接口使用多线程读取数据,并提供了简单的Cache功能;同时可以使用户只关注如何从文件中读取每一条数据,而不用关心数据如何传输,如何存储等等。 - -.. contents:: - -MNIST的使用场景 ---------------- - -我们以MNIST手写识别为例,来说明PyDataProvider2的简单使用场景。 - -样例数据 -++++++++ - -MNIST是一个包含有70,000张灰度图片的数字分类数据集。样例数据 ``mnist_train.txt`` 如下: - -.. literalinclude:: mnist_train.txt - -其中每行数据代表一张图片,行内使用 ``;`` 分成两部分。第一部分是图片的标签,为0-9中的一个数字;第二部分是28*28的图片像素灰度值。 对应的 ``train.list`` 即为这个数据文件的名字: - -.. literalinclude:: train.list - -dataprovider的使用 -++++++++++++++++++ - -.. literalinclude:: mnist_provider.dict.py - -- 首先,引入PaddlePaddle的PyDataProvider2包。 -- 其次,定义一个Python的 `Decorator `_ `@provider`_ 。用于将下一行的数据输入函数标记成一个PyDataProvider2,同时设置它的input_types属性。 - - - `input_types`_:设置这个PyDataProvider2返回什么样的数据。本例根据网络配置中 ``data_layer`` 的名字,显式指定返回的是一个28*28维的稠密浮点数向量和一个[0-9]的10维整数标签。 - - .. literalinclude:: mnist_config.py - :lines: 9-10 - - - 注意:如果用户不显示指定返回数据的对应关系,那么PaddlePaddle会根据layer的声明顺序,来确定对应关系。但这个关系可能不正确,所以推荐使用显式指定的方式来设置input_types。 -- 最后,实现数据输入函数(如本例的 ``process`` 函数)。 - - - 该函数的功能是:打开文本文件,读取每一行,将行中的数据转换成与input_types一致的格式,然后返回给PaddlePaddle进程。注意, - - - 返回的顺序需要和input_types中定义的顺序一致。 - - 返回时,必须使用Python关键词 ``yield`` ,相关概念是 ``generator`` 。 - - 一次yield调用,返回一条完整的样本。如果想为一个数据文件返回多条样本,只需要在函数中调用多次yield即可(本例中使用for循环进行多次调用)。 - - - 该函数具有两个参数: - - - settings:在本例中没有使用,具体可以参考 `init_hook`_ 中的说明。 - - filename:为 ``train.list`` 或 ``test.list`` 中的一行,即若干数据文件路径的某一个。 - -网络配置中的调用 -++++++++++++++++ - -在网络配置里,只需要一行代码就可以调用这个PyDataProvider2,如, - -.. literalinclude:: mnist_config.py - :lines: 1-7 - -训练数据是 ``train.list`` ,没有测试数据,调用的PyDataProvider2是 ``mnist_provider`` 模块中的 ``process`` 函数。 - -小结 -+++++ - -至此,简单的PyDataProvider2样例就说明完毕了。对用户来说,仅需要知道如何从 **一个文件** 中读取 **一条样本** ,就可以将数据传送给PaddlePaddle了。而PaddlePaddle则会帮用户做以下工作: - -* 将数据组合成Batch进行训练 -* 对训练数据进行Shuffle -* 多线程的数据读取 -* 缓存训练数据到内存(可选) -* CPU->GPU双缓存 - -是不是很简单呢? - -时序模型的使用场景 ------------------- -样例数据 -++++++++ - -时序模型是指数据的某一维度是一个序列形式,即包含时间步信息。所谓时间步信息,不一定和时间有关系,只是说明数据的顺序是重要的。例如,文本信息就是一个序列数据。 - -本例采用英文情感分类的数据,即将一段英文文本数据,分类成正面情绪和负面情绪两类(用0和1表示)。样例数据 ``sentimental_train.txt`` 如下: - -.. literalinclude:: sentimental_train.txt - -dataprovider的使用 -++++++++++++++++++ - -相对MNIST而言,这个dataprovider较复杂,主要原因是增加了初始化机制 `init_hook`_。本例的 ``on_init`` 函数就是根据该机制配置的,它会在dataprovider创建的时候执行。 - -- 其中 ``input_types`` 和在 `@provider`_ 中配置的效果一致。本例中的输入特征是词ID的序列,因此使用 ``integer_value_sequence`` 类型来设置。 -- 将 ``dictionary`` 存入settings对象,在 ``process`` 函数中使用。 dictionary是从网络配置中传入的dict对象,即一个将单词字符串映射到单词ID的字典。 - -.. literalinclude:: sentimental_provider.py - -网络配置中的调用 -++++++++++++++++ - -调用这个PyDataProvider2的方法,基本上和MNIST样例一致,除了 - -* 在配置中需要读取外部字典。 -* 在声明DataProvider的时候传入dictionary作为参数。 - -.. literalinclude:: sentimental_config.py - :emphasize-lines: 12-14 - -参考(Reference) ---------------- - -@provider -+++++++++ - -``@provider`` 是一个Python的 `Decorator`_ ,可以将某一个函数标记成一个PyDataProvider2。如果不了解 `Decorator`_ 是什么也没关系,只需知道这是一个标记属性的方法就可以了。它包含的属性参数如下: - -* input_types:数据输入格式。具体的格式说明,请参考 `input_types`_ 。 -* should_shuffle:是不是要对数据做Shuffle。训练时默认shuffle,测试时默认不shuffle。 -* min_pool_size:设置内存中最小暂存的数据条数,也是PaddlePaddle所能够保证的shuffle粒度。如果为-1,则会预先读取全部数据到内存中。 -* pool_size: 设置内存中暂存的数据条数。如果为-1(默认),则不在乎内存暂存多少条数据。如果设置,则推荐大于训练时batch size的值,并且在内存足够的情况下越大越好。 -* can_over_batch_size:是否允许暂存略微多余pool_size的数据。由于这样做可以避免很多死锁问题,一般推荐设置成True。 -* calc_batch_size:可以传入一个函数,用于自定义每条数据的batch size(默认为1)。 -* cache: 数据缓存的策略,具体请参考 `cache`_ 。 -* init_hook:初始化时调用的函数,具体请参考 `init_hook`_ 。 -* check:如果为true,会根据input_types检查数据的合法性。 -* check_fail_continue:如果为true,那么当check出数据不合法时,会扔到这条数据,继续训练或预测。(对check=false的情况,没有作用) - -input_types -+++++++++++ - -PaddlePaddle的数据包括四种主要类型,和三种序列模式。 - -四种数据类型: - -* dense_vector:稠密的浮点数向量。 -* sparse_binary_vector:稀疏的01向量,即大部分值为0,但有值的地方必须为1。 -* sparse_float_vector:稀疏的向量,即大部分值为0,但有值的部分可以是任何浮点数。 -* integer:整数标签。 - -三种序列模式: - -* SequenceType.NO_SEQUENCE:不是一条序列 -* SequenceType.SEQUENCE:是一条时间序列 -* SequenceType.SUB_SEQUENCE: 是一条时间序列,且序列的每一个元素还是一个时间序列。 - -不同的数据类型和序列模式返回的格式不同,列表如下: - -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| | NO_SEQUENCE | SEQUENCE | SUB_SEQUENCE | -+======================+=====================+===================================+================================================+ -| dense_vector | [f, f, ...] | [[f, ...], [f, ...], ...] | [[[f, ...], ...], [[f, ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| sparse_binary_vector | [i, i, ...] | [[i, ...], [i, ...], ...] | [[[i, ...], ...], [[i, ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| sparse_float_vector | [(i,f), (i,f), ...] | [[(i,f), ...], [(i,f), ...], ...] | [[[(i,f), ...], ...], [[(i,f), ...], ...],...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ -| integer_value | i | [i, i, ...] | [[i, ...], [i, ...], ...] | -+----------------------+---------------------+-----------------------------------+------------------------------------------------+ - -其中,f代表一个浮点数,i代表一个整数。 - -注意:对sparse_binary_vector和sparse_float_vector,PaddlePaddle存的是有值位置的索引。例如, - -- 对一个5维非序列的稀疏01向量 ``[0, 1, 1, 0, 0]`` ,类型是sparse_binary_vector,返回的是 ``[1, 2]`` 。 -- 对一个5维非序列的稀疏浮点向量 ``[0, 0.5, 0.7, 0, 0]`` ,类型是sparse_float_vector,返回的是 ``[(1, 0.5), (2, 0.7)]`` 。 - -init_hook -+++++++++ - -init_hook可以传入一个函数。该函数在初始化的时候会被调用,其参数如下: - -* 第一个参数是settings对象,它和数据传入函数的第一个参数(如本例中 ``process`` 函数的 ``settings`` 参数)必须一致。该对象具有以下两个属性: - * settings.input_types:数据输入格式,具体请参考 `input_types`_ 。 - * settings.logger:一个logging对象。 -* 其他参数使用 ``kwargs`` (key word arguments)传入,包括以下两种: - * PaddlePaddle定义的参数: 1)is_train:bool型参数,表示用于训练或预测;2)file_list:所有文件列表。 - * 用户定义的参数:使用args在网络配置中设置。 - -注意:PaddlePaddle保留添加参数的权力,因此init_hook尽量使用 ``**kwargs`` 来接受不使用的函数以保证兼容性。 - -cache -+++++ - -PyDataProvider2提供了两种简单的Cache策略: - -* CacheType.NO_CACHE:不缓存任何数据,每次都会从python端读取数据 -* CacheType.CACHE_PASS_IN_MEM:第一个pass会从python端读取数据,剩下的pass会直接从内存里 - 读取数据。 - - -注意事项 --------- - -可能的内存泄露问题 -++++++++++++++++++ - -PaddlePaddle将train.list中的每一行都传递给process函数,从而生成多个generator。当训练数据非常多时,就会生成非常多的generator。 - -虽然每个generator在没有调用的时候,是几乎不占内存的;但当调用过一次后,generator便会存下当前的上下文(Context),而这个Context可能会非常大。并且,generator至少需要调用两次才会知道是否停止。所以,即使process函数里面只有一个yield,也需要两次随机选择到相同generator的时候,才会释放该段内存。 - -.. code-block:: python - - def func(): - yield 0 - - f = func() # 创建generator - tmp = next(f) # 调用一次,返回0 - tmp = next(f) # 调用第二次的时候,才会Stop Iteration - -由于顺序调用这些generator不会出现上述问题,因此有两种解决方案: - -1. **最佳推荐**:将样本的地址放入另一个文本文件,train.list写入那个文本文件的地址。即不要将每一个样本都放入train.list。 -2. 在generator的上下文中尽量留下非常少的变量引用,例如 - -.. code-block:: python - - def real_process(fn): - # ... read from fn - return result # 当函数返回的时候,python可以解除掉内部变量的引用。 - - def process(fn): - yield real_process(fn) - -注意:这个问题是PyDataProvider读数据时候的逻辑问题,很难整体修正。 - -内存不够用的情况 -++++++++++++++++ - -PyDataProvider2会尽可能多的使用内存。因此,对于内存较小的机器,推荐使用 ``pool_size`` 变量来设置内存中暂存的数据条。具体请参考 `@provider`_ 中的说明。 - diff --git a/doc_cn/ui/index.rst b/doc_cn/ui/index.rst deleted file mode 100644 index ff36c9adb690f4126cf6ee332a9f0b09648270bd..0000000000000000000000000000000000000000 --- a/doc_cn/ui/index.rst +++ /dev/null @@ -1,32 +0,0 @@ -######## -用户接口 -######## - -数据提供 -======== - -.. toctree:: - :maxdepth: 1 - - data_provider/dataprovider.rst - data_provider/pydataprovider2.rst - -命令及命令行参数 -================ - -.. toctree:: - :maxdepth: 1 - - cmd/index.rst - -* `参数用例 <../../doc/ui/cmd_argument/use_case.html>`_ -* `参数分类 <../../doc/ui/cmd_argument/argument_outline.html>`_ -* `参数描述 <../../doc/ui/cmd_argument/detail_introduction.html>`_ - -预测 -======= - -.. toctree:: - :maxdepth: 1 - - predict/swig_py_paddle.rst diff --git a/doc_cn/ui/predict/swig_py_paddle.rst b/doc_cn/ui/predict/swig_py_paddle.rst deleted file mode 100644 index 05f25345c5246687363dee1931310120b5723d0b..0000000000000000000000000000000000000000 --- a/doc_cn/ui/predict/swig_py_paddle.rst +++ /dev/null @@ -1,56 +0,0 @@ -基于Python的预测 -================ - -预测流程 --------- - -PaddlePaddle使用swig对常用的预测接口进行了封装,通过编译会生成py_paddle软件包,安装该软件包就可以在python环境下实现模型预测。可以使用python的 ``help()`` 函数查询软件包相关API说明。 - -基于Python的模型预测,主要包括以下五个步骤。 - -1. 初始化PaddlePaddle环境 - - 在程序开始阶段,通过调用 ``swig_paddle.initPaddle()`` 并传入相应的命令行参数初始化PaddlePaddle。 - -2. 解析模型配置文件 - - 初始化之后,可以通过调用 ``parse_config()`` 解析训练模型时用的配置文件。注意预测数据通常不包含label, 同时预测网络通常直接输出最后一层的结果而不是像训练网络一样再接一层cost layer,所以一般需要对训练用的模型配置文件稍作相应修改才能在预测时使用。 - -3. 构造paddle.GradientMachine - - 通过调用 ``swig_paddle.GradientMachine.createFromConfigproto()`` 传入上一步解析出来的模型配置就可以创建一个 ``GradientMachine``。 - -4. 准备预测数据 - - swig_paddle中的预测接口的参数是自定义的C++数据类型,py_paddle里面提供了一个工具类 ``DataProviderConverter`` 可以用于接收和PyDataProvider2一样的输入数据并转换成预测接口所需的数据类型。 - -5. 模型预测 - - 通过调用 ``forwardTest()`` 传入预测数据,直接返回计算结果。 - - -预测Demo --------- - -如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。 - -.. literalinclude:: ../../../doc/ui/predict/predict_sample.py - :language: python - :lines: 15-18,121-136 - - -Demo预测输出如下,其中value即为softmax层的输出。由于TEST_DATA包含两条预测数据,所以输出的value包含两个向量 。 - -.. code-block:: text - - [{'id': None, 'value': array( - [[ 5.53018653e-09, 1.12194102e-05, 1.96644767e-09, - 1.43630644e-02, 1.51111044e-13, 9.85625684e-01, - 2.08823112e-10, 2.32777140e-08, 2.00186201e-09, - 1.15501715e-08], - [ 9.99982715e-01, 1.27787406e-10, 1.72296313e-05, - 1.49316648e-09, 1.36540484e-11, 6.93137714e-10, - 2.70634608e-08, 3.48565123e-08, 5.25639710e-09, - 4.48684503e-08]], dtype=float32)}] - - diff --git a/doc_theme/static/css/override.css b/doc_theme/static/css/override.css index 438a87848a0176a7857177aeb672c59f35bd8d4b..09ecff688b9a2dae3d834572217922640c529c5e 100644 --- a/doc_theme/static/css/override.css +++ b/doc_theme/static/css/override.css @@ -1,3 +1,6 @@ +* { + font-family:"Roboto","Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; +} body { padding-top: 80px; background-image: none !important; diff --git a/doc_theme/static/js/paddle_doc_init.js b/doc_theme/static/js/paddle_doc_init.js index 5c815a8d3a3dab9bdbce544ff3bb49be40ad8934..153ce30745a0a21097fb385f2d66f12e6c8d5be5 100644 --- a/doc_theme/static/js/paddle_doc_init.js +++ b/doc_theme/static/js/paddle_doc_init.js @@ -28,4 +28,4 @@ $(document).ready(function(){ $('.doc-menu-vertical').find('li.current').last().addClass('active'); $('.doc-menu-vertical').perfectScrollbar(); -}); \ No newline at end of file +}); diff --git a/doc_theme/templates/layout.html b/doc_theme/templates/layout.html index 034740369ed10a748856e2205d3315f51a7de62f..9fca69dc4e7f0827acfc755a97a662350214b90e 100644 --- a/doc_theme/templates/layout.html +++ b/doc_theme/templates/layout.html @@ -101,7 +101,7 @@