diff --git a/.clang_format.hook b/.clang_format.hook new file mode 100755 index 0000000000000000000000000000000000000000..1d928216867c0ba3897d71542fea44debf8d72a0 --- /dev/null +++ b/.clang_format.hook @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +readonly VERSION="3.8" + +version=$(clang-format -version) + +if ! [[ $version == *"$VERSION"* ]]; then + echo "clang-format version check failed." + echo "a version contains '$VERSION' is needed, but get '$version'" + echo "you can install the right version, and make an soft-link to '\$PATH' env" + exit -1 +fi + +clang-format $@ diff --git a/.gitignore b/.gitignore index 2b30f7938c8a1672acd0a14b7051af12c37889fb..351b8204100dfd71e94cb3efa2e946b44b9e4285 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,15 @@ third_party/ *~ bazel-* third_party/ + +# clion workspace. +cmake-build-* + +# generated while compiling +python/paddle/v2/framework/core.so +paddle/pybind/pybind.h +CMakeFiles +cmake_install.cmake +paddle/.timestamp +python/paddlepaddle.egg-info/ +paddle/pybind/pybind.h diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b138576fcc695408c4cc0a03d227da7d0c6f440..83fe9af768964003130d02b7d913ad1c2102dd1d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,8 +3,8 @@ hooks: - id: remove-crlf files: (?!.*third_party)^.*$ | (?!.*book)^.*$ -- repo: https://github.com/reyoung/mirrors-yapf.git - sha: v0.13.2 +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 hooks: - id: yapf files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ @@ -17,7 +17,20 @@ - id: detect-private-key files: (?!.*third_party)^.*$ | (?!.*book)^.*$ - id: end-of-file-fixer -- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git - sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 +- repo: local hooks: - - id: clang-formater + - id: clang-format-with-version-check + name: clang-format + description: Format files with ClangFormat. + entry: bash ./.clang_format.hook -i + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ +- repo: https://github.com/PaddlePaddle/pre-commit-golang + sha: 8337620115c25ff8333f1b1a493bd031049bd7c0 + hooks: + - id: go-fmt + types: + - go + - id: gometalinter + types: + - go diff --git a/.travis.yml b/.travis.yml index 865e21f046b7f3ac4bc3de09c1300a0a1d0337d4..d0e2696f100e55f320e410afd6a3038db647f76f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,30 +1,28 @@ language: cpp cache: directories: - - $HOME/third_party - $HOME/.ccache - $HOME/.cache/pip + - $TRAVIS_BUILD_DIR/build/third_party sudo: required dist: trusty os: - linux env: - - JOB=DOCS - - JOB=BUILD_AND_TEST - - JOB=PRE_COMMIT + - JOB=build_doc + - JOB=check_style addons: apt: packages: - gcc-4.8 - g++-4.8 - - gfortran-4.8 - git - build-essential - python - python-pip - python2.7-dev - - python-numpy - python-wheel + - libboost-dev - curl - swig - graphviz @@ -33,26 +31,16 @@ addons: - libtool - ccache before_install: - - | - if [ ${JOB} == "BUILD_AND_TEST" ]; then - local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE` - if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test. - if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)' - then - echo "Only markdown docs were updated, stopping build process." - exit - fi - fi - fi - - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python + - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi + # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker + - sudo pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - sudo pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - | - timeout 2580 paddle/scripts/travis/main.sh # 43min timeout + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 0000000000000000000000000000000000000000..4db4a4a8e7441b07ce2db4adff043bb99a09014b --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,48 @@ +| Github account | name | +|---|---| +| backyes | Yan-Fei Wang | +| beckett1124 | Bin Qi | +| Canpio | Jia-Yi Feng | +| chengxiaohua1105 | Xiao-Hua Cheng | +| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | +| cxysteven | Xing-Yi Cheng | +| dzhwinter | Zhi-Hong Dong | +| emailweixu | Wei Xu | +| gangliao | Gang Liao | +| gongweibao | Wei-Bao Gong | +| Guo Sheng | Sheng Guo | +| Haichao-Zhang | Hai-Chao Zhang | +| hedaoyuan | Dao-Yuan He | +| helinwang | He-Lin Wang | +| jacquesqiao | Long-Fei Qiao | +| kuke | Yi-Bing Liu | +| lcy-seso | Ying Cao | +| lipeng-unisound | Peng Li | +| liuyuan | Yuan Liu | +| livc | Zhao Li | +| llxxxll | Yong-Feng Liu | +| luotao01 | Tao Luo | +| lzhao4ever | Liang Zhao | +| NHZlX | Zhao-Long Xing | +| pakchoi | Chuan-Jiang Song | +| pengli09 | Peng Li | +| pkuyym | Ya-Ming Yang | +| QiJune | Jun Qi | +| qingqing01 | Qing-Qing Dang | +| reyoung | Yang Yu | +| Superjom | Chun-Wei Yan | +| tianbingsz | Tian-Bing Xu | +| typhoonzero | Yi Wu | +| wanghaoshuang | Hao-Shuang Wang | +| wangyang59 | Yang Wang | +| wangzhen-nlp | Zhen Wang | +| wen-bo-yang | Wen-Bo Yang | +| wwhu | Wei-Wei Hu | +| xinghai-sun | Xing-Hai Sun | +| Xreki | Yi-Qun Liu | +| xujun05 | Jun Xu | +| xushaoyong | Shao-Yong Xu | +| Yancey1989 | Xu Yan | +| zhaopu7 | Pu Zhao | +| zhouxiao-coder | Xiao Zhou | +| Zrachel | Rui-Qing Zhang | diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a59db8c71bf3b1ea472c1ee56a1cd97de42dad8..4783095194dc9c6409dc31c95588f46c9bee7c61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,29 +1,25 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_SOURCE_DIR}) +cmake_minimum_required(VERSION 3.0) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) -if(ANDROID) - cmake_minimum_required(VERSION 3.7) -else() - cmake_minimum_required(VERSION 3.0) -endif() - -project(paddle CXX C) +project(paddle CXX C Go) find_package(Sphinx) if(NOT CMAKE_CROSSCOMPILING) @@ -31,12 +27,17 @@ if(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING) find_package(Git REQUIRED) find_package(Threads REQUIRED) +if(NOT ANDROID AND NOT IOS) + find_package(Boost QUIET) +endif() include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -50,6 +51,11 @@ option(WITH_DOC "Compile PaddlePaddle with documentation" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) +option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) +option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) +option(GLIDE_INSTALL "Download and install go dependencies " ON) +option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) +option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -58,25 +64,50 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() -if(ANDROID) - if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") - message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21") +if(ANDROID OR IOS) + if(ANDROID) + if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "16") + message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 16") + elseif(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21") + # TODO: support glog for Android api 16 ~ 19 in the future + message(WARNING "Using the unofficial git repository instead") + endif() endif() set(WITH_GPU OFF CACHE STRING - "Disable GPU when cross-compiling for Android" FORCE) + "Disable GPU when cross-compiling for Android and iOS" FORCE) set(WITH_AVX OFF CACHE STRING - "Disable AVX when cross-compiling for Android" FORCE) + "Disable AVX when cross-compiling for Android and iOS" FORCE) set(WITH_PYTHON OFF CACHE STRING - "Disable PYTHON when cross-compiling for Android" FORCE) + "Disable PYTHON when cross-compiling for Android and iOS" FORCE) set(WITH_RDMA OFF CACHE STRING - "Disable RDMA when cross-compiling for Android" FORCE) -endif(ANDROID) + "Disable RDMA when cross-compiling for Android and iOS" FORCE) + set(WITH_MKLDNN OFF CACHE STRING + "Disable MKLDNN when cross-compiling for Android and iOS" FORCE) + set(WITH_MKLML OFF CACHE STRING + "Disable MKLML package when cross-compiling for Android and iOS" FORCE) + + # Compile PaddlePaddle mobile inference library + if (NOT WITH_C_API) + set(WITH_C_API ON CACHE STRING + "Always compile the C_API when cross-compiling for Android and iOS" FORCE) + endif() + set(MOBILE_INFERENCE ON) + add_definitions(-DPADDLE_MOBILE_INFERENCE) +endif() -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") + +if (WITH_C_API AND WITH_PYTHON) + message(WARNING "It is suggest not embedded a python interpreter in Paddle " + "when using C-API. It will give an unpredictable behavior when using a " + "different Python interpreter from compiling.") +endif() + ######################################################################################## +include(external/mklml) # download mklml package include(external/zlib) # download, build, install zlib include(external/gflags) # download, build, install gflags include(external/glog) # download, build, install glog @@ -84,24 +115,31 @@ include(external/gtest) # download, build, install gtest include(external/protobuf) # download, build, install protobuf include(external/python) # download, build, install python include(external/openblas) # download, build, install openblas +include(external/mkldnn) # download, build, install mkldnn include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any +include(external/eigen) # download eigen3 +include(external/pybind11) # download pybind11 +include(cudnn) # set cudnn libraries, must before configure +include(configure) # add paddle env configuration +include(generic) # simplify cmake module include(package) # set paddle packages include(cpplint) # set paddle c++ style include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(rdma) # set rdma libraries include(flags) # set paddle compile flags -include(cudnn) # set cudnn libraries include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include(configure) # add paddle env configuration -include_directories("${PROJ_ROOT}") -include_directories("${PROJ_ROOT}/paddle/cuda/include") + +include_directories("${PADDLE_SOURCE_DIR}") +include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") +include_directories(${Boost_INCLUDE_DIRS}) set(EXTERNAL_LIBS ${GFLAGS_LIBRARIES} @@ -113,15 +151,40 @@ set(EXTERNAL_LIBS ) if(WITH_GPU) - list(APPEND EXTERNAL_LIB ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) + list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) if(NOT WITH_DSO) - list(APPEND EXTERNAL_LIB ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) + list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) endif(NOT WITH_DSO) endif(WITH_GPU) +if(WITH_MKLDNN) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) +endif() + +if(USE_NNPACK) + include(external/nnpack) + list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS}) +endif(USE_NNPACK) + add_subdirectory(proto) + +if(NOT MOBILE_INFERENCE) + # "add_subdirectory(go)" should be placed after the following loine, + # because it depends on paddle/optimizer. + add_subdirectory(paddle/optimizer) +endif() + +# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be +# placed after this block, because they depends on it. +if(WITH_GOLANG) + add_subdirectory(go) +endif(WITH_GOLANG) + +set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") add_subdirectory(paddle) -add_subdirectory(python) +if(WITH_PYTHON) + add_subdirectory(python) +endif() if(WITH_DOC) add_subdirectory(doc) diff --git a/Dockerfile b/Dockerfile index f12be36ceb764a535e8a87b7071757f1ef3dada7..136db772cc6a24b8084120fa6bab666bc1eda78e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ # A image for building paddle binaries # Use cuda devel base image for both cpu and gpu environment -FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04 +FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04 MAINTAINER PaddlePaddle Authors ARG UBUNTU_MIRROR @@ -10,26 +10,40 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub ARG WITH_GPU ARG WITH_AVX ARG WITH_DOC -ARG WITH_STYLE_CHECK ENV WOBOQ OFF -ENV WITH_GPU=${WITH_GPU:-OFF} +ENV WITH_GPU=${WITH_GPU:-ON} ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_DOC=${WITH_DOC:-OFF} -ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV HOME /root # Add bash enhancements COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ - apt-get install -y git python-pip python-dev openssh-server bison && \ - apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ - apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ - apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake locales clang-format-3.8 swig doxygen && \ + apt-get install -y \ + git python-pip python-dev openssh-server bison \ + wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ + curl sed grep graphviz libjpeg-dev zlib1g-dev \ + python-matplotlib gcc-4.8 g++-4.8 \ + automake locales clang-format swig doxygen cmake \ + liblapack-dev liblapacke-dev libboost-dev \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ apt-get clean -y +# Install Go and glide +RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -s -q https://glide.sh/get | sh + # git credential to skip password typing RUN git config --global credential.helper store @@ -39,18 +53,29 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8 # FIXME: due to temporary ipykernel dependency issue, specify ipykernel jupyter # version util jupyter fixes this issue. RUN pip install --upgrade pip && \ - pip install -U 'protobuf==3.1.0' && \ - pip install -U wheel pillow BeautifulSoup && \ + pip install -U wheel && \ pip install -U docopt PyYAML sphinx && \ - pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ - pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ - pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' + pip install -U sphinx-rtd-theme==0.1.9 recommonmark + +RUN pip install pre-commit 'ipython==5.3.0' && \ + pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip install opencv-python + +COPY ./python/requirements.txt /root/ +RUN pip install -r /root/requirements.txt + +# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use +# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 +RUN apt-get install -y libssl-dev libffi-dev +RUN pip install certifi urllib3[secure] -RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ - cd .. && rm -rf cmake-3.4.1 -VOLUME ["/woboq_out"] +# Install woboq_codebrowser to /woboq +RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ + (cd /woboq \ + cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ + -DCMAKE_BUILD_TYPE=Release . \ + make) # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service RUN mkdir /var/run/sshd diff --git a/Dockerfile.android b/Dockerfile.android new file mode 100644 index 0000000000000000000000000000000000000000..9d13a414f67be04e17b7d83403228d92bce0eda9 --- /dev/null +++ b/Dockerfile.android @@ -0,0 +1,54 @@ +FROM ubuntu:16.04 +MAINTAINER PaddlePaddle Authors + +ARG UBUNTU_MIRROR +RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' + +# ENV variables +ARG ANDROID_ABI +ARG ANDROID_API + +ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"} +ENV ANDROID_API=${ANDROID_API:-21} + +ENV HOME=/root \ + ANDROID_NDK_HOME=/opt/android-ndk-linux \ + ANDROID_TOOLCHAINS_DIR=/opt/toolchains + +RUN apt-get update && \ + apt-get install -y \ + git python-dev python-pip python-numpy \ + wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ + apt-get clean -y + +# Install Go and glide +RUN wget -qO- go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ + mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src +ENV GOROOT=/usr/local/go GOPATH=/root/gopath +# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin + +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip install --upgrade pip && \ + pip install -U 'protobuf==3.1.0' && \ + pip install -U wheel sphinx && \ + pip install pre-commit + +# Android NDK +RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \ + mkdir -p /opt/android-ndk-tmp && \ + cd /opt/android-ndk-tmp && \ + wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \ + unzip -q android-ndk-r14b-linux-x86_64.zip && \ + mv android-ndk-r14b ${ANDROID_NDK_HOME} && \ + rm -rf /opt/android-ndk-tmp + +CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"] diff --git a/README.md b/README.md index bcc24b84128df282a2e3f0bc62aafe1ffe172338..db0fbd88b250cdc2a3cc77521cc1c2cea77c6e87 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,8 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) -[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/) -[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://doc.paddlepaddle.org/develop/doc/) +[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://doc.paddlepaddle.org/develop/doc_cn/) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -51,45 +51,46 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl - **Connected to Products** In addition, PaddlePaddle is also designed to be easily deployable. At Baidu, - PaddlePaddle has been deployed into products or service with a vast number + PaddlePaddle has been deployed into products and services with a vast number of users, including ad click-through rate (CTR) prediction, large-scale image classification, optical character recognition(OCR), search ranking, computer virus detection, recommendation, etc. It is widely utilized in products at - Baidu and it has achieved a significant impact. We hope you can also exploit - the capability of PaddlePaddle to make a huge impact for your product. + Baidu and it has achieved a significant impact. We hope you can also explore + the capability of PaddlePaddle to make an impact on your product. ## Installation It is recommended to check out the -[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) +[Docker installation guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) before looking into the -[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html) +[build from source guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html). ## Documentation -We provide [English](http://www.paddlepaddle.org/develop/doc/) and -[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation. +We provide [English](http://doc.paddlepaddle.org/develop/doc/) and +[Chinese](http://doc.paddlepaddle.org/doc_cn/) documentation. -- [Deep Learning 101](http://book.paddlepaddle.org/index.en.html) +- [Deep Learning 101](http://book.paddlepaddle.org/index.html) - You might want to start from the this online interactive book that can run in Jupyter Notebook. + You might want to start from this online interactive book that can run in a Jupyter Notebook. -- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) +- [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) You can run distributed training jobs on MPI clusters. -- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) +- [Distributed Training on Kubernetes](http://doc.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) You can also run distributed training jobs on Kubernetes clusters. -- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html) +- [Python API](http://doc.paddlepaddle.org/develop/doc/api/index_en.html) Our new API enables much shorter programs. -- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) +- [How to Contribute](http://doc.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) We appreciate your contributions! + ## Ask Questions You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues). diff --git a/RELEASE.cn.md b/RELEASE.cn.md old mode 100755 new mode 100644 diff --git a/RELEASE.md b/RELEASE.md index 9a09644b681b2ae4e922d92ed29500205c2a6ca4..146f7afa7dfbc152500b82fde28445ae3155c16c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,8 +1,6 @@ # Release v0.10.0 -We are glad to release version 0.10.0. In this version, we are happy to -release the -new +We are glad to release version 0.10.0. In this version, we are happy to release the new [Python API](http://research.baidu.com/paddlepaddles-new-api-simplifies-deep-learning-programs/). - Our old Python API is kind of out of date. It's hard to learn and hard to diff --git a/authors b/authors deleted file mode 100644 index daac4ec5d8173cba95df9f9b3c69c02b5256f5b2..0000000000000000000000000000000000000000 --- a/authors +++ /dev/null @@ -1,56 +0,0 @@ -Cao, Ying -Cheng, Yujuan -Dang, Qingqing -Dong, Tengfei -Du, Dalong -Feng, Shouqiang -Gao, Haoyuan -Han, Baochang -Han, Jinchen -Hao, Nanyu -He, Daoyuan -He, Zhengyan -Hou, Jue -Huang, Chang -Huang, Zhiheng -Hu, Na -Kong, Qi -Liao, Gang -Li, Bo -Li, Jiajie -Li, Jing -Li, Lei -Li, Peng -Liu, Sheng -Liu, Yuan -Li, Yuze -Luo, Heng -Luo, Tao -Lyu, Qin -Mao, Hongyue -Qian, Xiaojun -Qiao, Longfei -Qi, Jun -Qin, Duohao -Shen, Guolong -Shi, Guangchuan -Song, Xiang -Wang, Helin -Wang, Jiang -Wang, Yanfei -Wang, Yi -Wang, Yong -Weng, Renliang -Xu, Tianbing -Xu, Wei -Xu, Xingyu -Yan, Chong -Yan, Chunwei -Yang, Yi -Yu, Yang -Yu, Yinan -Zhang, Jian -Zhang, Ruiqing -Zhang, Weide -Zhao, Liang -Zhou, Jie diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py index 1ac47212b5a75667e8e9d4465b33f575516e2836..4703944c8722552d56ba80a8e0663de5fb4df53d 100644 --- a/benchmark/paddle/image/provider.py +++ b/benchmark/paddle/image/provider.py @@ -22,5 +22,5 @@ def initHook(settings, height, width, color, num_class, **kwargs): def process(settings, file_list): for i in xrange(1024): img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten() - lab = random.randint(0, settings.num_class) + lab = random.randint(0, settings.num_class - 1) yield img.astype('float32'), int(lab) diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh new file mode 100755 index 0000000000000000000000000000000000000000..e31fec1cd850157d90ddcab2d559d52381ecd317 --- /dev/null +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -0,0 +1,51 @@ +set -e + +function train() { + unset OMP_NUM_THREADS MKL_NUM_THREADS + export OMP_DYNAMIC="FALSE" + export KMP_AFFINITY="granularity=fine,compact,0,0" + topology=$1 + bs=$2 + use_mkldnn=$3 + if [ $3 == "True" ]; then + thread=1 + log="logs/${topology}-mkldnn-${bs}.log" + elif [ $3 == "False" ]; then + thread=`nproc` + # each trainer_count use only 1 core to avoid conflict + export OMP_NUM_THREADS=1 + export MKL_NUM_THREADS=1 + log="logs/${topology}-${thread}mklml-${bs}.log" + else + echo "Wrong input $3, use True or False." + exit 0 + fi + args="batch_size=${bs}" + config="${topology}.py" + paddle train --job=time \ + --config=$config \ + --use_mkldnn=$use_mkldnn \ + --use_gpu=False \ + --trainer_count=$thread \ + --log_period=10 \ + --test_period=100 \ + --config_args=$args \ + 2>&1 | tee ${log} +} + +if [ ! -d "train.list" ]; then + echo " " > train.list +fi +if [ ! -d "logs" ]; then + mkdir logs +fi + +#========== mkldnn ==========# +train vgg 64 True +train vgg 128 True +train vgg 256 True + +#========== mklml ===========# +train vgg 64 False +train vgg 128 False +train vgg 256 False diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..b8429975f5c83df6996e71478fe276b246e8b77b --- /dev/null +++ b/benchmark/paddle/image/vgg.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +from paddle.trainer_config_helpers import * + +height = 224 +width = 224 +num_class = 1000 +batch_size = get_config_arg('batch_size', int, 64) +layer_num = get_config_arg('layer_num', int, 19) + +args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} +define_py_data_sources2( + "train.list", None, module="provider", obj="process", args=args) + +settings( + batch_size=batch_size, + learning_rate=0.01 / batch_size, + learning_method=MomentumOptimizer(0.9), + regularization=L2Regularization(0.0005 * batch_size)) + +img = data_layer(name='image', size=height * width * 3) + + +def vgg_network(vgg_num=3): + tmp = img_conv_group( + input=img, + num_channels=3, + conv_padding=1, + conv_num_filter=[64, 64], + conv_filter_size=3, + conv_act=ReluActivation(), + pool_size=2, + pool_stride=2, + pool_type=MaxPooling()) + + tmp = img_conv_group( + input=tmp, + conv_num_filter=[128, 128], + conv_padding=1, + conv_filter_size=3, + conv_act=ReluActivation(), + pool_stride=2, + pool_type=MaxPooling(), + pool_size=2) + + channels = [] + for i in range(vgg_num): + channels.append(256) + tmp = img_conv_group( + input=tmp, + conv_num_filter=channels, + conv_padding=1, + conv_filter_size=3, + conv_act=ReluActivation(), + pool_stride=2, + pool_type=MaxPooling(), + pool_size=2) + channels = [] + for i in range(vgg_num): + channels.append(512) + tmp = img_conv_group( + input=tmp, + conv_num_filter=channels, + conv_padding=1, + conv_filter_size=3, + conv_act=ReluActivation(), + pool_stride=2, + pool_type=MaxPooling(), + pool_size=2) + tmp = img_conv_group( + input=tmp, + conv_num_filter=channels, + conv_padding=1, + conv_filter_size=3, + conv_act=ReluActivation(), + pool_stride=2, + pool_type=MaxPooling(), + pool_size=2) + + tmp = fc_layer( + input=tmp, + size=4096, + act=ReluActivation(), + layer_attr=ExtraAttr(drop_rate=0.5)) + + tmp = fc_layer( + input=tmp, + size=4096, + act=ReluActivation(), + layer_attr=ExtraAttr(drop_rate=0.5)) + + return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation()) + + +if layer_num == 16: + vgg = vgg_network(3) +elif layer_num == 19: + vgg = vgg_network(4) +else: + print("Wrong layer number.") + +lab = data_layer('label', num_class) +loss = cross_entropy(input=vgg, label=lab) +outputs(loss) diff --git a/cmake/CMakeDetermineGoCompiler.cmake b/cmake/CMakeDetermineGoCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..abf0a00c5e99e4201dede36f13200cfc9c151ad3 --- /dev/null +++ b/cmake/CMakeDetermineGoCompiler.cmake @@ -0,0 +1,46 @@ +if(NOT CMAKE_Go_COMPILER) + if(NOT $ENV{GO_COMPILER} STREQUAL "") + get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT) + + if(CMAKE_Go_FLAGS_ENV_INIT) + set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler") + endif() + + if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT}) + message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.") + endif() + + endif() + + set(Go_BIN_PATH + $ENV{GOPATH} + $ENV{GOROOT} + $ENV{GOROOT}/bin + $ENV{GO_COMPILER} + /usr/bin + /usr/local/bin + ) + + if(CMAKE_Go_COMPILER_INIT) + set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler") + else() + find_program(CMAKE_Go_COMPILER + NAMES go + PATHS ${Go_BIN_PATH} + ) + if(CMAKE_Go_COMPILER) + EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION) + STRING(REGEX MATCH "go[0-9]+[.0-9]*[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}") + message("-- The Golang compiler identification is ${VERSION}") + message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}") + endif() + endif() + +endif() + +mark_as_advanced(CMAKE_Go_COMPILER) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CMakeGoCompiler.cmake.in + ${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY) + +set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/cmake/CMakeGoCompiler.cmake.in b/cmake/CMakeGoCompiler.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..a71f08e064656fbaad8cfa77aea6f216515712ef --- /dev/null +++ b/cmake/CMakeGoCompiler.cmake.in @@ -0,0 +1,8 @@ +set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@") +set(CMAKE_Go_COMPILER_LOADED 1) + +set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go) +set(CMAKE_Go_LINKER_PREFERENCE 40) +set(CMAKE_Go_OUTPUT_EXTENSION .o) +set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1) +set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/cmake/CMakeGoInformation.cmake b/cmake/CMakeGoInformation.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ba51ac93fcd429478f324b66bd5129d94ea2a8f4 --- /dev/null +++ b/cmake/CMakeGoInformation.cmake @@ -0,0 +1,7 @@ +if(NOT CMAKE_Go_COMPILE_OBJECT) + set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o ") +endif() + +if(NOT CMAKE_Go_LINK_EXECUTABLE) + set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o ") +endif() diff --git a/cmake/CMakeTestGoCompiler.cmake b/cmake/CMakeTestGoCompiler.cmake new file mode 100644 index 0000000000000000000000000000000000000000..b9891b015baced05b51e34dba562fd98a84fe14c --- /dev/null +++ b/cmake/CMakeTestGoCompiler.cmake @@ -0,0 +1 @@ +set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "") diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index b8bf1bb07a1f779354b2c10071264bf41d279f6c..8fdc382f0c1c453a01dba884a3dad216e1c3092c 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -5,7 +5,7 @@ # If any cblas implementation found, the following variable will be set. # CBLAS_PROVIDER # one of MKL, ATLAS, OPENBLAS, REFERENCE # CBLAS_INC_DIR # the include directory for cblas. -# CBLAS_LIBS # a list of libraries should be linked by paddle. +# CBLAS_LIBS # a list of libraries should be linked by paddle. # # Each library should be full path to object file. # # User should set one of MKL_ROOT, ATLAS_ROOT, OPENBLAS_ROOT, REFERENCE_CBLAS_ROOT @@ -15,39 +15,57 @@ set(CBLAS_FOUND OFF) -## Find MKL First. -set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") -set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") +## Find MKLML First. +if(WITH_MKLML AND MKLML_INC_DIR AND MKLML_LIB) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER MKLML) + set(CBLAS_INC_DIR ${MKLML_INC_DIR}) + set(CBLAS_LIBRARIES ${MKLML_LIB}) + + add_definitions(-DPADDLE_USE_MKLML) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found cblas and lapack in MKLML " + "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + return() +endif() + +## Then find MKL. +set(INTEL_MKL_ROOT "/opt/intel/mkl" CACHE PATH "Folder contains intel mkl libs") +set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains env MKL") + +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_ROOT}/include + ${INTEL_MKL_ROOT}/include) +set(MKL_LIB_SEARCH_PATHS + ${MKL_ROOT}/lib + ${MKL_ROOT}/lib/intel64 + ${INTEL_MKL_ROOT}/lib + ${INTEL_MKL_ROOT}/lib/intel64) find_path(MKL_INC_DIR mkl.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS - ${MKL_ROOT}/include) + ${MKL_INCLUDE_SEARCH_PATHS}) find_library(MKL_CORE_LIB NAMES mkl_core PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS - ${MKL_ROOT}/lib - ${MKL_ROOT}/lib/intel64) + ${MKL_LIB_SEARCH_PATHS}) - -if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) +if(MKL_LAPACK_INC_DIR AND MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER MKL) - set(CBLAS_INC_DIR ${MKL_INC_DIR}) - set(CBLAS_LIBRARIES ${MKL_INTEL_LP64} - ${MKL_SEQUENTIAL_LIB} - ${MKL_CORE_LIB}) + set(CBLAS_INC_DIR ${MKL_INC_DIR} ${MKL_LAPACK_INC_DIR}) + set(CBLAS_LIBRARIES ${MKL_INTEL_LP64} ${MKL_SEQUENTIAL_LIB} ${MKL_CORE_LIB}) + add_definitions(-DPADDLE_USE_MKL) - message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") - set(CBLAS_FOUND ON) - if(${MKL_LAPACK_INC_DIR}) - add_definitions(-DPADDLE_USE_LAPACK) - message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") - endif() - return() # return file. + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found MKL (include: ${MKL_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})") + return() endif() ## Then find atlas. @@ -63,26 +81,26 @@ set(ATLAS_LIB_SEARCH_PATHS /usr/lib/atlas /usr/lib/atlas-base # special for ubuntu 14.04. ) -find_path(ATLAS_INC_DIR NAMES cblas.h +find_path(ATLAS_INC_DIR NAMES cblas.h PATHS ${ATLAS_INCLUDE_SEARCH_PATHS}) find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h PATHS ${ATLAS_INCLUDE_SEARCH_PATHS}) -find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 +find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3 PATHS ${ATLAS_LIB_SEARCH_PATHS}) -find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3 +find_library(ATLAS_CLAPACK_LIB NAMES lapack_atlas liblapack_atlas.so.3 PATHS ${ATLAS_LIB_SEARCH_PATHS}) -if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND) - set(CBLAS_PROVIDER ATLAS) - set(CBLAS_INC_DIR ${ATLAS_INC_DIR}) - set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB}) - add_definitions(-DPADDLE_USE_ATLAS) - message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") +if(ATLAS_CLAPACK_INC_DIR AND ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_CLAPACK_LIB) set(CBLAS_FOUND ON) - if(ATLAS_CLAPACK_INC_DIR) - add_definitions(-DPADDLE_USE_LAPACK) - message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") - endif() + set(CBLAS_PROVIDER ATLAS) + set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR}) + set(CBLAS_LIBRARIES ${ATLAS_CLAPACK_LIB} ${ATLAS_CBLAS_LIB}) + + add_definitions(-DPADDLE_USE_ATLAS) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found ATLAS (include: ${ATLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})") return() endif() @@ -107,16 +125,17 @@ find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h find_library(OPENBLAS_LIB NAMES openblas PATHS ${OPENBLAS_LIB_SEARCH_PATHS}) -if(OPENBLAS_INC_DIR AND OPENBLAS_LIB) +if(OPENBLAS_LAPACKE_INC_DIR AND OPENBLAS_INC_DIR AND OPENBLAS_LIB) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER OPENBLAS) - set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR}) + set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR} ${OPENBLAS_LAPACKE_INC_DIR}) set(CBLAS_LIBRARIES ${OPENBLAS_LIB}) - message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") - set(CBLAS_FOUND ON) - if(OPENBLAS_LAPACKE_INC_DIR) - add_definitions(-DPADDLE_USE_LAPACK) - message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") - endif() + + add_definitions(-DPADDLE_USE_OPENBLAS) + add_definitions(-DLAPACK_FOUND) + + message(STATUS "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})") return() endif() @@ -124,7 +143,7 @@ endif() ## Then find the reference-cblas. www.netlib.org/blas/ -set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH +set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH "Folder contains reference-cblas") set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include @@ -145,9 +164,17 @@ find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) + set(CBLAS_FOUND ON) set(CBLAS_PROVIDER REFERENCE) set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) - message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})") + add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) + message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") +endif() + +if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND) set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER vecLib) + set(CBLAS_INC_DIR ${VECLIB_INC_DIR}) + add_definitions(-DPADDLE_USE_VECLIB) endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 5e507e78f74eee885922f502f35e3c15fafb622d..db8f5ab0456792f903093b9cf20e2541f00add5c 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,10 +24,18 @@ if(WITH_DOUBLE) add_definitions(-DPADDLE_TYPE_DOUBLE) endif(WITH_DOUBLE) +if(WITH_TESTING) + add_definitions(-DPADDLE_WITH_TESTING) +endif(WITH_TESTING) + if(NOT WITH_TIMER) add_definitions(-DPADDLE_DISABLE_TIMER) endif(NOT WITH_TIMER) +if(USE_EIGEN_FOR_BLAS) + add_definitions(-DPADDLE_USE_EIGEN_FOR_BLAS) +endif(USE_EIGEN_FOR_BLAS) + if(NOT WITH_PROFILER) add_definitions(-DPADDLE_DISABLE_PROFILER) endif(NOT WITH_PROFILER) @@ -40,12 +48,17 @@ if(NOT CMAKE_CROSSCOMPILING) endif() endif() +if(NOT WITH_GOLANG) + add_definitions(-DPADDLE_WITHOUT_GOLANG) +endif(NOT WITH_GOLANG) + if(NOT WITH_GPU) - add_definitions(-DPADDLE_ONLY_CPU) add_definitions(-DHPPL_STUB_FUNC) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) else() + add_definitions(-DPADDLE_WITH_CUDA) + FIND_PACKAGE(CUDA REQUIRED) if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) @@ -63,5 +76,76 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) +if(WITH_MKLDNN) + add_definitions(-DPADDLE_USE_MKLDNN) + if (WITH_MKLML AND MKLDNN_IOMP_DIR) + message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") + else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + else() + message(WARNING "Can not find OpenMP." + "Some performance features in MKLDNN may not be available") + endif() + endif() + +endif(WITH_MKLDNN) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") + +if(WITH_GOLANG) + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. + set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") + file(MAKE_DIRECTORY ${GOPATH}) + set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") + file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}") + set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go") + + add_custom_target(go_path) + add_custom_command(TARGET go_path + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + # We can't run `go get -d ./...` for every target, because + # multiple `go get` can not run concurrently, but make need to be + # able to run with multiple jobs. + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) + + if (GLIDE_INSTALL) + if(EXISTS $ENV{GOPATH}/bin/glide) + set(GLIDE "$ENV{GOPATH}/bin/glide") + else() + message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") + endif() + + # this command will only run when the file it depends is missing + # or has changed, or the output is missing. + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide + COMMAND env GOPATH=${GOPATH} ${GLIDE} install + COMMAND touch ${CMAKE_BINARY_DIR}/glide + DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" + ) + + # depends on the custom command which outputs + # ${CMAKE_BINARY_DIR}/glide, the custom command does not need to + # run every time this target is built. + add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path) + endif() + +endif(WITH_GOLANG) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 38c636b30edc0af1c07255814e8bc2b1ad9514da..4823dc3e91390002aefac70f7931b4197db05789 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -25,8 +25,10 @@ set(STYLE_FILTER "${STYLE_FILTER}-readability/casting") set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* - .*LtrDataProvider.* - .*MultiDataProvider.*) + .*\\.pb\\.txt + .*MultiDataProvider.* + .*pb.* + .*pybind.h) # add_style_check_target # @@ -34,29 +36,27 @@ set(IGNORE_PATTERN # # first argument: target name to attach # rest arguments: source list to check code style. -# +# # NOTE: If WITH_STYLE_CHECK is OFF, then this macro just do nothing. macro(add_style_check_target TARGET_NAME) if(WITH_STYLE_CHECK) set(SOURCES_LIST ${ARGN}) list(REMOVE_DUPLICATES SOURCES_LIST) - list(SORT SOURCES_LIST) - foreach(filename ${SOURCES_LIST}) - set(LINT ON) foreach(pattern ${IGNORE_PATTERN}) if(filename MATCHES ${pattern}) - message(STATUS "DROP LINT ${filename}") - set(LINT OFF) - endif() + list(REMOVE_ITEM SOURCES_LIST ${filename}) + endif() endforeach() - if(LINT MATCHES ON) - add_custom_command(TARGET ${TARGET_NAME} - PRE_BUILD - COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" - "--filter=${STYLE_FILTER}" ${filename} - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}) - endif() endforeach() + + if(SOURCES_LIST) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" + "--filter=${STYLE_FILTER}" + ${SOURCES_LIST} + COMMENT "cpplint: Checking source code style" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() endif() endmacro() diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake new file mode 100644 index 0000000000000000000000000000000000000000..84219cfa5587f5b765b2e8f35180797d7053169f --- /dev/null +++ b/cmake/cross_compiling/android.cmake @@ -0,0 +1,236 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for Android, and the +# configuration refers to the open-source resposity: +# https://github.com/taka-no-me/android-cmake +# Most of the variables are compatible with that used in +# https://developer.android.com/ndk/guides/cmake.html +# The supported variables are listed belows: +# +# ANDROID_STANDALONE_TOOLCHAIN +# ANDROID_TOOLCHAIN +# ANDROID_ABI +# ANDROID_NATIVE_API_LEVEL +# ANDROID_ARM_MODE +# ANDROID_ARM_NEON +# +# For CMake >= 3.7.0, all the settings will be delivered to CMake system +# variables to let CMake do the cross-compiling configurations itself. +# More detail of cross-compiling settings +# https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html + +IF(NOT ANDROID) + return() +ENDIF() + +# check the exist of android standalone toolchain +IF(NOT DEFINED ANDROID_STANDALONE_TOOLCHAIN) + SET(ANDROID_STANDALONE_TOOLCHAIN $ENV{ANDROID_STANDALONE_TOOLCHAIN} + CACHE PATH "Folder holds the standalone toolchain of Android NDK") +ENDIF() +IF(NOT ANDROID_STANDALONE_TOOLCHAIN) + MESSAGE(WARNING "It is recommended to set ANDROID_STANDALONE_TOOLCHAIN to " + "use a standalone toolchain.\n" + "To cross-compile for Android, you need to:\n" + "1. Download an Android NDK from" + " https://developer.android.com/ndk/downloads/index.html\n" + "2. Setup a standalone toolchain" + "https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn\n") +ENDIF() + +IF(NOT DEFINED CMAKE_SYSTEM_VERSION AND ANDROID_NATIVE_API_LEVEL) + IF(ANDROID_NATIVE_API_LEVEL MATCHES "^android-[0-9]+$") + STRING(REPLACE "android-" "" CMAKE_SYSTEM_VERSION "${CMAKE_MATCH_0}") + ELSEIF(ANDROID_NATIVE_API_LEVEL MATCHES "^[0-9]+$") + SET(CMAKE_SYSTEM_VERSION ${ANDROID_NATIVE_API_LEVEL}) + ENDIF() +ENDIF() + +IF(NOT DEFINED ANDROID_TOOLCHAIN) + SET(ANDROID_TOOLCHAIN clang) +ENDIF() + +IF(NOT DEFINED ANDROID_ABI) + SET(ANDROID_ABI "armeabi-v7a") +ENDIF() + +IF(NOT DEFINED ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE ON) +ENDIF() +IF(ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE_NAME "arm") +ELSE(ANDROID_ARM_MODE) + SET(ANDROID_ARM_MODE_NAME "thumb") +ENDIF(ANDROID_ARM_MODE) + +IF(NOT DEFINED ANDROID_ARM_NEON) + SET(ANDROID_ARM_NEON ON) +ENDIF() + +IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") + IF("${CMAKE_VERSION}" VERSION_LESS "3.1.0") + SET(CMAKE_SYSTEM_NAME "Linux") + ENDIF() + MESSAGE(WARNING "It is recommended to use CMake >= 3.7.0 (current version: " + "${CMAKE_VERSION}), when cross-compiling for Android.") + + IF(ANDROID_STANDALONE_TOOLCHAIN) + # Use standalone toolchain + SET(CMAKE_SYSROOT "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot") + + IF(NOT CMAKE_SYSTEM_VERSION) + SET(ANDROID_STANDALONE_TOOLCHAIN_API "") + SET(ANDROID_API_LEVEL_H_REGEX "^[\t ]*#[\t ]*define[\t ]+__ANDROID_API__[\t ]+([0-9]+)") + FILE(STRINGS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" + ANDROID_API_LEVEL_H_CONTENT REGEX "${ANDROID_API_LEVEL_H_REGEX}") + IF(ANDROID_API_LEVEL_H_CONTENT MATCHES "${ANDROID_API_LEVEL_H_REGEX}") + SET(ANDROID_STANDALONE_TOOLCHAIN_API "${CMAKE_MATCH_1}") + ENDIF() + SET(CMAKE_SYSTEM_VERSION ${ANDROID_STANDALONE_TOOLCHAIN_API}) + ENDIF() + + # Toolchain + SET(ANDROID_TOOLCHAIN_ROOT ${ANDROID_STANDALONE_TOOLCHAIN}) + ELSE(ANDROID_NDK) + # TODO: use android ndk + ENDIF() + + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(ANDROID_TOOLCHAIN_NAME arm-linux-androideabi) + IF(ANDROID_ABI STREQUAL "armeabi") + SET(CMAKE_SYSTEM_PROCESSOR armv5te) + SET(ANDROID_CLANG_TRIPLE armv5te-none-linux-androideabi) + ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_SYSTEM_PROCESSOR armv7-a) + SET(ANDROID_CLANG_TRIPLE armv7-none-linux-androideabi) + ENDIF() + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android) + SET(CMAKE_SYSTEM_PROCESSOR aarch64) + SET(ANDROID_CLANG_TRIPLE aarch64-none-linux-android) + ELSE() + MESSAGE(FATAL_ERROR "Invalid Android ABI: ${ANDROID_ABI}.") + ENDIF() + SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") + + IF(ANDROID_TOOLCHAIN STREQUAL clang) + SET(ANDROID_C_COMPILER_NAME clang) + SET(ANDROID_CXX_COMPILER_NAME clang++) + SET(CMAKE_C_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) + SET(CMAKE_CXX_COMPILER_TARGET ${ANDROID_CLANG_TRIPLE}) + ELSEIF(ANDROID_TOOLCHAIN STREQUAL gcc) + SET(ANDROID_C_COMPILER_NAME gcc) + SET(ANDROID_CXX_COMPILER_NAME g++) + ELSE() + MESSAGE(FATAL_ERROR "Invalid Android toolchain: ${ANDROID_TOOLCHAIN}") + ENDIF() + + # C compiler + IF(NOT CMAKE_C_COMPILER) + SET(ANDROID_C_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_C_COMPILER_NAME}") + ELSE() + GET_FILENAME_COMPONENT(ANDROID_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) + ENDIF() + IF(NOT EXISTS ${ANDROID_C_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find C compiler: ${ANDROID_C_COMPILER}") + ENDIF() + + # CXX compiler + IF(NOT CMAKE_CXX_COMPILER) + SET(ANDROID_CXX_COMPILER "${ANDROID_TOOLCHAIN_PREFIX}${ANDROID_CXX_COMPILER_NAME}") + ELSE() + GET_FILENAME_COMPONENT(ANDROID_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) + ENDIF() + IF(NOT EXISTS ${ANDROID_CXX_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${ANDROID_CXX_COMPILER}") + ENDIF() + + SET(CMAKE_C_COMPILER ${ANDROID_C_COMPILER} CACHE PATH "C compiler" FORCE) + SET(CMAKE_CXX_COMPILER ${ANDROID_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + + # Toolchain and ABI specific flags. + SET(ANDROID_COMPILER_FLAGS "-ffunction-sections -fdata-sections") + SET(ANDROID_LINKER_FLAGS "-Wl,--gc-sections") + + IF(ANDROID_ABI STREQUAL "armeabi") + LIST(APPEND ANDROID_COMPILER_FLAGS + -march=armv5te + -mtune=xscale + -msoft-float) + ELSEIF(ANDROID_ABI STREQUAL "armeabi-v7a") + LIST(APPEND ANDROID_COMPILER_FLAGS + -march=armv7-a + -mfloat-abi=softfp) + IF(ANDROID_ARM_NEON) + LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=neon) + ELSE() + LIST(APPEND ANDROID_COMPILER_FLAGS -mfpu=vfpv3-d16) + ENDIF() + LIST(APPEND ANDROID_LINKER_FLAGS -Wl,--fix-cortex-a8) + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a) + ENDIF() + + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + IF(ANDROID_ARM_MODE) + LIST(APPEND ANDROID_COMPILER_FLAGS -marm) + ELSE() + LIST(APPEND ANDROID_COMPILER_FLAGS -mthumb) + ENDIF() + IF(ANDROID_TOOLCHAIN STREQUAL clang) + # Disable integrated-as for better compatibility. + LIST(APPEND ANDROID_COMPILER_FLAGS -fno-integrated-as) + ENDIF() + ENDIF() + + IF(ANDROID_TOOLCHAIN STREQUAL clang) + # CMake automatically forwards all compiler flags to the linker, + # and clang doesn't like having -Wa flags being used for linking. + # To prevent CMake from doing this would require meddling with + # the CMAKE__COMPILE_OBJECT rules, which would get quite messy. + LIST(APPEND ANDROID_LINKER_FLAGS -Qunused-arguments) + ENDIF() + + STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") + STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}") + + SET(CMAKE_C_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" + CACHE STRING "C flags") + SET(CMAKE_CXX_FLAGS "${ANDROID_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}" + CACHE STRING "CXX flags") + SET(CMAKE_SHARED_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}" + CACHE STRING "shared linker flags") + + SET(CMAKE_POSITION_INDEPENDENT_CODE TRUE) + SET(CMAKE_EXE_LINKER_FLAGS "-pie -fPIE ${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" + CACHE STRING "executable linker flags") + + MESSAGE(STATUS "Android: Targeting API '${CMAKE_SYSTEM_VERSION}' " + "with architecture '${ANDROID_ARM_MODE_NAME}', " + "ABI '${ANDROID_ABI}', and processor '${CMAKE_SYSTEM_PROCESSOR}'") + MESSAGE(STATUS "System CMAKE_C_FLAGS: " ${CMAKE_C_FLAGS}) + MESSAGE(STATUS "System CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) +ELSE() + IF(ANDROID_STANDALONE_TOOLCHAIN) + SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) + ENDIF() + SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) + IF(ANDROID_ABI STREQUAL "armeabi-v7a") + SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) + ENDIF() + ENDIF() +ENDIF() diff --git a/cmake/cross_compiling/host.cmake b/cmake/cross_compiling/host.cmake new file mode 100644 index 0000000000000000000000000000000000000000..14c35266ec60b439aaef30e5e4e0540c534160ae --- /dev/null +++ b/cmake/cross_compiling/host.cmake @@ -0,0 +1,49 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# find host C compiler +IF(HOST_C_COMPILER) + SET(HOST_C_COMPILER_NAME ${HOST_C_COMPILER}) +ELSEIF(NOT $ENV{CC} STREQUAL "") + SET(HOST_C_COMPILER_NAME $ENV{CC}) +ELSE() + SET(HOST_C_COMPILER_NAME cc) +ENDIF() + +GET_FILENAME_COMPONENT(HOST_C_COMPILER_PATH ${HOST_C_COMPILER_NAME} PROGRAM) +IF(NOT HOST_C_COMPILER_PATH OR NOT EXISTS ${HOST_C_COMPILER_PATH}) + MESSAGE(FATAL_ERROR "Cannot find host C compiler, set host C compiler:\n" + "\tcmake .. -DHOST_C_COMPILER=...") +ENDIF() + +# find host CXX compiler +IF(HOST_CXX_COMPILER) + SET(HOST_CXX_COMPILER_NAME ${HOST_CXX_COMPILER}) +ELSEIF(NOT $ENV{CXX} STREQUAL "") + SET(HOST_CXX_COMPILER_NAME $ENV{CXX}) +ELSE() + SET(HOST_CXX_COMPILER_NAME c++) +ENDIF() + +GET_FILENAME_COMPONENT(HOST_CXX_COMPILER_PATH ${HOST_CXX_COMPILER_NAME} PROGRAM) +IF(NOT HOST_CXX_COMPILER_PATH OR NOT EXISTS ${HOST_CXX_COMPILER_PATH}) + MESSAGE(FATAL_ERROR "Cannot find host CXX compiler, set host CXX compiler:\n" + "\tcmake .. -DHOST_CXX_COMPILER=...") +ENDIF() + +SET(HOST_C_COMPILER ${HOST_C_COMPILER_PATH} CACHE PATH "Host C compiler") +SET(HOST_CXX_COMPILER ${HOST_CXX_COMPILER_PATH} CACHE PATH "Host CXX compiler") + +MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER}) +MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER}) diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake new file mode 100644 index 0000000000000000000000000000000000000000..0b38943952f7fb9052368fe95eb31dd7592d8a47 --- /dev/null +++ b/cmake/cross_compiling/ios.cmake @@ -0,0 +1,350 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for iOS, and the +# configuration largely refers to public toolchain file: +# https://raw.githubusercontent.com/leetal/ios-cmake/master/ios.toolchain.cmake +# and +# https://github.com/cristeab/ios-cmake +# +# Supports options: +# IOS_PLATFORM = OS (default) or SIMULATOR +# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders +# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. +# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. +# IOS_ARCH +# The archectures wanted to support, such "arm64", "armv7;arm64" +# IOS_DEPLOYMENT_TARGET +# The minimum iOS deployment version, such as "7.0" +# IOS_ENABLE_BITCODE = ON (default) or OFF +# IOS_USE_VECLIB_FOR_BLAS = OFF (default) or ON +# IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# By default this location is automatcially chosen based on the IOS_PLATFORM value above. +# If set manually, it will override the default location and force the user of a particular Developer Platform +# IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatcially chosen based on the IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the IOS_DEVELOPER_ROOT path. +# If set manually, this will force the use of a specific SDK version + +# Macros: +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) +# A convenience macro for setting xcode specific properties on targets +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the iOS environment. +# Thanks to the android-cmake project for providing the command + +if(NOT IOS) + return() +endif() + +set(CMAKE_SYSTEM_NAME Darwin) + +# Get the Xcode version being used. +execute_process(COMMAND xcodebuild -version + OUTPUT_VARIABLE XCODE_VERSION + RESULT_VARIABLE XCODE_VERSION_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT ${XCODE_VERSION_RESULT}) + string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") + string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") + message(STATUS "Building with Xcode version: ${XCODE_VERSION}") +else() + message(FATAL_ERROR "Cannot execute xcodebuild, please check whether xcode is installed.") +endif() + +# Required as of cmake 2.8.10 +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + +# Setup iOS platform unless specified manually with IOS_PLATFORM +if(NOT DEFINED IOS_PLATFORM) + set(IOS_PLATFORM "OS") +endif() +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Set the architecture for iOS +if(NOT DEFINED IOS_ARCH) + if(IOS_PLATFORM STREQUAL "OS") + # FIXME(liuyiqun): support "armv7;armv7s;arm64" future + set(IOS_ARCH "arm64") + elseif(IOS_PLATFORM STREQUAL "SIMULATOR") + set(IOS_ARCH "i386;x86_64") + elseif(IOS_PLATFORM STREQUAL "WATCHOS") + set(IOS_ARCH armv7k) + endif() +endif() +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") + +# Specify minimum iOS deployment version +if(NOT DEFINED IOS_DEPLOYMENT_TARGET) + set(IOS_DEPLOYMENT_TARGET "7.0") +endif() +set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") + +# Whether to enable bitcode +if(NOT DEFINED IOS_ENABLE_BITCODE) + set(IOS_ENABLE_BITCODE ON) +endif() +set(IOS_ENABLE_BITCODE ${IOS_ENABLE_BITCODE} CACHE BOOL "Whether to enable bitcode") + +if(NOT DEFINED IOS_USE_VECLIB_FOR_BLAS) + set(IOS_USE_VECLIB_FOR_BLAS OFF) +endif() +set(IOS_USE_VECLIB_FOR_BLAS ${IOS_UES_VECLIB_FOR_BLAS} CACHE BOOL "Whether to use veclib") + +# Check the platform selection and setup for developer root +if(${IOS_PLATFORM} STREQUAL "OS") + set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") + set(XCODE_IOS_PLATFORM iphoneos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR") + set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + set(XCODE_IOS_PLATFORM iphonesimulator) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif(${IOS_PLATFORM} STREQUAL "WATCHOS") + set(IOS_PLATFORM_LOCATION "WatchOS.platform") + set(XCODE_IOS_PLATFORM watchos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") +else(${IOS_PLATFORM} STREQUAL "OS") + message(FATAL_ERROR "Unsupported IOS_PLATFORM value selected. Please set to\n" + "\t OS, SIMULATOR, or WATCHOS.") +endif() + +# Check iOS developer toolchain +if(NOT DEFINED IOS_DEVELOPER_ROOT) + # Setup iOS developer location + execute_process(COMMAND xcode-select -print-path + OUTPUT_VARIABLE XCODE_DEVELOPER_DIR + RESULT_VARIABLE XCODE_DEVELOPER_DIR_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Xcode 4.3 changed the installation location, choose the most recent one available + if(${XCODE_VERSION} VERSION_LESS "4.3.0") + set(IOS_DEVELOPER_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") + else() + set(IOS_DEVELOPER_ROOT "${XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") + endif() +endif() +if(EXISTS ${IOS_DEVELOPER_ROOT}) + set(IOS_DEVELOPER_ROOT ${IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") +else() + message(FATAL_ERROR "Invalid IOS_DEVELOPER_ROOT: ${IOS_DEVELOPER_ROOT} does not exist.") +endif() + +# Check iOS SDK +if(NOT DEFINED IOS_SDK_ROOT) + # Find and use the most recent iOS sdk + file(GLOB IOS_SDK_LISTS "${IOS_DEVELOPER_ROOT}/SDKs/*") + if(IOS_SDK_LISTS) + list(SORT IOS_SDK_LISTS) + list(REVERSE IOS_SDK_LISTS) + list(GET IOS_SDK_LISTS 0 IOS_SDK_ROOT) + else(IOS_SDK_LISTS) + message(FATAL_ERROR "No iOS SDK's found in default search path ${IOS_DEVELOPER_ROOT}." + " Please manually set IOS_SDK_ROOT or install the iOS SDK.") + endif(IOS_SDK_LISTS) +endif() +if(EXISTS ${IOS_SDK_ROOT}) + set(IOS_SDK_ROOT ${IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + message(STATUS "iOS toolchain: ${IOS_SDK_ROOT}") +else() + message(FATAL_ERROR "Invalid IOS_SDK_ROOT: ${IOS_SDK_ROOT} does not exist.") +endif() + +# Set the sysroot default to the most recent SDK +set(CMAKE_OSX_SYSROOT ${IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# Get version of iOS SDK +execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion + OUTPUT_VARIABLE IOS_SDK_VERSION + RESULT_VARIABLE IOS_SDK_VERSION_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +if(${IOS_SDK_VERSION_RESULT}) + string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" IOS_SDK_VERSION "${IOS_SDK_ROOT}") +endif() +if(NOT IOS_SDK_VERSION) + message(WARNING "Cannot get SDK's version.") + set(IOS_SDK_VERSION 1) +endif() +set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION}) + +# Find the C & C++ compilers for the specified SDK. +if(NOT CMAKE_C_COMPILER) + # Default to use clang + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang + OUTPUT_VARIABLE IOS_C_COMPILER + RESULT_VARIABLE IOS_C_COMPILER_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(${IOS_C_COMPILER_RESULT}) + get_filename_component(IOS_C_COMPILER clang PROGRAM) + endif() +else(NOT CMAKE_C_COMPILER) + # User can set it in cmake command + get_filename_component(IOS_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) +endif(NOT CMAKE_C_COMPILER) +if(NOT EXISTS ${IOS_C_COMPILER}) + message(FATAL_ERROR "Cannot find C compiler: ${IOS_C_COMPILER}") +endif() + +if(NOT CMAKE_CXX_COMPILER) + # Default to use clang++ + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ + OUTPUT_VARIABLE IOS_CXX_COMPILER + RESULT_VARIABLE IOS_CXX_COMPILER_RESULT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + if(${IOS_CXX_COMPILER_RESULT}) + get_filename_component(IOS_CXX_COMPILER clang++ PROGRAM) + endif() +else(NOT CMAKE_CXX_COMPILER) + # User can set it in cmake command + get_filename_component(IOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) +endif(NOT CMAKE_CXX_COMPILER) +if(NOT EXISTS ${IOS_CXX_COMPILER}) + message(FATAL_ERROR "Cannot find CXX compiler: ${IOS_CXX_COMPILER}") +endif() + +set(CMAKE_C_COMPILER ${IOS_C_COMPILER} CACHE PATH "C compiler" FORCE) +set(CMAKE_CXX_COMPILER ${IOS_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +# Set iOS specific C/C++ flags +if(IOS_PLATFORM STREQUAL "OS") + if(XCODE_VERSION VERSION_LESS "7.0") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") + endif() +else() + set(XCODE_IOS_FLATFORM_VERSION_FLAGS "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() + +if(IOS_ENABLE_BITCODE) + set(XCODE_IOS_BITCODE_FLAGS "${IOS_COMPILER_FLAGS} -fembed-bitcode") +else() + set(XCODE_IOS_BITCODE_FLAGS "") +endif() + +set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_FLAGS}") + +# Hidden visibilty is required for cxx on iOS +set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") +set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") + +set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") + +if(IOS_USE_VECLIB_FOR_BLAS) + # Find vecLib for iOS + set(VECLIB_SEARCH_DIRS + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks + ${IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework/Frameworks + ) + find_path(VECLIB_INC_DIR vecLib.h PATHS ${VECLIB_SEARCH_DIRS}/vecLib.framework/Headers) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(vecLib DEFAULT_MSG VECLIB_INC_DIR) + + if(VECLIB_FOUND) + if(VECLIB_INC_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*") + set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework vecLib") + message(STATUS "Found standalone vecLib.framework") + else() + set(IOS_LINK_FLAGS ${IOS_LINK_FLAGS} -lcblas "-framework Accelerate") + message(STATUS "Found vecLib as part of Accelerate.framework") + endif() + + endif() +endif() + +set(CMAKE_C_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${IOS_LINK_FLAGS} ${CMAKE_CXX_LINK_FLAGS}") + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +if(NOT IOS_ENABLE_BITCODE) + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") + set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +else() + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib") + set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle") +endif() +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree +# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache +# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) +# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex +if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif() + +# Set the find root to the iOS developer roots and to user defined paths +set(CMAKE_FIND_ROOT_PATH ${IOS_DEVELOPER_ROOT} ${IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} + CACHE string "iOS find search path root") + +# default to searching for frameworks first +set(CMAKE_FIND_FRAMEWORK FIRST) + +# set up the default search directories for frameworks +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${IOS_SDK_ROOT}/System/Library/Frameworks + ${IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${IOS_SDK_ROOT}/Developer/Library/Frameworks + ) + +# only search the iOS sdks, not the remainder of the host filesystem +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +message(STATUS "iOS: Targeting iOS '${CMAKE_SYSTEM_VERSION}', " + "building for '${IOS_PLATFORM}' platform, with architecture '${CMAKE_OSX_ARCHITECTURES}'") +message(STATUS "System CMAKE_C_FLAGS: ${CMAKE_C_FLAGS}") +message(STATUS "System CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") + +# Used in ExternalProject command +string(REPLACE ";" "\\$" EXTERNAL_IOS_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") +set(EXTERNAL_OPTIONAL_ARGS + -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} + -DCMAKE_OSX_ARCHITECTURES=${EXTERNAL_IOS_ARCHITECTURES}) + +# This little macro lets you set any XCode specific property +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property (TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + + find_package(${ARGN}) + + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/cmake/cross_compiling/raspberry_pi.cmake b/cmake/cross_compiling/raspberry_pi.cmake new file mode 100644 index 0000000000000000000000000000000000000000..817b39f6833e37c340d4ee465048480cfc3db151 --- /dev/null +++ b/cmake/cross_compiling/raspberry_pi.cmake @@ -0,0 +1,84 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This is a toolchain file for cross-compiling for Raspberry Pi. +# The supported variables are listed belows: +# +# RPI_TOOLCHAIN +# RPI_ARM_NEON +# +# Also you can set CMAKE_C/CXX_COMPILER yourself, through cmake arguments. + +IF(NOT RPI) + return() +ENDIF() + +SET(CMAKE_SYSTEM_NAME Linux) +SET(CMAKE_SYSTEM_VERSION 1) +SET(CMAKE_SYSTEM_PROCESSOR arm) + +# check the exist of raspberry pi toolchain +IF(NOT DEFINED RPI_TOOLCHAIN) + SET(RPI_TOOLCHAIN $ENV{RPI_TOOLCHAIN} + CACHE PATH "Folder holds the toolchain of Raspberr Pi") +ENDIF() +IF(NOT RPI_TOOLCHAIN) + MESSAGE(WARNING "It is recommended to set RPI_TOOLCHAIN to use toolchain.\n" + "To cross-compile for Raspberry Pi, you need to download the tools using:\n" + " git clone https://github.com/raspberrypi/tools\n") +ENDIF() + +IF(NOT DEFINED RPI_ARM_NEON) + SET(RPI_ARM_NEON ON) +ENDIF() + +IF(RPI_TOOLCHAIN) + SET(RPI_TOOLCHAIN_ROOT ${RPI_TOOLCHAIN}) + IF(RPI_TOOLCHAIN_ROOT MATCHES "gcc-linaro-arm-linux-gnueabihf-raspbian(-x64)?$") + # gcc-linaro-arm-linux-gnueabihf-raspbian + # gcc-linaro-arm-linux-gnueabihf-raspbian-x64 + SET(RPI_TOOLCHAIN_NAME arm-linux-gnueabihf) + ENDIF() + SET(RPI_TOOLCHAIN_PREFIX "${RPI_TOOLCHAIN_ROOT}/bin/${RPI_TOOLCHAIN_NAME}-") +ENDIF() + +# C compiler +IF(NOT CMAKE_C_COMPILER) + SET(RPI_C_COMPILER "${RPI_TOOLCHAIN_PREFIX}gcc") +ELSE() + GET_FILENAME_COMPONENT(RPI_C_COMPILER ${CMAKE_C_COMPILER} PROGRAM) +ENDIF() +IF(NOT EXISTS ${RPI_C_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find C compiler: ${RPI_C_COMPILER}") +ENDIF() + +# CXX compiler +IF(NOT CMAKE_CXX_COMPILER) + SET(RPI_CXX_COMPILER "${RPI_TOOLCHAIN_PREFIX}g++") +ELSE() + GET_FILENAME_COMPONENT(RPI_CXX_COMPILER ${CMAKE_CXX_COMPILER} PROGRAM) +ENDIF() +IF(NOT EXISTS ${RPI_CXX_COMPILER}) + MESSAGE(FATAL_ERROR "Cannot find CXX compiler: ${RPI_CXX_COMPILER}") +ENDIF() + +SET(CMAKE_C_COMPILER ${RPI_C_COMPILER} CACHE PATH "C compiler" FORCE) +SET(CMAKE_CXX_COMPILER ${RPI_CXX_COMPILER} CACHE PATH "CXX compiler" FORCE) + +IF(RPI_ARM_NEON) + SET(RPI_C_FLAGS "${RPI_C_FLAGS} -mfpu=neon") +ENDIF() + +SET(CMAKE_C_FLAGS "${RPI_C_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") +SET(CMAKE_CXX_FLAGS "${RPI_C_FLAGS} ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index af9be86961833dcd62371227165d411a3b61d79e..2c84061ff572de4687b4d496f8ded6deee8d1011 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -2,7 +2,7 @@ if(NOT WITH_GPU) return() endif() -set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT") +set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT") find_path(CUDNN_INCLUDE_DIR cudnn.h PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE} @@ -11,11 +11,16 @@ find_path(CUDNN_INCLUDE_DIR cudnn.h get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) +set(TARGET_ARCH "x86_64") +if(NOT ${CMAKE_SYSTEM_PROCESSOR}) + set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() + list(APPEND CUDNN_CHECK_LIBRARY_DIRS ${CUDNN_ROOT} ${CUDNN_ROOT}/lib64 ${CUDNN_ROOT}/lib - ${CUDNN_ROOT}/lib/x86_64-linux-gnu + ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index 8116f235d535917c03deb646ff4ec083a0cdadc7..85cce80b70a1fcf57015ac7a264e4950616b2717 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -2,13 +2,13 @@ INCLUDE(ExternalProject) SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) -INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any) +INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any) ExternalProject_Add( - linb_any + extern_lib_any ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/thelink2012/any.git" - GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" + GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" + GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d" PREFIX ${ANY_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" @@ -17,4 +17,15 @@ ExternalProject_Add( TEST_COMMAND "" ) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(lib_any STATIC ${dummyfile}) +else() + add_library(lib_any INTERFACE) +endif() + +add_dependencies(lib_any extern_lib_any) + add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) +LIST(APPEND external_project_dependencies lib_any) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f7483f6be9169eb58f0148cd3a956a8c881e1fe3 --- /dev/null +++ b/cmake/external/eigen.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) + +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3) + +ExternalProject_Add( + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG "master" + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + add_library(eigen3 STATIC ${dummyfile}) +else() + add_library(eigen3 INTERFACE) +endif() + +add_dependencies(eigen3 extern_eigen3) + +LIST(APPEND external_project_dependencies eigen3) diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 0afb3ab9af48046af01f03838eefa0bd2fcb2821..957f8271e4841836956b0c3f2cf3d8c88a31192a 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,30 +18,51 @@ SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags) SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) IF(WIN32) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) ELSE(WIN32) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) + set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) ENDIF(WIN32) INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) ExternalProject_Add( - gflags + extern_gflags ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/gflags/gflags.git" + # TODO(yiwang): The annoying warnings mentioned in + # https://github.com/PaddlePaddle/Paddle/issues/3277 are caused by + # gflags. I fired a PR https://github.com/gflags/gflags/pull/230 + # to fix it. Before it gets accepted by the gflags team, we use + # my personal fork, which contains above fix, temporarily. Let's + # change this back to the official Github repo once my PR is + # merged. + GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" PREFIX ${GFLAGS_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DBUILD_TESTING=OFF - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release ) +ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) +ADD_DEPENDENCIES(gflags extern_gflags) + LIST(APPEND external_project_dependencies gflags) + +IF(WITH_C_API) + INSTALL(DIRECTORY ${GFLAGS_INCLUDE_DIR} DESTINATION third_party/gflags) + IF(ANDROID) + INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib/${ANDROID_ABI}) + ELSE() + INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib) + ENDIF() +ENDIF() diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 4a9e2ecc6bbe74c5856a55fb0c982777d7ac25b7..b3fef738ccc0b5886bb0a32501bb7b7adade0ff1 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -19,33 +19,50 @@ SET(GLOG_INSTALL_DIR ${THIRD_PARTY_PATH}/install/glog) SET(GLOG_INCLUDE_DIR "${GLOG_INSTALL_DIR}/include" CACHE PATH "glog include directory." FORCE) IF(WIN32) - SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.lib" CACHE FILEPATH "glog library." FORCE) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.lib" CACHE FILEPATH "glog library." FORCE) ELSE(WIN32) - SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE) + SET(GLOG_LIBRARIES "${GLOG_INSTALL_DIR}/lib/libglog.a" CACHE FILEPATH "glog library." FORCE) ENDIF(WIN32) INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) ExternalProject_Add( - glog + extern_glog ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog.git" PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DWITH_GFLAGS=ON - CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags - CMAKE_ARGS -DBUILD_TESTING=OFF - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=ON + -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release ) +ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) +ADD_DEPENDENCIES(glog extern_glog gflags) +LINK_LIBRARIES(glog gflags) + LIST(APPEND external_project_dependencies glog) + +IF(WITH_C_API) + INSTALL(DIRECTORY ${GLOG_INCLUDE_DIR} DESTINATION third_party/glog) + IF(ANDROID) + INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib/${ANDROID_ABI}) + ELSE() + INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib) + ENDIF() +ENDIF() diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 49c7d71443cda700a14af6be65ff6658eec7229f..6a2a79b7631b32e8a099797de509af64533bbb95 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -34,26 +34,42 @@ IF(WITH_TESTING) "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) ENDIF(WIN32) + IF(WITH_MKLML) + # wait for mklml downloading completed + SET(GTEST_DEPENDS ${MKLML_PROJECT}) + ENDIF() + ExternalProject_Add( - gtest + extern_gtest ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${GTEST_DEPENDS} GIT_REPOSITORY "https://github.com/google/googletest.git" GIT_TAG "release-1.8.0" PREFIX ${GTEST_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DBUILD_GMOCK=ON - CMAKE_ARGS -Dgtest_disable_pthreads=ON - CMAKE_ARGS -Dgtest_force_shared_crt=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DBUILD_GMOCK=ON + -Dgtest_disable_pthreads=ON + -Dgtest_force_shared_crt=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release ) - LIST(APPEND external_project_dependencies gtest) + + ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) + ADD_DEPENDENCIES(gtest extern_gtest) + + ADD_LIBRARY(gtest_main STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) + ADD_DEPENDENCIES(gtest_main extern_gtest) + + LIST(APPEND external_project_dependencies gtest gtest_main) ENDIF(WITH_TESTING) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake new file mode 100644 index 0000000000000000000000000000000000000000..9686df00219001769d074ee815d9cc8db0258496 --- /dev/null +++ b/cmake/external/mkldnn.cmake @@ -0,0 +1,67 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLDNN}) + return() +ENDIF(NOT ${WITH_MKLDNN}) + +INCLUDE(ExternalProject) + +SET(MKLDNN_PROJECT "extern_mkldnn") +SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) +SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLDNN in Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE) + return() +ENDIF() + +SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) +MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") + +INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) + +IF(${CBLAS_PROVIDER} STREQUAL "MKLML") + SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) + SET(MKLDNN_MKLROOT ${MKLML_ROOT}) + SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) + SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) + MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}") +ENDIF() + +ExternalProject_Add( + ${MKLDNN_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS ${MKLDNN_DEPENDS} + GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" + GIT_TAG "v0.10" + PREFIX ${MKLDNN_SOURCES_DIR} + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} + CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} + -DMKLROOT:PATH=${MKLDNN_MKLROOT} +) + +ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) +ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) +MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}") +LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake new file mode 100644 index 0000000000000000000000000000000000000000..74f3279831357c21038df133df0f5a432a6dfd20 --- /dev/null +++ b/cmake/external/mklml.cmake @@ -0,0 +1,68 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IF(NOT ${WITH_MKLML}) + return() +ENDIF(NOT ${WITH_MKLML}) + +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLML in Paddle yet." + "Force WITH_MKLML=OFF") + SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) + return() +ENDIF() + +INCLUDE(ExternalProject) + +SET(MKLML_PROJECT "extern_mklml") +SET(MKLML_VER "mklml_lnx_2018.0.20170720") +SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.10/${MKLML_VER}.tgz") +SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") +SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") +SET(MKLML_DST_DIR "mklml") +SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") +SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) +SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) +SET(MKLML_INC_DIR ${MKLML_ROOT}/include) +SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib) +SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) +SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") + +INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) + +FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}\n" + " DESTINATION ${MKLML_DST_DIR})\n") + +ExternalProject_Add( + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${MKLML_SOURCE_DIR} + DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz + && tar zxf ${MKLML_VER}.tgz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} +) + +ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) +ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +LIST(APPEND external_project_dependencies mklml) diff --git a/cmake/external/nnpack.cmake b/cmake/external/nnpack.cmake new file mode 100644 index 0000000000000000000000000000000000000000..d42bcb0f329041462bd8b568052fbb8226d18e4e --- /dev/null +++ b/cmake/external/nnpack.cmake @@ -0,0 +1,30 @@ +# Find the NNPACK library +# NNPACK_ROOT - where to find NNPACK include and library. +# + +set(NNPACK_FOUND OFF) +set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") +find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) +find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) +find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib) +find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib) + +if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) + set(NNPACK_FOUND ON) + INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) + + set(NNPACK_LIBS) + list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB}) + if (NNPACK_UKERNELS_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB}) + endif() + if (NNPACK_CPUFEATURES_LIB) + list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB}) + endif() + if(NOT ANDROID) + list(APPEND NNPACK_LIBS "rt") + endif() +else() + message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") +endif() diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 92ea23c7633e974fd09251f967965364b1928307..143b57a954e4e6b2bf273535ebdf0fa8e3dab768 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(USE_EIGEN_FOR_BLAS) + return() +ENDIF(USE_EIGEN_FOR_BLAS) + INCLUDE(cblas) IF(NOT ${CBLAS_FOUND}) @@ -21,65 +25,108 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) - IF(WIN32) - SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/openblas.lib" CACHE FILEPATH "openblas library." FORCE) - ELSE(WIN32) - SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) - ENDIF(WIN32) + SET(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" + CACHE FILEPATH "openblas library." FORCE) - IF(CMAKE_COMPILER_IS_GNUCC) - ENABLE_LANGUAGE(Fortran) - if (NOT CMAKE_Fortran_COMPILER_VERSION) - # cmake < 3.4 cannot get CMAKE_Fortran_COMPILER_VERSION directly. - execute_process(COMMAND ${CMAKE_Fortran_COMPILER} -dumpversion - OUTPUT_VARIABLE CMAKE_Fortran_COMPILER_VERSION) - endif() - string(REGEX MATCHALL "[0-9]+" Fortran_VERSION ${CMAKE_Fortran_COMPILER_VERSION}) - list(GET Fortran_VERSION 0 Fortran_MAJOR) - list(GET Fortran_VERSION 1 Fortran_MINOR) - find_library(GFORTRAN_LIBRARY NAMES gfortran PATHS - /lib - /usr/lib - /usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}.${Fortran_MINOR}/ - /usr/lib/gcc/x86_64-linux-gnu/${Fortran_MAJOR}/) - if (NOT GFORTRAN_LIBRARY) - message(FATAL_ERROR "Cannot found gfortran library which it is used by openblas") - endif() - find_package(Threads REQUIRED) - LIST(APPEND CBLAS_LIBRARIES ${GFORTRAN_LIBRARY} ${CMAKE_THREAD_LIBS_INIT}) - ENDIF(CMAKE_COMPILER_IS_GNUCC) + SET(OPENBLAS_CC "${CMAKE_C_COMPILER}") - IF(NOT CMAKE_Fortran_COMPILER) - MESSAGE(FATAL_ERROR "To build lapack in libopenblas, " - "you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...") - ENDIF(NOT CMAKE_Fortran_COMPILER) + IF(CMAKE_CROSSCOMPILING) + SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) + GET_FILENAME_COMPONENT(CROSS_SUFFIX ${CMAKE_C_COMPILER} DIRECTORY) + SET(CROSS_SUFFIX ${CROSS_SUFFIX}/) + IF(ANDROID) + # arm_soft_fp_abi branch of OpenBLAS to support softfp + # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) + ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) + ENDIF() + ELSEIF(IOS) + # FIXME(liuyiqun): support multiple architectures + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") + IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") + SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) + ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) + ENDIF() + ELSEIF(RPI) + # use hardfp + SET(OPENBLAS_COMMIT "v0.2.20") + SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 USE_THREAD=0) + ENDIF() + ELSE() + IF(APPLE) + SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}") + ENDIF() + SET(OPENBLAS_COMMIT "v0.2.20") + SET(OPTIONAL_ARGS "") + IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$") + SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64) + ENDIF() + ENDIF() - ADD_DEFINITIONS(-DPADDLE_USE_LAPACK) + SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) ExternalProject_Add( - openblas + extern_openblas ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git - GIT_TAG v0.2.19 + GIT_TAG ${OPENBLAS_COMMIT} PREFIX ${CBLAS_SOURCES_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR} BUILD_IN_SOURCE 1 - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} DYNAMIC_ARCH=1 NO_SHARED=1 libs netlib - INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX= + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${COMMON_ARGS} ${OPTIONAL_ARGS} + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 NO_LAPACK=1 PREFIX= UPDATE_COMMAND "" CONFIGURE_COMMAND "" ) - ExternalProject_Add_Step( - openblas lapacke_install - COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h" - COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h" - COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h" - COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h" - DEPENDEES install - ) - - LIST(APPEND external_project_dependencies openblas) + IF(WITH_C_API) + INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas) + # Because libopenblas.a is a symbolic link of another library, thus need to + # install the whole directory. + IF(ANDROID) + SET(TMP_INSTALL_DIR third_party/openblas/lib/${ANDROID_ABI}) + ELSE() + SET(TMP_INSTALL_DIR third_party/openblas/lib) + ENDIF() + INSTALL(CODE "execute_process( + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CBLAS_INSTALL_DIR}/lib + destination ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR} + )" + ) + INSTALL(CODE "MESSAGE(STATUS \"Installing: \" + \"${CBLAS_INSTALL_DIR}/lib -> ${CMAKE_INSTALL_PREFIX}/${TMP_INSTALL_DIR}\" + )" + ) + ENDIF() ENDIF(NOT ${CBLAS_FOUND}) +MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) + +# FIXME(gangliao): generate cblas target to track all high performance +# linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) +SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c) +FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") +IF(${CBLAS_PROVIDER} MATCHES MKL) + ADD_LIBRARY(cblas SHARED ${dummyfile}) +ELSE() + ADD_LIBRARY(cblas STATIC ${dummyfile}) +ENDIF() +TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) + +IF(NOT ${CBLAS_FOUND}) + ADD_DEPENDENCIES(cblas extern_openblas) + LIST(APPEND external_project_dependencies cblas) +ELSE() + IF("${CBLAS_PROVIDER}" STREQUAL "MKLML") + ADD_DEPENDENCIES(cblas mklml) + ENDIF() +ENDIF(NOT ${CBLAS_FOUND}) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 2df042d226af8308d00f7870e7d2de0eacfdf07e..7cf7ba85cca4c248dcc74e078124c0b3815ee380 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -13,43 +13,173 @@ # limitations under the License. INCLUDE(ExternalProject) +# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp +FIND_PACKAGE(Protobuf QUIET) +SET(PROTOBUF_FOUND "OFF") -set(PROTOBUF_VERSION 3.1) -FIND_PACKAGE(Protobuf ${PROTOBUF_VERSION}) +if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. + function(protobuf_generate_python SRCS) + # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files") + return() + endif() -IF(PROTOBUF_FOUND) + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + + set(${SRCS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH) + get_filename_component(FIL_DIR ${FIL} DIRECTORY) + if(FIL_DIR) + set(FIL_WE "${FIL_DIR}/${FIL_WE}") + endif() + endif() + + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py" + COMMAND ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + COMMENT "Running Python protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + endfunction() +endif() + +# Print and set the protobuf library information, +# finish this cmake process and exit from this file. +macro(PROMPT_PROTOBUF_LIB) + SET(protobuf_DEPS ${ARGN}) + + MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") + MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") + MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") + INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + + # Assuming that all the protobuf libraries are of the same type. + IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE STATIC) + ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE SHARED) + ELSE() + MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") + ENDIF() + + ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) + + ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) + + ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + + ADD_EXECUTABLE(protoc IMPORTED GLOBAL) + SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) + # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. + # make `protobuf_generate_cpp` happy. + SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + + FOREACH(dep ${protobuf_DEPS}) + ADD_DEPENDENCIES(protobuf ${dep}) + ADD_DEPENDENCIES(protobuf_lite ${dep}) + ADD_DEPENDENCIES(libprotoc ${dep}) + ADD_DEPENDENCIES(protoc ${dep}) + ENDFOREACH() + + LIST(APPEND external_project_dependencies protobuf) + RETURN() +endmacro() +macro(SET_PROTOBUF_VERSION) EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}") - IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0") - SET(PROTOBUF_FOUND OFF) - ENDIF() -ENDIF(PROTOBUF_FOUND) +endmacro() -IF(NOT PROTOBUF_FOUND) - SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) - SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) - SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) - - IF(WIN32) - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) - ELSE(WIN32) - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) - ENDIF(WIN32) +set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") +if (NOT "${PROTOBUF_ROOT}" STREQUAL "") + find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include) + find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib) + find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib) + find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib) + find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin) + if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) + message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") + SET_PROTOBUF_VERSION() + PROMPT_PROTOBUF_LIB() + else() + message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}.") + endif() +endif() + +FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) + STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) + + SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) + SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) + SET(${TARGET_NAME}_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" + PARENT_SCOPE) + SET(${TARGET_NAME}_PROTOC_EXECUTABLE + "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" + PARENT_SCOPE) + + SET(OPTIONAL_CACHE_ARGS "") + SET(OPTIONAL_ARGS "") + IF(BUILD_FOR_HOST) + SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF") + ELSE() + SET(OPTIONAL_ARGS + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" + "-Dprotobuf_WITH_ZLIB=ON" + "-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}" + ${EXTERNAL_OPTIONAL_ARGS}) + SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}") + ENDIF() ExternalProject_Add( - protobuf + ${TARGET_NAME} ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${PROTOBUF_SOURCES_DIR} UPDATE_COMMAND "" @@ -57,11 +187,9 @@ IF(NOT PROTOBUF_FOUND) GIT_REPOSITORY "https://github.com/google/protobuf.git" GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" CONFIGURE_COMMAND - ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake + ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/${TARGET_NAME}/cmake + ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF - -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} @@ -71,10 +199,45 @@ IF(NOT PROTOBUF_FOUND) -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DZLIB_ROOT:STRING=${ZLIB_ROOT} + ${OPTIONAL_CACHE_ARGS} ) +ENDFUNCTION() - LIST(APPEND external_project_dependencies protobuf) -ENDIF(NOT PROTOBUF_FOUND) +SET(PROTOBUF_VERSION 3.1) +IF(CMAKE_CROSSCOMPILING) + build_protobuf(protobuf_host TRUE) + LIST(APPEND external_project_dependencies protobuf_host) -INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) +ENDIF() + +IF(NOT PROTOBUF_FOUND) + build_protobuf(extern_protobuf FALSE) + + SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} + CACHE PATH "protobuf include directory." FORCE) + SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY} + CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY} + CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} + CACHE FILEPATH "protoc library." FORCE) + + IF(WITH_C_API) + INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf) + IF(ANDROID) + INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI}) + ELSE() + INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib) + ENDIF() + ENDIF() + + IF(CMAKE_CROSSCOMPILING) + PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf) + ELSE() + SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} + CACHE FILEPATH "protobuf executable." FORCE) + PROMPT_PROTOBUF_LIB(extern_protobuf) + ENDIF() +ENDIF(NOT PROTOBUF_FOUND) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake new file mode 100644 index 0000000000000000000000000000000000000000..9391c285c7544669a5b1a078b7473d7a656c1bb4 --- /dev/null +++ b/cmake/external/pybind11.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind) + +INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include) + +ExternalProject_Add( + extern_pybind + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/pybind/pybind11.git" + GIT_TAG "v2.1.1" + PREFIX ${PYBIND_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(pybind STATIC ${dummyfile}) +else() + add_library(pybind INTERFACE) +endif() + +add_dependencies(pybind extern_pybind) + +LIST(APPEND external_project_dependencies pybind) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 9fd3afd0998b38c18b4490e6fb1c6fe0222ed142..46c68cce324f565ec9985ef1a280d6d933f88f1f 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -1,26 +1,31 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -INCLUDE(ExternalProject) +IF(NOT WITH_PYTHON) + return() +ENDIF() + INCLUDE(python_module) FIND_PACKAGE(PythonInterp 2.7) FIND_PACKAGE(PythonLibs 2.7) +# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE. +ADD_LIBRARY(python SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES}) SET(py_env "") - -IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) +IF(PYTHONINTERP_FOUND) find_python_module(pip REQUIRED) find_python_module(numpy REQUIRED) find_python_module(wheel REQUIRED) @@ -30,198 +35,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " "please use pip to upgrade protobuf. pip install -U protobuf") ENDIF() -ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) - MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") - ##################################### PYTHON ######################################## - SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) - SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) - SET(_python_DIR ${PYTHON_INSTALL_DIR}) - - IF(UNIX) - SET(PYTHON_FOUND ON) - SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include/python2.7" CACHE PATH "Python include dir" FORCE) - SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/lib/libpython2.7.a" CACHE FILEPATH "Python library" FORCE) - SET(PYTHON_EXECUTABLE ${PYTHON_INSTALL_DIR}/bin/python CACHE FILEPATH "Python executable" FORCE) - SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/lib/python2.7/site-packages" CACHE PATH "Python site-packages path" FORCE) - ELSEIF(WIN32) - SET(PYTHON_FOUND ON) - SET(PYTHON_INCLUDE_DIR "${PYTHON_INSTALL_DIR}/include" CACHE PATH "Python include dir" FORCE) - SET(PYTHON_LIBRARIES "${PYTHON_INSTALL_DIR}/libs/python27.lib" CACHE FILEPATH "Python library" FORCE) - SET(PYTHON_EXECUTABLE "${PYTHON_INSTALL_DIR}/bin/python.exe" CACHE FILEPATH "Python executable" FORCE) - SET(PY_SITE_PACKAGES_PATH "${PYTHON_INSTALL_DIR}/Lib/site-packages" CACHE PATH "Python site-packages path" FORCE) - ELSE() - MESSAGE(FATAL_ERROR "Unknown system !") - ENDIF() - - IF(APPLE) - LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS - -DCMAKE_BUILD_WITH_INSTALL_RPATH:BOOL=ON - ) - ENDIF() - - SET(EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS) - - # Force Python build to "Release". - IF(CMAKE_CONFIGURATION_TYPES) - SET(SAVED_CMAKE_CFG_INTDIR ${CMAKE_CFG_INTDIR}) - SET(CMAKE_CFG_INTDIR "Release") - ELSE() - LIST(APPEND EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS - -DCMAKE_BUILD_TYPE:STRING=Release - ) - ENDIF() - - ExternalProject_Add(python - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/python-cmake-buildsystem/python-cmake-buildsystem.git" - PREFIX ${PYTHON_SOURCES_DIR} - UPDATE_COMMAND "" - CMAKE_ARGS -DPYTHON_VERSION=2.7.12 - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_CACHE_ARGS - -DCMAKE_INSTALL_PREFIX:PATH=${PYTHON_INSTALL_DIR} - -DBUILD_LIBPYTHON_SHARED:BOOL=OFF - -DUSE_SYSTEM_LIBRARIES:BOOL=OFF - -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} - -DZLIB_INCLUDE_DIR:PATH=${ZLIB_INCLUDE_DIR} - -DZLIB_LIBRARY:FILEPATH=${ZLIB_LIBRARIES} - -DDOWNLOAD_SOURCES:BOOL=ON - -DINSTALL_WINDOWS_TRADITIONAL:BOOL=OFF - ${EXTERNAL_PROJECT_OPTIONAL_CMAKE_CACHE_ARGS} - ${EXTERNAL_PROJECT_OPTIONAL_CMAKE_ARGS} - DEPENDS zlib - ) - - SET(py_env - PATH=${PYTHON_INSTALL_DIR}/bin - PYTHONHOME=${PYTHON_INSTALL_DIR} - PYTHONPATH=${PYTHON_INSTALL_DIR}/lib:${PYTHON_INSTALL_DIR}/lib/python2.7:${PY_SITE_PACKAGES_PATH}) - #################################################################################### +ENDIF(PYTHONINTERP_FOUND) - ##################################### SETUPTOOLS ################################### - SET(SETUPTOOLS_SOURCES_DIR ${PYTHON_SOURCES_DIR}/setuptools) - ExternalProject_Add(setuptools - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${SETUPTOOLS_SOURCES_DIR} - URL "https://pypi.python.org/packages/source/s/setuptools/setuptools-18.3.2.tar.gz" - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python zlib - ) - ##################################################################################### - - ##################################### SIX ########################################### - SET(SIX_SOURCES_DIR ${PYTHON_SOURCES_DIR}/six) - ExternalProject_Add(six - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${SIX_SOURCES_DIR} - URL https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python setuptools - ) - ##################################################################################### - - ##################################### CYTHON ######################################## - SET(CYTHON_SOURCES_DIR ${PYTHON_SOURCES_DIR}/cython) - ExternalProject_Add(cython - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${CYTHON_SOURCES_DIR} - URL https://github.com/cython/cython/archive/0.25.2.tar.gz - GIT_TAG 0.25.2 - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND "" - PATCH_COMMAND "" - UPDATE_COMMAND "" - INSTALL_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python - ) - #################################################################################### - - ##################################### NUMPY ######################################## - SET(NUMPY_SOURCES_DIR ${PYTHON_SOURCES_DIR}/numpy) - SET(NUMPY_TAG_VERSION "v1.11.3") - SET(NUMPY_VERSION "1.11.3") - - SET(EGG_NAME "") - SET(PYTHON_NUMPY_INCLUDE_DIR "") - IF(WIN32) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}.egg") - ELSE(WIN32) - IF(APPLE) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-${HOST_SYSTEM}-${MACOS_VERSION}") - ELSE(APPLE) - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux") - SET(EGG_NAME "numpy-${NUMPY_VERSION}-py2.7-linux") - ENDIF(APPLE) - - FOREACH(suffix x86_64 intel fat64 fat32 universal) - LIST(APPEND PYTHON_NUMPY_INCLUDE_DIR ${PY_SITE_PACKAGES_PATH}/${EGG_NAME}-${suffix}.egg/numpy/core/include) - ENDFOREACH() - ENDIF(WIN32) - - ExternalProject_Add(numpy - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY https://github.com/numpy/numpy.git - GIT_TAG ${NUMPY_TAG_VERSION} - CONFIGURE_COMMAND "" - UPDATE_COMMAND "" - PREFIX ${NUMPY_SOURCES_DIR} - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - BUILD_IN_SOURCE 1 - DEPENDS python setuptools cython - ) - #################################################################################### - - ##################################### WHEEL ######################################## - SET(WHEEL_SOURCES_DIR ${PYTHON_SOURCES_DIR}/wheel) - ExternalProject_Add(wheel - ${EXTERNAL_PROJECT_LOG_ARGS} - URL https://pypi.python.org/packages/source/w/wheel/wheel-0.29.0.tar.gz - PREFIX ${WHEEL_SOURCES_DIR} - CONFIGURE_COMMAND "" - UPDATE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - BUILD_IN_SOURCE 1 - DEPENDS python setuptools - ) - #################################################################################### - - ################################### PROTOBUF ####################################### - SET(PY_PROTOBUF_SOURCES_DIR ${PYTHON_SOURCES_DIR}/protobuf) - ExternalProject_Add(python-protobuf - ${EXTERNAL_PROJECT_LOG_ARGS} - URL https://pypi.python.org/packages/e0/b0/0a1b364fe8a7d177b4b7d4dca5b798500dc57a7273b93cca73931b305a6a/protobuf-3.1.0.post1.tar.gz - URL_MD5 38b5fb160c768d2f8444d0c6d637ff91 - PREFIX ${PY_PROTOBUF_SOURCES_DIR} - BUILD_IN_SOURCE 1 - PATCH_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py build - INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install - DEPENDS python setuptools six - ) - #################################################################################### - - LIST(APPEND external_project_dependencies python setuptools six cython wheel python-protobuf numpy) - -ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) - -IF(WITH_PYTHON) - INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) - INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) -ELSE() - SET(PYTHON_LIBRARIES "") -ENDIF() +INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) +INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) diff --git a/cmake/external/swig.cmake b/cmake/external/swig.cmake index 744c766ee7b067058b2cb4aa7f7b761cbb9778d4..ce088ae7eaa3355f2f9761e8c421da0d7ef89fa7 100644 --- a/cmake/external/swig.cmake +++ b/cmake/external/swig.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(NOT WITH_SWIG_PY) + return() +ENDIF() + FIND_PACKAGE(SWIG) IF(NOT SWIG_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 293070c3cfcc1196001f64469f3254289b0de792..bb258c7b5581fc22b44f4fe15c119f8081f4767e 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,25 +16,14 @@ INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) -SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) -INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) - -SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE) - -IF(WIN32) - SET(WARPCTC_LIBRARIES - "${WARPCTC_INSTALL_DIR}/lib/warpctc.dll" CACHE FILEPATH "Warp-ctc Library" FORCE) -ELSE(WIN32) - IF(APPLE) - SET(_warpctc_SHARED_SUFFIX dylib) - ELSE(APPLE) - SET(_warpctc_SHARED_SUFFIX so) - ENDIF(APPLE) - - SET(WARPCTC_LIBRARIES - "${WARPCTC_INSTALL_DIR}/lib/libwarpctc.${_warpctc_SHARED_SUFFIX}" CACHE FILEPATH "Warp-ctc Library" FORCE) -ENDIF(WIN32) +SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" + CACHE PATH "Warp-ctc Directory" FORCE) +# Used in unit test test_WarpCTCLayer +SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" + CACHE PATH "Warp-ctc Library Directory" FORCE) +SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) SET(USE_OMP OFF) @@ -43,26 +32,34 @@ ELSE() ENDIF() ExternalProject_Add( - warpctc + extern_warpctc ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" PREFIX ${WARPCTC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} - CMAKE_ARGS -DWITH_GPU=${WITH_GPU} - CMAKE_ARGS -DWITH_OMP=${USE_OMP} - CMAKE_ARGS -DWITH_TORCH=OFF - CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON - CMAKE_ARGS -DBUILD_SHARED=ON - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} + -DWITH_GPU=${WITH_GPU} + -DWITH_OMP=${USE_OMP} + -DWITH_TORCH=OFF + -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON + -DBUILD_SHARED=ON + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} ) +MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") +INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) + +ADD_LIBRARY(warpctc STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) +ADD_DEPENDENCIES(warpctc extern_warpctc) + LIST(APPEND external_project_dependencies warpctc) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index 45ca5542b7dc30216b45487782f849b93c5f8fca..c496a52b780364f3014f8fa3dfbc944a7aa7430e 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -34,18 +34,28 @@ ExternalProject_Add( GIT_TAG "v1.2.8" PREFIX ${ZLIB_SOURCES_DIR} UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} - CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF - CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DCMAKE_MACOSX_RPATH=ON - CMAKE_ARGS -DCMAKE_BUILD_TYPE=Release + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR} + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_MACOSX_RPATH=ON + -DCMAKE_BUILD_TYPE=Release + ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=Release ) LIST(APPEND external_project_dependencies zlib) + +IF(WITH_C_API) + INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib) + IF(ANDROID) + INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib/${ANDROID_ABI}) + ELSE() + INSTALL(FILES ${ZLIB_LIBRARIES} DESTINATION third_party/zlib/lib) + ENDIF() +ENDIF() diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 7eb92efcb00fa18461e61e0508b485c13ef23a1f..4593ae6180b6d7deb61d897eb634b17ac0bb1683 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -109,7 +109,9 @@ set(COMMON_FLAGS -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare - -Wno-error=unused-local-typedefs) + -Wno-error=unused-local-typedefs + -Wno-error=parentheses-equality # Warnings in pybind11 +) set(GPU_COMMON_FLAGS -fPIC @@ -122,11 +124,14 @@ set(GPU_COMMON_FLAGS -Wno-error=literal-suffix -Wno-error=unused-local-typedefs -Wno-error=unused-function # Warnings in Numpy Header. + -Wno-error=array-bounds # Warnings in Eigen::array ) if (APPLE) - # On Mac OS X build fat binaries with x86_64 architectures by default. - set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) + if(NOT CMAKE_CROSSCOMPILING) + # On Mac OS X build fat binaries with x86_64 architectures by default. + set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE) + endif() else() set(GPU_COMMON_FLAGS -Wall @@ -187,6 +192,7 @@ endif() # Modern gpu architectures: Pascal if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0") list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") + list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr) endif() # Custom gpu architecture @@ -197,3 +203,4 @@ if(CUDA_ARCH) endif() set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS}) + diff --git a/cmake/generic.cmake b/cmake/generic.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ff9868fc4e0d970b11e4763d2e0c8581f4f85907 --- /dev/null +++ b/cmake/generic.cmake @@ -0,0 +1,422 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# generic.cmake defines CMakes functions that look like Bazel's +# building rules (https://bazel.build/). +# +# +# ------------------------------------------- +# C++ CUDA C++ Go +# ------------------------------------------- +# cc_library nv_library go_library +# cc_binary nv_binary go_binary +# cc_test nv_test go_test +# ------------------------------------------- +# +# To build a static library example.a from example.cc using the system +# compiler (like GCC): +# +# cc_library(example SRCS example.cc) +# +# To build a static library example.a from multiple source files +# example{1,2,3}.cc: +# +# cc_library(example SRCS example1.cc example2.cc example3.cc) +# +# To build a shared library example.so from example.cc: +# +# cc_library(example SHARED SRCS example.cc) +# +# To build a library using Nvidia's NVCC from .cu file(s), use the nv_ +# prefixed version: +# +# nv_library(example SRCS example.cu) +# +# To specify that a library new_example.a depends on other libraies: +# +# cc_library(new_example SRCS new_example.cc DEPS example) +# +# Static libraries can be composed of other static libraries: +# +# cc_library(composed DEPS dependent1 dependent2 dependent3) +# +# To build an executable binary file from some source files and +# dependent libraries: +# +# cc_binary(example SRCS main.cc something.cc DEPS example1 example2) +# +# To build an executable binary file using NVCC, use the nv_ prefixed +# version: +# +# nv_binary(example SRCS main.cc something.cu DEPS example1 example2) +# +# To build a unit test binary, which is an executable binary with +# GoogleTest linked: +# +# cc_test(example_test SRCS example_test.cc DEPS example) +# +# To build a unit test binary using NVCC, use the nv_ prefixed version: +# +# nv_test(example_test SRCS example_test.cu DEPS example) +# +# It is pretty often that executable and test binaries depend on +# pre-defined external libaries like glog and gflags defined in +# /cmake/external/*.cmake: +# +# cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +# +# To build a go static library using Golang, use the go_ prefixed version: +# +# go_library(example STATIC) +# +# To build a go shared library using Golang, use the go_ prefixed version: +# +# go_library(example SHARED) +# + +# including binary directory for generated headers. +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +if(NOT APPLE AND NOT ANDROID) + find_package(Threads REQUIRED) + link_libraries(${CMAKE_THREAD_LIBS_INIT}) + set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl -lrt") +endif(NOT APPLE AND NOT ANDROID) + +function(merge_static_libs TARGET_NAME) + set(libs ${ARGN}) + list(REMOVE_DUPLICATES libs) + + # Get all propagation dependencies from the merged libraries + foreach(lib ${libs}) + list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) + endforeach() + list(REMOVE_DUPLICATES libs_deps) + + # To produce a library we need at least one source file. + # It is created by add_custom_command below and will helps + # also help to track dependencies. + set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + if(APPLE) # Use OSX's libtool to merge archives + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${target_SRCS} + COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} + DEPENDS ${libs}) + + # Generate dummy staic lib + file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";") + add_library(${TARGET_NAME} STATIC ${target_SRCS}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + foreach(lib ${libs}) + # Get the file names of the libraries to be merged + set(libfiles ${libfiles} $) + endforeach() + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" + COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} + ) + else() # general UNIX: use "ar" to extract objects and re-add to a common lib + set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir) + + foreach(lib ${libs}) + set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library + set(objdir ${target_DIR}/${lib}.objdir) + + add_custom_command(OUTPUT ${objdir} + COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir} + DEPENDS ${lib}) + + add_custom_command(OUTPUT ${objlistfile} + COMMAND ${CMAKE_AR} -x "$" + COMMAND ${CMAKE_AR} -t "$" > ${objlistfile} + DEPENDS ${lib} ${objdir} + WORKING_DIRECTORY ${objdir}) + + list(APPEND target_OBJS "${objlistfile}") + endforeach() + + # Make the generated dummy source file depended on all static input + # libs. If input lib changes,the source file is touched + # which causes the desired effect (relink). + add_custom_command(OUTPUT ${target_SRCS} + COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} + DEPENDS ${libs} ${target_OBJS}) + + # Generate dummy staic lib + file(WRITE ${target_SRCS} "const char *dummy = \"${target_SRCS}\";") + add_library(${TARGET_NAME} STATIC ${target_SRCS}) + target_link_libraries(${TARGET_NAME} ${libs_deps}) + + # Get the file name of the generated library + set(target_LIBNAME "$") + + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'` + COMMAND ${CMAKE_RANLIB} ${target_LIBNAME} + WORKING_DIRECTORY ${target_DIR}) + endif() +endfunction(merge_static_libs) + +function(cc_library TARGET_NAME) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (cc_library_SRCS) + if (cc_library_SHARED OR cc_library_shared) # build *.so + add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + else() + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) + endif() + if (cc_library_DEPS) + add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) + endif() + + # cpplint code style + foreach(source_file ${cc_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${cc_library_SRCS} ${cc_library_HEADERS}) + + else(cc_library_SRCS) + if(cc_library_DEPS) + merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) + else() + message(FATAL "Please specify source file or library in cc_library.") + endif() + endif(cc_library_SRCS) +endfunction(cc_library) + +function(cc_binary TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_binary_SRCS}) + if(cc_binary_DEPS) + target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) + add_dependencies(${TARGET_NAME} ${cc_binary_DEPS}) + endif() +endfunction(cc_binary) + +function(cc_test TARGET_NAME) + if(WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_test_SRCS}) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main) + add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +endfunction(cc_test) + +function(nv_library TARGET_NAME) + if (WITH_GPU) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(nv_library_SRCS) + if (nv_library_SHARED OR nv_library_shared) # build *.so + cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) + else() + cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) + endif() + if (nv_library_DEPS) + add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) + endif() + # cpplint code style + foreach(source_file ${nv_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND nv_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${nv_library_SRCS} ${nv_library_HEADERS}) + else(nv_library_SRCS) + if (nv_library_DEPS) + merge_static_libs(${TARGET_NAME} ${nv_library_DEPS}) + else() + message(FATAL "Please specify source file or library in nv_library.") + endif() + endif(nv_library_SRCS) + endif() +endfunction(nv_library) + +function(nv_binary TARGET_NAME) + if (WITH_GPU) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + if(nv_binary_DEPS) + target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) + add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) + endif() + endif() +endfunction(nv_binary) + +function(nv_test TARGET_NAME) + if (WITH_GPU AND WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} gtest gtest_main) + add_test(${TARGET_NAME} ${TARGET_NAME}) + endif() +endfunction(nv_test) + +function(go_library TARGET_NAME) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs DEPS) + cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if (go_library_SHARED OR go_library_shared) + set(BUILD_MODE "-buildmode=c-shared") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + else() + set(BUILD_MODE "-buildmode=c-archive") + set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}") + endif() + + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + + # This custom command will always run since it depends on a not + # existing file. + add_custom_command( + OUTPUT dummy_rebulid_${TARGET_NAME} + COMMAND cmake -E touch ${dummyfile} + ) + # Create a custom target that depends on the custom command output + # file, so the custom command can be referenced as a dependency by + # `add_dependencies`. + add_custom_target(rebuild_${TARGET_NAME} + DEPENDS dummy_rebulid_${TARGET_NAME} + ) + + # Add dummy code to support `make target_name` under Terminal Command + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + if (go_library_SHARED OR go_library_shared) + add_library(${TARGET_NAME} SHARED ${dummyfile}) + else() + add_library(${TARGET_NAME} STATIC ${dummyfile}) + endif() + if(go_library_DEPS) + add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + endif(go_library_DEPS) + + # The "source file" of the library is `${dummyfile}` which never + # change, so the target will never rebuild. Make the target depends + # on the custom command that touches the library "source file", so + # rebuild will always happen. + add_dependencies(${TARGET_NAME} rebuild_${TARGET_NAME}) + + set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") + + file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${${TARGET_NAME}_LIB_PATH}" + # Golang build source code + COMMAND GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} + -o "${${TARGET_NAME}_LIB_PATH}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" + # must run under GOPATH + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_dependencies(${TARGET_NAME} go_vendor) +endfunction(go_library) + +function(go_binary TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + + add_custom_command(OUTPUT ${TARGET_NAME}_timestamp + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build + -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) +endfunction(go_binary) + +function(go_test TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs DEPS) + cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS}) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race + -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" + ".${CMAKE_CURRENT_SOURCE_REL_DIR}" + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_test(NAME ${TARGET_NAME} + COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +endfunction(go_test) + +function(proto_library TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(proto_srcs) + set(proto_hdrs) + protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) +endfunction() + +function(py_proto_compile TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(py_srcs) + protobuf_generate_python(py_srcs ${py_proto_compile_SRCS}) + add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs}) +endfunction() + +function(py_test TARGET_NAME) + if(WITH_TESTING) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_test(NAME ${TARGET_NAME} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python + python2 ${py_test_SRCS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +endfunction() diff --git a/cmake/make_resource.py b/cmake/make_resource.py new file mode 100644 index 0000000000000000000000000000000000000000..a9241b0e3e36c2e79c79e46b4f9114b7f6947341 --- /dev/null +++ b/cmake/make_resource.py @@ -0,0 +1,11 @@ +import os +import re +import sys + +res = sys.argv[1] +out = sys.argv[2] +var = re.sub(r'[ .-]', '_', os.path.basename(res)) + +open(out, "w").write("const unsigned char " + var + "[] = {" + ",".join([ + "0x%02x" % ord(c) for c in open(res).read() +]) + ",0};\n" + "const unsigned " + var + "_size = sizeof(" + var + ");\n") diff --git a/cmake/package.cmake b/cmake/package.cmake index ff49a2d08e8f6004320acfce266339aa301eb9c4..79e02147f3f7cc19c1bf45d8a1d208a9a32416ff 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack include (CMakePackageConfigHelpers) diff --git a/cmake/rdma.cmake b/cmake/rdma.cmake index 9ff1a77cac74fb1bdfe470a78d225ed1767bb1b5..b698f3bdc3ff586a72badee3e0109e29285b457f 100644 --- a/cmake/rdma.cmake +++ b/cmake/rdma.cmake @@ -10,7 +10,7 @@ if(WITH_RDMA) function(generate_rdma_links) #redirect to current DIR to isolate the pollution from system runtime environment - #it can benifits unified control for different gcc environment. + #it can benifits unified control for different gcc environment. #e.g, by default gcc48 did not refer /usr/lib64 which could contain low version #runtime libraries that will crash process while loading it. That redirect trick #can fix it. @@ -19,7 +19,9 @@ if(WITH_RDMA) COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1 COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1 - COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so + COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so + COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so.1 + COMMAND ln -s -f /lib64/libnl.so.1.1.4 librdma/libnl.so WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) endfunction(generate_rdma_links) @@ -44,7 +46,7 @@ if(WITH_RDMA) RDMA_INC_XIO AND RDMA_INC_EVENT AND RDMA_INC_NUMA AND - RDMA_LIB_SXISOCK AND + RDMA_LIB_SXISOCK AND RDMA_LIB_XIO AND RDMA_LIB_EVENT AND RDMA_LIB_EVENT_CORE AND @@ -53,19 +55,19 @@ if(WITH_RDMA) RDMA_LIB_NUMA ) - set(RDMA_INC_DIR - ${RDMA_INC_SXISOCK} + set(RDMA_INC_DIR + ${RDMA_INC_SXISOCK} ${RDMA_INC_XIO} ${RDMA_INC_EVENT} ${RDMA_INC_NUMA}) - set(RDMA_LIBS - ${RDMA_LIB_SXISOCK} - ${RDMA_LIB_XIO} - ${RDMA_LIB_EVENT} - ${RDMA_LIB_EVENT_CORE} - ${RDMA_LIB_EVENT_EXTRA} - ${RDMA_LIB_EVENT_PTHREADS} - ${RDMA_LIB_NUMA} + set(RDMA_LIBS + ${RDMA_LIB_SXISOCK} + ${RDMA_LIB_XIO} + ${RDMA_LIB_EVENT} + ${RDMA_LIB_EVENT_CORE} + ${RDMA_LIB_EVENT_EXTRA} + ${RDMA_LIB_EVENT_PTHREADS} + ${RDMA_LIB_NUMA} ) set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma") include_directories("${RDMA_INC_DIR}") diff --git a/cmake/system.cmake b/cmake/system.cmake index 3ca06665ab2385e34302a6bcce7ada549ea1e247..396bd1a0797edea0522bb1f02349373563b7726a 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -13,9 +13,9 @@ # limitations under the License. # Detects the OS and sets appropriate variables. -# CMAKE_SYSTEM_NAME only give us a coarse-grained name, -# but the name like centos is necessary in some scenes -# to distinguish system for customization. +# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is +# building for, but the host processor name like centos is necessary +# in some scenes to distinguish system for customization. # # for instance, protobuf libs path is /lib64 # on CentOS, but /lib on other systems. @@ -24,10 +24,15 @@ IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) IF(APPLE) - EXEC_PROGRAM (sw_vers ARGS -productVersion OUTPUT_VARIABLE MACOSX_VERSION) - STRING(REGEX MATCH "[0-9]+.[0-9]+" VERSION "${MACOSX_VERSION}") - SET(MACOS_VERSION ${VERSION}) SET(HOST_SYSTEM "macosx") + EXEC_PROGRAM(sw_vers ARGS -productVersion OUTPUT_VARIABLE HOST_SYSTEM_VERSION) + STRING(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}") + IF(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET}) + # Set cache variable - end user may change this during ccmake or cmake-gui configure. + SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING + "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") + ENDIF() + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") ELSE(APPLE) IF(EXISTS "/etc/issue") @@ -43,6 +48,8 @@ ELSE(WIN32) ELSEIF(LINUX_ISSUE MATCHES "Fedora") SET(HOST_SYSTEM "fedora") ENDIF() + + STRING(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION "${LINUX_ISSUE}") ENDIF(EXISTS "/etc/issue") IF(EXISTS "/etc/redhat-release") @@ -64,12 +71,21 @@ CMAKE_HOST_SYSTEM_INFORMATION(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES) MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES) -MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}") +MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}") MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores") +# configuration for cross-compiling IF(DEFINED CMAKE_SYSTEM_NAME) + INCLUDE(cross_compiling/host) IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android") SET(ANDROID TRUE) + INCLUDE(cross_compiling/android) + ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "RPi") + SET(RPI TRUE) + INCLUDE(cross_compiling/raspberry_pi) + ELSEIF(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") + SET(IOS TRUE) + INCLUDE(cross_compiling/ios) ENDIF() ENDIF() diff --git a/cmake/util.cmake b/cmake/util.cmake index 099a85809d93e01772bf8c5c329fd9055ee4f054..117ab7f49cdf4a568cd203b2b17767643d0b2d50 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -25,7 +25,9 @@ function(target_circle_link_libraries TARGET_NAME) endif() endforeach() if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - list(APPEND LIBS "-undefined dynamic_lookup") + if(NOT IOS_ENABLE_BITCODE) + list(APPEND LIBS "-undefined dynamic_lookup") + endif() endif() list(REVERSE libsInArgn) target_link_libraries(${TARGET_NAME} @@ -71,29 +73,52 @@ function(link_paddle_exe TARGET_NAME) generate_rdma_links() endif() - target_circle_link_libraries(${TARGET_NAME} - ARCHIVE_START - paddle_gserver - paddle_function - ARCHIVE_END - paddle_pserver - paddle_trainer_lib - paddle_network - paddle_math - paddle_utils - paddle_parameter - paddle_proto - paddle_cuda - ${EXTERNAL_LIBS} - ${CMAKE_THREAD_LIBS_INIT} - ${CMAKE_DL_LIBS} - ${RDMA_LD_FLAGS} - ${RDMA_LIBS}) + if(MOBILE_INFERENCE) + target_circle_link_libraries(${TARGET_NAME} + ARCHIVE_START + paddle_gserver + paddle_function + ARCHIVE_END + paddle_math + paddle_utils + paddle_parameter + paddle_proto + paddle_cuda + ${EXTERNAL_LIBS} + ${CMAKE_THREAD_LIBS_INIT} + ${CMAKE_DL_LIBS} + ${RDMA_LD_FLAGS} + ${RDMA_LIBS}) + else() + target_circle_link_libraries(${TARGET_NAME} + ARCHIVE_START + paddle_gserver + paddle_function + ARCHIVE_END + paddle_pserver + paddle_trainer_lib + paddle_network + paddle_math + paddle_utils + paddle_parameter + paddle_proto + paddle_cuda + paddle_optimizer + ${EXTERNAL_LIBS} + ${CMAKE_THREAD_LIBS_INIT} + ${CMAKE_DL_LIBS} + ${RDMA_LD_FLAGS} + ${RDMA_LIBS}) + endif() if(ANDROID) target_link_libraries(${TARGET_NAME} log) endif(ANDROID) + if(WITH_MKLDNN AND WITH_MKLML AND MKLDNN_IOMP_DIR) + target_link_libraries(${TARGET_NAME} "-L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + endif() + add_dependencies(${TARGET_NAME} ${external_project_dependencies}) endfunction() @@ -117,7 +142,6 @@ endfunction() macro(add_unittest_without_exec TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) link_paddle_test(${TARGET_NAME}) - add_style_check_target(${TARGET_NAME} ${ARGN}) endmacro() # add_unittest @@ -138,17 +162,23 @@ macro(add_simple_unittest TARGET_NAME) endmacro() # Creates C resources file from files in given resource file -function(create_resources res_file output) - # Create empty output file - file(WRITE ${output} "") - # Get short filename - string(REGEX MATCH "([^/]+)$" filename ${res_file}) - # Replace filename spaces & extension separator for C compatibility - string(REGEX REPLACE "\\.| |-" "_" filename ${filename}) - # Read hex data from file - file(READ ${res_file} filedata HEX) - # Convert hex data for C compatibility - string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata}) - # Append data to output file - file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n") +function(create_resources res_file output_file) + add_custom_command( + OUTPUT ${output_file} + COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) +endfunction() + + +# Create a python unittest using run_python_tests.sh, +# which takes care of making correct running environment +function(add_python_test TEST_NAME) + foreach(arg ${ARGN}) + get_filename_component(py_fn ${arg} NAME_WE) + set(TRG_NAME ${TEST_NAME}_${py_fn}) + add_test(NAME ${TRG_NAME} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + python2 ${arg} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endforeach() endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index ac1583a24c828629c46cb9cf4e965f8da2273732..cde650128a068faf32f4abfff5cdfdeb656d8577 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -4,7 +4,7 @@ set(tmp_version "HEAD") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} - WORKING_DIRECTORY ${PROJ_ROOT} + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore deleted file mode 100644 index 6a05b8f6632db0977fceade8b48a89b9f7f6e6cc..0000000000000000000000000000000000000000 --- a/demo/image_classification/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -data/cifar-10-batches-py -data/cifar-out -cifar_vgg_model/* -plot.png -train.log -image_provider_copy_1.py -*pyc -train.list -test.list diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py deleted file mode 100644 index 19d20540780becf504973a23b50445d4b65dc2ef..0000000000000000000000000000000000000000 --- a/demo/image_classification/api_v2_resnet.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2 as paddle - -__all__ = ['resnet_cifar10'] - - -def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - active_type=paddle.activation.Relu(), - ch_in=None): - tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=ch_in, - num_filters=ch_out, - stride=stride, - padding=padding, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.batch_norm(input=tmp, act=active_type) - - -def shortcut(ipt, n_in, n_out, stride): - if n_in != n_out: - return conv_bn_layer(ipt, n_out, 1, stride, 0, - paddle.activation.Linear()) - else: - return ipt - - -def basicblock(ipt, ch_out, stride): - ch_in = ch_out * 2 - tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) - tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) - short = shortcut(ipt, ch_in, ch_out, stride) - return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) - - -def layer_warp(block_func, ipt, features, count, stride): - tmp = block_func(ipt, features, stride) - for i in range(1, count): - tmp = block_func(tmp, features, 1) - return tmp - - -def resnet_cifar10(ipt, depth=32): - # depth should be one of 20, 32, 44, 56, 110, 1202 - assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 - nStages = {16, 64, 128} - conv1 = conv_bn_layer( - ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) - res1 = layer_warp(basicblock, conv1, 16, n, 1) - res2 = layer_warp(basicblock, res1, 32, n, 2) - res3 = layer_warp(basicblock, res2, 64, n, 2) - pool = paddle.layer.img_pool( - input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) - return pool diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py deleted file mode 100644 index 53cffa6fb4e8b2e19725f4f44bf7b9ffffb25232..0000000000000000000000000000000000000000 --- a/demo/image_classification/api_v2_train.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License - -import sys - -import paddle.v2 as paddle - -from api_v2_vgg import vgg_bn_drop - - -def main(): - datadim = 3 * 32 * 32 - classdim = 10 - - # PaddlePaddle init - paddle.init(use_gpu=False, trainer_count=1) - - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(datadim)) - - # Add neural network config - # option 1. resnet - # net = resnet_cifar10(image, depth=32) - # option 2. vgg - net = vgg_bn_drop(image) - - out = paddle.layer.fc(input=net, - size=classdim, - act=paddle.activation.Softmax()) - - lbl = paddle.layer.data( - name="label", type=paddle.data_type.integer_value(classdim)) - cost = paddle.layer.classification_cost(input=out, label=lbl) - - # Create parameters - parameters = paddle.parameters.create(cost) - - # Create optimizer - momentum_optimizer = paddle.optimizer.Momentum( - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), - learning_rate=0.1 / 128.0, - learning_rate_decay_a=0.1, - learning_rate_decay_b=50000 * 100, - learning_rate_schedule='discexp', - batch_size=128) - - # End batch and end pass event handler - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - result = trainer.test( - reader=paddle.batch( - paddle.dataset.cifar.test10(), batch_size=128), - feeding={'image': 0, - 'label': 1}) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - # Create trainer - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=momentum_optimizer) - trainer.train( - reader=paddle.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(), buf_size=50000), - batch_size=128), - num_passes=5, - event_handler=event_handler, - feeding={'image': 0, - 'label': 1}) - - -if __name__ == '__main__': - main() diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py deleted file mode 100644 index 1e0e6b93adde30425f17aa9cd07542275f4fec37..0000000000000000000000000000000000000000 --- a/demo/image_classification/api_v2_vgg.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.v2 as paddle - -__all__ = ['vgg_bn_drop'] - - -def vgg_bn_drop(input): - def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): - return paddle.networks.img_conv_group( - input=ipt, - num_channels=num_channels, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act=paddle.activation.Relu(), - conv_with_batchnorm=True, - conv_batchnorm_drop_rate=dropouts, - pool_type=paddle.pooling.Max()) - - conv1 = conv_block(input, 64, 2, [0.3, 0], 3) - conv2 = conv_block(conv1, 128, 2, [0.4, 0]) - conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) - conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) - conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - - drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) - fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) - bn = paddle.layer.batch_norm( - input=fc1, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) - return fc2 diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh deleted file mode 100755 index 532178d627fe19ab8ea79ecae73e5328b5294bea..0000000000000000000000000000000000000000 --- a/demo/image_classification/data/download_cifar.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -tar zxf cifar-10-python.tar.gz -rm cifar-10-python.tar.gz -rm -rf cifar-out/* -echo Converting CIFAR data to images..... -python process_cifar.py ./cifar-10-batches-py ./cifar-out diff --git a/demo/image_classification/data/process_cifar.py b/demo/image_classification/data/process_cifar.py deleted file mode 100644 index db6666189e5b8008a6b66fb64afcdf98980e72bb..0000000000000000000000000000000000000000 --- a/demo/image_classification/data/process_cifar.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import sys -import os -import PIL.Image as Image -""" - Usage: python process_cifar input_dir output_dir -""" - - -def mkdir_not_exist(path): - """ - Make dir if the path does not exist. - path: the path to be created. - """ - if not os.path.exists(path): - os.mkdir(path) - - -def create_dir_structure(output_dir): - """ - Create the directory structure for the directory. - output_dir: the direcotry structure path. - """ - mkdir_not_exist(os.path.join(output_dir)) - mkdir_not_exist(os.path.join(output_dir, "train")) - mkdir_not_exist(os.path.join(output_dir, "test")) - - -def convert_batch(batch_path, label_set, label_map, output_dir, data_split): - """ - Convert CIFAR batch to the structure of Paddle format. - batch_path: the batch to be converted. - label_set: the set of labels. - output_dir: the output path. - data_split: whether it is training or testing data. - """ - data = np.load(batch_path) - for data, label, filename in zip(data['data'], data['labels'], - data['filenames']): - data = data.reshape((3, 32, 32)) - data = np.transpose(data, (1, 2, 0)) - label = label_map[label] - output_dir_this = os.path.join(output_dir, data_split, str(label)) - output_filename = os.path.join(output_dir_this, filename) - if not label in label_set: - label_set[label] = True - mkdir_not_exist(output_dir_this) - Image.fromarray(data).save(output_filename) - - -if __name__ == '__main__': - input_dir = sys.argv[1] - output_dir = sys.argv[2] - num_batch = 5 - create_dir_structure(output_dir) - label_map = { - 0: "airplane", - 1: "automobile", - 2: "bird", - 3: "cat", - 4: "deer", - 5: "dog", - 6: "frog", - 7: "horse", - 8: "ship", - 9: "truck" - } - labels = {} - for i in range(1, num_batch + 1): - convert_batch( - os.path.join(input_dir, "data_batch_%d" % i), labels, label_map, - output_dir, "train") - convert_batch( - os.path.join(input_dir, "test_batch"), {}, label_map, output_dir, - "test") diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py deleted file mode 100644 index 6a315ff094c1af5f8250d8a22ff96740dddd9808..0000000000000000000000000000000000000000 --- a/demo/image_classification/image_provider.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import io -import random - -import paddle.utils.image_util as image_util -from paddle.trainer.PyDataProvider2 import * - - -# -# {'img_size': 32, -# 'settings': a global object, -# 'color': True, -# 'mean_img_size': 32, -# 'meta': './data/cifar-out/batches/batches.meta', -# 'num_classes': 10, -# 'file_list': ('./data/cifar-out/batches/train_batch_000',), -# 'use_jpeg': True} -def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg, - is_train, **kwargs): - settings.mean_img_size = mean_img_size - settings.img_size = img_size - settings.num_classes = num_classes - settings.color = color - settings.is_train = is_train - - if settings.color: - settings.img_raw_size = settings.img_size * settings.img_size * 3 - else: - settings.img_raw_size = settings.img_size * settings.img_size - - settings.meta_path = meta - settings.use_jpeg = use_jpeg - - settings.img_mean = image_util.load_meta(settings.meta_path, - settings.mean_img_size, - settings.img_size, settings.color) - - settings.logger.info('Image size: %s', settings.img_size) - settings.logger.info('Meta path: %s', settings.meta_path) - settings.input_types = { - 'image': dense_vector(settings.img_raw_size), - 'label': integer_value(settings.num_classes) - } - - settings.logger.info('DataProvider Initialization finished') - - -@provider(init_hook=hook, min_pool_size=0) -def processData(settings, file_list): - """ - The main function for loading data. - Load the batch, iterate all the images and labels in this batch. - file_list: the batch file list. - """ - with open(file_list, 'r') as fdata: - lines = [line.strip() for line in fdata] - random.shuffle(lines) - for file_name in lines: - with io.open(file_name.strip(), 'rb') as file: - data = cPickle.load(file) - indexes = list(range(len(data['images']))) - if settings.is_train: - random.shuffle(indexes) - for i in indexes: - if settings.use_jpeg == 1: - img = image_util.decode_jpeg(data['images'][i]) - else: - img = data['images'][i] - img_feat = image_util.preprocess_img( - img, settings.img_mean, settings.img_size, - settings.is_train, settings.color) - label = data['labels'][i] - yield { - 'image': img_feat.astype('float32'), - 'label': int(label) - } diff --git a/demo/image_classification/image_util.py b/demo/image_classification/image_util.py deleted file mode 100644 index f09605394a19e09d92e555eeefb0b5646625b618..0000000000000000000000000000000000000000 --- a/demo/image_classification/image_util.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -from PIL import Image -from cStringIO import StringIO - - -def resize_image(img, target_size): - """ - Resize an image so that the shorter edge has length target_size. - img: the input image to be resized. - target_size: the target resized image size. - """ - percent = (target_size / float(min(img.size[0], img.size[1]))) - resized_size = int(round(img.size[0] * percent)), int( - round(img.size[1] * percent)) - img = img.resize(resized_size, Image.ANTIALIAS) - return img - - -def flip(im): - """ - Return the flipped image. - Flip an image along the horizontal direction. - im: input image, (H x W x K) ndarrays - """ - if len(im.shape) == 3: - return im[:, :, ::-1] - else: - return im[:, ::-1] - - -def crop_img(im, inner_size, color=True, test=True): - """ - Return cropped image. - The size of the cropped image is inner_size * inner_size. - im: (K x H x W) ndarrays - inner_size: the cropped image size. - color: whether it is color image. - test: whether in test mode. - If False, does random cropping and flipping. - If True, crop the center of images. - """ - if color: - height, width = max(inner_size, im.shape[1]), max(inner_size, - im.shape[2]) - padded_im = np.zeros((3, height, width)) - startY = (height - im.shape[1]) / 2 - startX = (width - im.shape[2]) / 2 - endY, endX = startY + im.shape[1], startX + im.shape[2] - padded_im[:, startY:endY, startX:endX] = im - else: - im = im.astype('float32') - height, width = max(inner_size, im.shape[0]), max(inner_size, - im.shape[1]) - padded_im = np.zeros((height, width)) - startY = (height - im.shape[0]) / 2 - startX = (width - im.shape[1]) / 2 - endY, endX = startY + im.shape[0], startX + im.shape[1] - padded_im[startY:endY, startX:endX] = im - if test: - startY = (height - inner_size) / 2 - startX = (width - inner_size) / 2 - else: - startY = np.random.randint(0, height - inner_size + 1) - startX = np.random.randint(0, width - inner_size + 1) - endY, endX = startY + inner_size, startX + inner_size - if color: - pic = padded_im[:, startY:endY, startX:endX] - else: - pic = padded_im[startY:endY, startX:endX] - if (not test) and (np.random.randint(2) == 0): - pic = flip(pic) - return pic - - -def decode_jpeg(jpeg_string): - np_array = np.array(Image.open(StringIO(jpeg_string))) - if len(np_array.shape) == 3: - np_array = np.transpose(np_array, (2, 0, 1)) - return np_array - - -def preprocess_img(im, img_mean, crop_size, is_train, color=True): - """ - Does data augmentation for images. - If is_train is false, cropping the center region from the image. - If is_train is true, randomly crop a region from the image, - and randomy does flipping. - im: (K x H x W) ndarrays - """ - im = im.astype('float32') - test = not is_train - pic = crop_img(im, crop_size, color, test) - pic -= img_mean - return pic.flatten() - - -def load_meta(meta_path, mean_img_size, crop_size, color=True): - """ - Return the loaded meta file. - Load the meta image, which is the mean of the images in the dataset. - The mean image is subtracted from every input image so that the expected mean - of each input image is zero. - """ - mean = np.load(meta_path)['data_mean'] - border = (mean_img_size - crop_size) / 2 - if color: - assert (mean_img_size * mean_img_size * 3 == mean.shape[0]) - mean = mean.reshape(3, mean_img_size, mean_img_size) - mean = mean[:, border:border + crop_size, border:border + - crop_size].astype('float32') - else: - assert (mean_img_size * mean_img_size == mean.shape[0]) - mean = mean.reshape(mean_img_size, mean_img_size) - mean = mean[border:border + crop_size, border:border + - crop_size].astype('float32') - return mean - - -def load_image(img_path, is_color=True): - """ - Load image and return. - img_path: image path. - is_color: is color image or not. - """ - img = Image.open(img_path) - img.load() - return img - - -def oversample(img, crop_dims): - """ - image : iterable of (H x W x K) ndarrays - crop_dims: (height, width) tuple for the crops. - Returned data contains ten crops of input image, namely, - four corner patches and the center patch as well as their - horizontal reflections. - """ - # Dimensions and center. - im_shape = np.array(img[0].shape) - crop_dims = np.array(crop_dims) - im_center = im_shape[:2] / 2.0 - - # Make crop coordinates - h_indices = (0, im_shape[0] - crop_dims[0]) - w_indices = (0, im_shape[1] - crop_dims[1]) - crops_ix = np.empty((5, 4), dtype=int) - curr = 0 - for i in h_indices: - for j in w_indices: - crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1]) - curr += 1 - crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate( - [-crop_dims / 2.0, crop_dims / 2.0]) - crops_ix = np.tile(crops_ix, (2, 1)) - - # Extract crops - crops = np.empty( - (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]), - dtype=np.float32) - ix = 0 - for im in img: - for crop in crops_ix: - crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :] - ix += 1 - crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :] # flip for mirrors - return crops - - -class ImageTransformer: - def __init__(self, - transpose=None, - channel_swap=None, - mean=None, - is_color=True): - self.transpose = transpose - self.channel_swap = None - self.mean = None - self.is_color = is_color - - def set_transpose(self, order): - if self.is_color: - assert 3 == len(order) - self.transpose = order - - def set_channel_swap(self, order): - if self.is_color: - assert 3 == len(order) - self.channel_swap = order - - def set_mean(self, mean): - # mean value, may be one value per channel - if mean.ndim == 1: - mean = mean[:, np.newaxis, np.newaxis] - else: - # elementwise mean - if self.is_color: - assert len(mean.shape) == 3 - self.mean = mean - - def transformer(self, data): - if self.transpose is not None: - data = data.transpose(self.transpose) - if self.channel_swap is not None: - data = data[self.channel_swap, :, :] - if self.mean is not None: - data -= self.mean - return data diff --git a/demo/image_classification/predict.sh b/demo/image_classification/predict.sh deleted file mode 100755 index 9d5785c9a1a4dac12f7940fa708b1a79c6ec8a93..0000000000000000000000000000000000000000 --- a/demo/image_classification/predict.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -model=cifar_vgg_model/pass-00299/ -image=data/cifar-out/test/airplane/seaplane_s_000978.png -use_gpu=1 -python prediction.py $model $image $use_gpu diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py deleted file mode 100755 index 49c0ff600c40e0222093ff0a8a2f7e8e38ccba29..0000000000000000000000000000000000000000 --- a/demo/image_classification/prediction.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -import logging -from PIL import Image -from optparse import OptionParser - -import paddle.utils.image_util as image_util - -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import dense_vector -from paddle.trainer.config_parser import parse_config - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s') -logging.getLogger().setLevel(logging.INFO) - - -class ImageClassifier(): - def __init__(self, - train_conf, - use_gpu=True, - model_dir=None, - resize_dim=None, - crop_dim=None, - mean_file=None, - oversample=False, - is_color=True): - """ - train_conf: network configure. - model_dir: string, directory of model. - resize_dim: int, resized image size. - crop_dim: int, crop size. - mean_file: string, image mean file. - oversample: bool, oversample means multiple crops, namely five - patches (the four corner patches and the center - patch) as well as their horizontal reflections, - ten crops in all. - """ - self.train_conf = train_conf - self.model_dir = model_dir - if model_dir is None: - self.model_dir = os.path.dirname(train_conf) - - self.resize_dim = resize_dim - self.crop_dims = [crop_dim, crop_dim] - self.oversample = oversample - self.is_color = is_color - - self.transformer = image_util.ImageTransformer(is_color=is_color) - self.transformer.set_transpose((2, 0, 1)) - - self.mean_file = mean_file - mean = np.load(self.mean_file)['data_mean'] - mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1]) - self.transformer.set_mean(mean) # mean pixel - gpu = 1 if use_gpu else 0 - conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu) - conf = parse_config(train_conf, conf_args) - swig_paddle.initPaddle("--use_gpu=%d" % (gpu)) - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - assert isinstance(self.network, swig_paddle.GradientMachine) - self.network.loadParameters(self.model_dir) - - data_size = 3 * self.crop_dims[0] * self.crop_dims[1] - slots = [dense_vector(data_size)] - self.converter = DataProviderConverter(slots) - - def get_data(self, img_path): - """ - 1. load image from img_path. - 2. resize or oversampling. - 3. transformer data: transpose, sub mean. - return K x H x W ndarray. - img_path: image path. - """ - image = image_util.load_image(img_path, self.is_color) - if self.oversample: - # image_util.resize_image: short side is self.resize_dim - image = image_util.resize_image(image, self.resize_dim) - image = np.array(image) - input = np.zeros( - (1, image.shape[0], image.shape[1], 3), dtype=np.float32) - input[0] = image.astype(np.float32) - input = image_util.oversample(input, self.crop_dims) - else: - image = image.resize(self.crop_dims, Image.ANTIALIAS) - input = np.zeros( - (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32) - input[0] = np.array(image).astype(np.float32) - - data_in = [] - for img in input: - img = self.transformer.transformer(img).flatten() - data_in.append([img.tolist()]) - return data_in - - def forward(self, input_data): - in_arg = self.converter(input_data) - return self.network.forwardTest(in_arg) - - def forward(self, data, output_layer): - """ - input_data: py_paddle input data. - output_layer: specify the name of probability, namely the layer with - softmax activation. - return: the predicting probability of each label. - """ - input = self.converter(data) - self.network.forwardTest(input) - output = self.network.getLayerOutputs(output_layer) - # For oversampling, average predictions across crops. - # If not, the shape of output[name]: (1, class_number), - # the mean is also applicable. - return output[output_layer]['value'].mean(0) - - def predict(self, image=None, output_layer=None): - assert isinstance(image, basestring) - assert isinstance(output_layer, basestring) - data = self.get_data(image) - prob = self.forward(data, output_layer) - lab = np.argsort(-prob) - logging.info("Label of %s is: %d", image, lab[0]) - - -if __name__ == '__main__': - image_size = 32 - crop_size = 32 - multi_crop = True - config = "vgg_16_cifar.py" - output_layer = "__fc_layer_1__" - mean_path = "data/cifar-out/batches/batches.meta" - model_path = sys.argv[1] - image = sys.argv[2] - use_gpu = bool(int(sys.argv[3])) - - obj = ImageClassifier( - train_conf=config, - model_dir=model_path, - resize_dim=image_size, - crop_dim=crop_size, - mean_file=mean_path, - use_gpu=use_gpu, - oversample=multi_crop) - obj.predict(image, output_layer) diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py deleted file mode 100755 index 2947ad239c36f9a02ed67ccf5906380cb70e37ce..0000000000000000000000000000000000000000 --- a/demo/image_classification/preprocess.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.utils.preprocess_img import ImageClassificationDatasetCreater -from optparse import OptionParser - - -def option_parser(): - parser = OptionParser(usage="usage: python preprcoess.py "\ - "-i data_dir [options]") - parser.add_option( - "-i", - "--input", - action="store", - dest="input", - help="Input data directory.") - parser.add_option( - "-s", - "--size", - action="store", - dest="size", - help="Processed image size.") - parser.add_option( - "-c", - "--color", - action="store", - dest="color", - help="whether to use color images.") - return parser.parse_args() - - -if __name__ == '__main__': - options, args = option_parser() - data_dir = options.input - processed_image_size = int(options.size) - color = options.color == "1" - data_creator = ImageClassificationDatasetCreater( - data_dir, processed_image_size, color) - data_creator.train_list_name = "train.txt" - data_creator.test_list_name = "test.txt" - data_creator.num_per_batch = 1000 - data_creator.overwrite = True - data_creator.create_batches() diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh deleted file mode 100755 index c7396c6393599ef3f2c55089eb05f2435b2b4b82..0000000000000000000000000000000000000000 --- a/demo/image_classification/preprocess.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -data_dir=./data/cifar-out - -python preprocess.py -i $data_dir -s 32 -c 1 - -echo "data/cifar-out/batches/train.txt" > train.list -echo "data/cifar-out/batches/test.txt" > test.list diff --git a/demo/image_classification/train.sh b/demo/image_classification/train.sh deleted file mode 100755 index e45bd47ad5925c6674d628a70a7ad7c4d5d5c173..0000000000000000000000000000000000000000 --- a/demo/image_classification/train.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -config=vgg_16_cifar.py -output=./cifar_vgg_model -log=train.log - -paddle train \ ---config=$config \ ---dot_period=10 \ ---log_period=100 \ ---test_all_data_in_one_period=1 \ ---use_gpu=1 \ ---trainer_count=1 \ ---num_passes=300 \ ---save_dir=$output \ -2>&1 | tee $log -paddle usage -l $log -e $? -n "image_classification_train" >/dev/null 2>&1 - -python -m paddle.utils.plotcurve -i $log > plot.png diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py deleted file mode 100755 index 8ee4a64c15f885023a6e19812885b4f76bb12af9..0000000000000000000000000000000000000000 --- a/demo/image_classification/vgg_16_cifar.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -is_predict = get_config_arg("is_predict", bool, False) - -####################Data Configuration ################## -if not is_predict: - data_dir = 'data/cifar-out/batches/' - meta_path = data_dir + 'batches.meta' - - args = { - 'meta': meta_path, - 'mean_img_size': 32, - 'img_size': 32, - 'num_classes': 10, - 'use_jpeg': 1, - 'color': "color" - } - - define_py_data_sources2( - train_list="train.list", - test_list="train.list", - module='image_provider', - obj='processData', - args=args) - -######################Algorithm Configuration ############# -settings( - batch_size=128, - learning_rate=0.1 / 128.0, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * 128)) - -#######################Network Configuration ############# -data_size = 3 * 32 * 32 -label_size = 10 -img = data_layer(name='image', size=data_size) -# small_vgg is predefined in trainer_config_helpers.networks -predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size) - -if not is_predict: - lbl = data_layer(name="label", size=label_size) - outputs(classification_cost(input=predict, label=lbl)) -else: - outputs(predict) diff --git a/demo/introduction/.gitignore b/demo/introduction/.gitignore deleted file mode 100644 index c54f3f9480ce4ceefda98f77a812ec2d6cd4a5e3..0000000000000000000000000000000000000000 --- a/demo/introduction/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -dataprovider.pyc -empty.list -train.log -output -train.list diff --git a/demo/introduction/README.md b/demo/introduction/README.md deleted file mode 100644 index 0614a7afe645677ef0b65a17ea05f1dcfa45214f..0000000000000000000000000000000000000000 --- a/demo/introduction/README.md +++ /dev/null @@ -1,3 +0,0 @@ -This folder contains scripts used in PaddlePaddle introduction. -- use `bash train.sh` to train a simple linear regression model -- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3]. diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py deleted file mode 100644 index 1ba971b3688ce3dec078998df2c0b183a4e449f8..0000000000000000000000000000000000000000 --- a/demo/introduction/api_train_v2.py +++ /dev/null @@ -1,58 +0,0 @@ -import paddle.v2 as paddle -import paddle.v2.dataset.uci_housing as uci_housing - - -def main(): - # init - paddle.init(use_gpu=False, trainer_count=1) - - # network config - x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) - y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param(name='w'), - size=1, - act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param(name='b')) - y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) - cost = paddle.layer.mse_cost(input=y_predict, label=y) - - # create parameters - parameters = paddle.parameters.create(cost) - - # create optimizer - optimizer = paddle.optimizer.Momentum(momentum=0) - - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer) - - # event_handler to print training and testing info - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f" % ( - event.pass_id, event.batch_id, event.cost) - - if isinstance(event, paddle.event.EndPass): - if (event.pass_id + 1) % 10 == 0: - result = trainer.test( - reader=paddle.batch( - uci_housing.test(), batch_size=2), - feeding={'x': 0, - 'y': 1}) - print "Test %d, %.2f" % (event.pass_id, result.cost) - - # training - trainer.train( - reader=paddle.batch( - paddle.reader.shuffle( - uci_housing.train(), buf_size=500), - batch_size=2), - feeding={'x': 0, - 'y': 1}, - event_handler=event_handler, - num_passes=30) - - -if __name__ == '__main__': - main() diff --git a/demo/introduction/dataprovider.py b/demo/introduction/dataprovider.py deleted file mode 100644 index 5b48aad0408800676ae7da16eba2dcbb2124f25f..0000000000000000000000000000000000000000 --- a/demo/introduction/dataprovider.py +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * -import random - - -# define data types of input: 2 real numbers -@provider( - input_types={'x': dense_vector(1), - 'y': dense_vector(1)}, use_seq=False) -def process(settings, input_file): - for i in xrange(2000): - x = random.random() - yield {'x': [x], 'y': [2 * x + 0.3]} diff --git a/demo/introduction/evaluate_model.py b/demo/introduction/evaluate_model.py deleted file mode 100755 index eeda43c5c86f3e49f758bf55b16a68387e64238c..0000000000000000000000000000000000000000 --- a/demo/introduction/evaluate_model.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Print model parameters in last model - -Usage: - python evaluate_model.py -""" -import numpy as np -import os - - -def load(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. - return np.fromfile(f, dtype=np.float32) - - -def main(): - print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'), - load('output/pass-00029/b')) - - -if __name__ == '__main__': - main() diff --git a/demo/introduction/train.sh b/demo/introduction/train.sh deleted file mode 100755 index 2ce6446d7c943ffc9bea8da43d153539f6f9f15f..0000000000000000000000000000000000000000 --- a/demo/introduction/train.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -paddle train \ - --config=trainer_config.py \ - --save_dir=./output \ - --num_passes=30 \ - 2>&1 |tee 'train.log' -paddle usage -l "train.log" -e $? -n "introduction" >/dev/null 2>&1 diff --git a/demo/introduction/trainer_config.py b/demo/introduction/trainer_config.py deleted file mode 100644 index 651dfaa4b7b4873810a0b393655541a62d1a311b..0000000000000000000000000000000000000000 --- a/demo/introduction/trainer_config.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -# 1. read data. Suppose you saved above python code as dataprovider.py -define_py_data_sources2( - train_list=['no_matter.txt'], - test_list=None, - module='dataprovider', - obj='process', - args={}) - -# 2. learning algorithm -settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) - -# 3. Network configuration -x = data_layer(name='x', size=1) -y = data_layer(name='y', size=1) -y_predict = fc_layer( - input=x, - param_attr=ParamAttr(name='w'), - size=1, - act=LinearActivation(), - bias_attr=ParamAttr(name='b')) -cost = mse_cost(input=y_predict, label=y) -outputs(cost) diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py deleted file mode 100644 index 6b95a88042a13a280bcb80f753b3887fcef37296..0000000000000000000000000000000000000000 --- a/demo/mnist/api_train_v2.py +++ /dev/null @@ -1,137 +0,0 @@ -import paddle.v2 as paddle -import gzip - - -def softmax_regression(img): - predict = paddle.layer.fc(input=img, - size=10, - act=paddle.activation.Softmax()) - return predict - - -def multilayer_perceptron(img): - # The first fully-connected layer - hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) - # The second fully-connected layer and the according activation function - hidden2 = paddle.layer.fc(input=hidden1, - size=64, - act=paddle.activation.Relu()) - # The thrid fully-connected layer, note that the hidden size should be 10, - # which is the number of unique digits - predict = paddle.layer.fc(input=hidden2, - size=10, - act=paddle.activation.Softmax()) - return predict - - -def convolutional_neural_network(img): - # first conv layer - conv_pool_1 = paddle.networks.simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - num_channel=1, - pool_size=2, - pool_stride=2, - act=paddle.activation.Tanh()) - # second conv layer - conv_pool_2 = paddle.networks.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - num_channel=20, - pool_size=2, - pool_stride=2, - act=paddle.activation.Tanh()) - # The first fully-connected layer - fc1 = paddle.layer.fc(input=conv_pool_2, - size=128, - act=paddle.activation.Tanh()) - # The softmax layer, note that the hidden size should be 10, - # which is the number of unique digits - predict = paddle.layer.fc(input=fc1, - size=10, - act=paddle.activation.Softmax()) - return predict - - -def main(): - paddle.init(use_gpu=False, trainer_count=1) - - # define network topology - images = paddle.layer.data( - name='pixel', type=paddle.data_type.dense_vector(784)) - label = paddle.layer.data( - name='label', type=paddle.data_type.integer_value(10)) - - # Here we can build the prediction network in different ways. Please - # choose one by uncomment corresponding line. - predict = softmax_regression(images) - #predict = multilayer_perceptron(images) - #predict = convolutional_neural_network(images) - - cost = paddle.layer.classification_cost(input=predict, label=label) - - try: - with gzip.open('params.tar.gz', 'r') as f: - parameters = paddle.parameters.Parameters.from_tar(f) - except IOError: - parameters = paddle.parameters.create(cost) - - optimizer = paddle.optimizer.Momentum( - learning_rate=0.1 / 128.0, - momentum=0.9, - regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) - - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer) - - lists = [] - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1000 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - - with gzip.open('params.tar.gz', 'w') as f: - parameters.to_tar(f) - - elif isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=paddle.batch( - paddle.dataset.mnist.test(), batch_size=128)) - print "Test with Pass %d, Cost %f, %s\n" % ( - event.pass_id, result.cost, result.metrics) - lists.append((event.pass_id, result.cost, - result.metrics['classification_error_evaluator'])) - - trainer.train( - reader=paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=8192), - batch_size=128), - event_handler=event_handler, - num_passes=100) - - # find the best pass - best = sorted(lists, key=lambda list: float(list[1]))[0] - print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) - print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) - - test_creator = paddle.dataset.mnist.test() - test_data = [] - for item in test_creator(): - test_data.append((item[0], )) - if len(test_data) == 100: - break - - # output is a softmax layer. It returns probabilities. - # Shape should be (100, 10) - probs = paddle.infer( - output_layer=predict, parameters=parameters, input=test_data) - print probs.shape - - -if __name__ == '__main__': - main() diff --git a/demo/recommendation/.gitignore b/demo/recommendation/.gitignore deleted file mode 100644 index fd27ef62a87cae51f2392c0eba50a44490d029af..0000000000000000000000000000000000000000 --- a/demo/recommendation/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -log.txt -data/meta.bin -data/ml-1m -data/ratings.dat.train -data/ratings.dat.test -data/train.list -data/test.list -dataprovider_copy_1.py -*.pyc -output diff --git a/demo/recommendation/api_train_v2.py b/demo/recommendation/api_train_v2.py deleted file mode 100644 index f6a061799e3ac50236a68beedaf700dd6c698a05..0000000000000000000000000000000000000000 --- a/demo/recommendation/api_train_v2.py +++ /dev/null @@ -1,125 +0,0 @@ -import paddle.v2 as paddle -import cPickle -import copy - - -def main(): - paddle.init(use_gpu=False) - movie_title_dict = paddle.dataset.movielens.get_movie_title_dict() - uid = paddle.layer.data( - name='user_id', - type=paddle.data_type.integer_value( - paddle.dataset.movielens.max_user_id() + 1)) - usr_emb = paddle.layer.embedding(input=uid, size=32) - - usr_gender_id = paddle.layer.data( - name='gender_id', type=paddle.data_type.integer_value(2)) - usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16) - - usr_age_id = paddle.layer.data( - name='age_id', - type=paddle.data_type.integer_value( - len(paddle.dataset.movielens.age_table))) - usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16) - - usr_job_id = paddle.layer.data( - name='job_id', - type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id( - ) + 1)) - - usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16) - - usr_combined_features = paddle.layer.fc( - input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb], - size=200, - act=paddle.activation.Tanh()) - - mov_id = paddle.layer.data( - name='movie_id', - type=paddle.data_type.integer_value( - paddle.dataset.movielens.max_movie_id() + 1)) - mov_emb = paddle.layer.embedding(input=mov_id, size=32) - - mov_categories = paddle.layer.data( - name='category_id', - type=paddle.data_type.sparse_binary_vector( - len(paddle.dataset.movielens.movie_categories()))) - - mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) - - mov_title_id = paddle.layer.data( - name='movie_title', - type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) - mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32) - mov_title_conv = paddle.networks.sequence_conv_pool( - input=mov_title_emb, hidden_size=32, context_len=3) - - mov_combined_features = paddle.layer.fc( - input=[mov_emb, mov_categories_hidden, mov_title_conv], - size=200, - act=paddle.activation.Tanh()) - - inference = paddle.layer.cos_sim( - a=usr_combined_features, b=mov_combined_features, size=1, scale=5) - cost = paddle.layer.mse_cost( - input=inference, - label=paddle.layer.data( - name='score', type=paddle.data_type.dense_vector(1))) - - parameters = paddle.parameters.create(cost) - - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=paddle.optimizer.Adam( - learning_rate=1e-4)) - feeding = { - 'user_id': 0, - 'gender_id': 1, - 'age_id': 2, - 'job_id': 3, - 'movie_id': 4, - 'category_id': 5, - 'movie_title': 6, - 'score': 7 - } - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d Batch %d Cost %.2f" % ( - event.pass_id, event.batch_id, event.cost) - - trainer.train( - reader=paddle.batch( - paddle.reader.shuffle( - paddle.dataset.movielens.train(), buf_size=8192), - batch_size=256), - event_handler=event_handler, - feeding=feeding, - num_passes=1) - - user_id = 234 - movie_id = 345 - - user = paddle.dataset.movielens.user_info()[user_id] - movie = paddle.dataset.movielens.movie_info()[movie_id] - - feature = user.value() + movie.value() - - def reader(): - yield feature - - infer_dict = copy.copy(feeding) - del infer_dict['score'] - - prediction = paddle.infer( - output=inference, - parameters=parameters, - reader=paddle.batch( - reader, batch_size=32), - feeding=infer_dict) - print(prediction + 5) / 2 - - -if __name__ == '__main__': - main() diff --git a/demo/recommendation/common_utils.py b/demo/recommendation/common_utils.py deleted file mode 100755 index c20c65286621d701ad58409b539bbe9c813d453a..0000000000000000000000000000000000000000 --- a/demo/recommendation/common_utils.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer.PyDataProvider2 import * - - -def meta_to_header(meta, name): - metas = meta[name]['__meta__']['raw_meta'] - for each_meta in metas: - slot_name = each_meta.get('name', '%s_id' % name) - if each_meta['type'] == 'id': - yield slot_name, integer_value(each_meta['max']) - elif each_meta['type'] == 'embedding': - is_seq = each_meta['seq'] == 'sequence' - yield slot_name, integer_value( - len(each_meta['dict']), - seq_type=SequenceType.SEQUENCE - if is_seq else SequenceType.NO_SEQUENCE) - elif each_meta['type'] == 'one_hot_dense': - yield slot_name, dense_vector(len(each_meta['dict'])) diff --git a/demo/recommendation/data/config.json b/demo/recommendation/data/config.json deleted file mode 100644 index f26e74ce47bb7843a571e6033f051c046b31f054..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/config.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "user": { - "file": { - "name": "users.dat", - "delimiter": "::" - }, - "fields": ["id", "gender", "age", "occupation"] - }, - "movie": { - "file": { - "name": "movies.dat", - "delimiter": "::" - }, - "fields": ["id", "title", "genres"] - } -} diff --git a/demo/recommendation/data/config_generator.py b/demo/recommendation/data/config_generator.py deleted file mode 100644 index 4ca496a252dffc62ed62bb8f2a5ee1661a940580..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/config_generator.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -config_generator.py - -Usage: - ./config_generator.py [--output_format=] - ./config_generator.py -h | --help - -Options: - -h --help Show this screen. - --output_format= Output Config format(json or yaml) [default: json]. -""" - -import json -import docopt -import copy - -DEFAULT_FILE = {"type": "split", "delimiter": ","} - -DEFAULT_FIELD = { - "id": { - "type": "id" - }, - "gender": { - "name": "gender", - "type": "embedding", - "dict": { - "type": "char_based" - } - }, - "age": { - "name": "age", - "type": "embedding", - "dict": { - "type": "whole_content", - "sort": True - } - }, - "occupation": { - "name": "occupation", - "type": "embedding", - "dict": { - "type": "whole_content", - "sort": "true" - } - }, - "title": { - "regex": { - "pattern": r"^(.*)\((\d+)\)$", - "group_id": 1, - "strip": True - }, - "name": "title", - "type": { - "name": "embedding", - "seq_type": "sequence", - }, - "dict": { - "type": "char_based" - } - }, - "genres": { - "type": "one_hot_dense", - "dict": { - "type": "split", - "delimiter": "|" - }, - "name": "genres" - } -} - - -def merge_dict(master_dict, slave_dict): - return dict(((k, master_dict.get(k) or slave_dict.get(k)) - for k in set(slave_dict) | set(master_dict))) - - -def main(filename, fmt): - with open(filename, 'r') as f: - conf = json.load(f) - obj = dict() - for k in conf: - val = conf[k] - file_dict = val['file'] - file_dict = merge_dict(file_dict, DEFAULT_FILE) - - fields = [] - for pos, field_key in enumerate(val['fields']): - assert isinstance(field_key, basestring) - field = copy.deepcopy(DEFAULT_FIELD[field_key]) - field['pos'] = pos - fields.append(field) - obj[k] = {"file": file_dict, "fields": fields} - meta = {"meta": obj} - # print meta - if fmt == 'json': - - def formatter(x): - import json - return json.dumps(x, indent=2) - elif fmt == 'yaml': - - def formatter(x): - import yaml - return yaml.safe_dump(x, default_flow_style=False) - else: - raise NotImplementedError("Dump format %s is not implemented" % fmt) - - print formatter(meta) - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version="0.1.0") - main(args[""], args["--output_format"]) diff --git a/demo/recommendation/data/meta_config.json b/demo/recommendation/data/meta_config.json deleted file mode 100644 index cc6a046e271dd0faaa47eeb5a5bef6d3604113fe..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/meta_config.json +++ /dev/null @@ -1,81 +0,0 @@ -{ - "meta": { - "movie": { - "fields": [ - { - "type": "id", - "pos": 0 - }, - { - "regex": { - "pattern": "^(.*)\\((\\d+)\\)$", - "group_id": 1, - "strip": true - }, - "type": { - "seq_type": "sequence", - "name": "embedding" - }, - "dict": { - "type": "char_based" - }, - "name": "title", - "pos": 1 - }, - { - "type": "one_hot_dense", - "dict": { - "delimiter": "|", - "type": "split" - }, - "name": "genres", - "pos": 2 - } - ], - "file": { - "delimiter": "::", - "type": "split", - "name": "movies.dat" - } - }, - "user": { - "fields": [ - { - "type": "id", - "pos": 0 - }, - { - "type": "embedding", - "dict": { - "type": "char_based" - }, - "name": "gender", - "pos": 1 - }, - { - "type": "embedding", - "dict": { - "sort": true, - "type": "whole_content" - }, - "name": "age", - "pos": 2 - }, - { - "type": "embedding", - "dict": { - "sort": "true", - "type": "whole_content" - }, - "name": "occupation", - "pos": 3 - } - ], - "file": { - "delimiter": "::", - "type": "split", - "name": "users.dat" - } - } - } -} diff --git a/demo/recommendation/data/meta_generator.py b/demo/recommendation/data/meta_generator.py deleted file mode 100644 index 38e4679d266c331a751114cd13f0e3453016cf26..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/meta_generator.py +++ /dev/null @@ -1,430 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Preprocess Movielens dataset, to get movie/user object. - -Usage: - ./preprocess.py [--config=] - ./preprocess.py -h | --help - -Options: - -h --help Show this screen. - --version Show version. - --config= Get MetaData config file [default: config.json]. -""" -import docopt -import os -import sys -import re -import collections - -try: - import cPickle as pickle -except ImportError: - import pickle - - -class UniqueIDGenerator(object): - def __init__(self): - self.pool = collections.defaultdict(self.__next_id__) - self.next_id = 0 - - def __next_id__(self): - tmp = self.next_id - self.next_id += 1 - return tmp - - def __call__(self, k): - return self.pool[k] - - def to_list(self): - ret_val = [None] * len(self.pool) - for k in self.pool.keys(): - ret_val[self.pool[k]] = k - return ret_val - - -class SortedIDGenerator(object): - def __init__(self): - self.__key_set__ = set() - self.dict = None - - def scan(self, key): - self.__key_set__.add(key) - - def finish_scan(self, compare=None, key=None, reverse=False): - self.__key_set__ = sorted( - list(self.__key_set__), cmp=compare, key=key, reverse=reverse) - self.dict = dict() - for idx, each_key in enumerate(self.__key_set__): - self.dict[each_key] = idx - - def __call__(self, key): - return self.dict[key] - - def to_list(self): - return self.__key_set__ - - -class SplitFileReader(object): - def __init__(self, work_dir, config): - assert isinstance(config, dict) - self.filename = config['name'] - self.delimiter = config.get('delimiter', ',') - self.work_dir = work_dir - - def read(self): - with open(os.path.join(self.work_dir, self.filename), 'r') as f: - for line in f: - line = line.strip() - if isinstance(self.delimiter, unicode): - self.delimiter = str(self.delimiter) - yield line.split(self.delimiter) - - @staticmethod - def create(work_dir, config): - assert isinstance(config, dict) - if config['type'] == 'split': - return SplitFileReader(work_dir, config) - - -class IFileReader(object): - READERS = [SplitFileReader] - - def read(self): - raise NotImplementedError() - - @staticmethod - def create(work_dir, config): - for reader_cls in IFileReader.READERS: - val = reader_cls.create(work_dir, config) - if val is not None: - return val - - -class IDFieldParser(object): - TYPE = 'id' - - def __init__(self, config): - self.__max_id__ = -sys.maxint - 1 - self.__min_id__ = sys.maxint - self.__id_count__ = 0 - - def scan(self, line): - idx = int(line) - self.__max_id__ = max(self.__max_id__, idx) - self.__min_id__ = min(self.__min_id__, idx) - self.__id_count__ += 1 - - def parse(self, line): - return int(line) - - def meta_field(self): - return { - "is_key": True, - 'max': self.__max_id__, - 'min': self.__min_id__, - 'count': self.__id_count__, - 'type': 'id' - } - - -class SplitEmbeddingDict(object): - def __init__(self, delimiter): - self.__id__ = UniqueIDGenerator() - self.delimiter = delimiter - - def scan(self, multi): - for val in multi.split(self.delimiter): - self.__id__(val) - - def parse(self, multi): - return map(self.__id__, multi.split(self.delimiter)) - - def meta_field(self): - return self.__id__.to_list() - - -class EmbeddingFieldParser(object): - TYPE = 'embedding' - - NO_SEQUENCE = "no_sequence" - SEQUENCE = "sequence" - - class CharBasedEmbeddingDict(object): - def __init__(self, is_seq=True): - self.__id__ = UniqueIDGenerator() - self.is_seq = is_seq - - def scan(self, s): - for ch in s: - self.__id__(ch) - - def parse(self, s): - return map(self.__id__, s) if self.is_seq else self.__id__(s[0]) - - def meta_field(self): - return self.__id__.to_list() - - class WholeContentDict(object): - def __init__(self, need_sort=True): - assert need_sort - self.__id__ = SortedIDGenerator() - self.__has_finished__ = False - - def scan(self, txt): - self.__id__.scan(txt) - - def meta_field(self): - if not self.__has_finished__: - self.__id__.finish_scan() - self.__has_finished__ = True - return self.__id__.to_list() - - def parse(self, txt): - return self.__id__(txt) - - def __init__(self, config): - try: - self.seq_type = config['type']['seq_type'] - except TypeError: - self.seq_type = EmbeddingFieldParser.NO_SEQUENCE - - if config['dict']['type'] == 'char_based': - self.dict = EmbeddingFieldParser.CharBasedEmbeddingDict( - self.seq_type == EmbeddingFieldParser.SEQUENCE) - elif config['dict']['type'] == 'split': - self.dict = SplitEmbeddingDict(config['dict'].get('delimiter', ',')) - elif config['dict']['type'] == 'whole_content': - self.dict = EmbeddingFieldParser.WholeContentDict(config['dict'][ - 'sort']) - else: - print config - assert False - - self.name = config['name'] - - def scan(self, s): - self.dict.scan(s) - - def meta_field(self): - return { - 'name': self.name, - 'dict': self.dict.meta_field(), - 'type': 'embedding', - 'seq': self.seq_type - } - - def parse(self, s): - return self.dict.parse(s) - - -class OneHotDenseFieldParser(object): - TYPE = 'one_hot_dense' - - def __init__(self, config): - if config['dict']['type'] == 'split': - self.dict = SplitEmbeddingDict(config['dict']['delimiter']) - self.name = config['name'] - - def scan(self, s): - self.dict.scan(s) - - def meta_field(self): - # print self.dict.meta_field() - return { - 'dict': self.dict.meta_field(), - 'name': self.name, - 'type': 'one_hot_dense' - } - - def parse(self, s): - ids = self.dict.parse(s) - retv = [0.0] * len(self.dict.meta_field()) - for idx in ids: - retv[idx] = 1.0 - # print retv - return retv - - -class FieldParserFactory(object): - PARSERS = [IDFieldParser, EmbeddingFieldParser, OneHotDenseFieldParser] - - @staticmethod - def create(config): - if isinstance(config['type'], basestring): - config_type = config['type'] - elif isinstance(config['type'], dict): - config_type = config['type']['name'] - - assert config_type is not None - - for each_parser_cls in FieldParserFactory.PARSERS: - if config_type == each_parser_cls.TYPE: - return each_parser_cls(config) - print config - - -class CompositeFieldParser(object): - def __init__(self, parser, extractor): - self.extractor = extractor - self.parser = parser - - def scan(self, *args, **kwargs): - self.parser.scan(self.extractor.extract(*args, **kwargs)) - - def parse(self, *args, **kwargs): - return self.parser.parse(self.extractor.extract(*args, **kwargs)) - - def meta_field(self): - return self.parser.meta_field() - - -class PositionContentExtractor(object): - def __init__(self, pos): - self.pos = pos - - def extract(self, line): - assert isinstance(line, list) - return line[self.pos] - - -class RegexPositionContentExtractor(PositionContentExtractor): - def __init__(self, pos, pattern, group_id, strip=True): - PositionContentExtractor.__init__(self, pos) - pattern = pattern.strip() - self.pattern = re.compile(pattern) - self.group_id = group_id - self.strip = strip - - def extract(self, line): - line = PositionContentExtractor.extract(self, line) - match = self.pattern.match(line) - # print line, self.pattern.pattern, match - assert match is not None - txt = match.group(self.group_id) - if self.strip: - txt.strip() - return txt - - -class ContentExtractorFactory(object): - def extract(self, line): - pass - - @staticmethod - def create(config): - if 'pos' in config: - if 'regex' not in config: - return PositionContentExtractor(config['pos']) - else: - extra_args = config['regex'] - return RegexPositionContentExtractor( - pos=config['pos'], **extra_args) - - -class MetaFile(object): - def __init__(self, work_dir): - self.work_dir = work_dir - self.obj = dict() - - def parse(self, config): - config = config['meta'] - - ret_obj = dict() - for key in config.keys(): - val = config[key] - assert 'file' in val - reader = IFileReader.create(self.work_dir, val['file']) - assert reader is not None - assert 'fields' in val and isinstance(val['fields'], list) - fields_config = val['fields'] - field_parsers = map(MetaFile.__field_config_mapper__, fields_config) - - for each_parser in field_parsers: - assert each_parser is not None - - for each_block in reader.read(): - for each_parser in field_parsers: - each_parser.scan(each_block) - - metas = map(lambda x: x.meta_field(), field_parsers) - # print metas - key_index = filter( - lambda x: x is not None, - map(lambda (idx, meta): idx if 'is_key' in meta and meta['is_key'] else None, - enumerate(metas)))[0] - - key_map = [] - for i in range(min(key_index, len(metas))): - key_map.append(i) - for i in range(key_index + 1, len(metas)): - key_map.append(i) - - obj = {'__meta__': {'raw_meta': metas, 'feature_map': key_map}} - - for each_block in reader.read(): - idx = field_parsers[key_index].parse(each_block) - val = [] - for i, each_parser in enumerate(field_parsers): - if i != key_index: - val.append(each_parser.parse(each_block)) - obj[idx] = val - ret_obj[key] = obj - self.obj = ret_obj - return ret_obj - - @staticmethod - def __field_config_mapper__(conf): - assert isinstance(conf, dict) - extrator = ContentExtractorFactory.create(conf) - field_parser = FieldParserFactory.create(conf) - assert extrator is not None - assert field_parser is not None - return CompositeFieldParser(field_parser, extrator) - - def dump(self, fp): - pickle.dump(self.obj, fp, pickle.HIGHEST_PROTOCOL) - - -def preprocess(binary_filename, dataset_dir, config, **kwargs): - assert isinstance(config, str) - with open(config, 'r') as config_file: - file_loader = None - if config.lower().endswith('.yaml'): - import yaml - file_loader = yaml - elif config.lower().endswith('.json'): - import json - file_loader = json - config = file_loader.load(config_file) - meta = MetaFile(dataset_dir) - meta.parse(config) - with open(binary_filename, 'wb') as outf: - meta.dump(outf) - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version='0.1.0') - kwargs = dict() - for key in args.keys(): - if key != '--help': - param_name = key - assert isinstance(param_name, str) - param_name = param_name.replace('<', '') - param_name = param_name.replace('>', '') - param_name = param_name.replace('--', '') - kwargs[param_name] = args[key] - preprocess(**kwargs) diff --git a/demo/recommendation/data/ml_data.sh b/demo/recommendation/data/ml_data.sh deleted file mode 100755 index 2268d876389e0bdf5ead405e74d278d276626f82..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/ml_data.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -ex -cd "$(dirname "$0")" -# download the dataset -wget http://files.grouplens.org/datasets/movielens/ml-1m.zip -# unzip the dataset -unzip ml-1m.zip -# remove the unused zip file -rm ml-1m.zip diff --git a/demo/recommendation/data/split.py b/demo/recommendation/data/split.py deleted file mode 100644 index be6869c22f04be1db0f8e9c35c73c851e4c490b0..0000000000000000000000000000000000000000 --- a/demo/recommendation/data/split.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Separate movielens 1m dataset to train/test file. - -Usage: - ./separate.py [--test_ratio=] [--delimiter=] - ./separate.py -h | --help - -Options: - -h --help Show this screen. - --version Show version. - --test_ratio= Test ratio for separate [default: 0.1]. - --delimiter= File delimiter [default: ,]. -""" -import docopt -import collections -import random - - -def process(test_ratio, input_file, delimiter, **kwargs): - test_ratio = float(test_ratio) - rating_dict = collections.defaultdict(list) - with open(input_file, 'r') as f: - for line in f: - user_id = int(line.split(delimiter)[0]) - rating_dict[user_id].append(line.strip()) - - with open(input_file + ".train", 'w') as train_file: - with open(input_file + ".test", 'w') as test_file: - for k in rating_dict.keys(): - lines = rating_dict[k] - assert isinstance(lines, list) - random.shuffle(lines) - test_len = int(len(lines) * test_ratio) - for line in lines[:test_len]: - print >> test_file, line - - for line in lines[test_len:]: - print >> train_file, line - - -if __name__ == '__main__': - args = docopt.docopt(__doc__, version='0.1.0') - kwargs = dict() - for key in args.keys(): - if key != '--help': - param_name = key - assert isinstance(param_name, str) - param_name = param_name.replace('<', '') - param_name = param_name.replace('>', '') - param_name = param_name.replace('--', '') - kwargs[param_name] = args[key] - process(**kwargs) diff --git a/demo/recommendation/dataprovider.py b/demo/recommendation/dataprovider.py deleted file mode 100755 index c4ff96d80e81926049c9a71d6d9d991c0b568c25..0000000000000000000000000000000000000000 --- a/demo/recommendation/dataprovider.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * -import common_utils # parse - - -def __list_to_map__(lst): - ret_val = dict() - for each in lst: - k, v = each - ret_val[k] = v - return ret_val - - -def hook(settings, meta, **kwargs): - """ - Init hook is invoked before process data. It will set obj.slots and store - data meta. - - :param obj: global object. It will passed to process routine. - :type obj: object - :param meta: the meta file object, which passed from trainer_config. Meta - file record movie/user features. - :param kwargs: unused other arguments. - """ - del kwargs # unused kwargs - - # Header define slots that used for paddle. - # first part is movie features. - # second part is user features. - # final part is rating score. - # header is a list of [USE_SEQ_OR_NOT?, SlotType] - movie_headers = list(common_utils.meta_to_header(meta, 'movie')) - settings.movie_names = [h[0] for h in movie_headers] - headers = movie_headers - user_headers = list(common_utils.meta_to_header(meta, 'user')) - settings.user_names = [h[0] for h in user_headers] - headers.extend(user_headers) - headers.append(("rating", dense_vector(1))) # Score - - # slot types. - settings.input_types = __list_to_map__(headers) - settings.meta = meta - - -@provider(init_hook=hook, cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, filename): - with open(filename, 'r') as f: - for line in f: - # Get a rating from file. - user_id, movie_id, score = map(int, line.split('::')[:-1]) - - # Scale score to [-5, +5] - score = float(score) * 2 - 5.0 - - # Get movie/user features by movie_id, user_id - movie_meta = settings.meta['movie'][movie_id] - user_meta = settings.meta['user'][user_id] - - outputs = [('movie_id', movie_id - 1)] - - # Then add movie features - for i, each_meta in enumerate(movie_meta): - outputs.append((settings.movie_names[i + 1], each_meta)) - - # Then add user id. - outputs.append(('user_id', user_id - 1)) - - # Then add user features. - for i, each_meta in enumerate(user_meta): - outputs.append((settings.user_names[i + 1], each_meta)) - - # Finally, add score - outputs.append(('rating', [score])) - # Return data to paddle - yield __list_to_map__(outputs) diff --git a/demo/recommendation/evaluate.py b/demo/recommendation/evaluate.py deleted file mode 100755 index 3afa7a1e9db5fefb1bbf5aaa174b8168afae4058..0000000000000000000000000000000000000000 --- a/demo/recommendation/evaluate.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re -import math - - -def get_best_pass(log_filename): - with open(log_filename, 'r') as f: - text = f.read() - pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)', - re.S) - results = re.findall(pattern, text) - sorted_results = sorted(results, key=lambda result: float(result[0])) - return sorted_results[0] - - -log_filename = sys.argv[1] -log = get_best_pass(log_filename) -predict_error = math.sqrt(float(log[0])) / 2 -print 'Best pass is %s, error is %s, which means predict get error as %f' % ( - log[1], log[0], predict_error) - -evaluate_pass = "output/pass-%s" % log[1] -print "evaluating from pass %s" % evaluate_pass diff --git a/demo/recommendation/evaluate.sh b/demo/recommendation/evaluate.sh deleted file mode 100755 index 02b2857de028bc9c05d7ddd67012043b671b2764..0000000000000000000000000000000000000000 --- a/demo/recommendation/evaluate.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | sort | head -n 1 -} - -LOG=`get_best_pass log.txt` -LOG=(${LOG}) -echo 'Best pass is '${LOG[1]}, ' error is '${LOG[0]}, 'which means predict get error as '`echo ${LOG[0]} | python -c 'import math; print math.sqrt(float(raw_input()))/2'` - -evaluate_pass="output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass diff --git a/demo/recommendation/prediction.py b/demo/recommendation/prediction.py deleted file mode 100755 index 8ad993eab3a9f637cfff752bfedbbc62eaf3c8d5..0000000000000000000000000000000000000000 --- a/demo/recommendation/prediction.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/env python2 -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from py_paddle import swig_paddle, DataProviderConverter - -from common_utils import * -from paddle.trainer.config_parser import parse_config - -try: - import cPickle as pickle -except ImportError: - import pickle -import sys - -if __name__ == '__main__': - model_path = sys.argv[1] - swig_paddle.initPaddle('--use_gpu=0') - conf = parse_config("trainer_config.py", "is_predict=1") - network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - assert isinstance(network, swig_paddle.GradientMachine) - network.loadParameters(model_path) - with open('./data/meta.bin', 'rb') as f: - meta = pickle.load(f) - headers = [h[1] for h in meta_to_header(meta, 'movie')] - headers.extend([h[1] for h in meta_to_header(meta, 'user')]) - cvt = DataProviderConverter(headers) - while True: - movie_id = int(raw_input("Input movie_id: ")) - user_id = int(raw_input("Input user_id: ")) - movie_meta = meta['movie'][movie_id] # Query Data From Meta. - user_meta = meta['user'][user_id] - data = [movie_id - 1] - data.extend(movie_meta) - data.append(user_id - 1) - data.extend(user_meta) - print "Prediction Score is %.2f" % ( - (network.forwardTest(cvt.convert([data]))[0]['value'][0][0] + 5) - / 2) diff --git a/demo/recommendation/preprocess.sh b/demo/recommendation/preprocess.sh deleted file mode 100755 index eeb81ce3cb47e65c0aeb303e7571024ba82dad65..0000000000000000000000000000000000000000 --- a/demo/recommendation/preprocess.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -UNAME_STR=`uname` - -if [[ ${UNAME_STR} == 'Linux' ]]; then - SHUF_PROG='shuf' -else - SHUF_PROG='gshuf' -fi - - -cd "$(dirname "$0")" -delimiter='::' -dir=ml-1m -cd data -echo 'generate meta config file' -python config_generator.py config.json > meta_config.json -echo 'generate meta file' -python meta_generator.py $dir meta.bin --config=meta_config.json -echo 'split train/test file' -python split.py $dir/ratings.dat --delimiter=${delimiter} --test_ratio=0.1 -echo 'shuffle train file' -${SHUF_PROG} $dir/ratings.dat.train > ratings.dat.train -cp $dir/ratings.dat.test . -echo "./data/ratings.dat.train" > train.list -echo "./data/ratings.dat.test" > test.list diff --git a/demo/recommendation/requirements.txt b/demo/recommendation/requirements.txt deleted file mode 100644 index 1ea154584a428b6a389309f1f8def502e0aadfce..0000000000000000000000000000000000000000 --- a/demo/recommendation/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -PyYAML -docopt diff --git a/demo/recommendation/run.sh b/demo/recommendation/run.sh deleted file mode 100755 index 22aef556082ba429e9ca7c6dd3ec72699b9dbcf4..0000000000000000000000000000000000000000 --- a/demo/recommendation/run.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -paddle train \ - --config=trainer_config.py \ - --save_dir=./output \ - --use_gpu=false \ - --trainer_count=4\ - --test_all_data_in_one_period=true \ - --log_period=100 \ - --dot_period=1 \ - --num_passes=50 2>&1 | tee 'log.txt' -paddle usage -l log.txt -e $? -n "recommendation" >/dev/null 2>&1 diff --git a/demo/recommendation/trainer_config.py b/demo/recommendation/trainer_config.py deleted file mode 100755 index 25f529d7d7c430f179107fb189ade34760ab309d..0000000000000000000000000000000000000000 --- a/demo/recommendation/trainer_config.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -try: - import cPickle as pickle -except ImportError: - import pickle - -is_predict = get_config_arg('is_predict', bool, False) - -META_FILE = 'data/meta.bin' - -with open(META_FILE, 'rb') as f: - # load meta file - meta = pickle.load(f) - -settings( - batch_size=1600, learning_rate=1e-3, learning_method=RMSPropOptimizer()) - - -def construct_feature(name): - """ - Construct movie/user features. - - This method read from meta data. Then convert feature to neural network due - to feature type. The map relation as follow. - - * id: embedding => fc - * embedding: - is_sequence: embedding => context_projection => fc => pool - not sequence: embedding => fc - * one_hot_dense: fc => fc - - Then gather all features vector, and use a fc layer to combined them as - return. - - :param name: 'movie' or 'user' - :type name: basestring - :return: combined feature output - :rtype: LayerOutput - """ - __meta__ = meta[name]['__meta__']['raw_meta'] - fusion = [] - for each_meta in __meta__: - type_name = each_meta['type'] - slot_name = each_meta.get('name', '%s_id' % name) - if type_name == 'id': - slot_dim = each_meta['max'] - embedding = embedding_layer( - input=data_layer( - slot_name, size=slot_dim), size=256) - fusion.append(fc_layer(input=embedding, size=256)) - elif type_name == 'embedding': - is_seq = each_meta['seq'] == 'sequence' - slot_dim = len(each_meta['dict']) - din = data_layer(slot_name, slot_dim) - embedding = embedding_layer(input=din, size=256) - if is_seq: - fusion.append( - text_conv_pool( - input=embedding, context_len=5, hidden_size=256)) - else: - fusion.append(fc_layer(input=embedding, size=256)) - elif type_name == 'one_hot_dense': - slot_dim = len(each_meta['dict']) - hidden = fc_layer(input=data_layer(slot_name, slot_dim), size=256) - fusion.append(fc_layer(input=hidden, size=256)) - - return fc_layer(name="%s_fusion" % name, input=fusion, size=256) - - -movie_feature = construct_feature("movie") -user_feature = construct_feature("user") -similarity = cos_sim(a=movie_feature, b=user_feature) -if not is_predict: - outputs(mse_cost(input=similarity, label=data_layer('rating', size=1))) - - define_py_data_sources2( - 'data/train.list', - 'data/test.list', - module='dataprovider', - obj='process', - args={'meta': meta}) -else: - outputs(similarity) diff --git a/demo/semantic_role_labeling/.gitignore b/demo/semantic_role_labeling/.gitignore deleted file mode 100644 index 65c9b674c7d1dad53b7d1c6ee1dcbdb72553888d..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -*.pyc -train.log -data/feature -data/conll05st-release/ -data/src.dict -data/test.wsj.props -data/test.wsj.seq_pair -data/test.wsj.words -data/tgt.dict -output -data/emb -data/targetDict.txt -data/verbDict.txt -data/wordDict.txt diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py deleted file mode 100644 index 036cad4b0a32357bb42580ef577a1eba558be8fe..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/api_train_v2.py +++ /dev/null @@ -1,190 +0,0 @@ -import sys -import math -import numpy as np -import paddle.v2 as paddle -import paddle.v2.dataset.conll05 as conll05 - - -def db_lstm(): - word_dict, verb_dict, label_dict = conll05.get_dict() - word_dict_len = len(word_dict) - label_dict_len = len(label_dict) - pred_len = len(verb_dict) - - mark_dict_len = 2 - word_dim = 32 - mark_dim = 5 - hidden_dim = 512 - depth = 8 - - #8 features - def d_type(size): - return paddle.data_type.integer_value_sequence(size) - - word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) - predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) - - ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) - ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) - ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) - ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) - ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) - mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) - - target = paddle.layer.data(name='target', type=d_type(label_dict_len)) - - default_std = 1 / math.sqrt(hidden_dim) / 3.0 - - emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) - std_0 = paddle.attr.Param(initial_std=0.) - std_default = paddle.attr.Param(initial_std=default_std) - - predicate_embedding = paddle.layer.embedding( - size=word_dim, - input=predicate, - param_attr=paddle.attr.Param( - name='vemb', initial_std=default_std)) - mark_embedding = paddle.layer.embedding( - size=mark_dim, input=mark, param_attr=std_0) - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - paddle.layer.embedding( - size=word_dim, input=x, param_attr=emb_para) for x in word_input - ] - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0 = paddle.layer.mixed( - size=hidden_dim, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=emb, param_attr=std_default) for emb in emb_layers - ]) - - mix_hidden_lr = 1e-3 - lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) - hidden_para_attr = paddle.attr.Param( - initial_std=default_std, learning_rate=mix_hidden_lr) - - lstm_0 = paddle.layer.lstmemory( - input=hidden_0, - act=paddle.activation.Relu(), - gate_act=paddle.activation.Sigmoid(), - state_act=paddle.activation.Sigmoid(), - bias_attr=std_0, - param_attr=lstm_para_attr) - - #stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, depth): - mix_hidden = paddle.layer.mixed( - size=hidden_dim, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - paddle.layer.full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ]) - - lstm = paddle.layer.lstmemory( - input=mix_hidden, - act=paddle.activation.Relu(), - gate_act=paddle.activation.Sigmoid(), - state_act=paddle.activation.Sigmoid(), - reverse=((i % 2) == 1), - bias_attr=std_0, - param_attr=lstm_para_attr) - - input_tmp = [mix_hidden, lstm] - - feature_out = paddle.layer.mixed( - size=label_dict_len, - bias_attr=std_default, - input=[ - paddle.layer.full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - paddle.layer.full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ], ) - - crf_cost = paddle.layer.crf(size=label_dict_len, - input=feature_out, - label=target, - param_attr=paddle.attr.Param( - name='crfw', - initial_std=default_std, - learning_rate=mix_hidden_lr)) - - crf_dec = paddle.layer.crf_decoding( - name='crf_dec_l', - size=label_dict_len, - input=feature_out, - label=target, - param_attr=paddle.attr.Param(name='crfw')) - - return crf_cost, crf_dec - - -def load_parameter(file_name, h, w): - with open(file_name, 'rb') as f: - f.read(16) # skip header. - return np.fromfile(f, dtype=np.float32).reshape(h, w) - - -def main(): - paddle.init(use_gpu=False, trainer_count=1) - - # define network topology - crf_cost, crf_dec = db_lstm() - - # create parameters - parameters = paddle.parameters.create([crf_cost, crf_dec]) - - # create optimizer - optimizer = paddle.optimizer.Momentum( - momentum=0, - learning_rate=2e-2, - regularization=paddle.optimizer.L2Regularization(rate=8e-4), - model_average=paddle.optimizer.ModelAverage( - average_window=0.5, max_average_window=10000), ) - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - - trainer = paddle.trainer.SGD(cost=crf_cost, - parameters=parameters, - update_equation=optimizer) - parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) - - trn_reader = paddle.batch( - paddle.reader.shuffle( - conll05.test(), buf_size=8192), batch_size=10) - - feeding = { - 'word_data': 0, - 'ctx_n2_data': 1, - 'ctx_n1_data': 2, - 'ctx_0_data': 3, - 'ctx_p1_data': 4, - 'ctx_p2_data': 5, - 'verb_data': 6, - 'mark_data': 7, - 'target': 8 - } - - trainer.train( - reader=trn_reader, - event_handler=event_handler, - num_passes=10000, - feeding=feeding) - - -if __name__ == '__main__': - main() diff --git a/demo/semantic_role_labeling/data/extract_dict_feature.py b/demo/semantic_role_labeling/data/extract_dict_feature.py deleted file mode 100644 index da44111976a0dec68345fc139d0aa459ca9211c2..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/extract_dict_feature.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from optparse import OptionParser - - -def extract_dict_features(pair_file, feature_file): - - with open(pair_file) as fin, open(feature_file, 'w') as feature_out: - for line in fin: - sentence, predicate, labels = line.strip().split('\t') - sentence_list = sentence.split() - labels_list = labels.split() - - verb_index = labels_list.index('B-V') - - mark = [0] * len(labels_list) - if verb_index > 0: - mark[verb_index - 1] = 1 - ctx_n1 = sentence_list[verb_index - 1] - else: - ctx_n1 = 'bos' - - if verb_index > 1: - mark[verb_index - 2] = 1 - ctx_n2 = sentence_list[verb_index - 2] - else: - ctx_n2 = 'bos' - - mark[verb_index] = 1 - ctx_0 = sentence_list[verb_index] - - if verb_index < len(labels_list) - 1: - mark[verb_index + 1] = 1 - ctx_p1 = sentence_list[verb_index + 1] - else: - ctx_p1 = 'eos' - - if verb_index < len(labels_list) - 2: - mark[verb_index + 2] = 1 - ctx_p2 = sentence_list[verb_index + 2] - else: - ctx_p2 = 'eos' - - - feature_str = sentence + '\t' \ - + predicate + '\t' \ - + ctx_n2 + '\t' \ - + ctx_n1 + '\t' \ - + ctx_0 + '\t' \ - + ctx_p1 + '\t' \ - + ctx_p2 + '\t' \ - + ' '.join([str(i) for i in mark]) + '\t' \ - + labels - - feature_out.write(feature_str + '\n') - - -if __name__ == '__main__': - - usage = '-p pair_file -f feature_file' - parser = OptionParser(usage) - parser.add_option('-p', dest='pair_file', help='the pair file') - parser.add_option('-f', dest='feature_file', help='the feature file') - - (options, args) = parser.parse_args() - - extract_dict_features(options.pair_file, options.feature_file) diff --git a/demo/semantic_role_labeling/data/extract_pairs.py b/demo/semantic_role_labeling/data/extract_pairs.py deleted file mode 100644 index 94a8488c16734eb1882d54f7ec36f4b9308c09d4..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/extract_pairs.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from optparse import OptionParser - - -def read_labels(props_file): - ''' - a sentence maybe has more than one verb, each verb has its label sequence - label[], is a 3-dimension list. - the first dim is to store all sentence's label seqs, len is the sentence number - the second dim is to store all label sequences for one sentences - the third dim is to store each label for one word - ''' - labels = [] - with open(props_file) as fin: - label_seqs_for_one_sentences = [] - one_seg_in_file = [] - for line in fin: - line = line.strip() - if line == '': - for i in xrange(len(one_seg_in_file[0])): - a_kind_lable = [x[i] for x in one_seg_in_file] - label_seqs_for_one_sentences.append(a_kind_lable) - labels.append(label_seqs_for_one_sentences) - one_seg_in_file = [] - label_seqs_for_one_sentences = [] - else: - part = line.split() - one_seg_in_file.append(part) - return labels - - -def read_sentences(words_file): - sentences = [] - with open(words_file) as fin: - s = '' - for line in fin: - line = line.strip() - if line == '': - sentences.append(s) - s = '' - else: - s += line + ' ' - return sentences - - -def transform_labels(sentences, labels): - sen_lab_pair = [] - for i in xrange(len(sentences)): - if len(labels[i]) == 1: - continue - else: - verb_list = [] - for x in labels[i][0]: - if x != '-': - verb_list.append(x) - - for j in xrange(1, len(labels[i])): - label_list = labels[i][j] - current_tag = 'O' - is_in_bracket = False - label_seq = [] - verb_word = '' - for ll in label_list: - if ll == '*' and is_in_bracket == False: - label_seq.append('O') - elif ll == '*' and is_in_bracket == True: - label_seq.append('I-' + current_tag) - elif ll == '*)': - label_seq.append('I-' + current_tag) - is_in_bracket = False - elif ll.find('(') != -1 and ll.find(')') != -1: - current_tag = ll[1:ll.find('*')] - label_seq.append('B-' + current_tag) - is_in_bracket = False - elif ll.find('(') != -1 and ll.find(')') == -1: - current_tag = ll[1:ll.find('*')] - label_seq.append('B-' + current_tag) - is_in_bracket = True - else: - print 'error:', ll - sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq)) - return sen_lab_pair - - -def write_file(sen_lab_pair, output_file): - with open(output_file, 'w') as fout: - for x in sen_lab_pair: - sentence = x[0] - label_seq = ' '.join(x[2]) - assert len(sentence.split()) == len(x[2]) - fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n') - - -if __name__ == '__main__': - - usage = '-w words_file -p props_file -o output_file' - parser = OptionParser(usage) - parser.add_option('-w', dest='words_file', help='the words file') - parser.add_option('-p', dest='props_file', help='the props file') - parser.add_option('-o', dest='output_file', help='the output_file') - (options, args) = parser.parse_args() - - sentences = read_sentences(options.words_file) - labels = read_labels(options.props_file) - sen_lab_pair = transform_labels(sentences, labels) - - write_file(sen_lab_pair, options.output_file) diff --git a/demo/semantic_role_labeling/data/get_data.sh b/demo/semantic_role_labeling/data/get_data.sh deleted file mode 100755 index a0ef26a13b9a03392cb8b6207d6d21b7761e38e8..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/get_data.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt -wget http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb -tar -xzvf conll05st-tests.tar.gz -rm conll05st-tests.tar.gz -cp ./conll05st-release/test.wsj/words/test.wsj.words.gz . -cp ./conll05st-release/test.wsj/props/test.wsj.props.gz . -gunzip test.wsj.words.gz -gunzip test.wsj.props.gz - -python extract_pairs.py -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair -python extract_dict_feature.py -p test.wsj.seq_pair -f feature diff --git a/demo/semantic_role_labeling/data/test.list b/demo/semantic_role_labeling/data/test.list deleted file mode 100644 index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/test.list +++ /dev/null @@ -1 +0,0 @@ -./data/feature diff --git a/demo/semantic_role_labeling/data/train.list b/demo/semantic_role_labeling/data/train.list deleted file mode 100644 index ec370e897a7811b572613150ccb6f665c3adb974..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/data/train.list +++ /dev/null @@ -1 +0,0 @@ -./data/feature diff --git a/demo/semantic_role_labeling/dataprovider.py b/demo/semantic_role_labeling/dataprovider.py deleted file mode 100644 index 360c57ea6283ca43986610abf1831742bfc0c3ef..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/dataprovider.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -UNK_IDX = 0 - - -def hook(settings, word_dict, label_dict, predicate_dict, **kwargs): - settings.word_dict = word_dict - settings.label_dict = label_dict - settings.predicate_dict = predicate_dict - - #all inputs are integral and sequential type - settings.slots = [ - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(word_dict)), - integer_value_sequence(len(predicate_dict)), integer_value_sequence(2), - integer_value_sequence(len(label_dict)) - ] - - -def get_batch_size(yeild_data): - return len(yeild_data[0]) - - -@provider( - init_hook=hook, - should_shuffle=True, - calc_batch_size=get_batch_size, - can_over_batch_size=True, - cache=CacheType.CACHE_PASS_IN_MEM) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ - line.strip().split('\t') - - words = sentence.split() - sen_len = len(words) - word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words] - - predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len - ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - label_list = label.split() - label_slot = [settings.label_dict.get(w) for w in label_list] - yield word_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot diff --git a/demo/semantic_role_labeling/db_lstm.py b/demo/semantic_role_labeling/db_lstm.py deleted file mode 100644 index 04e2a559b19bd4b9aec0242eb43edf6ab1e7624e..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/db_lstm.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import os -import sys -from paddle.trainer_config_helpers import * - -#file paths -word_dict_file = './data/wordDict.txt' -label_dict_file = './data/targetDict.txt' -predicate_file = './data/verbDict.txt' -train_list_file = './data/train.list' -test_list_file = './data/test.list' - -is_test = get_config_arg('is_test', bool, False) -is_predict = get_config_arg('is_predict', bool, False) - -if not is_predict: - #load dictionaries - word_dict = dict() - label_dict = dict() - predicate_dict = dict() - with open(word_dict_file, 'r') as f_word, \ - open(label_dict_file, 'r') as f_label, \ - open(predicate_file, 'r') as f_pre: - for i, line in enumerate(f_word): - w = line.strip() - word_dict[w] = i - - for i, line in enumerate(f_label): - w = line.strip() - label_dict[w] = i - - for i, line in enumerate(f_pre): - w = line.strip() - predicate_dict[w] = i - - if is_test: - train_list_file = None - - #define data provider - define_py_data_sources2( - train_list=train_list_file, - test_list=test_list_file, - module='dataprovider', - obj='process', - args={ - 'word_dict': word_dict, - 'label_dict': label_dict, - 'predicate_dict': predicate_dict - }) - - word_dict_len = len(word_dict) - label_dict_len = len(label_dict) - pred_len = len(predicate_dict) - -else: - word_dict_len = get_config_arg('dict_len', int) - label_dict_len = get_config_arg('label_len', int) - pred_len = get_config_arg('pred_len', int) - -############################## Hyper-parameters ################################## -mark_dict_len = 2 -word_dim = 32 -mark_dim = 5 -hidden_dim = 512 -depth = 8 - -########################### Optimizer ####################################### - -settings( - batch_size=150, - learning_method=MomentumOptimizer(momentum=0), - learning_rate=2e-2, - regularization=L2Regularization(8e-4), - is_async=False, - model_average=ModelAverage( - average_window=0.5, max_average_window=10000), ) - -####################################### network ############################## -#8 features and 1 target -word = data_layer(name='word_data', size=word_dict_len) -predicate = data_layer(name='verb_data', size=pred_len) - -ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len) -ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len) -ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len) -ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len) -ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len) -mark = data_layer(name='mark_data', size=mark_dict_len) - -if not is_predict: - target = data_layer(name='target', size=label_dict_len) - -default_std = 1 / math.sqrt(hidden_dim) / 3.0 - -emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.) -std_0 = ParameterAttribute(initial_std=0.) -std_default = ParameterAttribute(initial_std=default_std) - -predicate_embedding = embedding_layer( - size=word_dim, - input=predicate, - param_attr=ParameterAttribute( - name='vemb', initial_std=default_std)) -mark_embedding = embedding_layer( - name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0) - -word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] -emb_layers = [ - embedding_layer( - size=word_dim, input=x, param_attr=emb_para) for x in word_input -] -emb_layers.append(predicate_embedding) -emb_layers.append(mark_embedding) - -hidden_0 = mixed_layer( - name='hidden0', - size=hidden_dim, - bias_attr=std_default, - input=[ - full_matrix_projection( - input=emb, param_attr=std_default) for emb in emb_layers - ]) - -mix_hidden_lr = 1e-3 -lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0) -hidden_para_attr = ParameterAttribute( - initial_std=default_std, learning_rate=mix_hidden_lr) - -lstm_0 = lstmemory( - name='lstm0', - input=hidden_0, - act=ReluActivation(), - gate_act=SigmoidActivation(), - state_act=SigmoidActivation(), - bias_attr=std_0, - param_attr=lstm_para_attr) - -#stack L-LSTM and R-LSTM with direct edges -input_tmp = [hidden_0, lstm_0] - -for i in range(1, depth): - - mix_hidden = mixed_layer( - name='hidden' + str(i), - size=hidden_dim, - bias_attr=std_default, - input=[ - full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ]) - - lstm = lstmemory( - name='lstm' + str(i), - input=mix_hidden, - act=ReluActivation(), - gate_act=SigmoidActivation(), - state_act=SigmoidActivation(), - reverse=((i % 2) == 1), - bias_attr=std_0, - param_attr=lstm_para_attr) - - input_tmp = [mix_hidden, lstm] - -feature_out = mixed_layer( - name='output', - size=label_dict_len, - bias_attr=std_default, - input=[ - full_matrix_projection( - input=input_tmp[0], param_attr=hidden_para_attr), - full_matrix_projection( - input=input_tmp[1], param_attr=lstm_para_attr) - ], ) - -if not is_predict: - crf_l = crf_layer( - name='crf', - size=label_dict_len, - input=feature_out, - label=target, - param_attr=ParameterAttribute( - name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr)) - - crf_dec_l = crf_decoding_layer( - name='crf_dec_l', - size=label_dict_len, - input=feature_out, - label=target, - param_attr=ParameterAttribute(name='crfw')) - - eval = sum_evaluator(input=crf_dec_l) - - outputs(crf_l) - -else: - crf_dec_l = crf_decoding_layer( - name='crf_dec_l', - size=label_dict_len, - input=feature_out, - param_attr=ParameterAttribute(name='crfw')) - - outputs(crf_dec_l) diff --git a/demo/semantic_role_labeling/predict.py b/demo/semantic_role_labeling/predict.py deleted file mode 100644 index 372fd090b6e8f08f5bb34697772c2e4976810595..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/predict.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import numpy as np -from optparse import OptionParser -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import integer_value_sequence -from paddle.trainer.config_parser import parse_config -""" -Usage: run following command to show help message. - python predict.py -h -""" -UNK_IDX = 0 - - -class Prediction(): - def __init__(self, train_conf, dict_file, model_dir, label_file, - predicate_dict_file): - """ - train_conf: trainer configure. - dict_file: word dictionary file name. - model_dir: directory of model. - """ - - self.dict = {} - self.labels = {} - self.predicate_dict = {} - self.labels_reverse = {} - self.load_dict_label(dict_file, label_file, predicate_dict_file) - - len_dict = len(self.dict) - len_label = len(self.labels) - len_pred = len(self.predicate_dict) - - conf = parse_config( - train_conf, 'dict_len=' + str(len_dict) + ',label_len=' + - str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True') - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - self.network.loadParameters(model_dir) - - slots = [ - integer_value_sequence(len_dict), integer_value_sequence(len_dict), - integer_value_sequence(len_dict), integer_value_sequence(len_dict), - integer_value_sequence(len_dict), integer_value_sequence(len_dict), - integer_value_sequence(len_pred), integer_value_sequence(2) - ] - self.converter = DataProviderConverter(slots) - - def load_dict_label(self, dict_file, label_file, predicate_dict_file): - """ - Load dictionary from self.dict_file. - """ - for line_count, line in enumerate(open(dict_file, 'r')): - self.dict[line.strip()] = line_count - - for line_count, line in enumerate(open(label_file, 'r')): - self.labels[line.strip()] = line_count - self.labels_reverse[line_count] = line.strip() - - for line_count, line in enumerate(open(predicate_dict_file, 'r')): - self.predicate_dict[line.strip()] = line_count - - def get_data(self, data_file): - """ - Get input data of paddle format. - """ - with open(data_file, 'r') as fdata: - for line in fdata: - sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip( - ).split('\t') - words = sentence.split() - sen_len = len(words) - - word_slot = [self.dict.get(w, UNK_IDX) for w in words] - predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX) - ] * sen_len - ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len - ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len - ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len - ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len - ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len - - marks = mark.split() - mark_slot = [int(w) for w in marks] - - yield word_slot, ctx_n2_slot, ctx_n1_slot, \ - ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot - - def predict(self, data_file, output_file): - """ - data_file: file name of input data. - """ - input = self.converter(self.get_data(data_file)) - output = self.network.forwardTest(input) - lab = output[0]["id"].tolist() - - with open(data_file, 'r') as fin, open(output_file, 'w') as fout: - index = 0 - for line in fin: - sen = line.split('\t')[0] - len_sen = len(sen.split()) - line_labels = lab[index:index + len_sen] - index += len_sen - fout.write(sen + '\t' + ' '.join( - [self.labels_reverse[i] for i in line_labels]) + '\n') - - -def option_parser(): - usage = ( - "python predict.py -c config -w model_dir " - "-d word dictionary -l label_file -i input_file -p pred_dict_file") - parser = OptionParser(usage="usage: %s [options]" % usage) - parser.add_option( - "-c", - "--tconf", - action="store", - dest="train_conf", - help="network config") - parser.add_option( - "-d", - "--dict", - action="store", - dest="dict_file", - help="dictionary file") - parser.add_option( - "-l", - "--label", - action="store", - dest="label_file", - default=None, - help="label file") - parser.add_option( - "-p", - "--predict_dict_file", - action="store", - dest="predict_dict_file", - default=None, - help="predict_dict_file") - parser.add_option( - "-i", - "--data", - action="store", - dest="data_file", - help="data file to predict") - parser.add_option( - "-w", - "--model", - action="store", - dest="model_path", - default=None, - help="model path") - - parser.add_option( - "-o", - "--output_file", - action="store", - dest="output_file", - default=None, - help="output file") - return parser.parse_args() - - -def main(): - options, args = option_parser() - train_conf = options.train_conf - data_file = options.data_file - dict_file = options.dict_file - model_path = options.model_path - label_file = options.label_file - predict_dict_file = options.predict_dict_file - output_file = options.output_file - - swig_paddle.initPaddle("--use_gpu=0") - predict = Prediction(train_conf, dict_file, model_path, label_file, - predict_dict_file) - predict.predict(data_file, output_file) - - -if __name__ == '__main__': - main() diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh deleted file mode 100755 index 873aad670d16803ce321ab60baabe9fe29ea64bf..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/predict.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -best_model_path="output/pass-${LOG[1]}" - -config_file=db_lstm.py -dict_file=./data/wordDict.txt -label_file=./data/targetDict.txt -predicate_dict_file=./data/verbDict.txt -input_file=./data/feature -output_file=predict.res - -python predict.py \ - -c $config_file \ - -w $best_model_path \ - -l $label_file \ - -p $predicate_dict_file \ - -d $dict_file \ - -i $input_file \ - -o $output_file diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh deleted file mode 100755 index 095bbff2ea42627a13d8ebab436f5a05abc09743..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/test.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -evaluate_pass="output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass -model_list=./model.list -touch $model_list | echo $evaluate_pass > $model_list - -paddle train \ - --config=./db_lstm.py \ - --model_list=$model_list \ - --job=test \ - --use_gpu=false \ - --config_args=is_test=1 \ - --test_all_data_in_one_period=1 \ -2>&1 | tee 'test.log' -paddle usage -l test.log -e $? -n "semantic_role_labeling_test" >/dev/null 2>&1 diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh deleted file mode 100755 index eee14010d7b04a1b824f39090fa82fc532085e0d..0000000000000000000000000000000000000000 --- a/demo/semantic_role_labeling/train.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -paddle train \ - --config=./db_lstm.py \ - --use_gpu=0 \ - --log_period=5000 \ - --trainer_count=1 \ - --show_parameter_stats_period=5000 \ - --save_dir=./output \ - --num_passes=10000 \ - --average_test_period=10000000 \ - --init_model_path=./data \ - --load_missing_parameter_strategy=rand \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' -paddle usage -l train.log -e $? -n "semantic_role_labeling_train" >/dev/null 2>&1 diff --git a/demo/sentiment/.gitignore b/demo/sentiment/.gitignore deleted file mode 100644 index bf2a9ab1ce3c937bf06179074cd952dc53591dfd..0000000000000000000000000000000000000000 --- a/demo/sentiment/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -data/aclImdb -data/imdb -data/pre-imdb -data/mosesdecoder-master -logs/ -model_output -dataprovider_copy_1.py -model.list -test.log -train.log -*.pyc diff --git a/demo/sentiment/data/get_imdb.sh b/demo/sentiment/data/get_imdb.sh deleted file mode 100755 index 7600af6fbb900ee845702f1297779c1f0ed9bf84..0000000000000000000000000000000000000000 --- a/demo/sentiment/data/get_imdb.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -set -x - -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd $DIR - -#download the dataset -echo "Downloading aclImdb..." -#http://ai.stanford.edu/%7Eamaas/data/sentiment/ -wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz - -echo "Downloading mosesdecoder..." -#https://github.com/moses-smt/mosesdecoder -wget https://github.com/moses-smt/mosesdecoder/archive/master.zip - -#extract package -echo "Unzipping..." -tar -zxvf aclImdb_v1.tar.gz -unzip master.zip - -#move train and test set to imdb_data directory -#in order to process when traing -mkdir -p imdb/train -mkdir -p imdb/test - -cp -r aclImdb/train/pos/ imdb/train/pos -cp -r aclImdb/train/neg/ imdb/train/neg - -cp -r aclImdb/test/pos/ imdb/test/pos -cp -r aclImdb/test/neg/ imdb/test/neg - -#remove compressed package -rm aclImdb_v1.tar.gz -rm master.zip - -echo "Done." diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py deleted file mode 100755 index 4b7f5d0e504aef3884a04cbed8c16503a4079772..0000000000000000000000000000000000000000 --- a/demo/sentiment/dataprovider.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from paddle.trainer.PyDataProvider2 import * - - -def hook(settings, dictionary, **kwargs): - settings.word_dict = dictionary - settings.input_types = [ - integer_value_sequence(len(settings.word_dict)), integer_value(2) - ] - settings.logger.info('dict len : %d' % (len(settings.word_dict))) - - -@provider(init_hook=hook) -def process(settings, file_name): - with open(file_name, 'r') as fdata: - for line_count, line in enumerate(fdata): - label, comment = line.strip().split('\t\t') - label = int(label) - words = comment.split() - word_slot = [ - settings.word_dict[w] for w in words if w in settings.word_dict - ] - if not word_slot: - continue - yield word_slot, label diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py deleted file mode 100755 index 64c78e0d6b9297e7a321a4f070517593b0bfe332..0000000000000000000000000000000000000000 --- a/demo/sentiment/predict.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os, sys -import numpy as np -from optparse import OptionParser -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import integer_value_sequence -from paddle.trainer.config_parser import parse_config -""" -Usage: run following command to show help message. - python predict.py -h -""" - - -class SentimentPrediction(): - def __init__(self, train_conf, dict_file, model_dir=None, label_file=None): - """ - train_conf: trainer configure. - dict_file: word dictionary file name. - model_dir: directory of model. - """ - self.train_conf = train_conf - self.dict_file = dict_file - self.word_dict = {} - self.dict_dim = self.load_dict() - self.model_dir = model_dir - if model_dir is None: - self.model_dir = os.path.dirname(train_conf) - - self.label = None - if label_file is not None: - self.load_label(label_file) - - conf = parse_config(train_conf, "is_predict=1") - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - self.network.loadParameters(self.model_dir) - input_types = [integer_value_sequence(self.dict_dim)] - self.converter = DataProviderConverter(input_types) - - def load_dict(self): - """ - Load dictionary from self.dict_file. - """ - for line_count, line in enumerate(open(self.dict_file, 'r')): - self.word_dict[line.strip().split('\t')[0]] = line_count - return len(self.word_dict) - - def load_label(self, label_file): - """ - Load label. - """ - self.label = {} - for v in open(label_file, 'r'): - self.label[int(v.split('\t')[1])] = v.split('\t')[0] - - def get_index(self, data): - """ - transform word into integer index according to the dictionary. - """ - words = data.strip().split() - word_slot = [self.word_dict[w] for w in words if w in self.word_dict] - return word_slot - - def batch_predict(self, data_batch): - input = self.converter(data_batch) - output = self.network.forwardTest(input) - prob = output[0]["value"] - labs = np.argsort(-prob) - for idx, lab in enumerate(labs): - if self.label is None: - print("predicting label is %d" % (lab[0])) - else: - print("predicting label is %s" % (self.label[lab[0]])) - - -def option_parser(): - usage = "python predict.py -n config -w model_dir -d dictionary -i input_file " - parser = OptionParser(usage="usage: %s [options]" % usage) - parser.add_option( - "-n", - "--tconf", - action="store", - dest="train_conf", - help="network config") - parser.add_option( - "-d", - "--dict", - action="store", - dest="dict_file", - help="dictionary file") - parser.add_option( - "-b", - "--label", - action="store", - dest="label", - default=None, - help="dictionary file") - parser.add_option( - "-c", - "--batch_size", - type="int", - action="store", - dest="batch_size", - default=1, - help="the batch size for prediction") - parser.add_option( - "-w", - "--model", - action="store", - dest="model_path", - default=None, - help="model path") - return parser.parse_args() - - -def main(): - options, args = option_parser() - train_conf = options.train_conf - batch_size = options.batch_size - dict_file = options.dict_file - model_path = options.model_path - label = options.label - swig_paddle.initPaddle("--use_gpu=0") - predict = SentimentPrediction(train_conf, dict_file, model_path, label) - - batch = [] - for line in sys.stdin: - words = predict.get_index(line) - if words: - batch.append([words]) - else: - print('All the words in [%s] are not in the dictionary.' % line) - if len(batch) == batch_size: - predict.batch_predict(batch) - batch = [] - if len(batch) > 0: - predict.batch_predict(batch) - - -if __name__ == '__main__': - main() diff --git a/demo/sentiment/predict.sh b/demo/sentiment/predict.sh deleted file mode 100755 index c72a8e8641516543ef267fcb4b448630246d1e8d..0000000000000000000000000000000000000000 --- a/demo/sentiment/predict.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -#Note the default model is pass-00002, you shold make sure the model path -#exists or change the mode path. -model=model_output/pass-00002/ -config=trainer_config.py -label=data/pre-imdb/labels.list -cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \ - --tconf=$config\ - --model=$model \ - --label=$label \ - --dict=./data/pre-imdb/dict.txt \ - --batch_size=1 diff --git a/demo/sentiment/preprocess.py b/demo/sentiment/preprocess.py deleted file mode 100755 index 29b3682b747c66574590de5ea70574981cc536bb..0000000000000000000000000000000000000000 --- a/demo/sentiment/preprocess.py +++ /dev/null @@ -1,359 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -import random -import operator -import numpy as np -from subprocess import Popen, PIPE -from os.path import join as join_path -from optparse import OptionParser - -from paddle.utils.preprocess_util import * -""" -Usage: run following command to show help message. - python preprocess.py -h -""" - - -def save_dict(dict, filename, is_reverse=True): - """ - Save dictionary into file. - dict: input dictionary. - filename: output file name, string. - is_reverse: True, descending order by value. - False, ascending order by value. - """ - f = open(filename, 'w') - for k, v in sorted(dict.items(), key=operator.itemgetter(1),\ - reverse=is_reverse): - f.write('%s\t%s\n' % (k, v)) - f.close() - - -def tokenize(sentences): - """ - Use tokenizer.perl to tokenize input sentences. - tokenizer.perl is tool of Moses. - sentences : a list of input sentences. - return: a list of processed text. - """ - dir = './data/mosesdecoder-master/scripts/tokenizer/tokenizer.perl' - tokenizer_cmd = [dir, '-l', 'en', '-q', '-'] - assert isinstance(sentences, list) - text = "\n".join(sentences) - tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE) - tok_text, _ = tokenizer.communicate(text) - toks = tok_text.split('\n')[:-1] - return toks - - -def read_lines(path): - """ - path: String, file path. - return a list of sequence. - """ - seqs = [] - with open(path, 'r') as f: - for line in f.readlines(): - line = line.strip() - if len(line): - seqs.append(line) - return seqs - - -class SentimentDataSetCreate(): - """ - A class to process data for sentiment analysis task. - """ - - def __init__(self, - data_path, - output_path, - use_okenizer=True, - multi_lines=False): - """ - data_path: string, traing and testing dataset path - output_path: string, output path, store processed dataset - multi_lines: whether a file has multi lines. - In order to shuffle fully, it needs to read all files into - memory, then shuffle them if one file has multi lines. - """ - self.output_path = output_path - self.data_path = data_path - - self.train_dir = 'train' - self.test_dir = 'test' - - self.train_list = "train.list" - self.test_list = "test.list" - - self.label_list = "labels.list" - self.classes_num = 0 - - self.batch_size = 50000 - self.batch_dir = 'batches' - - self.dict_file = "dict.txt" - self.dict_with_test = False - self.dict_size = 0 - self.word_count = {} - - self.tokenizer = use_okenizer - self.overwrite = False - - self.multi_lines = multi_lines - - self.train_dir = join_path(data_path, self.train_dir) - self.test_dir = join_path(data_path, self.test_dir) - self.train_list = join_path(output_path, self.train_list) - self.test_list = join_path(output_path, self.test_list) - self.label_list = join_path(output_path, self.label_list) - self.dict_file = join_path(output_path, self.dict_file) - - def data_list(self, path): - """ - create dataset from path - path: data path - return: data list - """ - label_set = get_label_set_from_dir(path) - data = [] - for lab_name in label_set.keys(): - file_paths = list_files(join_path(path, lab_name)) - for p in file_paths: - data.append({"label" : label_set[lab_name],\ - "seq_path": p}) - return data, label_set - - def create_dict(self, data): - """ - create dict for input data. - data: list, [sequence, sequnce, ...] - """ - for seq in data: - for w in seq.strip().lower().split(): - if w not in self.word_count: - self.word_count[w] = 1 - else: - self.word_count[w] += 1 - - def create_dataset(self): - """ - create file batches and dictionary of train data set. - If the self.overwrite is false and train.list already exists in - self.output_path, this function will not create and save file - batches from the data set path. - return: dictionary size, class number. - """ - out_path = self.output_path - if out_path and not os.path.exists(out_path): - os.makedirs(out_path) - - # If self.overwrite is false or self.train_list has existed, - # it will not process dataset. - if not (self.overwrite or not os.path.exists(self.train_list)): - print "%s already exists." % self.train_list - return - - # Preprocess train data. - train_data, train_lab_set = self.data_list(self.train_dir) - print "processing train set..." - file_lists = self.save_data(train_data, "train", self.batch_size, True, - True) - save_list(file_lists, self.train_list) - - # If have test data path, preprocess test data. - if os.path.exists(self.test_dir): - test_data, test_lab_set = self.data_list(self.test_dir) - assert (train_lab_set == test_lab_set) - print "processing test set..." - file_lists = self.save_data(test_data, "test", self.batch_size, - False, self.dict_with_test) - save_list(file_lists, self.test_list) - - # save labels set. - save_dict(train_lab_set, self.label_list, False) - self.classes_num = len(train_lab_set.keys()) - - # save dictionary. - save_dict(self.word_count, self.dict_file, True) - self.dict_size = len(self.word_count) - - def save_data(self, - data, - prefix="", - batch_size=50000, - is_shuffle=False, - build_dict=False): - """ - Create batches for a Dataset object. - data: the Dataset object to process. - prefix: the prefix of each batch. - batch_size: number of data in each batch. - build_dict: whether to build dictionary for data - - return: list of batch names - """ - if is_shuffle and self.multi_lines: - return self.save_data_multi_lines(data, prefix, batch_size, - build_dict) - - if is_shuffle: - random.shuffle(data) - num_batches = int(math.ceil(len(data) / float(batch_size))) - batch_names = [] - for i in range(num_batches): - batch_name = join_path(self.output_path, - "%s_part_%03d" % (prefix, i)) - begin = i * batch_size - end = min((i + 1) * batch_size, len(data)) - # read a batch of data - label_list, data_list = self.get_data_list(begin, end, data) - if build_dict: - self.create_dict(data_list) - self.save_file(label_list, data_list, batch_name) - batch_names.append(batch_name) - - return batch_names - - def get_data_list(self, begin, end, data): - """ - begin: int, begining index of data. - end: int, ending index of data. - data: a list of {"seq_path": seqquence path, "label": label index} - - return a list of label and a list of sequence. - """ - label_list = [] - data_list = [] - for j in range(begin, end): - seqs = read_lines(data[j]["seq_path"]) - lab = int(data[j]["label"]) - #File may have multiple lines. - for seq in seqs: - data_list.append(seq) - label_list.append(lab) - if self.tokenizer: - data_list = tokenize(data_list) - return label_list, data_list - - def save_data_multi_lines(self, - data, - prefix="", - batch_size=50000, - build_dict=False): - """ - In order to shuffle fully, there is no need to load all data if - each file only contains one sample, it only needs to shuffle list - of file name. But one file contains multi lines, each line is one - sample. It needs to read all data into memory to shuffle fully. - This interface is mainly for data containning multi lines in each - file, which consumes more memory if there is a great mount of data. - - data: the Dataset object to process. - prefix: the prefix of each batch. - batch_size: number of data in each batch. - build_dict: whether to build dictionary for data - - return: list of batch names - """ - assert self.multi_lines - label_list = [] - data_list = [] - - # read all data - label_list, data_list = self.get_data_list(0, len(data), data) - if build_dict: - self.create_dict(data_list) - - length = len(label_list) - perm_list = np.array([i for i in xrange(length)]) - random.shuffle(perm_list) - - num_batches = int(math.ceil(length / float(batch_size))) - batch_names = [] - for i in range(num_batches): - batch_name = join_path(self.output_path, - "%s_part_%03d" % (prefix, i)) - begin = i * batch_size - end = min((i + 1) * batch_size, length) - sub_label = [label_list[perm_list[i]] for i in range(begin, end)] - sub_data = [data_list[perm_list[i]] for i in range(begin, end)] - self.save_file(sub_label, sub_data, batch_name) - batch_names.append(batch_name) - - return batch_names - - def save_file(self, label_list, data_list, filename): - """ - Save data into file. - label_list: a list of int value. - data_list: a list of sequnece. - filename: output file name. - """ - f = open(filename, 'w') - print "saving file: %s" % filename - for lab, seq in zip(label_list, data_list): - f.write('%s\t\t%s\n' % (lab, seq)) - f.close() - - -def option_parser(): - parser = OptionParser(usage="usage: python preprcoess.py "\ - "-i data_dir [options]") - parser.add_option( - "-i", - "--data", - action="store", - dest="input", - help="Input data directory.") - parser.add_option( - "-o", - "--output", - action="store", - dest="output", - default=None, - help="Output directory.") - parser.add_option( - "-t", - "--tokenizer", - action="store", - dest="use_tokenizer", - default=True, - help="Whether to use tokenizer.") - parser.add_option("-m", "--multi_lines", action="store", - dest="multi_lines", default=False, - help="If input text files have multi lines and they "\ - "need to be shuffled, you should set -m True,") - return parser.parse_args() - - -def main(): - options, args = option_parser() - data_dir = options.input - output_dir = options.output - use_tokenizer = options.use_tokenizer - multi_lines = options.multi_lines - if output_dir is None: - outname = os.path.basename(options.input) - output_dir = join_path(os.path.dirname(data_dir), 'pre-' + outname) - data_creator = SentimentDataSetCreate(data_dir, output_dir, use_tokenizer, - multi_lines) - data_creator.create_dataset() - - -if __name__ == '__main__': - main() diff --git a/demo/sentiment/preprocess.sh b/demo/sentiment/preprocess.sh deleted file mode 100755 index 19ec34d4f016365d18db01ddec559d26202b19c6..0000000000000000000000000000000000000000 --- a/demo/sentiment/preprocess.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -echo "Start to preprcess..." - -data_dir="./data/imdb" -python preprocess.py -i $data_dir - -echo "Done." diff --git a/demo/sentiment/sentiment_net.py b/demo/sentiment/sentiment_net.py deleted file mode 100644 index a01577ca5ae025b7bec67c6d54c7dbd931dbee74..0000000000000000000000000000000000000000 --- a/demo/sentiment/sentiment_net.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from os.path import join as join_path - -from paddle.trainer_config_helpers import * - - -def sentiment_data(data_dir=None, - is_test=False, - is_predict=False, - train_list="train.list", - test_list="test.list", - dict_file="dict.txt"): - """ - Predefined data provider for sentiment analysis. - is_test: whether this config is used for test. - is_predict: whether this config is used for prediction. - train_list: text file name, containing a list of training set. - test_list: text file name, containing a list of testing set. - dict_file: text file name, containing dictionary. - """ - dict_dim = len(open(join_path(data_dir, "dict.txt")).readlines()) - class_dim = len(open(join_path(data_dir, 'labels.list')).readlines()) - if is_predict: - return dict_dim, class_dim - - if data_dir is not None: - train_list = join_path(data_dir, train_list) - test_list = join_path(data_dir, test_list) - dict_file = join_path(data_dir, dict_file) - - train_list = train_list if not is_test else None - word_dict = dict() - with open(dict_file, 'r') as f: - for i, line in enumerate(open(dict_file, 'r')): - word_dict[line.split('\t')[0]] = i - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={'dictionary': word_dict}) - - return dict_dim, class_dim - - -def bidirectional_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - lstm_dim=128, - is_predict=False): - data = data_layer("word", input_dim) - emb = embedding_layer(input=data, size=emb_dim) - bi_lstm = bidirectional_lstm(input=emb, size=lstm_dim) - dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5) - output = fc_layer(input=dropout, size=class_dim, act=SoftmaxActivation()) - - if not is_predict: - lbl = data_layer("label", 1) - outputs(classification_cost(input=output, label=lbl)) - else: - outputs(output) - - -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3, - is_predict=False): - """ - A Wrapper for sentiment classification task. - This network uses bi-directional recurrent network, - consisting three LSTM layers. This configure is referred to - the paper as following url, but use fewer layrs. - http://www.aclweb.org/anthology/P15-1109 - - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - is_predict: is predicting or not. - Some layers is not needed in network when predicting. - """ - hid_lr = 1e-3 - assert stacked_num % 2 == 1 - - layer_attr = ExtraLayerAttribute(drop_rate=0.5) - fc_para_attr = ParameterAttribute(learning_rate=hid_lr) - lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = ParameterAttribute(initial_std=0., l2_rate=0.) - relu = ReluActivation() - linear = LinearActivation() - - data = data_layer("word", input_dim) - emb = embedding_layer(input=data, size=emb_dim) - - fc1 = fc_layer(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr) - lstm1 = lstmemory( - input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) - - inputs = [fc1, lstm1] - for i in range(2, stacked_num + 1): - fc = fc_layer( - input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = lstmemory( - input=fc, - reverse=(i % 2) == 0, - act=relu, - bias_attr=bias_attr, - layer_attr=layer_attr) - inputs = [fc, lstm] - - fc_last = pooling_layer(input=inputs[0], pooling_type=MaxPooling()) - lstm_last = pooling_layer(input=inputs[1], pooling_type=MaxPooling()) - output = fc_layer( - input=[fc_last, lstm_last], - size=class_dim, - act=SoftmaxActivation(), - bias_attr=bias_attr, - param_attr=para_attr) - - if is_predict: - outputs(output) - else: - outputs(classification_cost(input=output, label=data_layer('label', 1))) diff --git a/demo/sentiment/test.sh b/demo/sentiment/test.sh deleted file mode 100755 index 85c4f3ccfc3ede23fcf701769b9701ecbf57c789..0000000000000000000000000000000000000000 --- a/demo/sentiment/test.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -function get_best_pass() { - cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \ - sed -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\ - sort -n | head -n 1 -} - -log=train.log -LOG=`get_best_pass $log` -LOG=(${LOG}) -evaluate_pass="model_output/pass-${LOG[1]}" - -echo 'evaluating from pass '$evaluate_pass - -model_list=./model.list -touch $model_list | echo $evaluate_pass > $model_list -net_conf=trainer_config.py -paddle train --config=$net_conf \ - --model_list=$model_list \ - --job=test \ - --use_gpu=false \ - --trainer_count=4 \ - --config_args=is_test=1 \ - 2>&1 | tee 'test.log' -paddle usage -l test.log -e $? -n "sentiment_test" >/dev/null 2>&1 diff --git a/demo/sentiment/train.sh b/demo/sentiment/train.sh deleted file mode 100755 index 14620f733bf03444e5ba3b3b792dfbed6146ecde..0000000000000000000000000000000000000000 --- a/demo/sentiment/train.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -config=trainer_config.py -output=./model_output -paddle train --config=$config \ - --save_dir=$output \ - --job=train \ - --use_gpu=false \ - --trainer_count=4 \ - --num_passes=10 \ - --log_period=10 \ - --dot_period=20 \ - --show_parameter_stats_period=100 \ - --test_all_data_in_one_period=1 \ - 2>&1 | tee 'train.log' -paddle usage -l train.log -e $? -n "sentiment_train" >/dev/null 2>&1 diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py deleted file mode 100644 index 1c856556bd0cb32f60eba322469b3621c37e1349..0000000000000000000000000000000000000000 --- a/demo/sentiment/train_v2.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import paddle.v2 as paddle - - -def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128): - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - conv_3 = paddle.networks.sequence_conv_pool( - input=emb, context_len=3, hidden_size=hid_dim) - conv_4 = paddle.networks.sequence_conv_pool( - input=emb, context_len=4, hidden_size=hid_dim) - output = paddle.layer.fc(input=[conv_3, conv_4], - size=class_dim, - act=paddle.activation.Softmax()) - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost - - -def stacked_lstm_net(input_dim, - class_dim=2, - emb_dim=128, - hid_dim=512, - stacked_num=3): - """ - A Wrapper for sentiment classification task. - This network uses bi-directional recurrent network, - consisting three LSTM layers. This configure is referred to - the paper as following url, but use fewer layrs. - http://www.aclweb.org/anthology/P15-1109 - - input_dim: here is word dictionary dimension. - class_dim: number of categories. - emb_dim: dimension of word embedding. - hid_dim: dimension of hidden layer. - stacked_num: number of stacked lstm-hidden layer. - """ - assert stacked_num % 2 == 1 - - layer_attr = paddle.attr.Extra(drop_rate=0.5) - fc_para_attr = paddle.attr.Param(learning_rate=1e-3) - lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.) - para_attr = [fc_para_attr, lstm_para_attr] - bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.) - relu = paddle.activation.Relu() - linear = paddle.activation.Linear() - - data = paddle.layer.data("word", - paddle.data_type.integer_value_sequence(input_dim)) - emb = paddle.layer.embedding(input=data, size=emb_dim) - - fc1 = paddle.layer.fc(input=emb, - size=hid_dim, - act=linear, - bias_attr=bias_attr) - lstm1 = paddle.layer.lstmemory( - input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) - - inputs = [fc1, lstm1] - for i in range(2, stacked_num + 1): - fc = paddle.layer.fc(input=inputs, - size=hid_dim, - act=linear, - param_attr=para_attr, - bias_attr=bias_attr) - lstm = paddle.layer.lstmemory( - input=fc, - reverse=(i % 2) == 0, - act=relu, - bias_attr=bias_attr, - layer_attr=layer_attr) - inputs = [fc, lstm] - - fc_last = paddle.layer.pooling( - input=inputs[0], pooling_type=paddle.pooling.Max()) - lstm_last = paddle.layer.pooling( - input=inputs[1], pooling_type=paddle.pooling.Max()) - output = paddle.layer.fc(input=[fc_last, lstm_last], - size=class_dim, - act=paddle.activation.Softmax(), - bias_attr=bias_attr, - param_attr=para_attr) - - lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) - cost = paddle.layer.classification_cost(input=output, label=lbl) - return cost - - -if __name__ == '__main__': - # init - paddle.init(use_gpu=False) - - #data - print 'load dictionary...' - word_dict = paddle.dataset.imdb.word_dict() - dict_dim = len(word_dict) - class_dim = 2 - train_reader = paddle.batch( - paddle.reader.shuffle( - lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=100) - test_reader = paddle.batch( - lambda: paddle.dataset.imdb.test(word_dict), batch_size=100) - - feeding = {'word': 0, 'label': 1} - - # network config - # Please choose the way to build the network - # by uncommenting the corresponding line. - cost = convolution_net(dict_dim, class_dim=class_dim) - # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) - - # create parameters - parameters = paddle.parameters.create(cost) - - # create optimizer - adam_optimizer = paddle.optimizer.Adam( - learning_rate=2e-3, - regularization=paddle.optimizer.L2Regularization(rate=8e-4), - model_average=paddle.optimizer.ModelAverage(average_window=0.5)) - - # End batch and end pass event handler - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - if isinstance(event, paddle.event.EndPass): - result = trainer.test(reader=test_reader, feeding=feeding) - print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) - - # create trainer - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=adam_optimizer) - - trainer.train( - reader=train_reader, - event_handler=event_handler, - feeding=feeding, - num_passes=2) diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py deleted file mode 100644 index f1cadaa728ac58107e15f77b5994d31da088caf7..0000000000000000000000000000000000000000 --- a/demo/sentiment/trainer_config.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from sentiment_net import * -from paddle.trainer_config_helpers import * - -# whether this config is used for test -is_test = get_config_arg('is_test', bool, False) -# whether this config is used for prediction -is_predict = get_config_arg('is_predict', bool, False) - -data_dir = "./data/pre-imdb" -dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict) - -################## Algorithm Config ##################### - -settings( - batch_size=128, - learning_rate=2e-3, - learning_method=AdamOptimizer(), - model_average=ModelAverage(0.5), - regularization=L2Regularization(8e-4), - gradient_clipping_threshold=25) - -#################### Network Config ###################### -stacked_lstm_net( - dict_dim, class_dim=class_dim, stacked_num=3, is_predict=is_predict) -# bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict) diff --git a/demo/seqToseq/.gitignore b/demo/seqToseq/.gitignore deleted file mode 100644 index 21cec2c2c1f3422cbb0ad133281dc1ecdd076a96..0000000000000000000000000000000000000000 --- a/demo/seqToseq/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -data/wmt14 -data/pre-wmt14 -data/wmt14_model -data/paraphrase -data/pre-paraphrase -data/paraphrase_model -translation/gen.log -translation/gen_result -translation/train.log -paraphrase/train.log -dataprovider_copy_1.py -translation/thirdparty.tgz -translation/thirdparty/train.conf -translation/thirdparty/dataprovider.py -translation/thirdparty/seqToseq_net.py -translation/thirdparty/*.dict -*.pyc diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py deleted file mode 100644 index 3072c375123a2713c655b09fb28001960c9ab64d..0000000000000000000000000000000000000000 --- a/demo/seqToseq/api_train_v2.py +++ /dev/null @@ -1,214 +0,0 @@ -import sys - -import paddle.v2 as paddle - - -def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False): - ### Network Architecture - word_vector_dim = 512 # dimension of word vector - decoder_size = 512 # dimension of hidden unit in GRU Decoder network - encoder_size = 512 # dimension of hidden unit in GRU Encoder network - - beam_size = 3 - max_length = 250 - - #### Encoder - src_word_id = paddle.layer.data( - name='source_language_word', - type=paddle.data_type.integer_value_sequence(source_dict_dim)) - src_embedding = paddle.layer.embedding( - input=src_word_id, - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) - src_forward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size) - src_backward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size, reverse=True) - encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) - - #### Decoder - with paddle.layer.mixed(size=decoder_size) as encoded_proj: - encoded_proj += paddle.layer.full_matrix_projection( - input=encoded_vector) - - backward_first = paddle.layer.first_seq(input=src_backward) - - with paddle.layer.mixed( - size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: - decoder_boot += paddle.layer.full_matrix_projection( - input=backward_first) - - def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - - decoder_mem = paddle.layer.memory( - name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) - - context = paddle.networks.simple_attention( - encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) - - with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: - decoder_inputs += paddle.layer.full_matrix_projection(input=context) - decoder_inputs += paddle.layer.full_matrix_projection( - input=current_word) - - gru_step = paddle.layer.gru_step( - name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) - - with paddle.layer.mixed( - size=target_dict_dim, - bias_attr=True, - act=paddle.activation.Softmax()) as out: - out += paddle.layer.full_matrix_projection(input=gru_step) - return out - - decoder_group_name = "decoder_group" - group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) - group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) - group_inputs = [group_input1, group_input2] - - if not is_generating: - trg_embedding = paddle.layer.embedding( - input=paddle.layer.data( - name='target_language_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)), - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) - group_inputs.append(trg_embedding) - - # For decoder equipped with attention mechanism, in training, - # target embeding (the groudtruth) is the data input, - # while encoded source sequence is accessed to as an unbounded memory. - # Here, the StaticInput defines a read-only memory - # for the recurrent_group. - decoder = paddle.layer.recurrent_group( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) - - lbl = paddle.layer.data( - name='target_language_next_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)) - cost = paddle.layer.classification_cost(input=decoder, label=lbl) - - return cost - else: - # In generation, the decoder predicts a next target word based on - # the encoded source sequence and the last generated target word. - - # The encoded source sequence (encoder's output) must be specified by - # StaticInput, which is a read-only memory. - # Embedding of the last generated word is automatically gotten by - # GeneratedInputs, which is initialized by a start mark, such as , - # and must be included in generation. - - trg_embedding = paddle.layer.GeneratedInputV2( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) - group_inputs.append(trg_embedding) - - beam_gen = paddle.layer.beam_search( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, - eos_id=1, - beam_size=beam_size, - max_length=max_length) - - return beam_gen - - -def main(): - paddle.init(use_gpu=False, trainer_count=1) - is_generating = False - - # source and target dict dim. - dict_size = 30000 - source_dict_dim = target_dict_dim = dict_size - - # train the network - if not is_generating: - cost = seqToseq_net(source_dict_dim, target_dict_dim) - parameters = paddle.parameters.create(cost) - - # define optimize method and trainer - optimizer = paddle.optimizer.Adam( - learning_rate=5e-5, - regularization=paddle.optimizer.L2Regularization(rate=8e-4)) - trainer = paddle.trainer.SGD(cost=cost, - parameters=parameters, - update_equation=optimizer) - # define data reader - wmt14_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size), buf_size=8192), - batch_size=5) - - # define event_handler callback - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 10 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, - event.metrics) - else: - sys.stdout.write('.') - sys.stdout.flush() - - # start to train - trainer.train( - reader=wmt14_reader, event_handler=event_handler, num_passes=2) - - # generate a english sequence to french - else: - # use the first 3 samples for generation - gen_creator = paddle.dataset.wmt14.gen(dict_size) - gen_data = [] - gen_num = 3 - for item in gen_creator(): - gen_data.append((item[0], )) - if len(gen_data) == gen_num: - break - - beam_gen = seqToseq_net(source_dict_dim, target_dict_dim, is_generating) - # get the pretrained model, whose bleu = 26.92 - parameters = paddle.dataset.wmt14.model() - # prob is the prediction probabilities, and id is the prediction word. - beam_result = paddle.infer( - output_layer=beam_gen, - parameters=parameters, - input=gen_data, - field=['prob', 'id']) - - # get the dictionary - src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) - - # the delimited element of generated sequences is -1, - # the first element of each generated sequence is the sequence length - seq_list = [] - seq = [] - for w in beam_result[1]: - if w != -1: - seq.append(w) - else: - seq_list.append(' '.join([trg_dict.get(w) for w in seq[1:]])) - seq = [] - - prob = beam_result[0] - beam_size = 3 - for i in xrange(gen_num): - print "\n*******************************************************\n" - print "src:", ' '.join( - [src_dict.get(w) for w in gen_data[i][0]]), "\n" - for j in xrange(beam_size): - print "prob = %f:" % (prob[i][j]), seq_list[i * beam_size + j] - - -if __name__ == '__main__': - main() diff --git a/demo/seqToseq/data/paraphrase_data.sh b/demo/seqToseq/data/paraphrase_data.sh deleted file mode 100755 index e6497c91286d44b5ef3b66c5f824e36a09728720..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/paraphrase_data.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -# download the in-house paraphrase dataset -wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz - -# untar the dataset -tar -zxvf paraphrase.tar.gz -rm paraphrase.tar.gz diff --git a/demo/seqToseq/data/paraphrase_model.sh b/demo/seqToseq/data/paraphrase_model.sh deleted file mode 100755 index d0e7f214a38c4dad0fdf7c10ba3b76eb0ab40f06..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/paraphrase_model.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -dim=32 -pretrained_dir='../../model_zoo/embedding/' -preModel=$pretrained_dir'model_'$dim'.emb' -preDict=$pretrained_dir'baidu.dict' - -usrDict_dir='pre-paraphrase/' -srcDict=$usrDict_dir'src.dict' -trgDict=$usrDict_dir'trg.dict' - -usrModel_dir='paraphrase_model/' -mkdir $usrModel_dir -srcModel=$usrModel_dir'_source_language_embedding' -trgModel=$usrModel_dir'_target_language_embedding' - -echo 'extract desired parameters based on user dictionary' -script=$pretrained_dir'extract_para.py' -python $script --preModel $preModel --preDict $preDict \ - --usrModel $srcModel --usrDict $srcDict -d $dim -python $script --preModel $preModel --preDict $preDict \ - --usrModel $trgModel --usrDict $trgDict -d $dim diff --git a/demo/seqToseq/data/wmt14_data.sh b/demo/seqToseq/data/wmt14_data.sh deleted file mode 100755 index 43f67168d2a876ba5401e0f8490a88adac9c5551..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/wmt14_data.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x -mkdir wmt14 -cd wmt14 - -# download the dataset -wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz -wget http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz - -# untar the dataset -tar -zxvf bitexts.tgz -tar -zxvf dev+test.tgz -gunzip bitexts.selected/* -mv bitexts.selected train -rm bitexts.tgz -rm dev+test.tgz - -# separate the dev and test dataset -mkdir test gen -mv dev/ntst1213.* test -mv dev/ntst14.* gen -rm -rf dev - -set +x -# rename the suffix, .fr->.src, .en->.trg -for dir in train test gen -do - filelist=`ls $dir` - cd $dir - for file in $filelist - do - if [ ${file##*.} = "fr" ]; then - mv $file ${file/%fr/src} - elif [ ${file##*.} = 'en' ]; then - mv $file ${file/%en/trg} - fi - done - cd .. -done diff --git a/demo/seqToseq/data/wmt14_model.sh b/demo/seqToseq/data/wmt14_model.sh deleted file mode 100755 index c4b55b90a3eb98f94e0eb3be028c6de1ef57326b..0000000000000000000000000000000000000000 --- a/demo/seqToseq/data/wmt14_model.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x - -# download the pretrained model -wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz - -# untar the model -tar -zxvf wmt14_model.tar.gz -rm wmt14_model.tar.gz diff --git a/demo/seqToseq/dataprovider.py b/demo/seqToseq/dataprovider.py deleted file mode 100755 index c2b49804be582d7d0bc3ef6332741be03936eb24..0000000000000000000000000000000000000000 --- a/demo/seqToseq/dataprovider.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * - -UNK_IDX = 2 -START = "" -END = "" - - -def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list, - **kwargs): - # job_mode = 1: training mode - # job_mode = 0: generating mode - settings.job_mode = not is_generating - - def fun(dict_path): - out_dict = dict() - with open(dict_path, "r") as fin: - out_dict = { - line.strip(): line_count - for line_count, line in enumerate(fin) - } - return out_dict - - settings.src_dict = fun(src_dict_path) - settings.trg_dict = fun(trg_dict_path) - - settings.logger.info("src dict len : %d" % (len(settings.src_dict))) - - if settings.job_mode: - settings.slots = { - 'source_language_word': - integer_value_sequence(len(settings.src_dict)), - 'target_language_word': - integer_value_sequence(len(settings.trg_dict)), - 'target_language_next_word': - integer_value_sequence(len(settings.trg_dict)) - } - settings.logger.info("trg dict len : %d" % (len(settings.trg_dict))) - else: - settings.slots = { - 'source_language_word': - integer_value_sequence(len(settings.src_dict)), - 'sent_id': - integer_value_sequence(len(open(file_list[0], "r").readlines())) - } - - -def _get_ids(s, dictionary): - words = s.strip().split() - return [dictionary[START]] + \ - [dictionary.get(w, UNK_IDX) for w in words] + \ - [dictionary[END]] - - -@provider(init_hook=hook, pool_size=50000) -def process(settings, file_name): - with open(file_name, 'r') as f: - for line_count, line in enumerate(f): - line_split = line.strip().split('\t') - if settings.job_mode and len(line_split) != 2: - continue - src_seq = line_split[0] # one source sequence - src_ids = _get_ids(src_seq, settings.src_dict) - - if settings.job_mode: - trg_seq = line_split[1] # one target sequence - trg_words = trg_seq.split() - trg_ids = [settings.trg_dict.get(w, UNK_IDX) for w in trg_words] - - # remove sequence whose length > 80 in training mode - if len(src_ids) > 80 or len(trg_ids) > 80: - continue - trg_ids_next = trg_ids + [settings.trg_dict[END]] - trg_ids = [settings.trg_dict[START]] + trg_ids - yield { - 'source_language_word': src_ids, - 'target_language_word': trg_ids, - 'target_language_next_word': trg_ids_next - } - else: - yield {'source_language_word': src_ids, 'sent_id': [line_count]} diff --git a/demo/seqToseq/paraphrase/train.conf b/demo/seqToseq/paraphrase/train.conf deleted file mode 100644 index be79c5e771c0e864fd1776cedb3ef37c997b6df6..0000000000000000000000000000000000000000 --- a/demo/seqToseq/paraphrase/train.conf +++ /dev/null @@ -1,33 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -is_generating = False -### Data Definiation -train_conf = seq_to_seq_data(data_dir = "./data/pre-paraphrase", - is_generating = is_generating) - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 50, - learning_rate = 5e-4) - -### Network Architecture -gru_encoder_decoder(train_conf, is_generating, word_vector_dim = 32) diff --git a/demo/seqToseq/paraphrase/train.sh b/demo/seqToseq/paraphrase/train.sh deleted file mode 100755 index 9bb6dbdb1d4c5e35bfb31855e0331f0250a69a20..0000000000000000000000000000000000000000 --- a/demo/seqToseq/paraphrase/train.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ - --config='paraphrase/train.conf' \ - --save_dir='paraphrase/model' \ - --init_model_path='data/paraphrase_model' \ - --load_missing_parameter_strategy=rand \ - --use_gpu=false \ - --num_passes=16 \ - --show_parameter_stats_period=100 \ - --trainer_count=4 \ - --log_period=10 \ - --dot_period=5 \ - 2>&1 | tee 'paraphrase/train.log' -paddle usage -l 'paraphrase/train.log' -e $? -n "seqToseq_paraphrase_train" >/dev/null 2>&1 diff --git a/demo/seqToseq/preprocess.py b/demo/seqToseq/preprocess.py deleted file mode 100755 index 03f371331a0755e5939e457f4bdfb1770b8dad88..0000000000000000000000000000000000000000 --- a/demo/seqToseq/preprocess.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/bin/env python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -Example: - python preprocess.py -i INPUT [-d DICTSIZE] [-m] - -Options: - -h, --help show this help message and exit - -i INPUT input original dataset path - -d DICTSIZE specified word count of dictionary - -m --mergeDict merge source and target dictionary -""" -import os -import sys - -import string -from optparse import OptionParser -from paddle.utils.preprocess_util import save_list, DatasetCreater - - -class SeqToSeqDatasetCreater(DatasetCreater): - """ - A class to process data for sequence to sequence application. - """ - - def __init__(self, data_path, output_path): - """ - data_path: the path to store the train data, test data and gen data - output_path: the path to store the processed dataset - """ - DatasetCreater.__init__(self, data_path) - self.gen_dir_name = 'gen' - self.gen_list_name = 'gen.list' - self.output_path = output_path - - def concat_file(self, file_path, file1, file2, output_path, output): - """ - Concat file1 and file2 to be one output file - The i-th line of output = i-th line of file1 + '\t' + i-th line of file2 - file_path: the path to store file1 and file2 - output_path: the path to store output file - """ - file1 = os.path.join(file_path, file1) - file2 = os.path.join(file_path, file2) - output = os.path.join(output_path, output) - if not os.path.exists(output): - os.system('paste ' + file1 + ' ' + file2 + ' > ' + output) - - def cat_file(self, dir_path, suffix, output_path, output): - """ - Cat all the files in dir_path with suffix to be one output file - dir_path: the base directory to store input file - suffix: suffix of file name - output_path: the path to store output file - """ - cmd = 'cat ' - file_list = os.listdir(dir_path) - file_list.sort() - for file in file_list: - if file.endswith(suffix): - cmd += os.path.join(dir_path, file) + ' ' - output = os.path.join(output_path, output) - if not os.path.exists(output): - os.system(cmd + '> ' + output) - - def build_dict(self, file_path, dict_path, dict_size=-1): - """ - Create the dictionary for the file, Note that - 1. Valid characters include all printable characters - 2. There is distinction between uppercase and lowercase letters - 3. There is 3 special token: - : the start of a sequence - : the end of a sequence - : a word not included in dictionary - file_path: the path to store file - dict_path: the path to store dictionary - dict_size: word count of dictionary - if is -1, dictionary will contains all the words in file - """ - if not os.path.exists(dict_path): - dictory = dict() - with open(file_path, "r") as fdata: - for line in fdata: - line = line.split('\t') - for line_split in line: - words = line_split.strip().split() - for word in words: - if word not in dictory: - dictory[word] = 1 - else: - dictory[word] += 1 - output = open(dict_path, "w+") - output.write('\n\n\n') - count = 3 - for key, value in sorted( - dictory.items(), key=lambda d: d[1], reverse=True): - output.write(key + "\n") - count += 1 - if count == dict_size: - break - self.dict_size = count - - def create_dataset(self, - dict_size=-1, - mergeDict=False, - suffixes=['.src', '.trg']): - """ - Create seqToseq dataset - """ - # dataset_list and dir_list has one-to-one relationship - train_dataset = os.path.join(self.data_path, self.train_dir_name) - test_dataset = os.path.join(self.data_path, self.test_dir_name) - gen_dataset = os.path.join(self.data_path, self.gen_dir_name) - dataset_list = [train_dataset, test_dataset, gen_dataset] - - train_dir = os.path.join(self.output_path, self.train_dir_name) - test_dir = os.path.join(self.output_path, self.test_dir_name) - gen_dir = os.path.join(self.output_path, self.gen_dir_name) - dir_list = [train_dir, test_dir, gen_dir] - - # create directory - for dir in dir_list: - if not os.path.exists(dir): - os.mkdir(dir) - - # checkout dataset should be parallel corpora - suffix_len = len(suffixes[0]) - for dataset in dataset_list: - file_list = os.listdir(dataset) - if len(file_list) % 2 == 1: - raise RuntimeError("dataset should be parallel corpora") - file_list.sort() - for i in range(0, len(file_list), 2): - if file_list[i][:-suffix_len] != file_list[i + 1][:-suffix_len]: - raise RuntimeError( - "source and target file name should be equal") - - # cat all the files with the same suffix in dataset - for suffix in suffixes: - for dataset in dataset_list: - outname = os.path.basename(dataset) + suffix - self.cat_file(dataset, suffix, dataset, outname) - - # concat parallel corpora and create file.list - print 'concat parallel corpora for dataset' - id = 0 - list = ['train.list', 'test.list', 'gen.list'] - for dataset in dataset_list: - outname = os.path.basename(dataset) - self.concat_file(dataset, outname + suffixes[0], - outname + suffixes[1], dir_list[id], outname) - save_list([os.path.join(dir_list[id], outname)], - os.path.join(self.output_path, list[id])) - id += 1 - - # build dictionary for train data - dict = ['src.dict', 'trg.dict'] - dict_path = [ - os.path.join(self.output_path, dict[0]), - os.path.join(self.output_path, dict[1]) - ] - if mergeDict: - outname = os.path.join(train_dir, train_dataset.split('/')[-1]) - print 'build src dictionary for train data' - self.build_dict(outname, dict_path[0], dict_size) - print 'build trg dictionary for train data' - os.system('cp ' + dict_path[0] + ' ' + dict_path[1]) - else: - outname = os.path.join(train_dataset, self.train_dir_name) - for id in range(0, 2): - suffix = suffixes[id] - print 'build ' + suffix[1:] + ' dictionary for train data' - self.build_dict(outname + suffix, dict_path[id], dict_size) - print 'dictionary size is', self.dict_size - - -def main(): - usage = "usage: \n" \ - "python %prog -i INPUT [-d DICTSIZE] [-m]" - parser = OptionParser(usage) - parser.add_option( - "-i", action="store", dest="input", help="input original dataset path") - parser.add_option( - "-d", - action="store", - dest="dictsize", - help="specified word count of dictionary") - parser.add_option( - "-m", - "--mergeDict", - action="store_true", - dest="mergeDict", - help="merge source and target dictionary") - (options, args) = parser.parse_args() - if options.input[-1] == os.path.sep: - options.input = options.input[:-1] - outname = os.path.basename(options.input) - output_path = os.path.join(os.path.dirname(options.input), 'pre-' + outname) - dictsize = int(options.dictsize) if options.dictsize else -1 - if not os.path.exists(output_path): - os.mkdir(output_path) - data_creator = SeqToSeqDatasetCreater(options.input, output_path) - data_creator.create_dataset(dictsize, options.mergeDict) - - -if __name__ == "__main__": - main() diff --git a/demo/seqToseq/seqToseq_net.py b/demo/seqToseq/seqToseq_net.py deleted file mode 100644 index 3d1f86ec3b7eda4fceaf3a1e406e3d0a1a4a2f60..0000000000000000000000000000000000000000 --- a/demo/seqToseq/seqToseq_net.py +++ /dev/null @@ -1,204 +0,0 @@ -# edit-mode: -*- python -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -from paddle.trainer_config_helpers import * - - -def seq_to_seq_data(data_dir, - is_generating, - dict_size=30000, - train_list='train.list', - test_list='test.list', - gen_list='gen.list', - gen_result='gen_result'): - """ - Predefined seqToseq train data provider for application - is_generating: whether this config is used for generating - dict_size: word count of dictionary - train_list: a text file containing a list of training data - test_list: a text file containing a list of testing data - gen_list: a text file containing a list of generating data - gen_result: a text file containing generating result - """ - src_lang_dict = os.path.join(data_dir, 'src.dict') - trg_lang_dict = os.path.join(data_dir, 'trg.dict') - - if is_generating: - train_list = None - test_list = os.path.join(data_dir, gen_list) - else: - train_list = os.path.join(data_dir, train_list) - test_list = os.path.join(data_dir, test_list) - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={ - "src_dict_path": src_lang_dict, - "trg_dict_path": trg_lang_dict, - "is_generating": is_generating - }) - - return { - "src_dict_path": src_lang_dict, - "trg_dict_path": trg_lang_dict, - "gen_result": gen_result - } - - -def gru_encoder_decoder(data_conf, - is_generating, - word_vector_dim=512, - encoder_size=512, - decoder_size=512, - beam_size=3, - max_length=250, - error_clipping=50): - """ - A wrapper for an attention version of GRU Encoder-Decoder network - is_generating: whether this config is used for generating - encoder_size: dimension of hidden unit in GRU Encoder network - decoder_size: dimension of hidden unit in GRU Decoder network - word_vector_dim: dimension of word vector - beam_size: expand width in beam search - max_length: a stop condition of sequence generation - """ - for k, v in data_conf.iteritems(): - globals()[k] = v - source_dict_dim = len(open(src_dict_path, "r").readlines()) - target_dict_dim = len(open(trg_dict_path, "r").readlines()) - gen_trans_file = gen_result - - src_word_id = data_layer(name='source_language_word', size=source_dict_dim) - src_embedding = embedding_layer( - input=src_word_id, - size=word_vector_dim, - param_attr=ParamAttr(name='_source_language_embedding')) - src_forward = simple_gru( - input=src_embedding, - size=encoder_size, - naive=True, - gru_layer_attr=ExtraLayerAttribute( - error_clipping_threshold=error_clipping)) - src_backward = simple_gru( - input=src_embedding, - size=encoder_size, - reverse=True, - naive=True, - gru_layer_attr=ExtraLayerAttribute( - error_clipping_threshold=error_clipping)) - encoded_vector = concat_layer(input=[src_forward, src_backward]) - - with mixed_layer(size=decoder_size) as encoded_proj: - encoded_proj += full_matrix_projection(input=encoded_vector) - - backward_first = first_seq(input=src_backward) - with mixed_layer( - size=decoder_size, - act=TanhActivation(), ) as decoder_boot: - decoder_boot += full_matrix_projection(input=backward_first) - - def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - decoder_mem = memory( - name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) - - context = simple_attention( - encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem, ) - - with mixed_layer(size=decoder_size * 3) as decoder_inputs: - decoder_inputs += full_matrix_projection(input=context) - decoder_inputs += full_matrix_projection(input=current_word) - - gru_step = gru_step_naive_layer( - name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size, - layer_attr=ExtraLayerAttribute( - error_clipping_threshold=error_clipping)) - - with mixed_layer( - size=target_dict_dim, bias_attr=True, - act=SoftmaxActivation()) as out: - out += full_matrix_projection(input=gru_step) - return out - - decoder_group_name = "decoder_group" - group_inputs = [ - StaticInput( - input=encoded_vector, is_seq=True), StaticInput( - input=encoded_proj, is_seq=True) - ] - - if not is_generating: - trg_embedding = embedding_layer( - input=data_layer( - name='target_language_word', size=target_dict_dim), - size=word_vector_dim, - param_attr=ParamAttr(name='_target_language_embedding')) - group_inputs.append(trg_embedding) - - # For decoder equipped with attention mechanism, in training, - # target embeding (the groudtruth) is the data input, - # while encoded source sequence is accessed to as an unbounded memory. - # Here, the StaticInput defines a read-only memory - # for the recurrent_group. - decoder = recurrent_group( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) - - lbl = data_layer(name='target_language_next_word', size=target_dict_dim) - cost = classification_cost(input=decoder, label=lbl) - outputs(cost) - else: - # In generation, the decoder predicts a next target word based on - # the encoded source sequence and the last generated target word. - - # The encoded source sequence (encoder's output) must be specified by - # StaticInput, which is a read-only memory. - # Embedding of the last generated word is automatically gotten by - # GeneratedInputs, which is initialized by a start mark, such as , - # and must be included in generation. - - trg_embedding = GeneratedInput( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) - group_inputs.append(trg_embedding) - - beam_gen = beam_search( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, - eos_id=1, - beam_size=beam_size, - max_length=max_length) - - seqtext_printer_evaluator( - input=beam_gen, - id_input=data_layer( - name="sent_id", size=1), - dict_file=trg_dict_path, - result_file=gen_trans_file) - outputs(beam_gen) diff --git a/demo/seqToseq/translation/eval_bleu.sh b/demo/seqToseq/translation/eval_bleu.sh deleted file mode 100755 index 54c2ed237e93adb3456dbe62f75626d36c2d90bc..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/eval_bleu.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -gen_file=$1 -beam_size=$2 - -# find top1 generating result -top1=$(printf '%s_top1.txt' `basename $gen_file .txt`) -if [ $beam_size -eq 1 ]; then - awk -F "\t" '{sub(" ","",$2);sub(" ","",$2);print $2}' $gen_file >$top1 -else - awk 'BEGIN{ - FS="\t"; - OFS="\t"; - read_pos = 2} { - if (NR == read_pos){ - sub(" ","",$3); - sub(" ","",$3); - print $3; - read_pos += (2 + res_num); - }}' res_num=$beam_size $gen_file >$top1 -fi - -# evalute bleu value -bleu_script=multi-bleu.perl -standard_res=../data/wmt14/gen/ntst14.trg -bleu_res=`perl $bleu_script $standard_res <$top1` - -echo $bleu_res -rm $top1 diff --git a/demo/seqToseq/translation/gen.conf b/demo/seqToseq/translation/gen.conf deleted file mode 100644 index e9bea4e4559ff31ad83c4474e91de7e7acc77e9f..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/gen.conf +++ /dev/null @@ -1,36 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -# whether this config is used for generating -is_generating = True - -### Data Definiation -gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14", - is_generating = is_generating, - gen_result = "./translation/gen_result") - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 1, - learning_rate = 0) - -### Network Architecture -gru_encoder_decoder(gen_conf, is_generating) diff --git a/demo/seqToseq/translation/gen.sh b/demo/seqToseq/translation/gen.sh deleted file mode 100755 index 64b78f5e9654e7b206740f92e224e0164108c9f1..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/gen.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ - --job=test \ - --config='translation/gen.conf' \ - --save_dir='data/wmt14_model' \ - --use_gpu=false \ - --num_passes=13 \ - --test_pass=12 \ - --trainer_count=1 \ - 2>&1 | tee 'translation/gen.log' -paddle usage -l 'translation/gen.log' -e $? -n "seqToseq_translation_gen" >/dev/null 2>&1 diff --git a/demo/seqToseq/translation/moses_bleu.sh b/demo/seqToseq/translation/moses_bleu.sh deleted file mode 100755 index 2f230d7f4c736da003966fbdb277f6b8b1ec952c..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/moses_bleu.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -set -x -echo "Downloading multi-bleu.perl" -wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/master/scripts/generic/multi-bleu.perl --no-check-certificate diff --git a/demo/seqToseq/translation/train.conf b/demo/seqToseq/translation/train.conf deleted file mode 100644 index 72b7ccdbb95dbda8f06674079db9a3257bb31622..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/train.conf +++ /dev/null @@ -1,36 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -sys.path.append("..") - -from seqToseq_net import * - -# whether this config is used for generating -is_generating = False - -### Data Definiation -data_dir = "./data/pre-wmt14" -train_conf = seq_to_seq_data(data_dir = data_dir, - is_generating = is_generating) - -### Algorithm Configuration -settings( - learning_method = AdamOptimizer(), - batch_size = 50, - learning_rate = 5e-4) - -### Network Architecture -gru_encoder_decoder(train_conf, is_generating) diff --git a/demo/seqToseq/translation/train.sh b/demo/seqToseq/translation/train.sh deleted file mode 100755 index b0ec9854b118cbb9ed39d6bed0cdd845403926a4..0000000000000000000000000000000000000000 --- a/demo/seqToseq/translation/train.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e -cd .. - -paddle train \ ---config='translation/train.conf' \ ---save_dir='translation/model' \ ---use_gpu=false \ ---num_passes=16 \ ---show_parameter_stats_period=100 \ ---trainer_count=4 \ ---log_period=10 \ ---dot_period=5 \ -2>&1 | tee 'translation/train.log' -paddle usage -l 'translation/train.log' -e $? -n "seqToseq_translation_train" >/dev/null 2>&1 diff --git a/demo/word2vec/train_v2.py b/demo/word2vec/train_v2.py deleted file mode 100644 index 7d952b446f9db432062fc3305a6b65b0ad66dd47..0000000000000000000000000000000000000000 --- a/demo/word2vec/train_v2.py +++ /dev/null @@ -1,80 +0,0 @@ -import math - -import paddle.v2 as paddle - -dictsize = 1953 -embsize = 32 -hiddensize = 256 -N = 5 - - -def wordemb(inlayer): - wordemb = paddle.layer.table_projection( - input=inlayer, - size=embsize, - param_attr=paddle.attr.Param( - name="_proj", - initial_std=0.001, - learning_rate=1, - l2_rate=0, )) - return wordemb - - -def main(): - paddle.init(use_gpu=False, trainer_count=1) - word_dict = paddle.dataset.imikolov.build_dict() - dict_size = len(word_dict) - firstword = paddle.layer.data( - name="firstw", type=paddle.data_type.integer_value(dict_size)) - secondword = paddle.layer.data( - name="secondw", type=paddle.data_type.integer_value(dict_size)) - thirdword = paddle.layer.data( - name="thirdw", type=paddle.data_type.integer_value(dict_size)) - fourthword = paddle.layer.data( - name="fourthw", type=paddle.data_type.integer_value(dict_size)) - nextword = paddle.layer.data( - name="fifthw", type=paddle.data_type.integer_value(dict_size)) - - Efirst = wordemb(firstword) - Esecond = wordemb(secondword) - Ethird = wordemb(thirdword) - Efourth = wordemb(fourthword) - - contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth]) - hidden1 = paddle.layer.fc(input=contextemb, - size=hiddensize, - act=paddle.activation.Sigmoid(), - layer_attr=paddle.attr.Extra(drop_rate=0.5), - bias_attr=paddle.attr.Param(learning_rate=2), - param_attr=paddle.attr.Param( - initial_std=1. / math.sqrt(embsize * 8), - learning_rate=1)) - predictword = paddle.layer.fc(input=hidden1, - size=dict_size, - bias_attr=paddle.attr.Param(learning_rate=2), - act=paddle.activation.Softmax()) - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 100 == 0: - result = trainer.test( - paddle.batch( - paddle.dataset.imikolov.test(word_dict, N), 32)) - print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics, - result.metrics) - - cost = paddle.layer.classification_cost(input=predictword, label=nextword) - parameters = paddle.parameters.create(cost) - adam_optimizer = paddle.optimizer.Adam( - learning_rate=3e-3, - regularization=paddle.optimizer.L2Regularization(8e-4)) - trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer) - trainer.train( - paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32), - num_passes=30, - event_handler=event_handler) - - -if __name__ == '__main__': - main() diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 6fa42fd0c71e78cc2fa6b0fe2cb970baf4ac89ed..94dd3457fb5b513441c4c8e339e1862de9092517 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -27,10 +27,6 @@ sphinx_add_target(paddle_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_docs - gen_proto_py) - - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -51,6 +47,3 @@ sphinx_add_target(paddle_docs_cn ${SPHINX_CACHE_DIR_CN} ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) - -add_dependencies(paddle_docs_cn - gen_proto_py) diff --git a/doc/about/index_cn.md b/doc/about/index_cn.md deleted file mode 100644 index 3bf030004d4de8c6f3cb773c6e78c09f40878c5f..0000000000000000000000000000000000000000 --- a/doc/about/index_cn.md +++ /dev/null @@ -1,11 +0,0 @@ -关于PaddlePaddle -================ - -PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台,兼备易用性、高效性、灵活性和可扩展性,目前已被百度内部多个产品线广泛使用。 -PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。 -同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。 - -致谢 --------- - -在此,特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。 diff --git a/doc/about/index_en.rst b/doc/about/index_en.rst deleted file mode 100644 index 065c430cdea802ed3c9f487cd00255b85a5598a5..0000000000000000000000000000000000000000 --- a/doc/about/index_en.rst +++ /dev/null @@ -1,14 +0,0 @@ -ABOUT -======= - -PaddlPaddle is an easy-to-use, efficient, flexible and scalable deep learning platform, -which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu. - -PaddlePaddle is now open source but far from complete, which is intended to be built upon, improved, scaled, and extended. -We hope to build an active open source community both by providing feedback and by actively contributing to the source code. - - -Credits --------- - -We owe many thanks to `all contributors and developers `_ of PaddlePaddle! diff --git a/doc/api/v1/index_cn.rst b/doc/api/v1/index_cn.rst index 3718cd73a2003b8ef6c406a9bd51dc68e76402dc..cf146dc088e3905a751ff55c26fd82ef0ba02c89 100644 --- a/doc/api/v1/index_cn.rst +++ b/doc/api/v1/index_cn.rst @@ -21,7 +21,7 @@ Model Config API trainer_config_helpers/optimizers.rst trainer_config_helpers/data_sources.rst trainer_config_helpers/layers.rst - trainer_config_helpers/activations.rst + trainer_config_helpers/activations.rst trainer_config_helpers/poolings.rst trainer_config_helpers/networks.rst trainer_config_helpers/evaluators.rst diff --git a/doc/api/v1/trainer_config_helpers/activations.rst b/doc/api/v1/trainer_config_helpers/activations.rst deleted file mode 100644 index 269e6491e7ebe3899c3fb24fca756a393043473b..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/activations.rst +++ /dev/null @@ -1,108 +0,0 @@ -=========== -Activations -=========== - -BaseActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: BaseActivation - :noindex: - -AbsActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: AbsActivation - :noindex: - -ExpActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: ExpActivation - :noindex: - -IdentityActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: IdentityActivation - :noindex: - -LinearActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: LinearActivation - :noindex: - -LogActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: LogActivation - :noindex: - -SquareActivation -================ - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SquareActivation - :noindex: - -SigmoidActivation -================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SigmoidActivation - :noindex: - -SoftmaxActivation -================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SoftmaxActivation - :noindex: - -SequenceSoftmaxActivation -========================= - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SequenceSoftmaxActivation - :noindex: - -ReluActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: ReluActivation - :noindex: - -BReluActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: BReluActivation - :noindex: - -SoftReluActivation -================== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: SoftReluActivation - :noindex: - -TanhActivation -============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: TanhActivation - :noindex: - -STanhActivation -=============== - -.. automodule:: paddle.trainer_config_helpers.activations - :members: STanhActivation - :noindex: diff --git a/doc/api/v1/trainer_config_helpers/attrs.rst b/doc/api/v1/trainer_config_helpers/attrs.rst deleted file mode 100644 index ac63127bf7d9db6351365ab7b58f43db12347a8e..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/attrs.rst +++ /dev/null @@ -1,5 +0,0 @@ -Parameter Attributes -======================= - -.. automodule:: paddle.trainer_config_helpers.attrs - :members: diff --git a/doc/api/v1/trainer_config_helpers/data_sources.rst b/doc/api/v1/trainer_config_helpers/data_sources.rst deleted file mode 100644 index b9dd4dda01ae59d1260356aff50ddf298d02c87f..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/data_sources.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. _api_trainer_config_helpers_data_sources: - -DataSources -=========== - -.. automodule:: paddle.trainer_config_helpers.data_sources - :members: diff --git a/doc/api/v1/trainer_config_helpers/evaluators.rst b/doc/api/v1/trainer_config_helpers/evaluators.rst deleted file mode 100644 index 11dc735164284d6ed1d661fab1e7690d263b3a7c..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/evaluators.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. _api_trainer_config_helpers_evaluators: - -========== -Evaluators -========== - -Base -==== -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: evaluator_base - :noindex: - -Classification -============== - -classification_error_evaluator ------------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: classification_error_evaluator - :noindex: - -auc_evaluator -------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: auc_evaluator - :noindex: - -ctc_error_evaluator -------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: ctc_error_evaluator - :noindex: - -chunk_evaluator ---------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: chunk_evaluator - :noindex: - -precision_recall_evaluator --------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: precision_recall_evaluator - :noindex: - -Rank -==== - -pnpair_evaluator ----------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: pnpair_evaluator - :noindex: - -Utils -===== - -sum_evaluator -------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: sum_evaluator - :noindex: - -column_sum_evaluator --------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: column_sum_evaluator - :noindex: - -Print -===== - -classification_error_printer_evaluator --------------------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: classification_error_printer_evaluator - :noindex: - -gradient_printer_evaluator --------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: gradient_printer_evaluator - :noindex: - -maxid_printer_evaluator ------------------------ -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: maxid_printer_evaluator - :noindex: - -maxframe_printer_evaluator ---------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: maxframe_printer_evaluator - :noindex: - -seqtext_printer_evaluator -------------------------- -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: seqtext_printer_evaluator - :noindex: - -value_printer_evaluator ------------------------ -.. automodule:: paddle.trainer_config_helpers.evaluators - :members: value_printer_evaluator - :noindex: - diff --git a/doc/api/v1/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst deleted file mode 100644 index 24389c2d8574dfda4bec9298776aa6b1aee51535..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/layers.rst +++ /dev/null @@ -1,508 +0,0 @@ -.. _api_trainer_config_helpers_layers: - -====== -Layers -====== - -Base -====== - -LayerType ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: LayerType - :noindex: - -LayerOutput ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: LayerOutput - :noindex: - -Data layer -=========== - -.. _api_trainer_config_helpers_layers_data_layer: - -data_layer ----------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: data_layer - :noindex: - -Fully Connected Layers -====================== - -.. _api_trainer_config_helpers_layers_fc_layer: - -fc_layer --------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: fc_layer - :noindex: - -selective_fc_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: selective_fc_layer - :noindex: - -Conv Layers -=========== - -conv_operator -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_operator - :noindex: - -conv_projection ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_projection - :noindex: - -conv_shift_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: conv_shift_layer - :noindex: - -img_conv_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_conv_layer - :noindex: - -.. _api_trainer_config_helpers_layers_context_projection: - -context_projection ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: context_projection - :noindex: - -Image Pooling Layer -=================== - -img_pool_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_pool_layer - :noindex: - -spp_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: spp_layer - :noindex: - -maxout_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: maxout_layer - :noindex: - -Norm Layer -========== - -img_cmrnorm_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: img_cmrnorm_layer - :noindex: - -batch_norm_layer ---------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: batch_norm_layer - :noindex: - -sum_to_one_norm_layer ---------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: sum_to_one_norm_layer - :noindex: - -Recurrent Layers -================ - -recurrent_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: recurrent_layer - :noindex: - -lstmemory ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstmemory - :noindex: - -grumemory ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: grumemory - :noindex: - -Recurrent Layer Group -===================== - -memory ------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: memory - :noindex: - -recurrent_group ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: recurrent_group - :noindex: - -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - -beam_search ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: beam_search - :noindex: - -get_output_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: get_output_layer - :noindex: - -Mixed Layer -=========== - -.. _api_trainer_config_helpers_layers_mixed_layer: - -mixed_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: mixed_layer - :noindex: - -.. _api_trainer_config_helpers_layers_embedding_layer: - -embedding_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: embedding_layer - :noindex: - -scaling_projection ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: scaling_projection - :noindex: - -dotmul_projection ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: dotmul_projection - :noindex: - -dotmul_operator ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: dotmul_operator - :noindex: - -full_matrix_projection ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: full_matrix_projection - :noindex: - -identity_projection -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: identity_projection - :noindex: - - -table_projection ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: table_projection - :noindex: - -trans_full_matrix_projection ----------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: trans_full_matrix_projection - :noindex: - -Aggregate Layers -================ - -.. _api_trainer_config_helpers_layers_pooling_layer: - -pooling_layer -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: pooling_layer - :noindex: - -.. _api_trainer_config_helpers_layers_last_seq: - -last_seq --------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: last_seq - :noindex: - -.. _api_trainer_config_helpers_layers_first_seq: - -first_seq ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: first_seq - :noindex: - -concat_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: concat_layer - :noindex: - -seq_concat_layer ----------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: seq_concat_layer - :noindex: - -Reshaping Layers -================ - -block_expand_layer ------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: block_expand_layer - :noindex: - -.. _api_trainer_config_helpers_layers_expand_layer: - -expand_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: expand_layer - :noindex: - -repeat_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: repeat_layer - :noindex: - -rotate_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: rotate_layer - :noindex: - -seq_reshape_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: seq_reshape_layer - :noindex: - -Math Layers -=========== - -addto_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: addto_layer - :noindex: - -linear_comb_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: linear_comb_layer - :noindex: - -interpolation_layer -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: interpolation_layer - :noindex: - -bilinear_interp_layer ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: bilinear_interp_layer - :noindex: - -power_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: power_layer - :noindex: - -scaling_layer -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: scaling_layer - :noindex: - -slope_intercept_layer ----------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: slope_intercept_layer - :noindex: - -tensor_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: tensor_layer - :noindex: - -.. _api_trainer_config_helpers_layers_cos_sim: - -cos_sim -------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cos_sim - :noindex: - -trans_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: trans_layer - :noindex: - -Sampling Layers -=============== - -maxid_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: maxid_layer - :noindex: - -sampling_id_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: sampling_id_layer - :noindex: - -Slicing and Joining Layers -========================== - -pad_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: pad_layer - :noindex: - -.. _api_trainer_config_helpers_layers_cost_layers: - -Cost Layers -=========== - -cross_entropy -------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cross_entropy - :noindex: - -cross_entropy_with_selfnorm ---------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: cross_entropy_with_selfnorm - :noindex: - -multi_binary_label_cross_entropy --------------------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: multi_binary_label_cross_entropy - :noindex: - -mse_cost ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: mse_cost - :noindex: - -huber_cost ----------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: huber_cost - :noindex: - -lambda_cost ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: lambda_cost - :noindex: - -rank_cost ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: rank_cost - :noindex: - -sum_cost ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: sum_cost - :noindex: - -crf_layer ------------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: crf_layer - :noindex: - -crf_decoding_layer -------------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: crf_decoding_layer - :noindex: - -ctc_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: ctc_layer - :noindex: - -warp_ctc_layer --------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: warp_ctc_layer - :noindex: - -nce_layer ------------ -.. automodule:: paddle.trainer_config_helpers.layers - :members: nce_layer - :noindex: - -hsigmoid ---------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: hsigmoid - :noindex: - -Check Layer -============ - -eos_layer ------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: eos_layer - :noindex: diff --git a/doc/api/v1/trainer_config_helpers/networks.rst b/doc/api/v1/trainer_config_helpers/networks.rst deleted file mode 100644 index edb53acbf0c31532aa34bda044066fed72eaa426..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/networks.rst +++ /dev/null @@ -1,123 +0,0 @@ -======== -Networks -======== - -The networks module contains pieces of neural network that combine multiple layers. - -NLP -=== - -sequence_conv_pool ------------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: sequence_conv_pool - :noindex: - -.. _api_trainer_config_helpers_network_text_conv_pool: - -text_conv_pool --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: text_conv_pool - :noindex: - -Images -====== - -img_conv_bn_pool ----------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: img_conv_bn_pool - :noindex: - -img_conv_group --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: img_conv_group - :noindex: - -.. _api_trainer_config_helpers_network_simple_img_conv_pool: - -simple_img_conv_pool --------------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_img_conv_pool - :noindex: - -vgg_16_network ---------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: vgg_16_network - :noindex: - -Recurrent -========= - -LSTM ----- - -lstmemory_unit -`````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: lstmemory_unit - :noindex: - -lstmemory_group -``````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: lstmemory_group - :noindex: - -simple_lstm -``````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_lstm - :noindex: - -bidirectional_lstm -`````````````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: bidirectional_lstm - :noindex: - -GRU ---- - -gru_unit -```````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: gru_unit - :noindex: - -gru_group -````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: gru_group - :noindex: - -simple_gru -`````````` -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_gru - :noindex: - -simple_attention ----------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: simple_attention - :noindex: - -Miscs -===== - -dropout_layer --------------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: dropout_layer - :noindex: - -outputs -------- -.. automodule:: paddle.trainer_config_helpers.networks - :members: outputs - :noindex: diff --git a/doc/api/v1/trainer_config_helpers/optimizers.rst b/doc/api/v1/trainer_config_helpers/optimizers.rst deleted file mode 100644 index d2f4958c92b8e3b7426945f1af07112ab4071136..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/optimizers.rst +++ /dev/null @@ -1,61 +0,0 @@ -.. _api_trainer_config_helpers_optimizers: - -========== -Optimizers -========== - -BaseSGDOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: BaseSGDOptimizer - :noindex: - -MomentumOptimizer -================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: MomentumOptimizer - :noindex: - -AdamOptimizer -============= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdamOptimizer - :noindex: - -AdamaxOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdamaxOptimizer - :noindex: - -AdaGradOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdaGradOptimizer - :noindex: - -DecayedAdaGradOptimizer -======================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: DecayedAdaGradOptimizer - :noindex: - -AdaDeltaOptimizer -================= -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: AdaDeltaOptimizer - :noindex: - -RMSPropOptimizer -================ -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: RMSPropOptimizer - :noindex: - -.. _api_trainer_config_helpers_optimizers_settings: - -settings -======== -.. automodule:: paddle.trainer_config_helpers.optimizers - :members: settings - :noindex: diff --git a/doc/api/v1/trainer_config_helpers/poolings.rst b/doc/api/v1/trainer_config_helpers/poolings.rst deleted file mode 100644 index 66566809d26f59263597b5286c5b27e0bbc9415a..0000000000000000000000000000000000000000 --- a/doc/api/v1/trainer_config_helpers/poolings.rst +++ /dev/null @@ -1,33 +0,0 @@ -======== -Poolings -======== - -BasePoolingType -=============== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: BasePoolingType - :noindex: - -AvgPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: AvgPooling - :noindex: - -MaxPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: MaxPooling - :noindex: - -SumPooling -========== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: SumPooling - :noindex: - -SquareRootNPooling -================== -.. automodule:: paddle.trainer_config_helpers.poolings - :members: SquareRootNPooling - :noindex: diff --git a/doc/api/v2/config/evaluators.rst b/doc/api/v2/config/evaluators.rst new file mode 100644 index 0000000000000000000000000000000000000000..9ac972fb193a2fb525edc507f7ba1303d2c8eabe --- /dev/null +++ b/doc/api/v2/config/evaluators.rst @@ -0,0 +1,110 @@ +.. _api_v2: + +========== +Evaluators +========== + +Classification +============== + +classification_error +-------------------- +.. automodule:: paddle.v2.evaluator + :members: classification_error + :noindex: + +auc +--- +.. automodule:: paddle.v2.evaluator + :members: auc + :noindex: + +ctc_error +--------- +.. automodule:: paddle.v2.evaluator + :members: ctc_error + :noindex: + +chunk +----- +.. automodule:: paddle.v2.evaluator + :members: chunk + :noindex: + +precision_recall +---------------- +.. automodule:: paddle.v2.evaluator + :members: precision_recall + :noindex: + +Rank +==== + +pnpair +------ +.. automodule:: paddle.v2.evaluator + :members: pnpair + :noindex: + +Utils +===== + +sum +--- +.. automodule:: paddle.v2.evaluator + :members: sum + :noindex: + +column_sum +---------- +.. automodule:: paddle.v2.evaluator + :members: column_sum + :noindex: + +Print +===== + +classification_error_printer +---------------------------- +.. automodule:: paddle.v2.evaluator + :members: classification_error_printer + :noindex: + +gradient_printer +---------------- +.. automodule:: paddle.v2.evaluator + :members: gradient_printer + :noindex: + +maxid_printer +------------- +.. automodule:: paddle.v2.evaluator + :members: maxid_printer + :noindex: + +maxframe_printer +---------------- +.. automodule:: paddle.v2.evaluator + :members: maxframe_printer + :noindex: + +seqtext_printer +--------------- +.. automodule:: paddle.v2.evaluator + :members: seqtext_printer + :noindex: + +value_printer +------------- +.. automodule:: paddle.v2.evaluator + :members: value_printer + :noindex: + +Detection +===== + +detection_map +------------- +.. automodule:: paddle.v2.evaluator + :members: detection_map + :noindex: diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 2a02baf17ba0d1119a8d222024616ef8ae33f8d5..d4e9d53e5c0955912a594fe8cd9cd41a4080a2d2 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -59,6 +59,11 @@ context_projection .. autoclass:: paddle.v2.layer.context_projection :noindex: +row_conv +-------- +.. autoclass:: paddle.v2.layer.row_conv + :noindex: + Image Pooling Layer =================== @@ -99,6 +104,11 @@ cross_channel_norm ------------------ .. autoclass:: paddle.v2.layer.cross_channel_norm :noindex: + +row_l2_norm +----------- +.. autoclass:: paddle.v2.layer.row_l2_norm + :noindex: Recurrent Layers ================ @@ -130,7 +140,7 @@ recurrent_group --------------- .. autoclass:: paddle.v2.layer.recurrent_group :noindex: - + lstm_step --------- .. autoclass:: paddle.v2.layer.lstm_step @@ -145,12 +155,12 @@ beam_search ------------ .. autoclass:: paddle.v2.layer.beam_search :noindex: - + get_output ---------- .. autoclass:: paddle.v2.layer.get_output :noindex: - + Mixed Layer =========== @@ -193,6 +203,10 @@ identity_projection .. autoclass:: paddle.v2.layer.identity_projection :noindex: +slice_projection +------------------- +.. autoclass:: paddle.v2.layer.slice_projection + :noindex: table_projection ---------------- @@ -203,10 +217,15 @@ trans_full_matrix_projection ---------------------------- .. autoclass:: paddle.v2.layer.trans_full_matrix_projection :noindex: - + Aggregate Layers ================ +AggregateLevel +-------------- +.. autoclass:: paddle.v2.layer.AggregateLevel + :noindex: + .. _api_v2.layer_pooling: pooling @@ -238,6 +257,21 @@ seq_concat .. autoclass:: paddle.v2.layer.seq_concat :noindex: +seq_slice +--------- +.. autoclass:: paddle.v2.layer.seq_slice + :noindex: + +kmax_sequence_score +------------------- +.. autoclass:: paddle.v2.layer.kmax_sequence_score + :noindex: + +sub_nested_seq +-------------- +.. autoclass:: paddle.v2.layer.sub_nested_seq + :noindex: + Reshaping Layers ================ @@ -248,6 +282,11 @@ block_expand .. _api_v2.layer_expand: +ExpandLevel +----------- +.. autoclass:: paddle.v2.layer.ExpandLevel + :noindex: + expand ------ .. autoclass:: paddle.v2.layer.expand @@ -301,6 +340,16 @@ scaling .. autoclass:: paddle.v2.layer.scaling :noindex: +clip +---- +.. autoclass:: paddle.v2.layer.clip + :noindex: + +resize +------ +.. autoclass:: paddle.v2.layer.resize + :noindex: + slope_intercept --------------- .. autoclass:: paddle.v2.layer.slope_intercept @@ -323,6 +372,11 @@ trans .. autoclass:: paddle.v2.layer.trans :noindex: +scale_shift +----------- +.. autoclass:: paddle.v2.layer.scale_shift + :noindex: + Sampling Layers =============== @@ -336,6 +390,12 @@ sampling_id .. autoclass:: paddle.v2.layer.sampling_id :noindex: +multiplex +--------- +.. autoclass:: paddle.v2.layer.multiplex + :noindex: + + Slicing and Joining Layers ========================== @@ -364,9 +424,14 @@ multi_binary_label_cross_entropy_cost .. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost :noindex: -huber_cost ----------- -.. autoclass:: paddle.v2.layer.huber_cost +huber_regression_cost +------------------------- +.. autoclass:: paddle.v2.layer.huber_regression_cost + :noindex: + +huber_classification_cost +------------------------- +.. autoclass:: paddle.v2.layer.huber_classification_cost :noindex: lambda_cost @@ -374,9 +439,9 @@ lambda_cost .. autoclass:: paddle.v2.layer.lambda_cost :noindex: -mse_cost +square_error_cost -------- -.. autoclass:: paddle.v2.layer.mse_cost +.. autoclass:: paddle.v2.layer.square_error_cost :noindex: rank_cost @@ -419,10 +484,49 @@ hsigmoid .. autoclass:: paddle.v2.layer.hsigmoid :noindex: -Check Layer +smooth_l1_cost +-------------- +.. autoclass:: paddle.v2.layer.smooth_l1_cost + :noindex: + +multibox_loss +-------------- +.. autoclass:: paddle.v2.layer.multibox_loss + :noindex: + +Check Layer ============ eos --- .. autoclass:: paddle.v2.layer.eos :noindex: + +Miscs +===== + +dropout +-------------- +.. autoclass:: paddle.v2.layer.dropout + :noindex: + +Activation with learnable parameter +=================================== + +prelu +-------- +.. autoclass:: paddle.v2.layer.prelu + :noindex: + +gated_unit +----------- +.. autoclass:: paddle.v2.layer.gated_unit + :noindex: + +Detection output Layer +====================== + +detection_output +---------------- +.. autoclass:: paddle.v2.layer.detection_output + :noindex: diff --git a/doc/api/v2/config/networks.rst b/doc/api/v2/config/networks.rst index 6f209bc95bec7279051118bb857a96515e0371a9..6e813ab1a820d068ea3e54cad6178f1cf928eadc 100644 --- a/doc/api/v2/config/networks.rst +++ b/doc/api/v2/config/networks.rst @@ -44,6 +44,12 @@ simple_img_conv_pool :members: simple_img_conv_pool :noindex: +small_vgg +--------- +.. automodule:: paddle.v2.networks + :members: small_vgg + :noindex: + vgg_16_network --------------- .. automodule:: paddle.v2.networks @@ -101,17 +107,21 @@ simple_gru :members: simple_gru :noindex: -simple_attention ----------------- +simple_gru2 +``````````` .. automodule:: paddle.v2.networks - :members: simple_attention + :members: simple_gru2 :noindex: -Miscs -===== +bidirectional_gru +`````````````````` +.. automodule:: paddle.v2.networks + :members: bidirectional_gru + :noindex: -dropout_layer --------------- +simple_attention +---------------- .. automodule:: paddle.v2.networks - :members: dropout_layer + :members: simple_attention :noindex: + diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst index a5fae7e29e56d9e236d489353a5c8967d1954641..992b559cbd87244612521d4c96f84f997d6c4196 100644 --- a/doc/api/v2/model_configs.rst +++ b/doc/api/v2/model_configs.rst @@ -6,6 +6,7 @@ Model Configuration config/activation.rst config/layer.rst + config/evaluators.rst config/optimizer.rst config/pooling.rst config/networks.rst diff --git a/doc/design/api.md b/doc/design/api.md index 8185d2af0ea264a2e7b4e28b9ed05279e4a22014..e6a4638d9100d9b07c3ee6b92b530a17eae1c162 100644 --- a/doc/design/api.md +++ b/doc/design/api.md @@ -3,7 +3,7 @@ ## Ingredients As our design principle is starting from the essence: how could we -allow users to express and solve their problems at neural networks. +allow users to express and solve their problems as neural networks. Some essential concepts that our API have to provide include: 1. A *topology* is an expression of *layers*. @@ -233,7 +233,7 @@ paddle.dist_train(model, num_parameter_servers=15) ``` -The pseudo code if `paddle.dist_train` is as follows: +The pseudo code of `paddle.dist_train` is as follows: ```python def dist_train(topology, parameters, trainer, reader, ...): diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md new file mode 100644 index 0000000000000000000000000000000000000000..f9991541bc51c6e13ffce4e9cec60f73dc800121 --- /dev/null +++ b/doc/design/auto_gradient_check.md @@ -0,0 +1,146 @@ +## Auto Gradient Checker Design + +## Backgraound: +- Generally, it is easy to check whether the forward computation of an Operator is correct or not. However, backpropagation is a notoriously difficult algorithm to debug and get right: + 1. you should get the right backpropagation formula according to the forward computation. + 2. you should implement it right in CPP. + 3. it's difficult to prepare test data. + +- Auto gradient checking gets a numerical gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: + 1. numerical gradient checker only need forward operator. + 2. user only need to prepare the input data for forward Operator. + +## Mathematical Theory +The following two document from Stanford has a detailed explanation of how to get numerical gradient and why it's useful. + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) + + +## Numeric Gradient Implementation +### Python Interface +```python +def get_numerical_gradient(op, + input_values, + output_name, + input_to_check, + delta=0.005, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, whose key is + variable name, and value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable with respect to which to compute the gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it will suffer from numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ +``` + +### Explaination: + +- Why need `output_name` + - An Operator may have multiple Output, one can get independent gradient from each Output. So caller should specify the name of the output variable. + +- Why need `input_to_check` + - One operator may have multiple inputs. Gradient Op can calculate the gradient of these inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. + + +### Core Algorithm Implementation + + +```python + # we only compute gradient of one element a time. + # we use a for loop to compute the gradient of each element. + for i in xrange(tensor_size): + # get one input element by its index i. + origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the new value of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + # plus delta to this element, run op and get the new value of the result tensor. + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. + return gradient_flat.reshape(tensor_to_check.get_dims()) +``` + +## Auto Graident Checker Framework + +Each Operator Kernel has three kinds of Gradient: + +1. Numerical gradient +2. CPU kernel gradient +3. GPU kernel gradient (if supported) + +The numerical gradient only relies on forward Operator. So we use the numerical gradient as the reference value. And the gradient checking is performed in the following three steps: + +1. calculate the numerical gradient +2. calculate CPU kernel gradient with the backward Operator and compare it with the numerical gradient +3. calculate GPU kernel gradient with the backward Operator and compare it with the numeric gradient (if supported) + +#### Python Interface + +```python + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: the input variable with respect to which to compute the gradient. + :param output_name: The final output variable name. + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ +``` + +### How to check if two numpy array is close enough? +if `abs_numerical_grad` is nearly zero, then use abs error for numerical_grad + +```python +numerical_grad = ... +operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + +abs_numerical_grad = numpy.abs(numerical_grad) +# if abs_numerical_grad is nearly zero, then use abs error for numeric_grad, not relative +# error. +abs_numerical_grad[abs_numerical_grad < 1e-3] = 1 + +diff_mat = numpy.abs(abs_numerical_grad - operator_grad) / abs_numerical_grad +max_diff = numpy.max(diff_mat) +``` + + +#### Notes: +The Input data for auto gradient checker should be reasonable to avoid numerical stability problem. + + +#### Refs: + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) diff --git a/doc/design/block.md b/doc/design/block.md new file mode 100644 index 0000000000000000000000000000000000000000..9c812732d6ead76eb3aa2d1b617449c96807f21a --- /dev/null +++ b/doc/design/block.md @@ -0,0 +1,336 @@ +# Design Doc: Block and Scope + +## The Representation of Computation + +Both deep learning systems and programming languages help users describe computation procedures. These systems use various representations of computation: + +- Caffe, Torch, and Paddle: sequences of layers. +- TensorFlow, Caffe2, Mxnet: graph of operators. +- PaddlePaddle: nested blocks, like C++ and Java programs. + +## Block in Programming Languages and Deep Learning + +In programming languages, a block is a pair of curly braces that includes local variables definitions and a sequence of instructions or operators. + +Blocks work with control flow structures like `if`, `else`, and `for`, which have equivalents in deep learning: + +| programming languages | PaddlePaddle | +|-----------------------|-----------------------| +| for, while loop | RNN, WhileOp | +| if, if-else, switch | IfElseOp, SwitchOp | +| sequential execution | a sequence of layers | + +A key difference is that a C++ program describes a one pass computation, whereas a deep learning program describes both the forward and backward passes. + +## Stack Frames and the Scope Hierarchy + +The existence of the backward pass makes the execution of a block of PaddlePaddle different from traditional programs: + +| programming languages | PaddlePaddle | +|-----------------------|---------------------------------| +| stack | scope hierarchy | +| stack frame | scope | +| push at entering block| push at entering block | +| pop at leaving block | destroy when minibatch completes| + +1. In traditional programs: + + - When the execution enters the left curly brace of a block, the runtime pushes a frame into the stack, where it realizes local variables. + - After the execution leaves the right curly brace, the runtime pops the frame. + - The maximum number of frames in the stack is the maximum depth of nested blocks. + +1. In PaddlePaddle + + - When the execution enters a block, PaddlePaddle adds a new scope, where it realizes variables. + - PaddlePaddle doesn't pop a scope after the execution of the block because variables therein are used by the backward pass. So it has a stack forest known as a *scope hierarchy*. + - The height of the highest tree is the maximum depth of nested blocks. + - After the processing of a minibatch, PaddlePaddle destroys the scope hierarchy. + +## Use Blocks in C++ and PaddlePaddle Programs + +Let us consolidate the discussion by presenting some examples. + +### Blocks with `if-else` and `IfElseOp` + +The following C++ programs shows how blocks are used with the `if-else` structure: + +```c++ +namespace pd = paddle; + +int x = 10; +int y = 1; +int z = 10; +bool cond = false; +int o1, o2; +if (cond) { + int z = x + y; + o1 = z; + o2 = pd::layer::softmax(z); +} else { + int d = pd::layer::fc(z); + o1 = d; + o2 = d+1; +} + +``` + +An equivalent PaddlePaddle program from the design doc of the [IfElseOp operator](./if_else_op.md) is as follows: + +```python +import paddle as pd + +x = minibatch([10, 20, 30]) # shape=[None, 1] +y = var(1) # shape=[1], value=1 +z = minibatch([10, 20, 30]) # shape=[None, 1] +cond = larger_than(x, 15) # [false, true, true] + +ie = pd.ifelse() +with ie.true_block(): + d = pd.layer.add_scalar(x, y) + ie.output(d, pd.layer.softmax(d)) +with ie.false_block(): + d = pd.layer.fc(z) + ie.output(d, d+1) +o1, o2 = ie(cond) +``` + +In both examples, the left branch computes `x+y` and `softmax(x+y)`, the right branch computes `fc(x)` and `x+1` . + +The difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances. + + +### Blocks with `for` and `RNNOp` + +The following RNN model in PaddlePaddle from the [RNN design doc](./rnn.md) : + +```python +x = sequence([10, 20, 30]) # shape=[None, 1] +m = var(0) # shape=[1] +W = var(0.314, param=true) # shape=[1] +U = var(0.375, param=true) # shape=[1] + +rnn = pd.rnn() +with rnn.step(): + h = rnn.memory(init = m) + h_prev = rnn.previous_memory(h) + a = layer.fc(W, x) + b = layer.fc(U, h_prev) + s = pd.add(a, b) + act = pd.sigmoid(s) + rnn.update_memory(h, act) + rnn.output(a, b) +o1, o2 = rnn() +``` +has its equivalent C++ program as follows + +```c++ +int* x = {10, 20, 30}; +int* m = {0}; +int* W = {0.314}; +int* U = {0.375}; + +int mem[sizeof(x) / sizeof(x[0]) + 1]; +int o1[sizeof(x) / sizeof(x[0]) + 1]; +int o2[sizeof(x) / sizeof(x[0]) + 1]; +for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) { + int x = x[i-1]; + if (i == 1) mem[0] = m; + int a = W * x; + int b = Y * mem[i-1]; + int s = fc_out + hidden_out; + int act = sigmoid(sum); + mem[i] = act; + o1[i] = act; + o2[i] = hidden_out; +} +``` + +## Compilation and Execution + +Like TensorFlow, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest executes the message for training or inference. + +The generation of this protobuf message is similar to how a compiler generates a binary executable file. The execution of the message is similar to how the OS executes the binary file. + +## The "Binary Executable File Format" + +The definition of the protobuf message is as follows: + +```protobuf +message BlockDesc { + repeated VarDesc vars = 1; + repeated OpDesc ops = 2; +} +``` + +The step net in above RNN example would look like + +``` +BlockDesc { + vars = { + VarDesc {...} // x + VarDesc {...} // h + VarDesc {...} // fc_out + VarDesc {...} // hidden_out + VarDesc {...} // sum + VarDesc {...} // act + } + ops = { + OpDesc {...} // matmul + OpDesc {...} // add_two + OpDesc {...} // sigmoid + } +}; +``` + +Also, the RNN operator in above example is serialized into a protobuf message of type `OpDesc` and would look like: + +``` +OpDesc { + inputs = {0} // the index of x in vars of BlockDesc above + outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above + attrs { + "memories" : {1} // the index of h + "step_net" : + } +}; +``` + +This `OpDesc` value is in the `ops` field of the `BlockDesc` value representing the global block. + + +## The Compilation of Blocks + +During the generation of the Protobuf message, the Block should store VarDesc (the Protobuf message which describes Variable) and OpDesc (the Protobuf message which describes Operator). + +VarDesc in a block should have its name scope to avoid local variables affect parent block's name scope. +Child block's name scopes should inherit the parent's so that OpDesc in child block can reference a VarDesc that stored in parent block. For example: + +```python +a = pd.Variable(shape=[20, 20]) +b = pd.fc(a, params=["fc.w", "fc.b"]) + +rnn = pd.create_rnn() +with rnn.stepnet(): + x = a.as_step_input() + # reuse fc's parameter + fc_without_b = pd.get_variable("fc.w") + rnn.output(fc_without_b) + +out = rnn() +``` +The method `pd.get_variable` can help retrieve a Variable by the name. The Variable may be stored in a parent block, but might be retrieved in a child block, so block should have a variable scope that supports inheritance. + +In compiler design, the symbol table is a data structure created and maintained by compilers to store information about the occurrence of various entities such as variable names, function names, classes, etc. + +To store the definition of variables and operators, we define a C++ class `SymbolTable`, like the one used in compilers. + +`SymbolTable` can do the following: + +- store the definitions (some names and attributes) of variables and operators, +- verify if a variable was declared, +- make it possible to implement type checking (offer Protobuf message pointers to `InferShape` handlers). + + +```c++ +// Information in SymbolTable is enough to trace the dependency graph. So maybe +// the Eval() interface takes a SymbolTable is enough. +class SymbolTable { + public: + SymbolTable(SymbolTable* parent) : parent_(parent) {} + + OpDesc* NewOp(const string& name=""); + + // TODO determine whether name is generated by python or C++. + // Currently assume that a unique name will be generated by C++ if the + // argument name is left default. + VarDesc* NewVar(const string& name=""); + + // find a VarDesc by name, if recursive is true, find parent's SymbolTable + // recursively. + // this interface is introduced to support InferShape, find protobuf messages + // of variables and operators, pass pointers into InferShape. + // + // NOTE maybe some C++ classes such as VarDescBuilder and OpDescBuilder should + // be proposed and embedded into pybind to enable python operation on C++ pointers. + VarDesc* FindVar(const string& name, bool recursive=true); + + OpDesc* FindOp(const string& name); + + BlockDesc Compile() const; + + private: + SymbolTable* parent_; + + map ops_; + map vars_; +}; +``` + +After all the description of variables and operators is added into SymbolTable, +the block has enough information to run. + +The `Block` class takes a `BlockDesc` as input, and provides `Run` and `InferShape` functions. + + +```c++ +namespace { + +class Block : OperatorBase { +public: + Block(const BlockDesc& desc) desc_(desc) {} + + void InferShape(const framework::Scope& scope) const override { + if (!symbols_ready_) { + CreateVariables(scope); + CreateOperators(); + } + // should run InferShape first. + for (auto& op : runtime_table_.ops()) { + op->InferShape(scope); + } + } + + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override { + PADDLE_ENFORCE(symbols_ready_, "operators and variables should be created first."); + for (auto& op : runtime_table_.ops()) { + op->Run(scope, dev_ctx); + } + } + + void CreateVariables(const framework::Scope& scope); + void CreateOperators(); + + // some other necessary interfaces of NetOp are listed below + // ... + +private: + BlockDesc desc_; + bool symbols_ready_{false}; +}; +``` + +## The Execution of Blocks + +Block inherits from OperatorBase, which has a Run method. +Block's Run method will run its operators sequentially. + +There is another important interface called `Eval`, which takes some arguments called targets and generates a minimal graph which treats targets as the end points and creates a new Block. After `Run`, `Eval` will get the latest value and return the targets. + +The definition of Eval is as follows: + +```c++ +// clean a block description by targets using the corresponding dependency graph. +// return a new BlockDesc with minimal number of operators. +// NOTE: The return type is not a Block but the block's description so that this can be distributed +// to a cluster. +BlockDesc Prune(const BlockDesc& desc, vector targets); + +void Block::Eval(const vector& targets, + const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) { + BlockDesc min_desc = Prune(desc_, targets); + Block min_block(min_desc); + min_block.Run(scope, dev_ctx); +} +``` diff --git a/doc/design/build_system/README.md b/doc/design/build_system/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bf0e4dddc1b640ecbce489f65820aaf8a4b3b1e7 --- /dev/null +++ b/doc/design/build_system/README.md @@ -0,0 +1,152 @@ +A few months ago when we were trying to replace CMake with Bazel, @emailweixu suggested that we rewrite those handy Bazel functions using CMake. Now it seems that it's the right time to get this done, as we are facing problems from the porting of Majel and the development of new the parameter server using Go and C++. + +Here are some initial thoughts. Your comments are welcome! + +### Required CMake Function + +I think we need only the following few CMake functions to make a project description mean and clean: + +| C++ | CUDA C++ | Go | +|---|---|---| +| cc_library | nv_library | go_library | +| cc_binary | nv_binary | go_binary | +| cc_test | nv_test | go_test | + +- The `_library` functions generate .a files from source code. +- The `_binary` functions generate executable binary files. +- The `_test` functions generate executable unit test files. They work like `_binary` but links `-lgtest` and `-lgtest_main`. + +The difference between `nv_` functions and `cc_` functions is that the former use `nvcc` instead of the system-default C++ compiler. + +Both `nv_` and `cc_` functions enables C++11 (-std=c++11). + +Also, + +- to describe external dependencies, we need `external_library`. +- to build shared libraries, we need `shared_library`. + +### An Example Project + +Suppose that we have aforementioned functions defined in our `/cmake` directory. The following example `CMakeLists.txt` describes a project including the following source files: + +- tensor.h +- tensor.cc +- tensor_test.cc +- ops.h +- ops.cu +- ops_test.cu +- api.go +- api_test.go + +Suppose that ops.cu depends on CUDNN. + +```cmake +# cc_binary parses tensor.cc and figures out that target also depend +# on tensor.h. +cc_binary(tensor + SRCS + tensor.cc) + +# The dependency to target tensor implies that if any of +# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. +cc_test(tensor_test + SRCS + tensor_test.cc + DEPS + tensor) + +# I don't have a clear idea what parameters external_library need to +# have. @gangliao as a CMake expert would have better ideas. +external_library(cudnn + ....) + +# Suppose that ops.cu depends on external target CUDNN. Also, ops.cu +# include global functions that take Tensor as their parameters, so +# ops depend on tensor. This implies that if any of tensor.{h.cc}, +# ops.{h,cu} is changed, ops need to be re-built. +nv_library(ops + SRCS + ops.cu + DEPS + tensor + cudnn) # cudnn is defined later. + +nv_test(ops_test + SRCS + ops_test.cu + DEPS + ops) + +# Because api.go defines a GO wrapper to ops and tensor, it depends on +# both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or +# api.go is changed, api need to be re-built. +go_library(api + SRCS + api.go + DEPS + tensor # Because ops depend on tensor, this line is optional. + ops) + +go_test(api_test + SRCS + api_test.go + DEPS + api) + + +# This builds libapi.so. shared_library might use CMake target +# api_shared so to distinguish it from above target api. +shared_library(api + DEPS + api) + +``` + +### Implementation + +As above example CMakeLists.txt executes, each function invocation adds "nodes" to a dependency graph. It also use this graph to generate CMake commands including `add_executable`, `add_dependencies`, `target_link_libraries`, and `add_test`. + +### Using Package Manager For Go + +Building Go binaries and libraries need to satisfy their dependencies, generally +we can do `go get ./...` to download and compile all external dependencies. The +problems are: + +1. `go get` will always get the latest code from the default branch of the + remote repo, so changes of dependents might break the build. This is very + different with what we already have in `cmake/external` which download a + specific version or commit id of the dependency. +1. Some locations can not access external dependencies through the internet, as mentioned + in https://github.com/PaddlePaddle/Paddle/issues/2605. Using package management + tools can package the dependencies as a "vendor" package, which can be mirrored + at many cloud file hosting, so users what to compile paddle by themselves can + download this "vendor" package from a mirror site. + +#### Choose A Suitable Tool + +As mentioned by @wangkuiyi, [Here](https://github.com/golang/go/wiki/PackageManagementTools) +list dozens of Go package managers. We choose the tool using following principles: + +- Most "active" projects with more stars, more pull requests or commits +- Widely used project + +After comparing all these projects, we shall choose between the most popular +tools: Godep and Glide. + +Here's a brief comparison between Godep and Glide +: https://github.com/Masterminds/glide/wiki/Go-Package-Manager-Comparison. There are +also many complaints about using `Godep`. There's also a new "official" pakcage +management tool has been started at: https://github.com/golang/dep to resolve +such problems, but it's currently at Alpha stage. So the best choice now is +glide obviously. + +#### Manage Go Packages + +- Dependencies: `go/glide.yaml` will store the dependencies and their versions which + is directly imported by paddle. `go/glide.lock` will store all dependencies recursively + with their commit id. Builds will "lock" to these packages if we don't `glide up` + them +- Vendor package: `go/vendor` directory will generated when running `cmake` command. `cmake` + will download the code corresponding to `go/glide.lock`. If we put a vendor folder + under `go/`, cmake will just check the commit id to the packages under the folder, + if commit id matches, there will be no download at all. diff --git a/doc/design/cluster_train/README.md b/doc/design/cluster_train/README.md new file mode 100644 index 0000000000000000000000000000000000000000..177a5f5d54bd924fab34795219ce1f7b270c8e25 --- /dev/null +++ b/doc/design/cluster_train/README.md @@ -0,0 +1,182 @@ +# Design Doc: Distributed Training + +## Objective + +In [this slides](https://www.slideshare.net/cxwangyi/paddlepaddle-a-complete-solution-for-businesses), we explained that we'd like PaddlePaddle running on general-purpose clusters like those managed by Kubernetes, so to address demands for AI from both Internet and non-Internet industries. + +This poses technical challenges to PaddlePaddle: + +1. Support fault-recovery. +1. Support both offline and online training. +1. [Serverless computing](https://en.wikipedia.org/wiki/Serverless_computing) of distributed training. + + +## Training Job + +A training job will be created once user asks Paddle cloud to train a model. The training job is made up of different processes that collaboratively consume data and produce a trained model. There are three kinds of processes: + +1. the *master server process*, which dispatches tasks to +1. one or more *trainer processes*, which run distributed training and synchronize gradients/models via +1. one or more *parameter server processes*, where each holds a shard of the global model, and receive the uploaded gradients from every *trainer process*, so they can run the optimize functions to update their parameters. + +Their relation is illustrated in the following graph: + + + +By coordinating these processes, PaddlePaddle supports use both Synchronize Stochastic Gradient Descent (sync SGD) and Asynchronous Stochastic Gradient Descent (async SGD) to train user-defined neural network topologies. + +When training with sync SGD, parameter servers wait for all trainers to finish gradients update and then send the updated parameters to trainers, training can not proceed until the trainer received the updated parameters. This creates a synchronization point between trainers. When training with async SGD, each trainer upload gradient and download new parameters individually, without the synchronization with other trainers. Using asyc SGD will be faster in terms of time per pass, but have more noise in gradient since trainers are likely to have a stale model. + +### Master Server Process + +The master server process will: + +- Partition a dataset into [tasks](#task) and dispatch tasks to trainers. +- Keep track of training progress on the dataset with [task queue](#task-queue). A training job will iterate on the dataset for a full pass until it goes into next pass. + + +#### Task + +A task is a data shard to be trained. The total number of tasks will be much bigger than the total number of trainers. The number of data instances inside a task will be much bigger than the mini-batch size. + +#### Task Queue + +The master server has three task queues to track training progress. As illustrated in the graph below, Job A and Job B both have one master server. Each master server process has three task queues. + + + +- The todo queue holds tasks to be dispatched. When a job starts, the master server fills in the todo queue with all tasks. +- The pending queue holds tasks that are currently training by trainers. +- the done queue holds tasks that are already trained. + +The life cycle of a single task is illustrated below: + + + +1. When a new pass of training starts, all tasks will be placed in the todo queue. +1. Upon trainer requests for new task, the master server will dispatch a task from todo queue to it, put the task in the pending queue and wait for completion. +1. The trainer will work on its task and tell the master server once the task is completed and ask for new task. The master server will dispatch a new task to that trainer. +1. If a task fails for any reason in trainer, or takes longer than a specific period of time, the master server will move the task back to the todo queue. The timeout count for that task will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, then it will be discarded. +1. The master server will move completed task to the done queue. When the todo queue is empty, the master server will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero. + +### Trainer Process + +The trainer process will: + +- Request tasks from the master. +- Work on the tasks +- Upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers. + +### Parameter Server Process + +Parameter server processes hold the parameters collaboratively. The parameters are partitioned on different parameter servers. + +The parameter server will: + +- Receive gradient from the trainers, update its parameters, and give the trainers the latest parameters. +- Periodically save its parameters to distributed file system by overriding the previous save. + +### Optimization Algorithms + +The communication pattern between the trainers and the parameter servers depends on the category of optimization algorithm: + +- Synchronous Stochastic Gradient Descent (sync-SGD) + + Parameter server will wait for all trainer finish n-th mini-batch calculation and send their gradients before broadcasting new parameters to every trainer. Every trainer will wait for the new parameters before starting n+1-th mini-batch. + +- Asynchronous Stochastic Gradient Descent (async-SGD) + + There will no synchronization between different trainers, and parameter server updates its parameter as soon as it receives new gradient: + + - Each trainer uploads its accumulated gradient every n mini-batches. + - Every m mini-batches, the trainer downloads new parameters from parameter server. + - n and m do not have to be equal. + +## Fault Tolerant + +The training job will pause if the master server processes is dead, or any of the parameter server process is dead. They will be started by [Kubernetes](https://kubernetes.io/) and recover in few minutes. Please refer to [fault recovery](#fault-recovery). + +The training job will continue to make progress if there is at least one training process running. The strategy depends on the type of optimization algorithm: + +- sync-SGD + + TODO + +- async-SGD + + Since async-SGD does not require synchronization between mini-batches, the system will by definition make process if at least one trainer is running. + +## Fault Recovery + +PaddlePaddle uses [etcd](https://github.com/coreos/etcd) to keep track of the states of processes. Because etcd is a distributed reliable key-value store, the restarted process can recover its states from etcd. The model parameters are periodically saved into distributed file system, so a restarted parameter server can recover its parameters from the saved file. + +Now we will introduce how each process recovers from a failure, the graph below shows how etcd is used: + + + +### Master Server Process + +When the master is started by the Kubernetes, it executes the following steps at startup: + +1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations. +1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them. +1. Write its ip address to */master/addr* so that trainers can discover it. +1. Listens to trainers' request of task, dispatch one upon request, and updates task queue using an etcd transaction to ensure lock is held during the update. + +When the master server process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes. + +### Trainer Process + +When the trainer is started by the Kubernetes, it executes the following steps at startup: + +1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count */ps_desired*. +1. Finds and watches */master/addr* to get master's address. +1. Requests for tasks from the master to start training. + +When a trainer fails, Kuberentes would try to restart it. The recovered trainer would fetch tasks from master and go on training. + +### Parameter Server Process + +When the parameter server is started by Kubernetes, it executes the following steps at startup: + +1. Read desired total number of parameter servers from etcd `/ps_desired` +1. Search through etcd keys `/ps/` (`/ps/0`, `/ps/1`, ...) to find the first non-existant key whose index is smaller than the total number of parameter servers. Set the key using a transaction to avoid concurrent writes. The parameter server's index is inferred from the key name. + + The desired number of parameter servers is 3: + + + + The third parameter server joined: + + + +1. The parameter server can load parameters if there are already saved parameters in the save path (inferred from its index). +1. Now the parameter server is ready for the trainers' requests. + +If the parameter server's etcd lease expires, the parameter server will kill itself. + + +## Parameter Server Checkpointing +See [here](./checkpointing.md) + +## Store and dispatching trainning data +See [here](./data_dispatch.md) + + +## Dynamic Scaling + +### Trainer Scaling + +TODO + +### Parameter Server Scaling + +Not planned for v1. + +## Training Dataset Format + +TODO + +## User Interface + +TODO diff --git a/doc/design/cluster_train/checkpointing.md b/doc/design/cluster_train/checkpointing.md new file mode 100644 index 0000000000000000000000000000000000000000..c87ef2c7d2636208866d05456d5d44316d0bb200 --- /dev/null +++ b/doc/design/cluster_train/checkpointing.md @@ -0,0 +1,44 @@ +## 模型参数检查点(Checkpointing) +模型数据检查点的实现,可以有效的避免parameter server的单点或多点同时故障。模型参数检查点通过定期向磁盘上保存一份存储在parameter server内存中的模型数据的完整镜像,来保证训练过程可以从中间状态重新启动。在一个不可中断并缺少备份的训练任务中,可以通过阶段性的保存每个parameter server的数据快照(snapshot)到 ***分布式存储服务*** 达到容灾的目的,比如每隔10分钟最新的快照,并删除更早的快照。在出现单点故障时,只需要恢复这台节点,或者将这台节点迁移到另一个节点并启动即可恢复训练任务。 + + + +### 快照保存的设计如下: + +说明: + +* parameter server在集群中启动后,自动挂载分布式存储目录,并把快照保存到这个目录下。 +* ***注:每个parameter server的检查点各自独立保存,暂时不考虑多个parameter server同步的保存一个特定时间点的全局检查点,因为这样做也没法保证消除随机性。*** + +检查点保存程序流程: + +1. 如果满足条件"每隔10分钟"时,parameter server会获取parameters内存的`read_lock`,启动一个新的线程开始保存检查点。如果已经正在执行保存检查点的线程,则忽略。由于对parameters的更新需要获取parameters内存的`write_lock`,所以在写入快照的过程中,parameter server会暂停参数更新并等待。 +2. parameter server生成一个UUID,向指定的目录中一个新的文件(文件名为此UUID)写入快照数据。在快照写入完成后,计算这个文件的MD5 sum。然后在etcd的`/checkpoints/[pserver_id]`中写入json内容:`{"uuid": [UUID], "md5", "MD5 sum", "timestamp": xxxx}`。 +3. 删除磁盘目录中不是当前uuid的快照文件。 +4. 释放对paramters内存的锁定,停止保存检查点的线程。 + +这里需要用户额外注意,在您的实际环境中,训练任务的运行可能会占满trainer和parameter server之间的网络带宽,如果parameter server此时还需要通过网络访问分布式存储以保存快照,可能会造成网络拥塞,而出现阶段性的运行停滞。 + +### 从快照恢复 + +在parameter server第一次启动或任意时间parameter server故障后被Kubernetes重新启动,则需要回滚到上一个检查点: + + 1. 从etcd中读取节点:`/checkpoints/[pserver_id]`获取最新的检查点的文件uuid + 1. 从磁盘文件中加载uuid文件名的检查点快照文件,并加载其中的参数 + 1. 如果上面两步出现错误,则使用启动参数定义的初始化方法初始化参数 + 1. 开始提供服务 + +## TODO List +### 推测执行/加速执行(TODO) +在异构集群中,如果存在某些trainer执行速度过慢会影响整体集群的速度(如图中Trainer 1),此时master将负责启动一个新的Trainer(Accelerate Trainer 2),使用同样的训练数据block。哪个trainer先完成block的训练,则把另一个慢速的kill掉。 + +### 动态扩容/缩容 +目前只考虑动态扩容trainer数量,可以减小系统复杂性。 + +## 术语 +* model: 指深度学习训练之后得到的所有参数,使用这个神经网络可以完成对新数据的预测 +* parameters: 神经网络中的参数,包括权重w和偏置b。一个神经网络的模型由大量的参数组成 +* shard: 分片,通常指将一个整体拆分成多份的其中的一份。 +* model shard: 将一个神经网络参数拆分成多份,每个shard分别存储在其中一台parameter server之上 +* parameter block: 多个parameter block构成一个model shard +* 单点故障: 任意时刻只可能同时有一台服务器故障。由于集群中同时存在两台机器故障的概率极低((平均故障率*平均故障修复时间)^2)只对特殊在线系统考虑两台以上同时故障的容灾。 diff --git a/doc/design/cluster_train/data_dispatch.md b/doc/design/cluster_train/data_dispatch.md new file mode 100644 index 0000000000000000000000000000000000000000..1f5d22ff5e6abcb576d16cbe7391da1967a1ab8e --- /dev/null +++ b/doc/design/cluster_train/data_dispatch.md @@ -0,0 +1,160 @@ +## 训练数据的存储和分发 + +### 概念解释 + +### 流程介绍 +生产环境中的训练数据集通常体积很大,并被存储在诸如Hadoop HDFS,Ceph,AWS S3之类的分布式存储之上。这些分布式存储服务通常会把数据切割成多个分片分布式的存储在多个节点之上。这样就可以在云端执行多种数据类计算任务,包括: + +* 数据预处理任务 +* Paddle训练任务 +* 在线模型预测服务 +
+ +
+ +在上图中显示了在一个实际生产环境中的应用(人脸识别)的数据流图。生产环境的日志数据会通过实时流的方式(Kafka)和离线数据的方式(HDFS)存储,并在集群中运行多个分布式数据处理任务,比如流式数据处理(online data process),离线批处理(offline data process)完成数据的预处理,提供给paddle作为训练数据。用户也可以上传labeled data到分布式存储补充训练数据。在paddle之上运行的深度学习训练输出的模型会提供给在线人脸识别的应用使用。 + +### 训练数据存储 +我们选择[CephFS](http://docs.ceph.com/docs/master/cephfs/)作为存储系统。 + +- 无论是从[PFSClient](../file_manager/README.md)的角度,还是从[Pod](https://kubernetes.io/docs/concepts/workloads/pods/pod/)中运行任务的角度,统一用`/pfs/$DATACENTER/home/$USER`来访问用户自己的数据。 +- `/pfs/$DATACENTER/common`下存放公共数据集合 + - 做只读挂载 + +
+ +
+ +### 文件预处理 + + +在开始训练之前, 数据集需要预先被转换成PaddlePaddle分布式训练使用的存储格[RecordIO](https://github.com/PaddlePaddle/Paddle/issues/1947)。我们提供两个转换方式: + +1. 用户在本地转换好再上传 +1. 用户上传数据后,在机群上运行转换程序 + +转换生成的文件名会是以下格式: + +```text +name_prefix-aaaaa-of-bbbbb +``` + +"aaaaa"和"bbbbb"都是五位的数字,每一个文件是数据集的一个shard,"aaaaa"代表shard的index,"bbbbb"代表这个shard的最大index。 + +比如ImageNet这个数据集可能被分成1000个shard,它们的文件名是: +```text +imagenet-00000-of-00999 +imagenet-00001-of-00999 +... +imagenet-00999-of-00999 +``` + +#### 转换库 + +无论是在本地或是云端转换,我们都提供Python的转换库,接口是: +```python +def convert(output_path, reader, num_shards, name_prefix) +``` + +- `output_path`: directory in which output files will be saved. +- `reader`: a [data reader](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#data-reader-interface), from which the convert program will read data instances. +- `num_shards`: the number of shards that the dataset will be partitioned into. +- `name_prefix`: the name prefix of generated files. + +`reader`每次输出一个data instance,这个instance可以是单个值,或者用tuple表示的多个值: + +```python +yield 1 # 单个值 +yield numpy.random.uniform(-1, 1, size=28*28) # 单个值 +yield numpy.random.uniform(-1, 1, size=28*28), 0 # 多个值 +``` + +每个值的类型可以是整形、浮点型数据、字符串,或者由它们组成的list,以及numpy.ndarray。如果是其它类型,会被Pickle序列化成字符串。 + +### 示例程序 + +#### 使用转换库 + +以下`reader_creator`生成的`reader`每次输出一个data instance,每个data instance包涵两个值:numpy.ndarray类型的值和整型的值: +```python +def reader_creator(): + def reader(): + for i in range(1000): + yield numpy.random.uniform(-1, 1, size=28*28), 0 # 多个值 + return reader +``` + +把`reader_creator`生成的`reader`传入`convert`函数即可完成转换: +```python +convert("./", reader_creator(), 100, random_images) +``` + +以上命令会在当前目录下生成100个文件: +```text +random_images-00000-of-00099 +random_images-00001-of-00099 +... +random_images-00099-of-00099 +``` + +#### 进行训练 + + +PaddlePaddle提供专用的[data reader creator](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#python-data-reader-design-doc),生成给定`RecordIO`文件对应的data reader。**无论在本地还是在云端,reader的使用方式都是一致的**: + +```python +# ... +reader = paddle.reader.creator.RecordIO("/pfs/datacenter_name/home/user_name/random_images-*-of-*") +batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128) +trainer.train(batch_reader, ...) +``` + +以上代码的reader输出的data instance与生成数据集时,reader输出的data instance是一模一样的。 + +### 上传训练文件 + +使用下面命令,可以把本地的数据上传到存储集群中。 + +```bash +paddle pfs cp filename /pfs/$DATACENTER/home/$USER/folder/ +``` + +比如,把之前示例中转换完毕的random_images数据集上传到云端的`/home/`可以用以下指令: + +```bash +paddle pfs cp random_images-*-of-* /pfs/$DATACENTER/home/$USER/folder/ +``` + +需要`$DATACENTER`的配置写到配置文件中,例如 + +``` +# config file +[datacenter_1] +username=user +usercert=user.pem +userkey=user-key.pem +endpoint=datacenter1.paddlepaddle.org + +[datacenter_2] +username=user +usercert=user.pem +userkey=user-key.pem +endpoint=datacenter2.paddlepaddle.org +``` +## TODO +### 文件访问的权限 +控制用户权限 + +- 用户可以把自己的数据分享给别人 + +### 文件访问方式 +不用mount的方式来访问数据,而是直接用API的接口远程访问 + +例如: + +``` +f = open('/pfs/datacenter_name/home/user_name/test1.dat') +``` + + +### 支持用户自定义的数据预处理job diff --git a/doc/design/cluster_train/large_model_dist_train.md b/doc/design/cluster_train/large_model_dist_train.md new file mode 100644 index 0000000000000000000000000000000000000000..0c4b5bc24c854b7062d509249bea9c50d42bd5f1 --- /dev/null +++ b/doc/design/cluster_train/large_model_dist_train.md @@ -0,0 +1,101 @@ +# Alalysis of large model distributed training in Paddle + +***NOTE: This is only some note for how we implemeted this scheme in V1, not a new design.*** + +## What is it + +We often encounter cases that the embedding layer parameters(sparse) are so large that we can not store it in the trainer's memory when training. So we need to put them to several servers, and fetch them row by row instead of fetch all of the parameters. + +## How to use + +Specify command-line argument like `--loadsave_parameters_in_pserver=true --ports_num_for_sparse=1 --use_old_updater=1` when starting the paddle trainer. And also add something like `--ports_num_for_sparse=1 --pserver_num_threads=5` when starting pserver processes. + +Accrodingly, configure your embedding layers like: + +```python +SPARSE_REMOTE=True + +w1 = data_layer(name="w1", size=dict_size) +emb1 = embedding_layer(input=w1, size=32, param_attr=ParameterAttribute(sparse_update=SPARSE_REMOTE)) +w2 = data_layer(name="w2", size=dict_size) +emb2 = embedding_layer(input=w2, size=32, param_attr=ParameterAttribute(sparse_update=SPARSE_REMOTE)) +... +``` + +## Implementation details + +```c++ +enum MatType { + MAT_NORMAL, + MAT_NORMAL_SHARED, + MAT_VALUE_SHARED, + MAT_SPARSE_ROW_IDS, + MAT_SPARSE_ROW_AUTO_GROW, + MAT_CACHE_ROW, + MAT_SPARSE_ROW, + MAT_SPARSE_ROW_PREFETCH, + MAT_SPARSE_ROW_PREFETCH_FULL_SIZE, +}; +``` + +`MAT_SPARSE_ROW_PREFETCH` is what we use when configured to fetch only row of matrix when training. + +In `trainer_internal.cpp:L93 trainOneBatch`: + +```c++ + if (config_->getOptConfig().use_sparse_remote_updater()) { + REGISTER_TIMER("prefetch"); + gradientMachine_->prefetch(inArgs); + parameterUpdater_->getParametersRemote(); + } +``` + +When doing actual network forward and backward, at the beginning of each batch, the trainer will try to download one row of data from pserver. + +In `trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`: + +```c++ +if (fullSize) { + ... +} else { +getParams = [&] { + parameterClient_->getParameterSparse( + /* recvParameterType= */ PARAMETER_VALUE, sendBackParameterType); +}; +applyL1 = [](Parameter& para, real decayRate) { + para.getMat(PARAMETER_VALUE)->applyL1(/*lr=*/1.0f, decayRate); +}; +} +``` + +Calling `parameterClient_->getParameterSparse` will do remote call to pserver's `getParameterSparse`: + +```c++ +void ParameterServer2::getParameterSparse(const SendParameterRequest& request, + std::vector& inputBuffers, + SendParameterResponse* response, + std::vector* outputBuffers) { + (void)inputBuffers; + auto& buffer = *readWriteBuffer_; + size_t numReals = 0; + for (const auto& block : request.blocks()) { + numReals += getParameterConfig(block).dims(1); + } + buffer.resize(numReals); + + VLOG(3) << "pserver: getParameterSparse, numReals=" << numReals; + + ReadLockGuard guard(parameterMutex_); + size_t offset = 0; + for (const auto& block : request.blocks()) { + size_t width = getParameterConfig(block).dims(1); + Buffer buf = {buffer.data() + offset, width}; + int type = request.send_back_parameter_type(); + sendBackParameterSparse(block, type, response, &buf, width, outputBuffers); + offset += width; + } +} +``` + +`getParameterConfig(block).dims(1)` returns the width of the current "parameter block"(a shard of parameter object), +then `getParameterSparse` remote call returns only one row of data to the client. diff --git a/doc/design/cluster_train/master_server.md b/doc/design/cluster_train/master_server.md new file mode 100644 index 0000000000000000000000000000000000000000..4bf3c506f101361875043f8bfd97972b8c981a22 --- /dev/null +++ b/doc/design/cluster_train/master_server.md @@ -0,0 +1,91 @@ +# Design Doc: Master Server + +For an overview of master server's role, please refer to [distributed training design doc](./README.md). In this design doc we will discuss the master server in more details. The master will be implemented in [Go](https://golang.org/). + +## Dataset + + + +A dataset is a list of files in *RecordIO* format. A RecordIO file consists of chunks, whereas each chunk consists some records. + +## Task Queue + +As mentioned in [distributed training design doc](./README.md), a *task* is a data shard that the master server assigns to the trainer process to train on. A task consists of one or multiple *chunks* from one or multiple files. The master server maintains *task queues* to track the training progress. + +### Task Queue Creation + +1. Each trainer will make an RPC call (using Go's [rpc](https://golang.org/pkg/net/rpc/) package) to the master server, telling it the RecordIO files representing the dataset specified by the user. Since every trainer will tell the master server the same dataset, only the first RPC call will be honored. + + The RPC interface is: + ```go + func (m *RPCServer) ReportDataset(Paths []string, dummy *int) error { + } + ``` +1. The master server will scan through each RecordIO file to generate the *chunk index* and know how many chunks does each file have. A chunk can be referenced by the file path and the index of the chunk within the file. The chunk index is in memory data structure that enables fast access to each chunk, and the index of the chunk with the file is an integer start from 0, representing the n-th chunk within the file. + + The definition of the chunk is: + ```go + type Chunk struct { + Idx int // index of the chunk within the file + Path string + Index recordio.Index // chunk index + } + ``` +1. Chunks are grouped into tasks, and tasks are filled into the todo queue. The pending queue and the done queue are initialized with no element. + + The definition of the task is: + ```go + type Task struct { + Index int + Chunks []Chunk + } + ``` + + The elements in the tasks queues is of type `TaskEntry`, containing a timeout counter (described in [task retry logic](#task-retry-logic)), and a task: + ```go + type TaskEntry struct { + NumTimeout int + Task Task + } + ``` + + The definition of task queues is: + ```go + type TaskQueues struct { + Todo []TaskEntry + Pending map[int]TaskEntry // map from task index to task entry + Done []TaskEntry + } + ``` + +### Task Queue Persistence + +The task queues need to be persisted on [etcd](https://github.com/coreos/etcd) for fault recovery. Since the task queues only change once a task is completed or timed out, which is not very frequent, we can afford to synchronize with etcd every time the task queues change. + +We will serialize the task queues data structure with [gob encoding](https://golang.org/pkg/encoding/gob/), compress with gzip, and save into etcd synchronously under key `/task_queues`. + +### Task Dispatch + +The trainer will make an RPC call to master to get a new task when: + +- the trainer first started, or +- the trainer finishes a task. + +The RPC interface is: +```go +func (m *RPCServer) GetTask(finished *Task, result *Task) error { +} +``` +Argument `finished` will be `nil` when the trainer is just started. + +During the RPC call the master will do the following: + +- Make a copy of the task queues, and update the copy reflecting the finished tasks and the new pending tasks. +- Synchronize the copy of task queues with etcd using a transaction conditioned on holding the master lock. +- Replace the task queues with the copy and report to the trainer with the new tasks if succeeded, or discard the copy and report the error to the trainer if failed. + +### Task Retry Logic + +When a task is dispatched to the trainer, the master will schedule a function for execution after the timeout duration (based on the moving average of task completion time). If the task entry in still in the pending queue, its timeout counter will increase by one, and the task will be moved to todo queue. If the timeout counter is above the threshold, the master will log the error and discard the task. + +Please note that since a timed out task could be completed after it has been dispatched for retry, so it is possible for a task to be processed multiple times. We do not try to prevent it from happening since it's fine to train on the same task multiple times due to the stochastic nature of the stochastic gradient decent algorithm. diff --git a/doc/design/cluster_train/pserver_client.md b/doc/design/cluster_train/pserver_client.md new file mode 100644 index 0000000000000000000000000000000000000000..474b8c572cd92fc87e9f7f3f2b19d12cccd158de --- /dev/null +++ b/doc/design/cluster_train/pserver_client.md @@ -0,0 +1,171 @@ +# Design Doc: The Client Library of Parameter Server + +For an overview of trainer's role, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter server's client library, which will manage communication with parameter servers. The library will be implemented in [Go](https://golang.org/) and made available as a static or dynamic library with a C header file. + +## Parameter Partition + +Each parameter will be partitioned into parameter blocks to make the parameters evenly distributed on parameter servers. The partition is done automatically by the client library. The *sparse parameter* require a little different treatment: + +### Sparse Parameter + +The sparse parameter is a parameter that is updated sparsely. The name is somewhat misleading, it does not have a sparse representation, it has the same representation as a dense vector. + +Because a sparse parameter is updated sparsely, the trainer will have to partition the sparse parameter. Because the parameter server will merge all sparse parameter shard into the same file when saving the parameter. It needs special naming convention: + +If a sparse parameter is partitioned into n shards, they should be named as: + +```text +name:sparse-0 +name:sparse-1 +... +name:sparse-n-1 +``` + +The library is unaware of the partition, and treat each parameter independently. Only when saving parameters, the parameter servers will merge the sparse parameters according to the naming convention. + +## Model Optimization Using Gradients + +There are two ways to perform model optimization using gradients: + +- On Client + + The client does multiple steps of forward and backward update. In each step, the gradients are calculated and a new model is generated. After some steps, the client will calculate the difference between the newest model and the old model at step 0. The difference will be updated to parameter servers. Parameter servers will just update parameters using the difference without any optimization using gradients (such as Adam and L1 regularization). + +- On Parameter Server + + The client will send accumulated gradients to parameter servers, the parameter server will do the optimization using gradients. + +## L1 and L2 Regularization + +PaddlePaddle allows L1 or L2 regularizations to be specified per parameter, so when the trainer initializes the parameter it needs include a parameter configuration when L1 or L2 regularization is necessary. + +## Parameter Initialization + +The parameters on parameter servers need to be initialized. To provide maximum flexibility, the trainer will initialize the parameters. Only one trainer will do the initialization, the other trainers will wait for the completion of initialization and get the parameters from the parameter servers. + +### Trainer Selection + +To select the trainer for initialization, every trainer will try to get a distributed lock, whoever owns the lock will do the initialization. As illustrated below: + + + +### Trainer Selection Process + +The trainer select process is encapsulated in the C API function: +```c +int paddle_begin_init_params(paddle_pserver_client* client, const char* config_proto); +``` +The selected trainer's call to `paddle_begin_init_params` will return with 1, and the other trainers' call to `paddle_begin_init_params` will return 0. `paddle_get_params` will be blocked until initialization is completed. As illustrated below: + + + +## C Interface + +```c +typedef enum { + PADDLE_ELEMENT_TYPE_INT32 = 0, + PADDLE_ELEMENT_TYPE_UINT32 = 1, + PADDLE_ELEMENT_TYPE_INT64 = 2, + PADDLE_ELEMENT_TYPE_UINT64 = 3, + PADDLE_ELEMENT_TYPE_FLOAT32 = 4, + PADDLE_ELEMENT_TYPE_FLOAT64 = 5, +} paddle_element_type; + +typedef struct { + char* name; + paddle_element_type element_type; + unsigned char* content; + int content_len; +} paddle_parameter, paddle_gradient; + +typedef int paddle_pserver_client; + +/** + * @brief creates a pserver client that talks to etcd for coordination. + */ +paddle_pserver_client paddle_new_etcd_pserver_client(char* etcd_addr); + +/** + * @brief creates a pserver client given pserver addresses. + * + * @param pserver_addrs comma-separated pserver addresses. + * @param selected if current pserver client is selected to initialize all parameter servers. + */ +paddle_pserver_client paddle_new_pserver_client(char* pserver_addrs, int selected); +void paddle_pserver_client_release(paddle_pserver_client c); + +/** + * @brief paddle_begin_init_params begins to initialize parameters on + * parameter servers. + * + * paddle_begin_init_params will be called from multiple trainers, + * only one trainer will be selected to initialize the parameters on + * parameter servers. Other trainers need to get the initialized + * parameters from parameter servers using @paddle_get_params. + * + * @return 1 if the trainer is selected to initialize parameter + * servers, otherwise 0. + */ +int paddle_begin_init_params(paddle_pserver_client client); + +/** + * @brief paddle_init_param initializes the parameter on parameter + * servers. + * + * @param param the parameter to initialize. + * @param param_config_proto the configuration for the parameter. + * @param config_len the length of param_config_proto + * @return 0 if successful, otherwise -1. On failure, the trainer + * needs to restart the entire initialization process (starting from + * @paddle_begin_init_param). Or simply exit the program and wait for + * the cluster management system to restart the trainer. + */ +int paddle_init_param(paddle_pserver_client client, paddle_parameter param, const unsigned char* param_config_proto, int config_len); + +/** + * @brief paddle_finish_init_params tells parameter servers client has + * sent all parameters to parameter servers as initialization. + * + * @return 0 if successful, otherwise -1. On failure, the trainer + * needs to restart the entire initialization process (starting from + * @paddle_begin_init_param). Or simply exit the program and wait for + * the cluster management system to restart the trainer. + */ +int paddle_finish_init_params(paddle_pserver_client client); + +/** + * @brief paddle_send_grads sends gradients to parameter servers for + * updating parameters. + * + * @param grads the array of gradients to send. + * @param len the length of the gradient array. + * @param learning_rate the learning rate for the gradients. + * @return 0 if successful, otherwise -1. + */ +int paddle_send_grads(paddle_pserver_client client, const paddle_gradient* grads, int len); + +/** + * @brief paddle_get_params gets parameters from parameter servers. + * + * paddle_get_params will block until parameters are initialized on + * the parameter servers. + * + * @param dst the destination array of parameter pointers to save to. + * The parameter pointer must be pre-popullated with required parameter name, + * and the content of parameter must be pre-allocated of the size of required + * parameter on pserver. + * @param len the length of the names array and the paddle_parameter + * array. + * @return 0 if successful, otherwise -1. + */ +int paddle_get_params(paddle_pserver_client client, paddle_parameter** dst, int len); + +/** + * @brief paddle_save_model indicates parameters to save the parameter + * to the given path + * + * @param path the path to save parameters. + * @return 0 if successful, otherwise -1. + */ +int paddle_save_model(paddle_pserver_client client, const char* path); +``` diff --git a/doc/design/cluster_train/remote_parameter_updater.md b/doc/design/cluster_train/remote_parameter_updater.md new file mode 100644 index 0000000000000000000000000000000000000000..6e8e5938455b869e0f3367794c41250340b37f77 --- /dev/null +++ b/doc/design/cluster_train/remote_parameter_updater.md @@ -0,0 +1,21 @@ +# Design Doc: Remote Parameter Updater for Cluster Train + +For an overview of distribute training, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter updater that will use parameter server cclient [The Client Library of Parameter Server Design Doc](pserver_client.md) to manage and update parameters. + +## Parameter Updater + +Parameter Updater is used by trainer to manage and update parameter, there are mainly two kind of parameter updater: local and remote, since this design is for cluster train, we will only discuss remote parameter updater here. + +### Remote Parameter Updater + +Remote Parameter Updater manage parameters through remote parameter server with the client that communicate with pserver([The Client Library of Parameter Server Design Doc](pserver_client.md)) + +In PaddlePaddle Python V2 API, trainer is implemented in python, and the trainer will hold a instance of parameter updater and call it's functions directly. In this design, we will also expose the api of RemoteParameterUpdater to python with swig. + +#### Sparse Remote Parameter Updater + +Since we will only implement dense parameter management new, the mechanism for sparse parameter will be discussed in next stage. + +### Interface Design + +TBD diff --git a/doc/design/cluster_train/save_model.md b/doc/design/cluster_train/save_model.md new file mode 100644 index 0000000000000000000000000000000000000000..b755185c81ad617b9c85c47de0f5f65d2201c658 --- /dev/null +++ b/doc/design/cluster_train/save_model.md @@ -0,0 +1,111 @@ +# Design Doc: Save Model + +## Overview + +The model is the output of the training process. There are two +ways from which user can obtain a model: + +- Save model triggered by user code: user code asks PaddlePaddle to + save a model. +- Convert model from the checkpoint: model being converted from + pservers' periodic checkpoint. In this way, the user can cancel a + job at any time, and still have a relatively fresh model (we + checkpoint around every 5 minutes). + +### Trainer Saving Model vs. Pservers Saving Model + +Both trainers and pservers have access to the model. So the model can +be saved from a trainer or pservers. We need to decide where the model +is saved from. + +#### Dense Update vs. Sparse Update + +There are two types of model update methods: dense update and sparse +update (when the model parameter is configured to be sparse). + +- Dense update + + Every trainer has it's own full copy of the model. Every model + update will update the entire model. + +- Sparse update + + The training input is sparse, and the trainer does not have the + entire model. It will only download the sub-model necessary related + to the input. When updating the model, only the sub-model related to + the training input is updated. + + +#### Pservers Saving Model + +The benefit of letting pservers save model is they have the entire +model all the time. However, since pservers are on different nodes, it +requires a merging process to merge model shards into the same +model. Thus requires the pservers to write models to a distributed +filesystem, making the checkpoint shards visible to the merge program. + +#### Trainer Saving Model + +The benefit of letting one trainer to save the model is it does not +require a distributed filesystem. And it's reusing the same save model +logic when training locally - except when doing sparse update, the +trainer needs to download the entire model during the saving process. + +#### Conclusion + +Given trainer saving model does not require a distributed filesystem, +and is an intuitive extension to trainer saving model when training +locally, we decide to let the trainer save the model when doing +distributed training. + + +### Convert Model from Checkpoint + +TODO + + +## Timeline + +We first implement trainer save the model. Converting the latest +snapshot to a model will be a TODO for future. + + +## Trainer Save Model + +### Trainer Election + +One trainer will be elected as the one to save the model. When using +etcd, trainer ID is a randomly generated UUID, the trainer will +contact the master server requesting to save the model, and find out +if itself is elected. When the master server is not used, unique +trainer IDs will be given by the administrator, the trainer whose ID +is "0" is elected to save the model. + +### Model Save Path + +Each trainer will be given the directory to save the model. The +elected trainer will save the model to +`given-directory/trainerID`. Since the trainer ID is unique, this +would prevent concurrent save to the same file when multiple trainers +are elected to save the model when split-brain problem happens. + +### What Happens When Model Is Saving + +It takes some time to save model, we need to define what will happen +when save model is taking place. + +When doing dense update, the trainer uses the local model. Pservers +does not need to pause model update. + +When doing sparse update. The trainer needs to download the entire +model while saving. To get the most accurate model, the model update +needs to be paused before the download starts and resumed after the +download finishes. Otherwise, the trainer gets a model that is +"polluted": some part of the model is old, some part of the model is +new. + +It's unclear that the "polluted" model will be inferior due to the +stochastic nature of deep learning, and pausing the model update will +add more complexity to the system. Since supporting sparse update is a +TODO item. We defer the evaluation of pause the model update or not +during saving model to the future. diff --git a/doc/design/cluster_train/src/checkpointing.png b/doc/design/cluster_train/src/checkpointing.png new file mode 100644 index 0000000000000000000000000000000000000000..c221e8474f90f37e31416cbb19c9452207a0d14c Binary files /dev/null and b/doc/design/cluster_train/src/checkpointing.png differ diff --git a/doc/design/cluster_train/src/data_dispatch.png b/doc/design/cluster_train/src/data_dispatch.png new file mode 100644 index 0000000000000000000000000000000000000000..5bdcc24d6a6d193cb014f8c38b362451fded5e54 Binary files /dev/null and b/doc/design/cluster_train/src/data_dispatch.png differ diff --git a/doc/design/cluster_train/src/dataset.graffle b/doc/design/cluster_train/src/dataset.graffle new file mode 100644 index 0000000000000000000000000000000000000000..c10a423ed16a23229a9ee33d11bfc82bb59646c8 Binary files /dev/null and b/doc/design/cluster_train/src/dataset.graffle differ diff --git a/doc/design/cluster_train/src/dataset.png b/doc/design/cluster_train/src/dataset.png new file mode 100644 index 0000000000000000000000000000000000000000..2fb7f1cce3b6dd21489392557826e95a9f207c34 Binary files /dev/null and b/doc/design/cluster_train/src/dataset.png differ diff --git a/doc/design/cluster_train/src/file_storage.graffle b/doc/design/cluster_train/src/file_storage.graffle new file mode 100644 index 0000000000000000000000000000000000000000..50a17e70fa255495337c529a3bf12a5c0024a5be Binary files /dev/null and b/doc/design/cluster_train/src/file_storage.graffle differ diff --git a/doc/design/cluster_train/src/file_storage.png b/doc/design/cluster_train/src/file_storage.png new file mode 100644 index 0000000000000000000000000000000000000000..fccb4e3e7e738224c7f1584326bd5f351ce799aa Binary files /dev/null and b/doc/design/cluster_train/src/file_storage.png differ diff --git a/doc/design/cluster_train/src/init_lock.graffle b/doc/design/cluster_train/src/init_lock.graffle new file mode 100644 index 0000000000000000000000000000000000000000..fa9149f21b1311eed48ef72ec55e556559d0fc94 Binary files /dev/null and b/doc/design/cluster_train/src/init_lock.graffle differ diff --git a/doc/design/cluster_train/src/init_lock.png b/doc/design/cluster_train/src/init_lock.png new file mode 100644 index 0000000000000000000000000000000000000000..92404ee6d6c0f9a7727952bae3c869ba338ecd7f Binary files /dev/null and b/doc/design/cluster_train/src/init_lock.png differ diff --git a/doc/design/cluster_train/src/paddle-cloud-in-data-center.png b/doc/design/cluster_train/src/paddle-cloud-in-data-center.png new file mode 100644 index 0000000000000000000000000000000000000000..da5d1a77562480ad1d886f5f21dbd84001d3d508 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-cloud-in-data-center.png differ diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/design/cluster_train/src/paddle-etcd.graffle new file mode 100644 index 0000000000000000000000000000000000000000..f973dc9b9dbf72e9bc31e2d32822916cd281f8d9 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-etcd.graffle differ diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/design/cluster_train/src/paddle-etcd.png new file mode 100644 index 0000000000000000000000000000000000000000..57981ceb4b94f0f7d6dfa63f3d28c0402bf9cc31 Binary files /dev/null and b/doc/design/cluster_train/src/paddle-etcd.png differ diff --git a/doc/design/dist/src/paddle-model-sharding.graffle b/doc/design/cluster_train/src/paddle-model-sharding.graffle similarity index 100% rename from doc/design/dist/src/paddle-model-sharding.graffle rename to doc/design/cluster_train/src/paddle-model-sharding.graffle diff --git a/doc/design/dist/src/paddle-model-sharding.png b/doc/design/cluster_train/src/paddle-model-sharding.png similarity index 100% rename from doc/design/dist/src/paddle-model-sharding.png rename to doc/design/cluster_train/src/paddle-model-sharding.png diff --git a/doc/design/dist/src/paddle-ps-0.png b/doc/design/cluster_train/src/paddle-ps-0.png similarity index 100% rename from doc/design/dist/src/paddle-ps-0.png rename to doc/design/cluster_train/src/paddle-ps-0.png diff --git a/doc/design/dist/src/paddle-ps-1.png b/doc/design/cluster_train/src/paddle-ps-1.png similarity index 100% rename from doc/design/dist/src/paddle-ps-1.png rename to doc/design/cluster_train/src/paddle-ps-1.png diff --git a/doc/design/dist/src/paddle-ps.graffle b/doc/design/cluster_train/src/paddle-ps.graffle similarity index 100% rename from doc/design/dist/src/paddle-ps.graffle rename to doc/design/cluster_train/src/paddle-ps.graffle diff --git a/doc/design/dist/src/paddle-task-queues.graffle b/doc/design/cluster_train/src/paddle-task-queues.graffle similarity index 100% rename from doc/design/dist/src/paddle-task-queues.graffle rename to doc/design/cluster_train/src/paddle-task-queues.graffle diff --git a/doc/design/dist/src/paddle-task-queues.png b/doc/design/cluster_train/src/paddle-task-queues.png similarity index 100% rename from doc/design/dist/src/paddle-task-queues.png rename to doc/design/cluster_train/src/paddle-task-queues.png diff --git a/doc/design/dist/src/paddle-task-states.graffle b/doc/design/cluster_train/src/paddle-task-states.graffle similarity index 100% rename from doc/design/dist/src/paddle-task-states.graffle rename to doc/design/cluster_train/src/paddle-task-states.graffle diff --git a/doc/design/dist/src/paddle-task-states.png b/doc/design/cluster_train/src/paddle-task-states.png similarity index 100% rename from doc/design/dist/src/paddle-task-states.png rename to doc/design/cluster_train/src/paddle-task-states.png diff --git a/doc/design/cluster_train/src/pserver_init.graffle b/doc/design/cluster_train/src/pserver_init.graffle new file mode 100644 index 0000000000000000000000000000000000000000..5f3f1f52be8aa7f9049a8fcd6b7c93c8560c1676 Binary files /dev/null and b/doc/design/cluster_train/src/pserver_init.graffle differ diff --git a/doc/design/cluster_train/src/pserver_init.png b/doc/design/cluster_train/src/pserver_init.png new file mode 100644 index 0000000000000000000000000000000000000000..dfe491ff98dd7db1c336093c80964a260df2cd90 Binary files /dev/null and b/doc/design/cluster_train/src/pserver_init.png differ diff --git a/doc/design/cluster_train/src/submit-job.graffle b/doc/design/cluster_train/src/submit-job.graffle new file mode 100644 index 0000000000000000000000000000000000000000..677cdfb6d9a32168bf71729eb841fa1ca0dd31d6 Binary files /dev/null and b/doc/design/cluster_train/src/submit-job.graffle differ diff --git a/doc/design/cluster_train/src/submit-job.png b/doc/design/cluster_train/src/submit-job.png new file mode 100644 index 0000000000000000000000000000000000000000..3046a460a7ba708079e88a560debaa215a694680 Binary files /dev/null and b/doc/design/cluster_train/src/submit-job.png differ diff --git a/doc/design/cluster_train/src/trainer.graffle b/doc/design/cluster_train/src/trainer.graffle new file mode 100644 index 0000000000000000000000000000000000000000..42384a3f059966e22e22f5fa4295cc9ead5cef83 Binary files /dev/null and b/doc/design/cluster_train/src/trainer.graffle differ diff --git a/doc/design/cluster_train/src/trainer.png b/doc/design/cluster_train/src/trainer.png new file mode 100644 index 0000000000000000000000000000000000000000..6537d3d56589ca9f19a77a50a970e4b5275e6ce0 Binary files /dev/null and b/doc/design/cluster_train/src/trainer.png differ diff --git a/doc/design/cluster_train/submit-job.md b/doc/design/cluster_train/submit-job.md new file mode 100644 index 0000000000000000000000000000000000000000..8377d5489dc64bd2fdc5bb4f7bc737e7b489000d --- /dev/null +++ b/doc/design/cluster_train/submit-job.md @@ -0,0 +1,127 @@ +# Submit a Distributed Training Job + +The user can submit a distributed training job with Python code, rather than with a command-line interface. + +## Runtime Environment On Kubernetes + +For a distributed training job, there is two Docker image called *runtime Docker image* and *base Docker image*. The runtime Docker image is the Docker image that gets scheduled by Kubernetes to run during training. The base Docker image is for building the runtime Docker image. + +### Base Docker Image + +Usually, the base Docker image is PaddlePaddle product Docker image including paddle binary files and python package. And of course, users can specify any image name hosted on any docker registry which users have the access right. + +### Runtime Docker Image + +The trainer package which user upload and some Python dependencies are packaged into a runtime Docker image based on base Docker image. + +- Handle Python Dependencies + + You need to provide requirements.txt file in your `trainer-package` folder. Example: + + ```txt + pillow + protobuf==3.1.0 + ``` + More [details](https://pip.readthedocs.io/en/1.1/requirements.html) about requirements, an example project looks like: + ```bash + paddle_example + |-quick_start + |-trainer.py + |-dataset.py + |-requirements.txt + ``` + +## Submit Distributed Training Job With Python Code + + +- `paddle.job.dist_train()` will call the Job Server API `/v1/packages` to upload the trainer package and save them on CephFS, and then call `/v1/trainer/job` to submit the PaddlePaddle distributed job. +- `/v1/trainer/job` will start a building job for preparing the runtime Docker image. When the building job is finished, Job Server will submit the PaddlePaddle distributed job to Kubernetes. +- *NOTE*: For the first version, we will not prepare the runtime Docker image, instead, the package is uploaded to Paddle Cloud, and Paddle Cloud will mount the package in a temporary folder into the base Docker image. We will not support custom Python dependencies in the first version as well. + +You can call `paddle.job.dist_train` and provide distributed training configuration as the parameters: +```python +paddle.job.dist_train( + trainer=dist_trainer(), + paddle_job=PaddleJob( + job_name = "paddle-cloud", + entry_point = "python %s"%__file__, + trainer_package = "/example/word2vec", + image = "yancey1989/paddle-job", + trainers = 10, + pservers = 3, + trainer_cpu = 1, + trainer_gpu = 1, + trainer_mem = "10G", + pserver_cpu = 1, + pserver_mem = "2G" + )) +``` + +The parameter `trainer` of `paddle.job.dist_train` is a function and you can implement it as follows: +```python +def dist_trainer(): + def trainer_creator(): + trainer = paddle.v2.trainer.SGD(...) + trainer.train(...) + return trainer_creator +``` + +The pseudo code of `paddle.job.dist_train` is as follows: +```python +def dist_train(trainer, paddle_job): + # if the code is running on cloud, set PADDLE_ON_CLOUD=YES + if os.getenv("RUNNING_ON_CLOUD", "NO") == "NO": + #submit the paddle job + paddle_job.submit() + else: + #start the training + trainer() +``` +### PaddleJob Parameters +parameter | type | explanation + --- | --- | --- +job_name | str | the unique name for the training job +entry_point | str | entry point for startup trainer process +trainer_package | str | trainer package file path which user have the access right +image|str|the [base image](#base-docker-image) for building the [runtime image](#runtime-docker-image) +pservers|int| Parameter Server process count +trainers|int| Trainer process count +pserver_cpu|int| CPU count for each Parameter Server process +pserver_mem|str| memory allocated for each Parameter Server process, a plain integer using one of these suffixes: E, P, T, G, M, K +trainer_cpu|int| CPU count for each Trainer process +trainer_mem|str| memory allocated for each Trainer process, a plain integer using one of these suffixes: E, P, T, G, M, K +trainer_gpu|int| GPU count for each Trainer process, if you only want CPU, do not set this parameter + +### Deploy Parameter Server, Trainer and Master Process + - Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet. + - Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job. + - Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet. + +## Job Server + +- RESTful API + + Job server provides RESTful HTTP API for receiving the trainer package and displaying + PaddlePaddle job related informations. + - `POST /v1/package` receive the trainer package and save them on CephFS + - `POST /v1/trainer/job` submit a trainer job + - `GET /v1/jobs/` list all jobs + - `GET /v1/jobs/` the status of a job + - `DELETE /v1/jobs/` delete a job + - `GET /v1/version` job server version + +- Build Runtime Docker Image on Kubernetes + + `paddle.job.dist_train` will upload the trainer package to Job Server, save them on the distributed filesystem, and then start up a job for building the runtime Docker image that gets scheduled by Kubernetes to run during training. + + There are some benefits for building runtime Docker image on JobServer: + - On Paddle Cloud, users will run the trainer code in a Jupyter Notebook which is a Kubernetes Pod, if we want to execute `docker build` in the Pod, we should mount the host's `docker.sock` to the Pod, user's code will connect the host's Docker Engine directly, it's not safe. + - Users only need to upload the training package files, does not need to install docker engine, docker registry as dependencies. + - If we want to change another image type, such as RKT, users do not need to care about it. + +- Deploy Parameter Server, Trainer and Master Processes + + `POST /v1/trainer/job` receives the distributed training parameters, and deploy the job as follows: + - Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet. + - Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job. + - Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet. diff --git a/doc/design/dcgan.png b/doc/design/dcgan.png new file mode 100644 index 0000000000000000000000000000000000000000..15e8e290a111ff43900934341365cb4360d87d28 Binary files /dev/null and b/doc/design/dcgan.png differ diff --git a/doc/design/dist/README.md b/doc/design/dist/README.md deleted file mode 100644 index 1788208bcabca30f66cb1c80e80f6b824c0d9579..0000000000000000000000000000000000000000 --- a/doc/design/dist/README.md +++ /dev/null @@ -1,172 +0,0 @@ -# Design Doc: Distributed Training - -## Objective - -In [this slides](https://www.slideshare.net/cxwangyi/paddlepaddle-a-complete-solution-for-businesses), we explained that we'd like PaddlePaddle running on general-purpose clusters like those managed by Kubernetes, so to address demands for AI from both Internet and non-Internet industries. - -This poses technical challenges to PaddlePaddle: - -1. Support fault-recovery. -1. Support both offline and online training. -1. [Serverless computing](https://en.wikipedia.org/wiki/Serverless_computing) of distributed training. - - -## Training Job - -A training job will be created once user asks Paddle cloud to train a model. The training job is made up of different processes that collaboratively consume data and produce a trained model. There are three kinds of processes: - -1. the *master process*, which dispatches tasks to -1. one or more *trainer processes*, which run distributed training and synchronize gradients/models via -1. one or more *parameter server processes*, where each holds a shard of the global model. - -Their relation is illustrated in the following graph: - - - -### Master Process - -The master process will: - -- Partition a dataset into [tasks](#task) and dispatch tasks to trainers. -- Keep track of training progress on the dataset with [task queue](#task-queue). A training job will iterate on the dataset for a full pass until it goes into next pass. - - -#### Task - -A task is a data shard to be trained. The total number of tasks will be much bigger than the total number of trainers. The number of data instances inside a task will be much bigger than the mini-batch size. - -#### Task Queue - -The master process has three task queues to track training progress. As illustrated in the graph below, Job A and Job B both have one master process. Each master process has three task queues. - - - -- The todo queue holds tasks to be dispatched. When a job starts, the master process fills in the todo queue with all tasks. -- The pending queue holds tasks that are currently training by trainers. -- the done queue holds tasks that are already trained. - -The life cycle of a single task is illustrated below: - - - -1. When a new pass of training starts, all tasks will be placed in the todo queue. -1. The master process will dispatch few tasks to each trainer at a time, puts them in the pending queue and waits for completion. -1. The trainer will work on its tasks and tell the master process once a task is completed. The master process will dispatch a new task to that trainer. -1. If a task timeout. the master process will move it back to the todo queue. The timeout count will increase by one. If the timeout count is above a threshold, the task is likely to cause a trainer to crash, so it will be discarded. -1. The master process will move completed task to the done queue. When the todo queue is empty, the master process will start a new pass by moving all tasks in the done queue to todo queue and reset the timeout counter of all tasks to zero. - -### Trainer Process - -The trainer process will: - -- Receive tasks from the master. -- Work on the tasks: calculate and upload gradient to parameter servers, and update local model by downloading new parameters from parameter servers. - -### Parameter Server Process - -Parameter server processes hold the parameters collaboratively. The parameters are partitioned on different parameter servers. - -The parameter server will: - -- Receive gradient from the trainers, update its parameters, and give the trainers the latest parameters. -- Periodically save its parameters to distributed file system by overriding the previous save. - -### Optimization Algorithms - -The communication pattern between the trainers and the parameter servers depends on the category of optimization algorithm: - -- Synchronous Stochastic Gradient Descent (sync-SGD) - - Parameter server will wait for all trainer finish n-th mini-batch calculation and send their gradients before broadcasting new parameters to every trainer. Every trainer will wait for the new parameters before starting n+1-th mini-batch. - -- Asynchronous Stochastic Gradient Descent (async-SGD) - - There will no synchronization between different trainers, and parameter server updates its parameter as soon as it receives new gradient: - - - Each trainer uploads its accumulated gradient every n mini-batches. - - Every m mini-batches, the trainer downloads new parameters from parameter server. - - n and m do not have to be equal. - -## Fault Tolerant - -The training job will pause if the master processes is dead, or any of the parameter server process is dead. They will be started by [Kubernetes](https://kubernetes.io/) and recover in few minutes. Please refer to [fault recovery](#fault-recovery). - -The training job will continue to make progress if there is at least one training process running. The strategy depends on the type of optimization algorithm: - -- sync-SGD - - TODO - -- async-SGD - - Since async-SGD does not require synchronization between mini-batches, the system will by definition make process if at least one trainer is running. - -## Fault Recovery - -PaddlePaddle uses [etcd](https://github.com/coreos/etcd) to keep track of the states of processes. Because etcd is a distributed reliable key-value store, the restarted process can recover its states from etcd. The model parameters are periodically saved into distributed file system, so a restarted parameter server can recover its parameters from the saved file. - -Now we will introduce how each process recovers from a failure, the graph below shows how etcd is used: - - - -### Master Process - -When the master is started by the Kubernetes, it executes the following steps at startup: - -1. Grabs a unique *master* lock in etcd, which prevents concurrent master instantiations. -1. Recovers the task queues from etcd if they already exist, otherwise, the master will create them. -1. Watches the trainer prefix keys `/trainer/` on etcd to find the live trainers. -1. Starts dispatching the tasks to the trainers, and updates task queue using an etcd transaction to ensure lock is held during the update. - -The master process will kill itself if its etcd lease expires. - -When the master process is dead for any reason, Kubernetes will restart it. It will be online again with all states recovered from etcd in few minutes. - -### Trainer Process - -When the trainer is started by the Kubernetes, it executes the following steps at startup: - -1. Watches the available parameter server prefix keys `/ps/` on etcd and waits until the count of parameter servers reaches the desired count. -1. Generates a unique ID, and sets key `/trainer/` with its contact address as value. The key will be deleted when the lease expires, so the master will be aware of the trainer being online and offline. -1. Waits for tasks from the master to start training. - -If trainer's etcd lease expires, it will try set key `/trainer/` again so that the master process can discover the trainer again. - -### Parameter Server Process - -When the parameter server is started by Kubernetes, it executes the following steps at startup: - -1. Read desired total number of parameter servers from etcd `/ps_desired` -1. Search through etcd keys `/ps/` (`/ps/0`, `/ps/1`, ...) to find the first non-existant key whose index is smaller than the total number of parameter servers. Set the key using a transaction to avoid concurrent writes. The parameter server's index is inferred from the key name. - - The desired number of parameter servers is 3: - - - - The third parameter server joined: - - - -1. The parameter server can load parameters if there are already saved parameters in the save path (inferred from its index). -1. Now the parameter server is ready for the trainers' requests. - -If the parameter server's etcd lease expires, the parameter server will kill itself. - - -## Dynamic Scaling - -### Trainer Scaling - -TODO - -### Parameter Server Scaling - -Not planned for v1. - -## Training Dataset Format - -TODO - -## User Interface - -TODO diff --git a/doc/design/dist/src/paddle-etcd.graffle b/doc/design/dist/src/paddle-etcd.graffle deleted file mode 100644 index 56681ae5bbe11849116d621b066a6317e003e4ca..0000000000000000000000000000000000000000 Binary files a/doc/design/dist/src/paddle-etcd.graffle and /dev/null differ diff --git a/doc/design/dist/src/paddle-etcd.png b/doc/design/dist/src/paddle-etcd.png deleted file mode 100644 index 4f9c9762b3a8c089dd5e9b2c07cb9dfc78296a21..0000000000000000000000000000000000000000 Binary files a/doc/design/dist/src/paddle-etcd.png and /dev/null differ diff --git a/doc/design/file_manager/README.md b/doc/design/file_manager/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3df10d801e568834729f902aace483d033340e2d --- /dev/null +++ b/doc/design/file_manager/README.md @@ -0,0 +1,87 @@ +# FileManager设计文档 +## 目标 +在本文档中,我们设计说明了名为FileManager系统,方便用户上传自己的训练数据以进行分布式训练 + +主要功能包括: + +- 提供常用的命令行管理命令管理文件和目录 +- 支持大文件的断点上传、下载 + +## 名词解释 +- PFS:是`Paddlepaddle cloud File System`的缩写,是对用户文件存储空间的抽象,与之相对的是local filesystem。目前我们用CephFS来搭建。 +- [CephFS](http://docs.ceph.com/docs/master/cephfs/):一个POSIX兼容的文件系统。 +- Chunk:逻辑划上文件分块的单位。 + +## 模块 +### 架构图 + + +### PFSClient +- 功能: 详细设计[link](./pfs/pfsclient.md) + - 提供用户管理文件的命令 + - 需要可以跨平台执行 + +- 双向验证 + PFSClient需要和Ingress之间做双向验证[tls](#tls),所以用户需要首先在`cloud.paddlepaddle.org`上注册一下,申请用户空间,并且把系统生成的CA(certificate authority)、Key、CRT(CA signed certificate)下载到本地,然后才能使用PFSClient。 + +### [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) +- 功能: + 提供七层协议的反向代理、基于粘性会话的负载均衡功能。 + +- 透传用户身份的办法 + Ingress需要把PFSClient的身份信息传给PFSServer,配置的方法参考[link](http://www.integralist.co.uk/posts/clientcertauth.html#3) + +### PFSServer +PFSServer提供RESTful API接口,接收处理PFSClient端的文件管理请求,并且把结果返回PFSClient端。 + +RESTful API + +- /api/v1/files + - `GET /api/v1/files`: Get metadata of files or directories. + - `POST /api/v1/files`: Create files or directories. + - `PATCH /api/v1/files`: Update files or directories. + - `DELETE /api/v1/files`: Delete files or directories. + +- /api/v1/file/chunks + - `GET /api/v1/storage/file/chunks`: Get chunks's metadata of a file. + +- /api/v1/storage/files + - `GET /api/v1/storage/files`: Download files or directories. + - `POST /api/v1/storage/files`: Upload files or directories. + +- /api/v1/storage/file/chunks + - `GET /api/v1/storage/file/chunks`: Download chunks's data. + - `POST /api/v1/storage/file/chunks`: Upload chunks's data. + +## 文件传输优化 + +### 分块文件传输 +用户文件可能是比较大的,上传到Cloud或者下载到本地的时间可能比较长,而且在传输的过程中也可能出现网络不稳定的情况。为了应对以上的问题,我们提出了Chunk的概念,一个Chunk由所在的文件偏移、数据、数据长度及校验值组成。文件的上传和下载都是通过对Chunk的操作来实现的。由于Chunk比较小(默认256K),完成一个传输动作完成的时间也比较短,不容易出错。PFSClient需要在传输完毕最后一个Chunk的时候检查destination文件的MD5值是否和source文件一致。 + +一个典型的Chunk如下所示: + +``` +type Chunk struct { + fileOffset int64 + checksum uint32 + len uint32 + data []byte +} +``` + +### 生成sparse文件 +当destination文件不存在或者大小和source文件不一致时,可以用[Fallocate](https://Go.org/pkg/syscall/#Fallocate)生成sparse文件,然后就可以并发写入多个Chunk。 + +### 覆盖不一致的部分 +文件传输的的关键在于需要PFSClient端对比source和destination的文件Chunks的checksum是否保持一致,不一致的由PFSClient下载或者传输Chunk完成。这样已经传输成功的部分就不用重新传输了。 + +## 用户使用流程 +参考[link](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md) + +## 框架生成 +用[swagger](https://github.com/swagger-api/swagger-codegen)生成PFSClient和PFSServer的框架部分,以便我们可以把更多的精力放到逻辑本身上。 + +## 参考文档 +- [TLS complete guide](https://github.com/k8sp/tls/blob/master/tls.md) +- [aws.s3](http://docs.aws.amazon.com/cli/latest/reference/s3/) +- [linux man document](https://linux.die.net/man/) diff --git a/doc/design/file_manager/pfs/pfsclient.md b/doc/design/file_manager/pfs/pfsclient.md new file mode 100644 index 0000000000000000000000000000000000000000..56bc70c54bbc92b78d66e04fb495b1300cf8ebe0 --- /dev/null +++ b/doc/design/file_manager/pfs/pfsclient.md @@ -0,0 +1,129 @@ +# PFSClient + +## Description +The `pfs` command is a Command Line Interface to manage your files on PaddlePaddle Cloud + +## Synopsis +``` +paddle [options] pfs [parameters] +``` + +## Options +``` +--profile (string) + Use a specific profile from your credential file. + +--help (string) + Display more information about command + +--version + Output version information and exit + +--debug + Show detailed debugging log + +--only-show-errors (boolean) + Only errors and warnings are displayed. All other output is suppressed. +``` + +## Path Arguments +When using a command, we need to specify path arguments. There are two path argument type: `localpath` and `pfspath`. + +A `pfspath` begin with `/pfs`, eg: `/pfs/$DATACENTER/home/$USER/folder`. + +[Here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md#上传训练文件) is how to config datacenters. + +## order of Path Arguments +Commonly, if there are two path arguments, the first is the source, and the second is the destination. + +## Subcommonds +- rm - remove files or directories + +``` +Synopsis: + rm [-r] [-v] ... + +Options: + -r + Remove directories and their contents recursively + -v + Cause rm to be verbose, showing files after they are removed. + +Examples: + paddle pfs rm /pfs/$DATACENTER/home/$USER/file + paddle pfs rm -r /pfs/$DATACENTER/home/$USER/folder +``` +- mv - move (rename) files + +``` +Synopsis: + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + mv [-f | -n] [-v] + mv [-f | -n] [-v] ... + +Options: + -f + Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.) + -n + Do not overwrite an existing file. (The -n option overrides previous -f options.) + -v + Cause mv to be verbose, showing files after they are moved. + +Examples: + paddle pfs mv ./text1.txt /pfs/$DATACENTER/home/$USER/text1.txt +``` +- cp - copy files or directories + +``` +Synopsis: + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + cp [-r] [-f | -n] [-v] [--preserve--links] + cp [-r] [-f | -n] [-v] [--preserve--links] ... + +Options: + -r + Copy directories recursively + -f + Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.) + -n + Do not overwrite an existing file. (The -n option overrides previous -f options.) + -v + Cause cp to be verbose, showing files after they are copied. + --preserve--links + Reserve links when copy links + +Examples: + paddle pfs cp ./file /pfs/$DATACENTER/home/$USER/file + paddle pfs cp /pfs/$DATACENTER/home/$USER/file ./file +``` +- ls- list files + +``` +Synopsis: + ls [-r] ... + +Options: + -R + List directory(ies) recursively + +Examples: + paddle pfs ls /pfs/$DATACENTER/home/$USER/file + paddle pfs ls /pfs/$DATACENTER/home/$USER/folder +``` + +- mkdir - mkdir directory(ies) +Create intermediate directory(ies) as required. + +``` +Synopsis: + mkdir ... + +Examples: + paddle pfs mkdir /pfs/$DATACENTER/home/$USER/folder +``` diff --git a/doc/design/file_manager/src/filemanager.graffle b/doc/design/file_manager/src/filemanager.graffle new file mode 100644 index 0000000000000000000000000000000000000000..7861a33072bc1908f69d12b37c20491dd8663103 Binary files /dev/null and b/doc/design/file_manager/src/filemanager.graffle differ diff --git a/doc/design/file_manager/src/filemanager.png b/doc/design/file_manager/src/filemanager.png new file mode 100644 index 0000000000000000000000000000000000000000..8139a19f5722f56d3c211f3ab0d3982f751134b9 Binary files /dev/null and b/doc/design/file_manager/src/filemanager.png differ diff --git a/doc/design/functions_operators_layers.md b/doc/design/functions_operators_layers.md new file mode 100644 index 0000000000000000000000000000000000000000..984b59f4c6971dfb6f46dfe342f2751f392c0e88 --- /dev/null +++ b/doc/design/functions_operators_layers.md @@ -0,0 +1,100 @@ +# Design Doc: Functions, Operators, and Layers + +In a DL system, we can compose one or more fine grained operators into a coarse grained one. For example, the FC layer can be composed of a multiplication operator and an add operator. + +Historically, some fine grained operations are known as operators, and some coarse level ones are known as layers. But we need a well-defined separation. + +In general, operators are those very fine grained operations, e.g., mul and add. In the implementation, we can write them as C++ functions: + +```c++ +template T add(T x, T y) { return x + y; } +template T mul(T x, T y) { return x * y; } +``` + +Then we can wrap them into operators which are C++ classes and can be created from Python bindings by name. A C macro can do this. For example, the following macro invocation + +```c++ +#define MAKE_FUNCTION_OPERATOR(mul); +``` + +generates + +```c++ +template class mulOp : public OperatorBase {...}; +REGISTER_OP(mulOp, "mul"); +``` + +so that in Python we can create operator mul by: + +```python +X1 = Var() +X2 = Var() +Y = Var() +paddle.cpp.create_operator("mul", input=[X1, X2], output=Y) +``` + +Also, at the same time, we can compose a coarse level C++ operator class by composing functions `mul` and `add`: + +```c++ +template +class FCOp : public OperatorBase { + public: + void Run(...) { + add(mul(Input("X"), Input("W")), Input("b"); + } +}; +REGISTER_OP(FCOp, "fc"); +``` + +We need to support such composition in Python as well. To do so, we need a higher level Python wrapping of operator creation than `paddle.cpp.create_operator`. This higher level operator API should be compatible with the layer API. + +Let's explain using an example. Suppose that we are going to compose the FC using mul and add in Python, we'd like to have Python functions `mul` and `add` defined in module `operator`: + +```python +def operator.mul(X1, X2): + O = Var() + paddle.cpp.create_operator("mul", input={X1, Y1}, output=O) + return O + +def operator.add(X1, X2): + O = Var() + paddle.cpp.create_operator("add", input={X1, X2}, output=O) + return O +``` + +Above code snippets are automatically generated. Given them, users can define + +```python +def layer.fc(X): + W = Var() + b = Var() + return operator.add(operator.mul(X, W), b) +``` + +If we don't have `operator.mul` and `operator.add`, the definiton of `layer.fc` would be complicated: + +```python +def layer.fc(X): + W = Var() + b = Var() + O1 = Var() + paddle.cpp.create_operator("mul", input=[X, W], output=O1) + O2 = Var() + paddle.cpp.create_operator("add", input=[O1, b], output=O2) + return O2 +``` + +We'd like to have Python bindings to operators in package `paddle.operator`, and Python compositions of operators in package `paddle.layer`. So we have the following concepts in above illustrative example: + + +| C++ functions/functors | mul | add | | | +|------------------------|--------------|--------------|-------------|----------| +| C++ operator class | mulOp | addOp | FCOp | | +| Python binding | operator.mul | operator.add | operator.fc | | +| Python function | | | | layer.fc | + + +This is how we differentiate layer and operators in PaddlePaddle: + +- those defined in C++ and have a lightweighted Python wrapper in module `operators` are operators; whereas +- those who don't have C++ implementations but a Python implementation that compose C++ operators are known as layers. diff --git a/doc/design/gan_api.md b/doc/design/gan_api.md new file mode 100644 index 0000000000000000000000000000000000000000..fb41df8615f73d9fd4c32995eab265833eac1a55 --- /dev/null +++ b/doc/design/gan_api.md @@ -0,0 +1,253 @@ +# Design for GAN + +GAN (General Adversarial Net [https://arxiv.org/abs/1406.2661]) is an important model for unsupervised learning and widely used in many areas. + +It applies several important concepts in machine learning system design, including building and running subgraphs, dependency tracing, different optimizers in one executor and so forth. + +In our GAN design, we wrap it as a user-friendly easily customized python API to design different models. We take the conditional DC-GAN (Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks [https://arxiv.org/abs/1511.06434]) as an example due to its good performance on image generation. + +

+
+Figure 1. The overall running logic of GAN. The black solid arrows indicate the forward pass; the green dashed arrows indicate the backward pass of generator training; the red dashed arrows indicate the backward pass of the discriminator training. The BP pass of the green (red) arrow should only update the parameters in the green (red) boxes. The diamonds indicate the data providers. d\_loss and g\_loss marked in red and green are the two targets we would like to run. +

+ +The operators, layers and functions required/optional to build a GAN demo is summarized in https://github.com/PaddlePaddle/Paddle/issues/4563. + +

+
+Figure 2. Photo borrowed from the original DC-GAN paper. +

+ +## The Conditional-GAN might be a class. +This design we adopt the popular open source design in https://github.com/carpedm20/DCGAN-tensorflow and https://github.com/rajathkmp/DCGAN. It contains following data structure: + +- DCGAN(object): which contains everything required to build a GAN model. It provides following member functions methods as API: + +- __init__(...): Initialize hyper-parameters (like conv dimension and so forth), and declare model parameters of discriminator and generator as well. + +- generator(z, y=None): Generate a fake image from input noise z. If the label y is provided, the conditional GAN model will be chosen. +Returns a generated image. + +- discriminator(image): +Given an image, decide if it is from a real source or a fake one. +Returns a 0/1 binary label. + +- build_model(self): +build the whole GAN model, define training loss for both generator and discrimator. + +## Discussion on Engine Functions required to build GAN +- Trace the tensor and variable dependency in the engine executor. (Very critical, otherwise GAN can'be be trained correctly) +- Different optimizers responsible for optimizing different loss. + +To be more detailed, we introduce our design of DCGAN as following: + +### Class member Function: Initializer +- Set up hyper-parameters, including condtional dimension, noise dimension, batch size and so forth. +- Declare and define all the model variables. All the discriminator parameters are included in the list self.theta_D and all the generator parameters are included in the list self.theta_G. +```python +class DCGAN(object): + def __init__(self, y_dim=None): + + # hyper parameters + self.y_dim = y_dim # conditional gan or not + self.batch_size = 100 + self.z_dim = z_dim # input noise dimension + + # define parameters of discriminators + self.D_W0 = pd.Variable(shape=[3,3, 1, 128], data=pd.gaussian_normal_randomizer()) + self.D_b0 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data + self.D_W1 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer()) + self.D_b1 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data + self.D_W2 = pd.Varialble(np.random.rand(128, 1)) + self.D_b2 = pd.Variable(np.zeros(128)) + self.theta_D = [self.D_W0, self.D_b0, self.D_W1, self.D_b1, self.D_W2, self.D_b2] + + # define parameters of generators + self.G_W0 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer()) + self.G_b0 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data + self.G_W1 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer()) + self.G_b1 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data + self.G_W2 = pd.Varialble(np.random.rand(128, 1)) + self.G_b2 = pd.Variable(np.zeros(128)) + self.theta_G = [self.G_W0, self.G_b0, self.G_W1, self.G_b1, self.G_W2, self.G_b2] +``` + +### Class member Function: Generator +- Given a noisy input z, returns a fake image. +- Concatenation, batch-norm, FC operations required; +- Deconv layer required, which is missing now... +```python +class DCGAN(object): + def generator(self, z, y = None): + # input z: the random noise + # input y: input data label (optional) + # output G_im: generated fake images + + if not self.y_dim: + z = pd.layer.concat(1, [z, y]) + + G_h0 = pd.layer.fc(z, self.G_w0, self.G_b0) + G_h0_bn = pd.layer.batch_norm(G_h0) + G_h0_relu = pd.layer.relu(G_h0_bn) + + G_h1 = pd.layer.deconv(G_h0_relu, self.G_w1, self.G_b1) + G_h1_bn = pd.layer.batch_norm(G_h1) + G_h1_relu = pd.layer.relu(G_h1_bn) + + G_h2 = pd.layer.deconv(G_h1_relu, self.G_W2, self.G_b2)) + G_im = pd.layer.tanh(G_im) + return G_im +``` + +### Class member function: Discriminator +- Given a noisy input z, returns a fake image. +- Concatenation, Convolution, batch-norm, FC, Leaky-ReLU operations required; +```python +class DCGAN(object): + def discriminator(self, image): + # input image: either generated images or real ones + # output D_h2: binary logit of the label + + D_h0 = pd.layer.conv2d(image, w=self.D_w0, b=self.D_b0) + D_h0_bn = pd.layer.batchnorm(h0) + D_h0_relu = pd.layer.lrelu(h0_bn) + + D_h1 = pd.layer.conv2d(D_h0_relu, w=self.D_w1, b=self.D_b1) + D_h1_bn = pd.layer.batchnorm(D_h1) + D_h1_relu = pd.layer.lrelu(D_h1_bn) + + D_h2 = pd.layer.fc(D_h1_relu, w=self.D_w2, b=self.D_b2) + return D_h2 +``` + +### Class member function: Build the model +- Define data readers as placeholders to hold the data; +- Build generator and discriminators; +- Define two training losses for discriminator and generator, respectively. +If we have execution dependency engine to back-trace all tensors, the module building our GAN model will be like this: +```python +class DCGAN(object): + def build_model(self): + if self.y_dim: + self.y = pd.data(pd.float32, [self.batch_size, self.y_dim]) + self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) + self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) + self.z = pd.data(tf.float32, [None, self.z_size]) + + # step 1: generate images by generator, classify real/fake images with discriminator + if self.y_dim: # if conditional GAN, includes label + self.G = self.generator(self.z, self.y) + self.D_t = self.discriminator(self.images) + # generated fake images + self.sampled = self.sampler(self.z, self.y) + self.D_f = self.discriminator(self.G) + else: # original version of GAN + self.G = self.generator(self.z) + self.D_t = self.discriminator(self.images) + # generate fake images + self.sampled = self.sampler(self.z) + self.D_f = self.discriminator(self.images) + + # step 2: define the two losses + self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size)) + self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size)) + self.d_loss = self.d_loss_real + self.d_loss_fake + + self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_f, np.ones(self.batch_szie)) +``` + +If we do not have dependency engine but blocks, the module building our GAN model will be like this: +```python +class DCGAN(object): + def build_model(self, default_block): + # input data in the default block + if self.y_dim: + self.y = pd.data(pd.float32, [self.batch_size, self.y_dim]) + self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) + # self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) + self.z = pd.data(tf.float32, [None, self.z_size]) + + # step 1: generate images by generator, classify real/fake images with discriminator + with pd.default_block().g_block(): + if self.y_dim: # if conditional GAN, includes label + self.G = self.generator(self.z, self.y) + self.D_g = self.discriminator(self.G, self.y) + else: # original version of GAN + self.G = self.generator(self.z) + self.D_g = self.discriminator(self.G, self.y) + self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_g, np.ones(self.batch_szie)) + + with pd.default_block().d_block(): + if self.y_dim: # if conditional GAN, includes label + self.D_t = self.discriminator(self.images, self.y) + self.D_f = self.discriminator(self.G, self.y) + else: # original version of GAN + self.D_t = self.discriminator(self.images) + self.D_f = self.discriminator(self.G) + + # step 2: define the two losses + self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size)) + self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size)) + self.d_loss = self.d_loss_real + self.d_loss_fake +``` +Some small confusion and problems with this design: +- D\_g and D\_f are actually the same thing, but has to be written twice; i.e., if we want to run two sub-graphs conceptually, the same codes have to be written twice if they are shared by the graph. +- Requires ability to create a block anytime, rather than in if-else or rnn only; + +## Main function for the demo: +Generally, the user of GAN just need to the following things: +- Define an object as DCGAN class; +- Build the DCGAN model; +- Specify two optimizers for two different losses with respect to different parameters. +```python +# pd for short, should be more concise. +from paddle.v2 as pd +import numpy as np +import logging + +if __name__ == "__main__": + # dcgan class in the default graph/block + # if we use dependency engine as tensorflow + # the codes, will be slightly different like: + # dcgan = DCGAN() + # dcgan.build_model() + with pd.block() as def_block: + dcgan = DCGAN() + dcgan.build_model(def_block) + + # load mnist data + data_X, data_y = self.load_mnist() + + # Two subgraphs required!!! + with pd.block().d_block(): + d_optim = pd.train.Adam(lr = .001, beta= .1) + d_step = d_optim.minimize(dcgan.d_loss, dcgan.theta_D) + with pd.block.g_block(): + g_optim = pd.train.Adam(lr = .001, beta= .1) + g_step = pd.minimize(dcgan.g_loss, dcgan.theta_G) + + # executor + sess = pd.executor() + + # training + for epoch in xrange(10000): + for batch_id in range(N / batch_size): + idx = ... + # sample a batch + batch_im, batch_label = data_X[idx:idx+batch_size], data_y[idx:idx+batch_size] + # sample z + batch_z = np.random.uniform(-1., 1., [batch_size, z_dim]) + + if batch_id % 2 == 0: + sess.run(d_step, + feed_dict = {dcgan.images: batch_im, + dcgan.y: batch_label, + dcgan.z: batch_z}) + else: + sess.run(g_step, + feed_dict = {dcgan.z: batch_z}) +``` + +# More thinking about dependency engine v.s. block design: +- What if we just want to run an intermediate result? Do we need to run the whole block/graph? +- Should we call eval() to get the fake images in the first stage? And then train the discriminator in the second stage? diff --git a/doc/design/graph.md b/doc/design/graph.md new file mode 100644 index 0000000000000000000000000000000000000000..7519a65df835a39fe14f6ef45530afff170191ff --- /dev/null +++ b/doc/design/graph.md @@ -0,0 +1,70 @@ +# Design Doc: Computations as a Graph + +A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before. + +This document explains that the construction of a graph as three steps: + +- construct the forward part +- construct the backward part +- construct the optimization part + +## The Construction of a Graph + +Let us take the problem of image classification as a simple example. The application program that trains the model looks like: + +```python +x = layer.data("images") +l = layer.data("label") +y = layer.fc(x) +cost = layer.mse(y, l) +optimize(cost) +train(cost, reader=mnist.train()) +``` + +### Forward Part + +The first four lines of above program build the forward part of the graph. + +![](images/graph_construction_example_forward_only.png) + +In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b, and the initialization operators. + +Initialization operators are kind of "run-once" operators -- the `Run` method increments a class data member counter so to run at most once. By doing so, a parameter wouldn't be initialized repeatedly, say, in every minibatch. + +In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`. These protobuf messages are saved in a `BlockDesc` protobuf message. + +### Backward Part + +The fifth line `optimize(cost)` calls two functions, `ConstructBackwardGraph` and `ConstructOptimizationGraph`. + +`ConstructBackwardGraph` traverses the forward graph in the `BlockDesc` protobuf message and builds the backward part. + +![](images/graph_construction_example_forward_backward.png) + +According to the chain rule of gradient computation, `ConstructBackwardGraph` would + +1. create a gradient operator G for each operator F, +1. make all inputs, outputs, and outputs' gradient of F as inputs of G, +1. create gradients for all inputs of F, except for those who don't have gradients, like x and l, and +1. make all these gradients as outputs of G. + +### Optimization Part + +For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient. Here results in the complete graph: + +![](images/graph_construction_example_all.png) + +## Block and Graph + +The word block and graph are interchangable in the desgin of PaddlePaddle. A [Block](https://github.com/PaddlePaddle/Paddle/pull/3708) is a metaphore of the code and local variables in a pair of curly braces in programming languages, where operators are like statements or instructions. A graph of operators and variables is a representation of the block. + +A Block keeps operators in an array `BlockDesc::ops` + +```protobuf +message BlockDesc { + repeated OpDesc ops = 1; + repeated VarDesc vars = 2; +} +``` + +in the order that they appear in user programs, like the Python program at the beginning of this article. We can imagine that in `ops`, we have some forward operators, followed by some gradient operators, and then some optimization operators. diff --git a/doc/design/if_else_op.md b/doc/design/if_else_op.md new file mode 100644 index 0000000000000000000000000000000000000000..26d140f06db4ecefa86be015eaa731ffddc6910c --- /dev/null +++ b/doc/design/if_else_op.md @@ -0,0 +1,51 @@ +# The `IfElse` Operator + +PaddlePaddle's `IfElse` operator differs from TensorFlow's: + +- the TensorFlow version takes a scalar boolean value as the condition so that the whole mini-batch goes to either the true or the false branch, whereas +- the PaddlePaddle version takes a vector of boolean value as the condition, and instances corresponding to true values go to the true branch, those corresponding to false values go to the false branch. + +## Example + +The following PaddlePaddle program shows the usage of the IfElse operator: + +```python +import paddle as pd + +x = minibatch([10, 20, 30]) # shape=[None, 1] +y = var(1) # shape=[1], value=1 +z = minibatch([10, 20, 30]) # shape=[None, 1] +cond = larger_than(x, 15) # [false, true, true] + +ie = pd.ifelse() +with ie.true_block(): + d = pd.layer.add(x, y) + ie.output(d, pd.layer.softmax(d)) +with ie.false_block(): + d = pd.layer.fc(z) + ie.output(d, d+1) +o1, o2 = ie(cond) +``` + +A challenge to implement the `IfElse` operator is to infer those variables to be split, or, say, to identify the variable of the mini-batch or those derived from the mini-batch. + +An equivalent C++ program is as follows: + +```c++ +namespace pd = paddle; + +int x = 10; +int y = 1; +int z = 10; +bool cond = false; +int o1, o2; +if (cond) { + int d = x + y; + o1 = z; + o2 = pd::layer::softmax(z); +} else { + int d = pd::layer::fc(z); + o1 = d; + o2 = d+1; +} +``` diff --git a/doc/design/images/graph_construction_example.bash b/doc/design/images/graph_construction_example.bash new file mode 100755 index 0000000000000000000000000000000000000000..35e6997abd17588e17a82d448918fc1b3bd7220e --- /dev/null +++ b/doc/design/images/graph_construction_example.bash @@ -0,0 +1,11 @@ +cat ./graph_construction_example.dot | \ + sed 's/color=red/color=red, style=invis/g' | \ + sed 's/color=green/color=green, style=invis/g' | \ + dot -Tpng > graph_construction_example_forward_only.png + +cat ./graph_construction_example.dot | \ + sed 's/color=green/color=green, style=invis/g' | \ + dot -Tpng > graph_construction_example_forward_backward.png + +cat ./graph_construction_example.dot | \ + dot -Tpng > graph_construction_example_all.png diff --git a/doc/design/images/graph_construction_example.dot b/doc/design/images/graph_construction_example.dot new file mode 100644 index 0000000000000000000000000000000000000000..8d1b673abf6b78c851676fa379dc850c4818f0e5 --- /dev/null +++ b/doc/design/images/graph_construction_example.dot @@ -0,0 +1,69 @@ +digraph ImageClassificationGraph { + ///////// The forward part ///////// + FeedX [label="Feed", color=blue, shape=box]; + FeedY [label="Feed", color=blue, shape=box]; + InitW [label="Init", color=blue, shape=diamond]; + Initb [label="Init", color=blue, shape=diamond]; + FC [label="FC", color=blue, shape=box]; + MSE [label="MSE", color=blue, shape=box]; + + x [label="x", color=blue, shape=oval]; + l [label="l", color=blue, shape=oval]; + y [label="y", color=blue, shape=oval]; + W [label="W", color=blue, shape=doublecircle]; + b [label="b", color=blue, shape=doublecircle]; + cost [label="cost", color=blue, shape=oval]; + + FeedX -> x -> FC -> y -> MSE -> cost [color=blue]; + FeedY -> l [color=blue]; + InitW -> W [color=blue]; + Initb -> b [color=blue]; + W -> FC [color=blue]; + b -> FC [color=blue]; + l -> MSE [color=blue]; + + ////////// The backward part ///////// + MSE_Grad [label="MSE_grad", color=red, shape=box]; + FC_Grad [label="FC_grad", color=red, shape=box]; + + d_cost [label="d cost", color=red, shape=oval]; + d_y [label="d y", color=red, shape=oval]; + d_b [label="d b", color=red, shape=oval]; + d_W [label="d W", color=red, shape=oval]; + + cost -> MSE_Grad [color=red]; + d_cost -> MSE_Grad [color=red]; + x -> MSE_Grad [color=red]; + l -> MSE_Grad [color=red]; + y -> MSE_Grad -> d_y [color=red]; + + x -> FC_Grad [color=red]; + y -> FC_Grad [color=red]; + d_y -> FC_Grad [color=red]; + W -> FC_Grad -> d_W [color=red]; + b -> FC_Grad -> d_b [color=red]; + + ////////// The optimizaiton part ////////// + + OPT_W [label="SGD", color=green, shape=box]; + OPT_b [label="SGD", color=green, shape=box]; + + W -> OPT_W [color=green]; + b -> OPT_b [color=green]; + d_W -> OPT_W -> W [color=green]; + d_b -> OPT_b -> b [color=green]; + + ////////// Groupings ////////// + + subgraph clusterMSE { + style=invis; + MSE; + MSE_Grad; + } + + subgraph clusterFC { + style=invis; + FC; + FC_Grad; + } +} diff --git a/doc/design/images/graph_construction_example_all.png b/doc/design/images/graph_construction_example_all.png new file mode 100644 index 0000000000000000000000000000000000000000..181187503472d15779b87284105841168b3945c4 Binary files /dev/null and b/doc/design/images/graph_construction_example_all.png differ diff --git a/doc/design/images/graph_construction_example_forward_backward.png b/doc/design/images/graph_construction_example_forward_backward.png new file mode 100644 index 0000000000000000000000000000000000000000..3049a9315fd616464dec54e33064cb75598ca536 Binary files /dev/null and b/doc/design/images/graph_construction_example_forward_backward.png differ diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/design/images/graph_construction_example_forward_only.png new file mode 100644 index 0000000000000000000000000000000000000000..25d19088cbf0b5f68cf734f2ff21eba8af4a2860 Binary files /dev/null and b/doc/design/images/graph_construction_example_forward_only.png differ diff --git a/doc/design/images/replica.png b/doc/design/images/replica.png new file mode 100644 index 0000000000000000000000000000000000000000..ef59e56b01d792a059279e6bb9a29f3db6a59a41 Binary files /dev/null and b/doc/design/images/replica.png differ diff --git a/doc/design/images/two_phase_commit.png b/doc/design/images/two_phase_commit.png new file mode 100644 index 0000000000000000000000000000000000000000..ef6f7317bd440cc7d9fe08fcbbf2b7a542f99049 Binary files /dev/null and b/doc/design/images/two_phase_commit.png differ diff --git a/doc/design/mkldnn/README.MD b/doc/design/mkldnn/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..fe8da907d9d45a2164031430ac5b7a3d5523967a --- /dev/null +++ b/doc/design/mkldnn/README.MD @@ -0,0 +1,111 @@ +# Intel® MKL-DNN on PaddlePaddle: Design Doc + +我们计划将Intel深度神经网络数学库(**MKL-DNN**\[[1](#references)\])集成到PaddlePaddle,充分展现英特尔平台的优势,有效提升PaddlePaddle在英特尔架构上的性能。 + +我们短期内的基本目标是: + +- 完成常用layer的MKL-DNN实现。 +- 完成常见深度神经网络VGG,GoogLeNet 和 ResNet的MKL-DNN实现。 + + +## Contents + +- [Overview](#overview) +- [Actions](#actions) + - [CMake](#cmake) + - [Layers](#layers) + - [Activations](#activations) + - [Unit Tests](#unit-tests) + - [Protobuf Messages](#protobuf-messages) + - [Python API](#python-api) + - [Demos](#demos) + - [Benchmarking](#benchmarking) + - [Others](#others) +- [Design Concerns](#design-concerns) + +## Overview + +我们会把MKL-DNN作为第三方库集成进PaddlePaddle,整体框架图 +
+
+Figure 1. PaddlePaddle on IA. +
+ +## Actions +我们把集成方案大致分为了如下几个方面。 + +### CMake +我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。 + +同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。 + +所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 + +**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。 + +### Layers +所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在 +`paddle/gserver/layers`中,并且文件名都会一以*Mkldnn*开头。 + +所有MKL-DNN的layers都会继承于一个叫做`MkldnnLayer`的父类,该父类继承于PaddlePaddle的基类`Layer`。 + +### Activations +由于在PaddlePaddle中,激活函数是独立于layer概念的,所以会在`paddle/gserver/activations`目录下添加一个`MkldnnActivation.h`文件定义一些用于MKL-DNN的接口,实现方法还是会在`ActivationFunction.cpp`文件。 + +### Unit Tests +会在`paddle/gserver/test`目录下添加`test_Mkldnn.cpp`和`MkldnnTester.*`用于MKL-DNN的测试。 + +Activation的测试,计划在PaddlePaddle原有的测试文件上直接添加新的测试type。 + +### Protobuf Messages +根据具体layer的需求可能会在`proto/ModelConfig.proto`里面添加必要的选项。 + +### Python API +目前只考虑**v1 API**。 + +计划在`python/paddle/trainer/config_parser.py`里面添加`use_mkldnn`这个选择,方便用户选择使用MKL-DNN的layers。 + +具体实现方式比如: + +```python +use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) +if use_mkldnn + self.layer_type = mkldnn_* +``` + +所有MKL-DNN的layer type会以*mkldnn_*开头,以示区分。 + +并且可能在`python/paddle/trainer_config_helper`目录下的`activations.py `和`layers.py`里面添加必要的MKL-DNN的接口。 + +### Demos + +会在`v1_api_demo`目录下添加一个`mkldnn`的文件夹,里面放入一些用于MKL-DNN测试的demo脚本。 + +### Benchmarking +会考虑添加部分逻辑在`benchmark/paddle/image/run.sh`,添加使用MKL-DNN的测试。 + +### Others +1. 如果在使用MKL-DNN的情况下,会把CPU的Buffer对齐为64。 +2. 深入PaddlePaddle,寻找有没有其他可以优化的可能,进一步优化。比如可能会用OpenMP改进SGD的更新性能。 + +## Design Concerns + +为了更好的符合PaddlePaddle的代码风格\[[2](#references)\],同时又尽可能少的牺牲MKL-DNN的性能\[[3](#references)\]。 + +我们总结出一些特别需要注意的点: + +1. 使用**deviceId_**。为了尽可能少的在父类Layer中添加变量或者函数,我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MkldnnLayer`特有的设备ID。 +2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。 +3. 创建`MkldnnMatrix`,用于管理MKL-DNN会用到的相关memory函数、接口以及会用的到格式信息。 +4. 创建`MkldnnBase`,定义一些除了layer和memory相关的类和函数。包括MKL-DNN会用到`MkldnnStream`和`CpuEngine`,和未来可能还会用到`FPGAEngine`等。 +5. 在**Argument**里添加两个`MkldnnMatrixPtr`,取名为`mkldnnValue`和`mkldnnGrad`,用于存放`MkldnnLayer`会用到的memory buffer。 并且添加函数cvt(会修改为一个更加合适的函数名),用于处理"CPU device"和"MKL-DNN device"之间memory的相互转化。 +6. 在父类`Layer`中的`getOutput`函数中添加一段逻辑,用于判断`deviceId`,并针对device在MKL-DNN和CPU之间不统一的情况,做一个前期转换。 也就是调用`Argument`的cvt函数把output统一到需要的device上。 +7. 在原来的`FLAGS`中添加一个`use_mkldnn`的flag,用于选择是否使用MKL-DNN的相关功能。 +8. 关于MKLDNN参数的保存。由于MKLDNN参数的格式与PaddlePaddle原有的格式存在不一样的情况,所以需要在保存参数时同时保存该格式信息。目前准备扩展[Header](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/parameter/Parameter.h#L247)里面的`int32_t version`。这个值不管是在v1还是在v2里面,一直保存的是0,所以可以充分利用这个信息,定义一个枚举处理所有MKLDNN的参数格式,从而`MKLDNNLayer`就可以从输入的参数中获取需要的格式信息。 + +## References + +1. [Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN)](https://github.com/01org/mkl-dnn "Intel MKL-DNN") +2. [原来的方案](https://github.com/PaddlePaddle/Paddle/pull/3096)会引入**nextLayer**的信息。但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。 +3. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的CUDNN部分使用的也是`NCHW`,所以不存在这个问题),所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。 + diff --git a/doc/design/mkldnn/image/overview.png b/doc/design/mkldnn/image/overview.png new file mode 100644 index 0000000000000000000000000000000000000000..84b455c28230703599a2529f014cfbb222138fef Binary files /dev/null and b/doc/design/mkldnn/image/overview.png differ diff --git a/doc/design/multi_language_interface/00.why_plain_c.md b/doc/design/multi_language_interface/00.why_plain_c.md new file mode 100644 index 0000000000000000000000000000000000000000..a1443093342c5a3ed698fb6b52a751dfc7cb5319 --- /dev/null +++ b/doc/design/multi_language_interface/00.why_plain_c.md @@ -0,0 +1,118 @@ +# Paddle多语言接口实现 +## 背景 + +Paddle需要一个多语言接口,这个接口需要做到: + +* 有标准的,良好的文档 + * 例如Python可以使用[Sphinx](http://www.sphinx-doc.org/en/stable/)生成API文档,golang可以使用[GoDoc](https://godoc.org/golang.org/x/tools/cmd/godoc)生成文档。这都需要这个接口按照约定俗成的规则来注释完备。 +* 不同语言的接口适应不同语言的特性 + * 例如Java与Python的错误处理是直接扔出来Exception,而对于golang错误处理应该使用返回值。 + +## 基本要求 + +Paddle的多语言接口实现包括一下几个方面: + +* 我们使用动态库来分发Paddle。在这个动态库中不嵌入任何其他语言的解释器,也不使用其他动态库。 +* 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号。 +* 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler)。 +* 不使用SWIG这种代码生成器,而是手写多语言绑定。 + + +## 原因 + +### 使用动态库来分发Paddle + +* Paddle的链接方式比较复杂 + * 如果用户要把Paddle的静态库(libpaddle.a)链接到自己的程序里,得使用 `--whole-archive` (for GCC) 或者 `--force_load` (for Clang) 参数,来确保把 libpaddle.a 里所有的符号都写入自己的程序的二进制文件里。这是因为 Paddle 的源码里使用了[object factory design pattern](http://stackoverflow.com/a/1310326/724872)。 +* 编译型语言,例如C/C++使用静态库和动态库难度差不多。但是解释性语言,例如[Python](http://stackoverflow.com/questions/19560594/how-to-import-static-library-in-python)或者[Java](http://stackoverflow.com/questions/24493337/linking-static-library-with-jni),只能调用Paddle的动态库,否则得把Paddle静态库链接到解释器里。 + * 解释性语言实际运行的二进制是解释器本身,如果调用静态库只能将静态库与解释器链接。例如对于Java来说,便是将静态库加入JVM中。这对于通常的Java的开发者来说,是不常见的做法。 + +### 动态库中不嵌入任何其他语言的解释器 + +* 目前Paddle的进程模型是C++内部驱动Python解释器进行模型配置解析和数据读取 +* 我们最终的动态库中不嵌入Python或者其他任何语言的解释器。模型配置解析,数据读取均交由其他语言完成 + +现阶段Paddle有一个问题是,Paddle内嵌的Python解释器和外部使用的Python如果版本不同,会直接报错退出。 + +### Paddle动态库中,不引用其他动态库 + +* 即这个动态库是不依赖于其他任何文件的,可以在任何机器上执行的。 + +### 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号 + +* 由于C++编译器没有[名字修饰](https://en.wikipedia.org/wiki/Name_mangling#C.2B.2B)的规范,不同版本的编译器之间,对于同一段C++代码生成的符号可能不一致。而多语言接口需要直接读取生成的二进制(动态库),需要有稳定的导出符号。 +* C语言是有导出符号的标准的,并且在常见的平台上,都是ABI调用标准的。 +* 大多数语言都支持使用C语言API +* 使用C99而不使用C89,是因为C99支持[Fixed-width integer types](https://en.wikipedia.org/wiki/C_data_types#Fixed-width_integer_types)和[Boolean type](https://en.wikipedia.org/wiki/C_data_types#Boolean_type)。 +* 使用C99而不使用C11的原因是,[C11](https://en.wikipedia.org/wiki/C11_(C_standard_revision))并没有Paddle特别需要的特性,且C99相对于C11使用更加广泛。 + +### 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler) + +* Paddle内部的类为C++书写,直接导出到C的接口比较困难。 +* 在C-API中使用`void*`来表示Paddle内部类。再在每一个API中自己检查类型。 + +在C的头文件 `paddle_matrix.h` 中: + +```C +typedef void* paddle_matrix; +typedef int paddle_error; + +extern "C" +paddle_error paddle_matrix_get_shape(paddle_matrix matrix, + uint64_t* width, + uint64_t* height); +``` +而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp` + +```cpp +#include "paddle/math/matrix.h" +extern "C" +paddle_error paddle_matrix_shape(paddle_matrix matrix, + uint64_t *width, + uint64_t *height) { + auto m = (paddle::capi::CMatrix*)(matrix); + *width = m->width(); + *height = m->height(); +} +``` + +其中`paddle/capi/CMatrix.hpp`文件内容为: + +```cpp +namespace paddle { +namespace math { + +class CMatrix { + std::shared_ptr mat; +}; + +} // namespace math +} // namespace paddle +``` + +### 不使用SWIG这种代码生成器,而是手写多语言绑定 + +* [SWIG](http://www.swig.org/)是一个多语言接口的代码生成器。他的目标是使用C/C++写代码,SWIG直接读取C/C++的头文件,生成各种语言的绑定代码。 + * 对于多语言接口,SWIG需要写一个interface文件。这个文件具有独特的语法,学习成本高。且增加一个第三方语言,就需要对这个第三方语言增加一些定义。有的时候,interface文件的写法非常[tricky](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/api/Paddle.swig#L36)。社区贡献代码学习成本高。 + * SWIG暴露的接口保留了C++的接口样式,很难保证多语言代码风格的一致性。(函数命名,错误处理) + * 因为SWIG在第三方语言中暴露的函数名,类名和C++中完全一致。C++的命名风格并不能适应其他第三方语言。如果使用SWIG我们需要将在interface文件里,将大量的`SomeCppClass`重命名成`some_python_class`,或者`SomeGoTypes`。 + * 对于不同语言,错误处理的方式也不尽相同。例如对于Java或者Python,最常见的错误处理方式是Exception,而对于Golang,错误处理方式是返回值。而SWIG只能简单的暴露C++接口,无法做到对于各种语言错误处理方式的适配。 + * 对于大多数语言,直接使用C语言的.h并不困难。例如Python的[cffi](https://cffi.readthedocs.io/en/latest/overview.html#simple-example-abi-level-in-line)或者[Cython](http://cython.org/), golang的[cgo](https://golang.org/cmd/cgo/)。 + * SWIG支持的语言或者解释器有局限。例如对于Python,使用SWIG只支持CPython解释器,而不支持PyPy解释器。 + + +## 原因列表 + +| 结论 | 对比 | 原因 | +|---| --- | --- | +| 使用动态库 | 不使用静态库 | 解释型语言只能调用动态库,Paddle静态库链接复杂 | +| 不嵌入其他语言解释器 | 不嵌入Python解释器 | Paddle C++目前嵌入Python解释器,会导致不同版本Python在一个进程里的bug | +| 不引用其他动态库 | | Paddle一个动态库可以在任何Linux系统上运行 | +| 使用C99做接口 | 不使用C++做接口 | C有标准的ABI,C99是目前C最广泛的使用标准,且C99支持bool类型和定长整数(uint64_t等)类型 | +| 使用void*作为类句柄 | 不显示的写每个类具体包含什么| 实现简单,并且让接口脱离实现细节 | +| 手写多语言绑定 | 不使用SWIG | 使用SWIG需要多语言绑定的开发人员熟练掌握SWIG配置,社区参与困难。SWIG生成的代码不能保证多语言代码风格的一致性 | + + +## 实现 + +参考[Inference implementation](01.inference_implementation.md) diff --git a/doc/design/multi_language_interface/01.inference_implementation.md b/doc/design/multi_language_interface/01.inference_implementation.md new file mode 100644 index 0000000000000000000000000000000000000000..9820284523246a062581f322616d196f575c9d29 --- /dev/null +++ b/doc/design/multi_language_interface/01.inference_implementation.md @@ -0,0 +1,131 @@ +# C-API 模型推断实现文档 + +本文档描述Paddle C-API的实现细节。Paddle C-API是多语言API的基础部分。Paddle需要暴露的API很多。先实现模型推断的API,通过模型推断API的实现作为一个样例,来进行讨论。至于为什么需要C-API,请参考[Why Plain C](./00.why_plain_c.md)。 + +## Table of Contents + * [C-API 模型推断实现文档](#c-api-模型推断实现文档) + * [暴露接口原则](#暴露接口原则) + * [目录结构](#目录结构) + * [实现方式](#实现方式) + * [capi.h](#capih) + * [具体某种类型的头文件](#具体某种类型的头文件) + * [capi_private.h](#capi_privateh) + * [具体某种类型的实现文件](#具体某种类型的实现文件) + * [libpaddle_capi_shared.{so, dylib}](#libpaddle_capi_sharedso-dylib) + * [libpaddle_capi_whole.a](#libpaddle_capi_wholea) + * [examples](#examples) + * [编译选项](#编译选项) + + +## 暴露接口原则 + +1. 所有的接口均为C接口。即使用`extern "C"` +2. 除构造某种类型的函数(`paddle_matrix_create`等),其他函数均返回`paddle_error`。且调用时不能抛出异常或出现运行时错误。 +3. 所有类型名为`paddle_类型名`,所有与类型相关的函数,函数名为`paddle_类型名_函数名` +4. 如果某一个Paddle Core概念(GradientMachine/Matrix)需要被暴露到其他语言,那么 + * 为了暴露的接口尽量简单。只暴露概念的接口,而不暴露概念的实现。即暴露`GradientMachine`或者`Matrix`但不暴露`RecurrentGradientMachine`和`CpuSparseMatrix`。 + * 暴露这个概念必要函数。`必要`是指,即完成某一个任务的最少函数。 +5. 不在`capi`接口层做过多封装。 + * 如果某一个Paddle概念必须要暴露,但是又过于琐碎。不在`capi`这一层进行封装,而是直接修改Paddle Core。让Paddle核心中,这一概念不再琐碎。 + + +## 目录结构 + +```text +Paddle + `-- paddle + `-- capi + `-- examples # The example project for C-API. + `-- tests # unittests for C-API + `-- capi.h # C-API header file. + `-- capi_private.h # The shared header file between implementation sources. + `-- matrix.{h, cpp} + `-- gradient_machine.{h, cpp} + `-- ... +``` + + +Paddle的C-API目录结构如上图表所示。这个目录中除了`capi_private.h`之外的所有头文件,均会被安装到include/paddle路径下。C-API生成的二进制文件会被安装到`lib`目录下。即,安装后的目录结构为 + +```text +`-- include + `-- paddle + `-- capi.h + `-- matrix.h + `-- gradient_machine.h + `-- ... +`-- lib + `-- libpaddle_capi_shared.{so, dylib} # In mac, dynamic libary's file name extention is `dylib` + `-- libpaddle_capi_whole.a # static library for all symbols of Paddle. +``` + +## 实现方式 + +下面分别介绍某一类文件的实现方式。 + +### capi.h + +`capi.h`是用户使用C-API时所唯一需要引入的头文件。在`capi.h`中,引入了类型的头文件,`matrix.h`, `gradient_machine.h`。在引入其他类型的头文件时,使用相对路径的引用方式。即`#include "matrix.h"` + +### 具体某种类型的头文件 + +具体某种类型的头文件,即例如`matrix.h`,`gradient_machine.h`等。在这些头文件中,包含了某种类型的类型定义和暴露的全部函数。 + +这个头文件不假设其他文件的引用顺序,即使用户直接引用某种类型的头文件,也不应该报错(虽然不鼓励这样)。如果某一个类型需要引用另一个类型,例如`gradient_machine`需要引用`matrix`,则直接引入另一种类型的头文件,即`#include "matrix.h"`。 + +### capi_private.h + +`capi_prviate.h`是各个实现中共享的头文件,他主要包含了实际暴露的类型结构。在用户使用C-API时,Paddle的类型全部退化成`void *`,即`typedef paddle_matrix void*`。但,对于每种C-API暴露的类型,均是在`capi_private.h`中实现的结构体。 + +```cpp +struct CMatrix { + int type = MatrixType; + std::shared_ptr mat; +}; +``` + +通常,这个结构体包含两个项目。 + +* `type`是一个类型的标志。对于每种类型,type字段均不尽相同。这样,即使C-API接受的类型全是`void *`,我们也可以确定每一个参数的类型。 + + ```cpp + void some_c_api_function(void* some_instance) { + int* type = (int *) some_instance; + switch (*type) { + case MatrixType: + CMatrix* mat = (CMatrix *) some_instance; + ... + ... + } + } + ``` +* 这个结构体中的另一个项目是,Paddle Core中这一类型接口的智能指针(shared_ptr)。 + * 使用智能指针的原因是: 用户可以安全的释放某个C-API的实例,而不必在意Paddle Core是否还在使用这个实例。 + * 例如,用户通过C-API获得了神经网络的参数实例。当用户使用完这个参数后,直接删除这个参数即可。即便Paddle Core中的模型还在使用这个参数,这个参数也不会一并删除。 + +### 具体某种类型的实现文件 + +具体某种类型的实现文件,即`matrix.cpp`, `gradient_machine.cpp`等文件。在这些文件中,使用C++ 11实现了C-API的接口,并且使用`extern "C"`导出这些接口。在实现过程中,对输入参数的安全性进行了必要的判断,并将C-API接口的参数转发给`Paddle Core`。 + +### libpaddle\_capi_shared.{so, dylib} + +`libpaddle_capi_shared`是C-API导出的动态库。这个动态库的连接参数与Paddle的其他二进制(例如`paddle_trainer`)类似。用户可以直接使用这个动态库来引入Paddle C-API。具体使用方法为`-lpaddle_capi_shared`。 + +### libpaddle\_capi_whole.a + +`libpaddle_capi_whole`是C-API导出的静态库。这个静态库包含了Paddle的全部符号。他是将`libpaddle_gserver.a`, `libpaddle_math.a`, `libpaddle_capi.a`等全部静态库中的目标文件全部打包后产生的文件。具体使用方法为`--whole-archive -lpaddle_capi_whole --no-whole-archive`。 + + +### examples + +在样例中,使用`C99`开发了模型预测的样例代码。具体请参考[example/README.md](../../../paddle/capi/examples/README.md)。 + +## 编译选项 + +C-API的编译选项默认关闭,打开这个编译选项,需要在cmake的时候,设置 + +```bash +cmake ${YOUR_SOURCE_ROOT} -DWITH_C_API=ON -DWITH_PYTHON=OFF -DWITH_SWIG_PY=OFF +``` + +编译C-API的时候推荐Paddle不嵌入Python解释器,也不生成`SWIG`接口,具体原因参考[Why Plain C](./00.why_plain_c.md)。 diff --git a/doc/design/multi_language_interface/why_plain_c.md b/doc/design/multi_language_interface/why_plain_c.md deleted file mode 100644 index a3f41ca7b93de8a55d927c88812802ef12246182..0000000000000000000000000000000000000000 --- a/doc/design/multi_language_interface/why_plain_c.md +++ /dev/null @@ -1,118 +0,0 @@ -# Paddle多语言接口实现 -## 背景 - -Paddle需要一个多语言接口,这个接口需要做到: - -* 有标准的,良好的文档 - * 例如Python可以使用[Sphinx](http://www.sphinx-doc.org/en/stable/)生成API文档,golang可以使用[GoDoc](https://godoc.org/golang.org/x/tools/cmd/godoc)生成文档。这都需要这个接口按照约定俗成的规则来注释完备。 -* 不同语言的接口适应不同语言的特性 - * 例如Java与Python的错误处理是直接扔出来Exception,而对于golang错误处理应该使用返回值。 - -## 基本要求 - -Paddle的多语言接口实现包括一下几个方面: - -* 我们使用动态库来分发Paddle。在这个动态库中不嵌入任何其他语言的解释器,也不使用其他动态库。 -* 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号。 -* 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler)。 -* 不使用SWIG这种代码生成器,而是手写多语言绑定。 - - -## 原因 - -### 使用动态库来分发Paddle - -* Paddle的链接方式比较复杂 - * 如果用户要把Paddle的静态库(libpaddle.a)链接到自己的程序里,得使用 `--whole-archive` (for GCC) 或者 `--force_load` (for Clang) 参数,来确保把 libpaddle.a 里所有的符号都写入自己的程序的二进制文件里。这是因为 Paddle 的源码里使用了[object factory design pattern](http://stackoverflow.com/a/1310326/724872)。 -* 编译型语言,例如C/C++使用静态库和动态库难度差不多。但是解释性语言,例如[Python](http://stackoverflow.com/questions/19560594/how-to-import-static-library-in-python)或者[Java](http://stackoverflow.com/questions/24493337/linking-static-library-with-jni),只能调用Paddle的动态库,否则得把Paddle静态库链接到解释器里。 - * 解释性语言实际运行的二进制是解释器本身,如果调用静态库只能将静态库与解释器链接。例如对于Java来说,便是将静态库加入JVM中。这对于通常的Java的开发者来说,是不常见的做法。 - -### 动态库中不嵌入任何其他语言的解释器 - -* 目前Paddle的进程模型是C++内部驱动Python解释器进行模型配置解析和数据读取 -* 我们最终的动态库中不嵌入Python或者其他任何语言的解释器。模型配置解析,数据读取均交由其他语言完成 - -现阶段Paddle有一个问题是,Paddle内嵌的Python解释器和外部使用的Python如果版本不同,会直接报错退出。 - -### Paddle动态库中,不引用其他动态库 - -* 即这个动态库是不依赖于其他任何文件的,可以在任何机器上执行的。 - -### 这个动态库使用C99标准的头文件导出一些函数,不使用/导出C++符号 - -* 由于C++编译器没有[名字修饰](https://en.wikipedia.org/wiki/Name_mangling#C.2B.2B)的规范,不同版本的编译器之间,对于同一段C++代码生成的符号可能不一致。而多语言接口需要直接读取生成的二进制(动态库),需要有稳定的导出符号。 -* C语言是有导出符号的标准的,并且在常见的平台上,都是ABI调用标准的。 -* 大多数语言都支持使用C语言API -* 使用C99而不使用C89,是因为C99支持[Fixed-width integer types](https://en.wikipedia.org/wiki/C_data_types#Fixed-width_integer_types)和[Boolean type](https://en.wikipedia.org/wiki/C_data_types#Boolean_type)。 -* 使用C99而不使用C11的原因是,[C11](https://en.wikipedia.org/wiki/C11_(C_standard_revision))并没有Paddle特别需要的特性,且C99相对于C11使用更加广泛。 - -### 不导出Paddle内部的结构体、类,仅仅使用`void*`指针作为类型的句柄(handler) - -* Paddle内部的类为C++书写,直接导出到C的接口比较困难。 -* 在C-API中使用`void*`来表示Paddle内部类。再在每一个API中自己检查类型。 - -在C的头文件 `paddle_matrix.h` 中: - -```C -typedef void* paddle_matrix; -typedef int paddle_error; - -extern "C" -paddle_error paddle_matrix_shape(paddle_matrix matrix, - uint64_t* width, - uint64_t* height); -``` -而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp` - -```cpp -#include "paddle/math/matrix.hpp" -extern "C" -paddle_error paddle_matrix_shape(paddle_matrix matrix, - uint64_t *width, - uint64_t *height) { - auto m = (paddle::math::matrix*)(matrix); - *width = m->width(); - *height = m->height(); -} -``` - -其中`paddle/math/matrix.hpp`文件内容为: - -```cpp -namespace paddle { -namespace math { - -class Matrix { - //... -}; - -} // namespace math -} // namespace paddle -``` - -### 不使用SWIG这种代码生成器,而是手写多语言绑定 - -* [SWIG](http://www.swig.org/)是一个多语言接口的代码生成器。他的目标是使用C/C++写代码,SWIG直接读取C/C++的头文件,生成各种语言的绑定代码。 - * 对于多语言接口,SWIG需要写一个interface文件。这个文件具有独特的语法,学习成本高。且增加一个第三方语言,就需要对这个第三方语言增加一些定义。有的时候,interface文件的写法非常[tricky](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/api/Paddle.swig#L36)。社区贡献代码学习成本高。 - * SWIG暴露的接口保留了C++的接口样式,很难保证多语言代码风格的一致性。(函数命名,错误处理) - * 因为SWIG在第三方语言中暴露的函数名,类名和C++中完全一致。C++的命名风格并不能适应其他第三方语言。如果使用SWIG我们需要将在interface文件里,将大量的`SomeCppClass`重命名成`some_python_class`,或者`SomeGoTypes`。 - * 对于不同语言,错误处理的方式也不尽相同。例如对于Java或者Python,最常见的错误处理方式是Exception,而对于Golang,错误处理方式是返回值。而SWIG只能简单的暴露C++接口,无法做到对于各种语言错误处理方式的适配。 - * 对于大多数语言,直接使用C语言的.h并不困难。例如Python的[cffi](https://cffi.readthedocs.io/en/latest/overview.html#simple-example-abi-level-in-line)或者[Cython](http://cython.org/), golang的[cgo](https://golang.org/cmd/cgo/)。 - * SWIG支持的语言或者解释器有局限。例如对于Python,使用SWIG只支持CPython解释器,而不支持PyPy解释器。 - - -## 原因列表 - -| 结论 | 对比 | 原因 | -|---| --- | --- | -| 使用动态库 | 不使用静态库 | 解释型语言只能调用动态库,Paddle静态库链接复杂 | -| 不嵌入其他语言解释器 | 不嵌入Python解释器 | Paddle C++目前嵌入Python解释器,会导致不同版本Python在一个进程里的bug | -| 不引用其他动态库 | | Paddle一个动态库可以在任何Linux系统上运行 | -| 使用C99做接口 | 不使用C++做接口 | C有标准的ABI,C99是目前C最广泛的使用标准,且C99支持bool类型和定长整数(uint64_t等)类型 | -| 使用void*作为类句柄 | 不显示的写每个类具体包含什么| 实现简单,并且让接口脱离实现细节 | -| 手写多语言绑定 | 不使用SWIG | 使用SWIG需要多语言绑定的开发人员熟练掌握SWIG配置,社区参与困难。SWIG生成的代码不能保证多语言代码风格的一致性 | - - -## 简单实现 - -TBD diff --git a/doc/design/ops/images/2_level_rnn.dot b/doc/design/ops/images/2_level_rnn.dot new file mode 100644 index 0000000000000000000000000000000000000000..a498e882a3d85a33d44dbad7474fa2a340e33976 --- /dev/null +++ b/doc/design/ops/images/2_level_rnn.dot @@ -0,0 +1,56 @@ +digraph G { + + rnn [label="1-th level RNN" shape=box] + + subgraph cluster0 { + label = "time step 0" + + sent0 [label="sentence"] + sent1 [label="sentence"] + + rnn1 [label="2-th level RNN" shape=box] + + sent0 -> rnn1 + sent1 -> rnn1 + } + + subgraph cluster1 { + label = "time step 1" + + sent2 [label="sentence"] + sent3 [label="sentence"] + + rnn2 [label="2-th level RNN" shape=box] + + sent2 -> rnn2 + sent3 -> rnn2 + } + + subgraph cluster2 { + label = "time step 2" + + sent4 [label="sentence"] + sent5 [label="sentence"] + + rnn3 [label="2-th level RNN" shape=box] + + sent4 -> rnn3 + sent5 -> rnn3 + } + + + para0 [label="paragraph info 0"] + para1 [label="paragraph info 1"] + para2 [label="paragraph info 2"] + + rnn1 -> para0 + rnn2 -> para1 + rnn3 -> para2 + + para0 -> rnn + para1 -> rnn + para2 -> rnn + + chapter [label="chapter info"] + rnn -> chapter +} diff --git a/doc/design/ops/images/2_level_rnn.png b/doc/design/ops/images/2_level_rnn.png new file mode 100644 index 0000000000000000000000000000000000000000..0537a75beb175c0c284717421f7aa908da2a5038 Binary files /dev/null and b/doc/design/ops/images/2_level_rnn.png differ diff --git a/doc/design/ops/images/rnn.dot b/doc/design/ops/images/rnn.dot new file mode 100644 index 0000000000000000000000000000000000000000..c1141cd9c981bb3cbf50d8bf7a6ed210280d79a5 --- /dev/null +++ b/doc/design/ops/images/rnn.dot @@ -0,0 +1,87 @@ +digraph G { + label = "simple RNN implementation" + + ranksep=2; + + //graph [nodesep=1, ranksep=1]; + + node[nodesep=1] + + subgraph cluster0 { + label = "global scope" + rankdir = TB + W + boot_memory + input + output + } + + subgraph cluster1 { + label = "step-scope 0" + rankdir = TB + memory0[label="memory"] + prememory0[label="pre-memory"] + step_input0[label="step input"] + step_output0[label="step output"] + } + + subgraph cluster2 { + label = "step-scope 1" + rankdir = TB + memory1[label="memory"] + prememory1[label="pre-memory"] + step_input1[label="step input"] + step_output1[label="step output"] + } + + subgraph cluster3 { + label = "step-scope 2" + rankdir = TB + memory2[label="memory"] + prememory2[label="pre-memory"] + step_input2[label="step input"] + step_output2[label="step output"] + } + + stepnet [shape=box] + stepnet0 [shape=box, style=dashed] + stepnet1 [shape=box, style=dashed] + stepnet2 [shape=box, style=dashed] + + + edge[color=blue] + boot_memory -> prememory0 [label="init" color="blue"] + memory0 -> prememory1 [label="copy/reference" color="blue"] + memory1 -> prememory2 [label="copy/reference" color="blue"] + + edge[color=black] + W -> stepnet0[constraint=false, style=dashed] + W -> stepnet1[constraint=false, style=dashed] + W -> stepnet2[constraint=false, style=dashed] + + memory0 -> stepnet0[style=dashed] + prememory0 -> stepnet0 -> step_output0[style=dashed] + + memory1 -> stepnet1[style=dashed] + prememory1 -> stepnet1 -> step_output1[style=dashed] + + memory2 -> stepnet2[style=dashed] + prememory2 -> stepnet2 -> step_output2[style=dashed] + + input -> step_input0 + input -> step_input1 + input -> step_input2 + + step_input0 -> stepnet0 [style=dashed] + step_input1 -> stepnet1[style=dashed] + step_input2 -> stepnet2[style=dashed] + + step_output0 -> output + step_output1 -> output + step_output2 -> output + + stepnet0 -> stepnet[style=dashed] + stepnet1 -> stepnet[style=dashed] + stepnet2 -> stepnet[style=dashed] + +} diff --git a/doc/design/ops/images/rnn.jpg b/doc/design/ops/images/rnn.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9867e404cf959df0dce6ded5222b466c788fb840 Binary files /dev/null and b/doc/design/ops/images/rnn.jpg differ diff --git a/doc/design/ops/images/rnn.png b/doc/design/ops/images/rnn.png new file mode 100644 index 0000000000000000000000000000000000000000..e139e373fe8396782044cfd936fdde624f8c66fe Binary files /dev/null and b/doc/design/ops/images/rnn.png differ diff --git a/doc/design/ops/images/rnn_2level_data.dot b/doc/design/ops/images/rnn_2level_data.dot new file mode 100644 index 0000000000000000000000000000000000000000..1d85ae2617a915ad0ad8288d848b607cc37ad297 --- /dev/null +++ b/doc/design/ops/images/rnn_2level_data.dot @@ -0,0 +1,75 @@ +digraph G { + chapter [label="chapter"] + + subgraph cluster0 { + label = "paragraph 0" + + top_rnn0[label="top rnn step 0" shape=box] + + p0 [label="paragraph 0"] + p1 [label="paragraph 1"] + } + + subgraph cluster1{ + label = "paragraph 1" + + top_rnn1[label="top rnn step 1" shape=box] + + p2 [label="paragraph 0"] + p3 [label="paragraph 1"] + } + + subgraph cluster_p0 { + label = "sentence 0" + + low_rnn0 [label="low rnn step 0" shape=box] + s00 [label="sentence 0"] + s01 [label="sentence 1"] + + low_rnn0 -> s00 + low_rnn0 -> s01 + } + + subgraph cluster_p1 { + label = "sentence 1" + low_rnn1 [label="low rnn step 1" shape=box] + s10 [label="sentence 0"] + s11 [label="sentence 1"] + low_rnn1 -> s10 + low_rnn1 -> s11 + } + + subgraph cluster_p2 { + label = "sentence 1" + low_rnn2 [label="low rnn step 0" shape=box] + s20 [label="sentence 0"] + s21 [label="sentence 1"] + low_rnn2 -> s20 + low_rnn2 -> s21 + } + + subgraph cluster_p3 { + label = "sentence 1" + low_rnn3 [label="low rnn step 1" shape=box] + s30 [label="sentence 0"] + s31 [label="sentence 1"] + low_rnn3 -> s30 + low_rnn3 -> s31 + } + + + chapter -> top_rnn0 + chapter -> top_rnn1 + + top_rnn0 -> p0 + top_rnn0 -> p1 + top_rnn1 -> p2 + top_rnn1 -> p3 + + + p0 -> low_rnn0 + p1 -> low_rnn1 + p2 -> low_rnn2 + p3 -> low_rnn3 + +} diff --git a/doc/design/ops/images/rnn_2level_data.png b/doc/design/ops/images/rnn_2level_data.png new file mode 100644 index 0000000000000000000000000000000000000000..4be81b2430717a6a506342a09fc26899568574c6 Binary files /dev/null and b/doc/design/ops/images/rnn_2level_data.png differ diff --git a/doc/design/ops/rnn.md b/doc/design/ops/rnn.md new file mode 100644 index 0000000000000000000000000000000000000000..a78eea7d45e9e9553d153170aa31da55ec6e8289 --- /dev/null +++ b/doc/design/ops/rnn.md @@ -0,0 +1,153 @@ +# RNNOp design + +This document is about an RNN operator which requires that instances in a mini-batch have the same length. We will have a more flexible RNN operator. + +## RNN Algorithm Implementation + +

+ +

+ +The above diagram shows an RNN unrolled into a full network. + +There are several important concepts: + +- *step-net*: the sub-graph to run at each step, +- *memory*, $h_t$, the state of the current step, +- *ex-memory*, $h_{t-1}$, the state of the previous step, +- *initial memory value*, the ex-memory of the first step. + +### Step-scope + +There could be local variables defined in step-nets. PaddlePaddle runtime realizes these variables in *step-scopes* -- scopes created for each step. + +

+
+Figure 2 the RNN's data flow +

+ +Please be aware that all steps run the same step-net. Each step + +1. creates the step-scope, +2. realizes local variables, including step-outputs, in the step-scope, and +3. runs the step-net, which could use these variables. + +The RNN operator will compose its output from step outputs in step scopes. + +### Memory and Ex-memory + +Let's give more details about memory and ex-memory via a simply example: + +$$ +h_t = U h_{t-1} + W x_t +$$, + +where $h_t$ and $h_{t-1}$ are the memory and ex-memory of step $t$'s respectively. + +In the implementation, we can make an ex-memory variable either "refers to" the memory variable of the previous step, +or copy the value of the previous memory value to the current ex-memory variable. + +### Usage in Python + +For more information on Block, please refer to the [design doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md). + +We can define an RNN's step-net using Block: + +```python +import paddle as pd + +X = some_op() # x is some operator's output, and is a LoDTensor +a = some_op() + +# declare parameters +W = pd.Variable(shape=[20, 30]) +U = pd.Variable(shape=[20, 30]) + +rnn = pd.create_rnn_op(output_num=1) +with rnn.stepnet(): + x = rnn.add_input(X) + # declare a memory (rnn's step) + h = rnn.add_memory(init=a) + # h.pre_state() means previous memory of rnn + new_state = pd.add_two( pd.matmul(W, x) + pd.matmul(U, h.pre_state())) + # update current memory + h.update(new_state) + # indicate that h variables in all step scopes should be merged + rnn.add_outputs(h) + +out = rnn() +``` + +Python API functions in above example: + +- `rnn.add_input` indicates the parameter is a variable that will be segmented into step-inputs. +- `rnn.add_memory` creates a variable used as the memory. +- `rnn.add_outputs` mark the variables that will be concatenated across steps into the RNN output. + +### Nested RNN and LoDTensor + +An RNN whose step-net includes other RNN operators is known as an *nested RNN*. + +For example, we could have a 2-level RNN, where the top level corresponds to paragraphs, and the lower level corresponds to sentences. + +The following figure illustrates the feeding of text into the lower level, one sentence each step, and the feeding of step outputs to the top level. The final top level output is about the whole text. + +

+ +

+ +```python +import paddle as pd + +W = pd.Variable(shape=[20, 30]) +U = pd.Variable(shape=[20, 30]) + +W0 = pd.Variable(shape=[20, 30]) +U0 = pd.Variable(shape=[20, 30]) + +# a is output of some op +a = some_op() + +# chapter_data is a set of 128-dim word vectors +# the first level of LoD is sentence +# the second level of LoD is chapter +chapter_data = pd.Variable(shape=[None, 128], type=pd.lod_tensor, level=2) + +def lower_level_rnn(paragraph): + ''' + x: the input + ''' + rnn = pd.create_rnn_op(output_num=1) + with rnn.stepnet(): + sentence = rnn.add_input(paragraph, level=0) + h = rnn.add_memory(shape=[20, 30]) + h.update( + pd.matmul(W, sentence) + pd.matmul(U, h.pre_state())) + # get the last state as sentence's info + rnn.add_outputs(h) + return rnn + +top_level_rnn = pd.create_rnn_op(output_num=1) +with top_level_rnn.stepnet(): + paragraph_data = rnn.add_input(chapter_data, level=1) + low_rnn = lower_level_rnn(paragraph_data) + paragraph_out = low_rnn() + + h = rnn.add_memory(init=a) + h.update( + pd.matmul(W0, paragraph_data) + pd.matmul(U0, h.pre_state())) + top_level_rnn.add_outputs(h) + +# just output the last step +chapter_out = top_level_rnn(output_all_steps=False) +``` + +in above example, the construction of the `top_level_rnn` calls `lower_level_rnn`. The input is a LoD Tensor. The top level RNN segments input text data into paragraphs, and the lower level RNN segments each paragraph into sentences. + +By default, the `RNNOp` will concatenate the outputs from all the time steps, +if the `output_all_steps` set to False, it will only output the final time step. + + +

+ +

diff --git a/doc/design/optimizer.md b/doc/design/optimizer.md new file mode 100644 index 0000000000000000000000000000000000000000..17440fae5028cfac5d58fc079ca2096d0be3a0f9 --- /dev/null +++ b/doc/design/optimizer.md @@ -0,0 +1,105 @@ +## Optimizer Design + +### The Problem + +A PaddlePaddle program, or a block, is a sequence of operators operating variables. A training program needs to do three kinds of works: + +1. the forward pass, which computes intermediate results and the cost(s), +1. the backward pass, which derives gradients from intermediate results and costs, and +1. the optimization pass, which update model parameters to optimize the cost(s). + +These works rely on three kinds of operators: + +1. forward operators, +1. gradient operators, and +1. optimization operators. + +It's true that users should be able to create all these operators manually by calling some low-level API, but it would be much more convenient if they could only describe the forward pass and let PaddlePaddle create the backward and optimization operators automatically. + +In this design, we propose a high-level API that automatically derives the optimisation pass and operators from the forward pass. + + +### High-level Python API to describe the training process + +1. User write code to describe the network: + + ```python + images = layer.data("images") + labels = layer.data("labels") + w1 = pd.var("w1") + b1 = pd.var("b1") + hidden = layer.fc(images, w=w1, b=b1) + cost = layer.mse(hidden, labels) + ``` + + The above code snippet will create forward operators in [Block](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md). + + +2. Users create a certain kind of Optimizer with some argument. + + ```python + optimizer = AdagradOptimizer(learing_rate=0.001) + ``` + +3. Users use the optimizer to `minimize` a certain `cost` through updating parameters in parameter_list. + + ```python + opt_op_list = optimizer.minimize(cost, parameter_list=[w1, b1]) + ``` + The above code snippet will create gradient and optimization operators in Block. The return value of `minimize()` is list of optimization operators that will be run by session. + +4. Users use Session/Executor to run this opt_op_list as target to do training. + + ```python + sess.run(target= opt_op_list, ...) + ``` + +#### Optimizer Python interface: + +```python +class Optimizer(object): + """Optimizer Base class. + + """ + + def __init__(self): + pass + + def create_backward_pass(self, loss, parameter_list=None): + """ + create and add gradient Operators in BlockDesc to Compute gradients of `loss` + for parameters in parameter_list + + Args: + loss: an variable generated by cost function. + parameter_list: parameters that need to compute gradient and update to optimize the lost. + + Returns: + list of (parameters, gradients) pair. + """ + return None + + def create_optimization_pass(self, parameters_and_grads): + """Add optimization operators to update gradients to variables. + + Args: + parameters_and_grads: a list of (variable, gradient) pair to update. + + Returns: + optmization_op_list: a list of optimization operator that will update parameter using gradient. + """ + return None + + def minimize(self, loss, parameter_list): + """Add operations to minimize `loss` by updating `parameter_list`. + + This method combines interface `create_backward_pass()` and + `create_optimization_pass()` into one. + """ + params_grads = self.create_backward_pass(loss, parameter_list) + update_ops = self.create_optimization_pass(params_grads) + return update_ops + +``` + +Users can inherit the Optimizer above to create their own Optimizer with some special logic, such as AdagradOptimizer. diff --git a/doc/design/parameters_in_cpp.md b/doc/design/parameters_in_cpp.md new file mode 100644 index 0000000000000000000000000000000000000000..a7ac3f17c44ca94a669a8f1e283b291bceb42317 --- /dev/null +++ b/doc/design/parameters_in_cpp.md @@ -0,0 +1,41 @@ +# Design Doc: The C++ Class `Parameters` + +`Parameters` is a concept we designed in PaddlePaddle V2 API. `Parameters` is a container of parameters, which makes PaddlePaddle capable of sharing parameter between topologies. We described usages of `Parameter` in [api.md](./api.md). + +We used Python to implement Parameters when designing V2 API before. There are several defects for the current implementation: +* We just use `memcpy` to share Parameters between topologies, but this is very inefficient. +* We did not support sharing Parameters while training. We just trigger `memcpy` when start training. + +It is necessary that we implement Parameters in CPP side. However, it could result a code refactoring for PaddlePaddle, because PaddlePaddle was designed for training only one topology before, i.e., each GradientMachine contains its Parameter as a data member. In current PaddlePaddle implementation, there are three concepts associated with `Parameters`: + +1. `paddle::Parameter`. A `Parameters` is a container for `paddle::Parameter`. +It is evident that we should use `paddle::Parameter` when developing `Parameters`. +However, the `Parameter` class contains many functions and does not have a clear interface. +It contains `create/store Parameter`, `serialize/deserialize`, `optimize(i.e SGD)`, `randomize/zero`. +When we developing `Parameters`, we only use `create/store Parameter` functionality. +We should extract functionalities of Parameter into many classes to clean PaddlePaddle CPP implementation. + +2. `paddle::GradientMachine` and its sub-classes, e.g., `paddle::MultiGradientMachine`, `paddle::NeuralNetwork`. +We should pass `Parameters` to `paddle::GradientMachine` when `forward/backward` to avoid `memcpy` between topologies. +Also, we should handle multi-GPU/CPU training, because `forward` and `backward` would perform on multi-GPUs and multi-CPUs. +`Parameters` should dispatch the parameter value to each device, and gather the parameter gradient from each device. + +3. `paddle::ParameterUpdater`. The ParameterUpdater is used to update parameters in Paddle. +So `Parameters` should be used by `paddle::ParameterUpdater`, and `paddle::ParameterUpdater` should optimize `Parameters` (by SGD). + + +The step by step approach for implementation Parameters in PaddlePaddle C++ core is listed below. Each step should be a PR and could be merged into PaddlePaddle one by one. + +1. Clean `paddle::Parameter` interface. Extract the functionalities of `paddle::Parameter` to prepare for the implementation of Parameters. + +2. Implementation a `Parameters` class. It just stores the `paddle::Parameter` inside. Make `GradientMachine` uses `Parameters` as a class member. + +3. Make `Parameters` support Multi-CPU and Multi-GPU training to prepare for sharing `Parameter` between topologies. +Because we need share `Parameters` between topologies, it is `Parameters`'s response to exchange Parameters between GPUs. +`GradientMachine` should not handle how to exchange Parameters because `GradientMachine` only used to train one topology and we need to support train many topologies in Paddle, i.e., there could be many GradientMachines use one `Parameters`. + * We should use a global function to exchange Parameters between GPUs, not a member function in `Parameters`. The `MultiGradientMachine` invoke this function, which uses `Parameters` as this function inputs. + * The MultiGradientMachine contains many functionalities. Extracting the Parameters exchanging logic could make MultiGradientMachine clearer and simpler. + +4. Make `Parameters` as an argument for `forward/backward` function, not a data member for `GradientMachine`. For example, `forward` could be `forward(const Parameters& params, ...)` and `backward` could be `backward(Parameters* params, ...)`. After this step, Paddle could share `Parameters` between topologies. + +5. `ParameterUpdater` is invoked by `GradientMachine` and `Trainer`, but it updates `Parameters`. In the end of this code refactoring, we could change `ParameterUpdater` directly uses `Parameters` to make `ParameterUpdater`'s implementation clear. diff --git a/doc/design/program.md b/doc/design/program.md new file mode 100644 index 0000000000000000000000000000000000000000..bd2456787c4e336d357a65255a8274a7c9e465cc --- /dev/null +++ b/doc/design/program.md @@ -0,0 +1,139 @@ +# Design Doc: PaddlePaddle Programs + +## Compile and Execution + +A PaddlePaddle program consists of two parts -- the first generates a `ProgramDesc` protobuf message that describes the program, and the second runs this message using a C++ class `Executor`. + +A simple example PaddlePaddle program can be found in [graph.md](./graph.md): + +```python +x = layer.data("images") +l = layer.data("label") +y = layer.fc(x) +cost = layer.mse(y, l) +optimize(cost) +train(cost, reader=mnist.train()) +``` + +The first five lines of the following PaddlePaddle program generates, or, compiles, the `ProgramDesc` message. The last line runs it. + +## Programs and Blocks + +The basic structure of a PaddlePaddle program is some nested blocks, as a C++ or Java program. + +- program: some nested blocks +- [block](./block.md): + - some local variable definitions, and + - a sequence of operators + +The concept of block comes from usual programs. For example, the following C++ program has three blocks: + +```c++ +int main() { // block 0 + int i = 0; + if (i < 10) { // block 1 + for (int j = 0; j < 10; j++) { // block 2 + } + } + return 0; +} +``` + +The following PaddlePaddle program has three blocks: + +```python +import paddle as pd // block 0 + +x = minibatch([10, 20, 30]) # shape=[None, 1] +y = var(1) # shape=[1], value=1 +z = minibatch([10, 20, 30]) # shape=[None, 1] +cond = larger_than(x, 15) # [false, true, true] + +ie = pd.ifelse() +with ie.true_block(): // block 1 + d = pd.layer.add_scalar(x, y) + ie.output(d, pd.layer.softmax(d)) +with ie.false_block(): // block 2 + d = pd.layer.fc(z) + ie.output(d, d+1) +o1, o2 = ie(cond) +``` + +## `BlockDesc` and `ProgramDesc` + +All protobuf messages are defined in `framework.proto`. + +`BlockDesc` is straight-forward -- it includes local variable definitions, `vars`, and a sequence of operators, `ops`. + +```protobuf +message BlockDesc { + required int32 parent = 1; + repeated VarDesc vars = 2; + repeated OpDesc ops = 3; +} +``` + +The parent ID indicates the parent block so that operators in a block can refer to variables defined locally and also those defined in their ancestor blocks. + +All hierarchical blocks in a program are flattened and stored in an array. The block ID is the index of the block in this array. + +```protobuf +message ProgramDesc { + repeated BlockDesc blocks = 1; +} +``` + + +### Global Block + +The global block is the first one in the above array. + +## Operators that Use Blocks + +In the above example, the operator `IfElseOp` has two blocks -- the true branch and the false branch. + +The definition of `OpDesc` shows that an operator could have some attributes: + +```protobuf +message OpDesc { + AttrDesc attrs = 1; + ... +} +``` + +and an attribute could be of type block, which is, in fact, a block ID as described above: + +``` +message AttrDesc { + required string name = 1; + + enum AttrType { + INT = 1, + STRING = 2, + ... + BLOCK = ... + } + required AttrType type = 2; + + optional int32 block = 10; // when type == BLOCK + ... +} +``` + +## InferShape + +With this design, the InferShape function should take the following parameters: + +```c++ +void InferShape(int current_block, + int current_operator, + ProgramDesc* program // might change VarDesc values. + ) { + ... +} +``` + +where + +- `current_block` indices into `ProgramDesc::blocks`, +- `current_operator` indices into `BlockDesc::ops`. diff --git a/doc/design/python_api.md b/doc/design/python_api.md new file mode 100644 index 0000000000000000000000000000000000000000..56ae1d925a96622b5576013f38e33e5f92cbbb90 --- /dev/null +++ b/doc/design/python_api.md @@ -0,0 +1,220 @@ +# Design Doc: Python API + +Due to the refactorization of the PaddlePaddle core, we need Python classes to construct corresponding protobuf messages that describe a DL program. + +| Python classes | Protobuf messages | +| --- | --- | +| Program | ProgramDesc | +| Block | BlockDesc | +| Operator | OpDesc | +| Variable | VarDesc | + +Please be aware that these Python classes need to maintain some construction-time information, which are not part of the protobuf messages. + +## Core Concepts + +### Program + +A `ProgramDesc` describes a [DL program](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md), which is composed of an array of `BlockDesc`s. The `BlockDesc`s in a `ProgramDesc` can have a tree-like hierarchical structure. However, the `ProgramDesc` onlys stores a flattened array of `BlockDesc`s. A `BlockDesc` refers to its parent block by its index in the array. For example, operators in the step block of an RNN operator need to be able to access variables in its ancestor blocks. + +Whenever we create a block, we need to set its parent block to the current block, hence the Python class `Program` needs to maintain a data member `current_block`. + +```python +class Program(objects): + def __init__(self): + self.desc = core.NewProgram() # a C++ ProgramDesc pointer. + self.blocks = vector() + self.blocks.append(Block(self, -1)) # the global block + self.current_block = 0 # initialized to the global block + + def global_block(): + return self.blocks[0] + + def current_block(): + return self.get_block(self.current_block) + + def rollback(): + self.current_block = self.current_block().parent_idx + + def create_block(): + new_block_idx = len(self.block) + self.blocks.append(Block(self, self.current_block)) + self.current_block = new_block_idx + return current_block() +``` + +`Program` is an accessor to the protobuf message `ProgramDesc`, which is created in C++ space, because the InferShape function is in C++, which manipulates `VarDesc` messages, which are in turn members of `BlockDesc`, which is a member of `ProgramDesc`. + +`Program` creates the first block as the global block in its constructor. All parameters and their initializer operators are in the global block. + +### Block + +A [Block](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md) includes + +1. a map from variable names to an instance of the Python `Variable` class, and +1. a list of `Operator` instances. + +```python +class Block(objects): + def __init__(self, program, parent_idx): + self.desc = core.NewBlock(program.desc) + self.program = program + self.vars = map() + self.ops = vector() + self.parent_idx = parent_idx + + def create_var(self, ...): + return Variable(self, ...) + + def _create_global_var(self, ...): + program.global_block().create_var(...) + + def create_parameter(self, name, ...): + # Parameter is a subclass of variable. See Parameter section for details. + self.vars[name] = Parameter(self._create_global_var(...), ...) + return self.vars[name] + + def append_operator(self, ...): + self.ops.append(Operator(self, ...)) + + def prepend_operator(self, ...): # Parameter's ctor prepands initialize operators. + self.ops.prepend(Operator(self, ...)) +``` + +`create_parameter` is necessary because parameters are global variables, defined in the global block, but can be created in some sub-blocks. For example, an FC layer in the step block of an RNN operator. + +`prepend_operator` is necessary because the constructor of `Parameter` needs to create the initialize (or load) operator of the parameter, and would like to put it in the *preamble* of the global block. + +### Operator + +The `Operator` class fills in the `OpDesc` message and calls the C++ function `InferShape` to infer the output shapes from the input shapes. + +```python +class Operator(object): + def __init__(self, + block, # Block + type, # string + inputs, # dict + outputs,# dict + attrs # dict + ): + self.desc = core.NewOpDesc(block.desc, type, inputs, outputs, attrs) + core.infer_shape(self.desc, inputs, outputs) + + def type(self): + return self.desc.type() +``` + +`Operator` creates the `OpDesc` message in C++ space, so that it can call the `InferShape` function, which is in C++. + +### Variable + +Operators take Variables as its inputs and outputs. + +```python +class Variable(object): + def __init__(self, + block=None, # Block + name=None, # string + shape, # tuple + dtype="float32", # string + lod_level=None # int + ): + if name is None: + name = unique_name_generator() + self.name = name + self.block = block + self.desc = core.NewVarDesc(block.desc, name, shape, lod_level) + self.writer = None +``` + +Please be aware of `self.writer`, that tracks operator who creates the variable. It possible that there are more than one operators who write a variable, but in Python space, each write to a variable is represented by a Variable class. This is guaranteed by the fact that **`core.NewVarDesc` must NOT create a new `VarDesc` message if its name already exists in the specified block**. + +### Parameter + +A parameter is a global variable with an initializer (or load) operator. + +```python +class Parameter(Variable): + def __init__(self, + block=None, # Block + name=None, # string + shape, # tuple + dtype="float32", # string + lod_level=None # int + trainable, # bool + initialize_op_attrs, + optimize_op_attrs): + super(Parameter, self).__init__(block, name, shape, dtype, lod_level) + self.trainable = trainable + self.optimize_op_attrs = optimize_op_attrs + block.prepend(Operator(block, # Block + initialize_op_attrs['type'], # string + None, # no inputs + self, # output is the parameter + initialize_op_attrs) +``` + +When users create a parameter, they can call + +```python +program.create_parameter( + ..., + init_attr={ + type: "uniform_random", + min: -1.0, + max: 1.0, + }) +) +``` + +In above example, `init_attr.type` names an initialize operator. It can also name the load operator + +```python +init_attr={ + type: "load", + filename: "something.numpy", +} +``` + +`optimize_op_attrs` is not in the `VarDesc` message, but kept in the Python instance, as it will be used in the Python space when creating the optimize operator's `OpDesc`, and will be in the `OpDesc` message. + +## Layer Functions + +A layer is a Python function that creates some operators and variables. Layers simplify the work of application programmers. + +### Data Layer + +```python +def data_layer(name, type, column_name): + block = the_current_program.glolal_block() + var = block.create_global_var( + name=name, + shape=[None] + type.dims(), + dtype=type.dtype) + block.prepend_operator(block, + type="Feed", + inputs = None, + outputs = [var], + {column_name: column_name}) + return var +``` + +The input to the feed operator is a special variable in the global scope, which is the output of [Python readers](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md). + +### FC Layer + +```python +def fc_layer(input, size, ...): + block = program.current_block() + w = block.create_parameter(...) + b = block.create_parameter(...) + out = block.create_var() + op = block.append_operator("FC", X=input, W=w, b=b, out=out) + out.writer = op + return out +``` + +## Optimizer + +[Optimizer Design Doc](./optimizer.md) diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md index f21f7af520df5171798326818ecb97c3bcd14a12..320dccec3ddc7bfe6042f4e65b2518ea7b1ad24a 100644 --- a/doc/design/reader/README.md +++ b/doc/design/reader/README.md @@ -52,7 +52,7 @@ Here are valid outputs: # a mini batch of three data items, each data item is a list (single column). [([1,1,1],), ([2,2,2],), -([3,3,3],), +([3,3,3],)] ``` Please note that each item inside the list must be a tuple, below is an invalid output: diff --git a/doc/design/refactor/distributed_architecture.md b/doc/design/refactor/distributed_architecture.md new file mode 100644 index 0000000000000000000000000000000000000000..ac7e98ccf1aadbb973a4801fde842375cf63448c --- /dev/null +++ b/doc/design/refactor/distributed_architecture.md @@ -0,0 +1,222 @@ +# Design Doc: Distributed Training Architecture + +## Abstract + +PaddlePaddle v0.10.0 uses the "trainer-parameter server" +architecture. We run multiple replicated instances of trainers (runs +the same code written by the user) and parameter servers for +distributed training. This architecture served us well, but has some +limitations: + +1. Need to write special code to handle tasks which should only be run + by a single trainer. E.g., initializing model and saving model. + +2. Model parallelism is hard: need to write if-else branches conditioned + on the trainer ID to partition model onto each trainer, and manually + write the inter-model-shard communication code. + +3. The user can not directly specify the parameter update rule: need + to modify the parameter server C++ code and compile a new + binary. This adds complication for researchers: A lot of extra + effort is required. Besides, the training job submission program + may not allow running arbitrary binaries. + +This design doc discusses PaddlePaddle's new distributed training +architecture that addresses the above limitations. + +## Analysis + +We will assume the user writes the trainer program by Python, the same +analysis holds if the trainer program is written in C++. + +### Limitation 1 + +If we look at the Python code that the user writes, there are two +kinds of functionalities: + +- The training logic such as load / save model and print log. +- The neural network definition such as the definition of the data + layer, the fully connected layer, the cost function and the + optimizer. + +When we training with PaddlePaddle v0.10.0 distributedly, multiple +replicated Python instances are running on different nodes: both the +training logic and the neural network computation is replicated. + +The tasks that should only run once all belong to the training logic, +if we only replicate the neural network computation, but do **not** +replicate the training logic, the limitation could be solved. + +### Limitation 2 + +Model parallelism means running a single model on multiple nodes by +partitioning the model onto different nodes and managing the +inter-model-shard communications. + +PaddlePaddle should be able to modify the nerual network computation +definition to support model parallelism automatically. However, the +computation is only specified in Python code, and PaddlePaddle can not +modify Python code. + +Just like compiler uses a intermediate representation (IR) so that +programmer does not need to manually optimize their code in most of +the cases - the compiler will optimize the IR: + + + +We can have our own IR too: PaddlePaddle can support model parallel by +converting the IR so the user no longer need to manually do it in +Python: + + + +The IR for PaddlePaddle after refactor is called `Block`, it specifies +the computation dependency graph and the variables used in the +computation. + +### Limitation 3 + +The user can not directly specify the parameter update rule for the +parameter server because the parameter server does not use the same +computation definition as the trainer. Instead, the update rule is +baked in the parameter server. The user can not specify the update +rule in the same way of specifying the trainer computation. + +This could be fixed by making the parameter server run the same +computation definition as the trainer. For a detailed explanation, +please +see +[Design Doc: Operation Graph Based Parameter Server](./dist_train.md) + +## Distributed Training Architecture + +The new distributed training architecture can address the above +limitations. Below is the illustration: + + + +The architecture includes major components: *PaddlePaddle Python*, +*PaddlePaddle converter* and *PaddlePaddle runtime*: + +### PaddlePaddle Python + +PaddlePaddle Python is the Python library that user's Python trainer +invoke to build the neural network topology, start training, etc. + +```Python +paddle.init() +input = paddle.op.recordIO("/home/data/mnist.recordio") # file stored on the cluster +img, label = input[0], input[1] +hidden = paddle.layer.fc(input=img, size=200, act=paddle.activation.Tanh()) +prediction = paddle.layer.fc(input=img, size=10, act=paddle.activation.Softmax()) +cost = paddle.layer.classification_cost(input=prediction, label=label) +optimizer = paddle.optimizer.SGD(cost, learning_rate=0.01) +session = paddle.session.NewRemote(num_trainer=3, num_ps=2, GPU_per_trainer=1) +for i in range(1000): + _, cost_val = session.eval(targets=[cost, optimizer]) + print cost_val +``` + +The code above is a typical Python trainer code, the neural network +topology is built using helper functions such as +`paddle.layer.fc`. The training is done by calling `session.eval` +iteratively. + +#### session.eval + +As shown in the graph, `session.eval` sends the IR and the evaluation +inputs/targets to the PaddlePaddle cluster for evaluation. The +targets can be any variable in the computation graph. When the target +is the `optimizer` variable, the neural network will be optimized +once. When the target is the `cost` variable, `session.eval` returns +the cost value. + +The Python `session` is a wrapper of the C++ `Session` class. For more +information about `Session`, please +see [Design Doc: Session](./session.md). + +### PaddlePaddle Converter + +PaddlePaddle converter automatically converts the IR in the request +(IR and evaluation inputs/targets) from PaddlePaddle Python to new +partitioned IRs and dispatch the new IRs and evaluation inputs/targets +to different PaddlePaddle runtimes. Below are the steps: + +1. Add `feed` OP that feeds the eval inputs, and `fetch` OP that + fetches the eval targets to the IR. + +1. Extract a new computation (sub)graph with `feed` and `fetch` OP as + the boundary. The runtime does not need to run the OP that is not + dependent by the `fetch` OP. + +1. Optimizes the computation graph. + +1. Place the OPs in the graph onto different devices on different + PaddlePaddle runtime according to a placement algorithm and device + constraint specified by the user. + +1. Partition the graph according to runtime boundaries and add `send` / + `recv` OP pair on the runtime boundaries. + +1. Dispatch the partitioned graph to different PaddlePaddle runtimes. + +1. PaddlePaddle runtimes with the `fetch` OP reports evaluation + results back to the converter, the convert reports the evaluation + results back to the PaddlePaddle Python. + +The output IRs will be cached to optimize the conversion latency. + + +#### Placement Algorithm + +Our first implementation will only support "trainer-parameter server" +placement: the parameters, initializers, and optimizers are placed on +the PaddlePaddle runtimes with the parameter server role. And +everything else will be placed on the PaddlePaddle runtimes with the +trainer role. This has the same functionality of our +"trainer-parameter server" architecture of PaddlePaddle v0.10.0, but +is more general and flexible. + +In the future, we will implement the general placement algorithm, +which makes placements according to the input IR, and a model of +device computation time and device communication time. Model +parallelism requires the general placement algorithm. + + +### PaddlePaddle Runtime + +The PaddlePaddle runtime owns multiple devices (e.g., CPUs, GPUs) and +runs the IR. The runtime does not need to do OP placement since it's +already done by the converter. + + +### Local Training Architecture + +The local training architecture will be the same as the distributed +training architecture, the differences are everything runs locally, +and there is just one PaddlePaddle runtime: + + + + +### Training Data + +In PaddlePaddle v0.10.0, training data is typically read +with [data reader](../reader/README.md) from Python. This approach is +no longer efficient when training distributedly since the Python +process no longer runs on the same node with the trainer processes, +the Python reader will need to read from the distributed filesystem +(assuming it has the access) and send to the trainers, doubling the +network traffic. + +When doing distributed training, the user can still use Python data +reader: the training data are sent with `session.eval`. However should +be used for debugging purpose only. The users are encouraged to use +the read data OPs. + + +## References: + +[1] [TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) + +[2] [TensorFlow: A System for Large-Scale Machine Learning](https://www.usenix.org/system/files/conference/osdi16/osdi16-abadi.pdf) diff --git a/doc/design/refactor/parameter_server.md b/doc/design/refactor/parameter_server.md new file mode 100644 index 0000000000000000000000000000000000000000..fa3c5d7990213cf2b0d236e66e592dd2699da876 --- /dev/null +++ b/doc/design/refactor/parameter_server.md @@ -0,0 +1,106 @@ +# Design Doc: Operation Graph Based Parameter Server + +## Abstract + +We propose an approach to implement the parameter server. In this +approach, there is no fundamental difference between the trainer and +the parameter server: they both run subgraphs, but subgraphs of +different purposes. + +## Background + +The previous implementations of the parameter server does not run a +subgraph. parameter initialization, optimizer computation, network +communication and checkpointing are implemented twice on both the +trainer and the parameter server. + +It would be great if we can write code once and use them on both the +trainer and the parameter server: reduces code duplication and +improves extensibility. Given that after the current refactor, we are +representing everything as a computing graph on the +trainer. Representing everything as a computing graph on the parameter +server becomes a natural extension. + +## Design + +### Graph Converter + +The *graph converter* converts the user-defined operation (OP) graph +into subgraphs to be scheduled on different nodes with the following +steps: + +1. OP placement: the OPs will be placed on different nodes according + to heuristic that minimizes estimated total computation + time. Currently we will use a simple heuristic that puts parameter + varable on parameter server workers and everything else on trainer + workers. + +1. Add communication OPs to enable the communication between nodes. + +We will need these OPs: *Send*, *Recv*, *Enqueue*, *Dequeue*. + +Below is an example of converting the user defined graph to the +subgraphs for the trainer and the parameter server: + + + +After converting: + + + +1. The parameter variable W and it's optimizer subgraph are placed on the parameter server. +1. Operators are added to the subgraphs. + - *Send* sends data to the connected *Recv* operator. The + scheduler on the receive node will only schedule *Recv* operator + to run when the *Send* operator has ran (the *Send* OP will mark + the *Recv* OP runnable automatically). + - *Enueue* enqueues the input variable, it can block until space + become available in the queue. + - *Dequeue* outputs configurable numbers of tensors from the + queue. It will block until the queue have the required number of + tensors. + + +### Benefits + +- Model parallelism become easier to implement: it's an extension to + the trainer - parameter server approach. we already have the + communication OPs, but need to extend the graph converter's + placement functionality. + +- User-defined optimizer is easier to add - user can now express it as + a subgraph. + +- No more duplication logic inside the trainer and the parameter + server mentioned in the background section. + +### Challenges + +- It might be hard for the graph converter to cut a general graph + (without any hint for which subgraph is the optimizer). We may need + to label which subgraph inside the OP graph is the optimizer. + +- It's important to balance the parameter shards of on multiple + parameter server. If a single parameter is very big (some + word-embedding, fully connected, softmax layer), we need to + automatically partition the single parameter onto different + parameter servers when possible (only element-wise optimizer depends + on the parameter variable). + +### Discussion + +- In the "Aync SGD" figure, the "W" variable on the parameter server + could be read and wrote concurrently, what is our locking strategy? + E.g., each variable have a lock cpp method to be invoked by every + OP, or, have a lock OP. + +- Can the Enqueue OP be implemented under our current tensor design + (puts the input tensor into the queue tensor)? + +- *Dequeue* OP will have variable numbers of output (depends on the + `min_count` attribute), does our current design support it? (similar + question for the *Add* OP) + + +### References: +[1] [TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) diff --git a/doc/design/refactor/session.md b/doc/design/refactor/session.md new file mode 100644 index 0000000000000000000000000000000000000000..1d9a26683c14f54e3b5fe41675cd03b5620646b8 --- /dev/null +++ b/doc/design/refactor/session.md @@ -0,0 +1,180 @@ +# Design Doc: Session + +## Abstract + +The *session* object encapsulates the environment in which the +computation graph is executed. + +We will have the *local* session and *remote* session, they offer the +same [interface](#interface). The local session encapsulates the local +runtime environment and the remote session encapsulates the cluster +runtime environment. + +The local runtime environment contains: + +1. computation devices (i.e., CPU, GPU) handles, and +1. the [scope](../scope.md) which holds all variables. + +The remote runtime environment contains: + +1. computation devices (i.e., CPU and GPU on node 0, 1) in a cluster, + and +1. the distributed [scope](../scope.md) in a cluster which holds all + variables. + +The user can create a remote session on Paddle Cloud and evaluate the +computation graph with it. In this way, the user can control the +remote computation resource in a cluster from his local computer. + + +## Background + +The current design has an implicit global session in which +`paddle.eval()` is executed. The pain point is: + +Since the user is not able to explicitly switch between runtime +environments, the user cannot run a topology in two independent +environments. + +For example, in reinforcement learning, the user may want to have a +stale model for inference and a fresh model for training, and only +replace the stale model with the fresh model periodically. + +Furthermore, we have no concept that encapsulates a remote environment +that executes a computation graph. + +We need the session object to address above issues. + + +## Session + +A session is an object that owns the runtime environment. All +computations are executed through `session.eval()`. + + +### Interface + +```python +eval( + targets, + feed_dict=None, +) +``` + +Evaluates the target Operations or Variables in `targets`. + +- *targets*: the evaluation targets. Can be a single Operation or + Variable, or a list with the Operations or Variables as + elements. The value returned by `eval()` has the same shape as the + `target` argument. + + The PaddlePaddle program is represented by + the [ProgramDesc](../design/program.md), `eval()` will infer the + ProgramDesc from the given targets and run the PaddlePaddle + program. Please + see + [this graph](./distributed_architecture.md#local-training-architecture) for + the detailed illustration for the local session + and + [this graph](./distributed_architecture.md#distributed-training-architecture) for + the detailed illustration for the remote session. + +- *feed_dict*: a dictionary that contains the tensors which override + the edges of the computation graph. + + feed_dict not only can provide the input data, it can override any + OP's input as well: + + ```python + a = pd.constant(2.0, name="a") + b = pd.variable(name="b") + c = pd.mul(a,b) + sess.eval(targets=c, feed_dict={"b":3.0}) # returns 6.0 + ``` + +```python +close() +``` + +Closes the session and releases the scope that the session owns. + + +### Create a Local Session + +```python +session( + devices=None +) +``` + +Creates a new session. One session owns one global scope, so creating +multiple sessions will create different scopes. + +- *devices*: a single `string` or a list of `string` of device names, + the corresponding devices will be the computation devices for + `eval()`. If not specified, all available devices (e.g., all GPUs) + will be used. The user doesn't need to specify the CPU device since + it will be always used. Multiple sessions can use the same device. + + +#### Example + +```Python +a = paddle.constant(1.0) +b = paddle.constant(2.0) +c = a + b +sess = paddle.session(devices=["gpu:0", "gpu:1", "fpga:0"]) +sess.eval(c) +sess.close() +``` + +### Create a Remote Session + +```python +create_cloud_job( + name, + num_trainer, + mem_per_trainer, + gpu_per_trainer, + cpu_per_trainer, + num_ps, + mem_per_ps, + cpu_per_ps, +) +``` + +Creates a Paddle Cloud job. Fails if the job name exists. + +```python +get_cloud_job( + name +) +``` + +Gets a Paddle Cloud job. + +```python +remote_session( + job +) +``` + +- *job*: the Paddle Cloud job. + +#### Example + +```Python +reader = paddle.reader.recordio("/pfs/home/peter/mnist-train-*") # data stored on Paddle Cloud +image = reader.column(0) +label = reader.column(1) +fc1 = paddle.op.fc(image, size=256, act="sigmoid") +fc2 = paddle.op.fc(fc1, size=10, act="softmax") +cost = paddle.op.cross_entropy(fc2, label) +opt = paddle.optimizer.sgd(cost) + +job = paddle.create_cloud_job("test", 3, "1G", 1, 1, 2, "1G", 1) +sess = paddle.remote_ession(job) +for i in range(1000): + sess.eval(opt) +sess.close() +``` diff --git a/doc/design/refactor/src/compiler.graffle b/doc/design/refactor/src/compiler.graffle new file mode 100644 index 0000000000000000000000000000000000000000..8cc678fea3c820103e7ce81f7a5d625d6c1d92de Binary files /dev/null and b/doc/design/refactor/src/compiler.graffle differ diff --git a/doc/design/refactor/src/compiler.png b/doc/design/refactor/src/compiler.png new file mode 100644 index 0000000000000000000000000000000000000000..65d34f841afce9756def07dd8ecb9ca44e658bfe Binary files /dev/null and b/doc/design/refactor/src/compiler.png differ diff --git a/doc/design/refactor/src/dist-graph.graffle b/doc/design/refactor/src/dist-graph.graffle new file mode 100644 index 0000000000000000000000000000000000000000..941399c6ced8d5f65b6c595522b770c88259df4b Binary files /dev/null and b/doc/design/refactor/src/dist-graph.graffle differ diff --git a/doc/design/refactor/src/dist-graph.png b/doc/design/refactor/src/dist-graph.png new file mode 100644 index 0000000000000000000000000000000000000000..3546b09f1c2ee3e4f60f519d5e47f823f08051a7 Binary files /dev/null and b/doc/design/refactor/src/dist-graph.png differ diff --git a/doc/design/refactor/src/distributed_architecture.graffle b/doc/design/refactor/src/distributed_architecture.graffle new file mode 100644 index 0000000000000000000000000000000000000000..f8496e57326c38de7468eb452a7713291d57653c Binary files /dev/null and b/doc/design/refactor/src/distributed_architecture.graffle differ diff --git a/doc/design/refactor/src/distributed_architecture.png b/doc/design/refactor/src/distributed_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..410c4510c6aab301dec95e6427fe80ac24e105fe Binary files /dev/null and b/doc/design/refactor/src/distributed_architecture.png differ diff --git a/doc/design/refactor/src/local-graph.graffle b/doc/design/refactor/src/local-graph.graffle new file mode 100644 index 0000000000000000000000000000000000000000..19e509bd9af3c1e9a3f5e0f16ddd281457a339c5 Binary files /dev/null and b/doc/design/refactor/src/local-graph.graffle differ diff --git a/doc/design/refactor/src/local-graph.png b/doc/design/refactor/src/local-graph.png new file mode 100644 index 0000000000000000000000000000000000000000..ada51200f793a9bb18911e7d63cfdb3244b967d7 Binary files /dev/null and b/doc/design/refactor/src/local-graph.png differ diff --git a/doc/design/refactor/src/local_architecture.graffle b/doc/design/refactor/src/local_architecture.graffle new file mode 100644 index 0000000000000000000000000000000000000000..cc7783c45381f25ded0b898649322c81418ad317 Binary files /dev/null and b/doc/design/refactor/src/local_architecture.graffle differ diff --git a/doc/design/refactor/src/local_architecture.png b/doc/design/refactor/src/local_architecture.png new file mode 100644 index 0000000000000000000000000000000000000000..4b999538b7825c805292ee28b5e3256d5543bd09 Binary files /dev/null and b/doc/design/refactor/src/local_architecture.png differ diff --git a/doc/design/refactor/src/paddle-compile.graffle b/doc/design/refactor/src/paddle-compile.graffle new file mode 100644 index 0000000000000000000000000000000000000000..a6348cc3dbcaca923c6e794681b2edb85cb9f8f6 Binary files /dev/null and b/doc/design/refactor/src/paddle-compile.graffle differ diff --git a/doc/design/refactor/src/paddle-compile.png b/doc/design/refactor/src/paddle-compile.png new file mode 100644 index 0000000000000000000000000000000000000000..e0f13d551ac41afaec627a57dea79356464bf0bf Binary files /dev/null and b/doc/design/refactor/src/paddle-compile.png differ diff --git a/doc/design/refactorization.md b/doc/design/refactorization.md new file mode 100644 index 0000000000000000000000000000000000000000..ec51aa1a0ec667175ff7215dcd359023e296769f --- /dev/null +++ b/doc/design/refactorization.md @@ -0,0 +1,254 @@ +# Design Doc: Refactorization Overview + +The goals of refactoring include: + +1. Making it easy for external contributors to write new elementary computation operations. +1. Making the codebase clean and readable. +1. Designing a new computation representation -- a computation graph of operators and variables. +1. Implementing auto-scalability and auto fault recoverable distributed computing with the help of computation graphs. + +## Computation Graphs + +1. PaddlePaddle represents the computation, training and inference of Deep Learning models, by computation graphs. + + 1. Please refer to [computation graphs](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/graph.md) for a concrete example. + +1. Users write Python programs to describe the graphs and run them (locally or remotely). + +1. A graph is composed of *variables* and *operators*. + +1. The description of graphs must be serializable/deserializable, so that: + + 1. It can be sent to the cloud for distributed execution, and + 1. It can be sent to clients for mobile or enterprise deployment. + +1. The Python program does two things + + 1. *Compilation* runs a Python program to generate a protobuf message representation of the graph and send it to + 1. the C++ library `libpaddle.so` for local execution, + 1. the master process of a distributed training job for training, or + 1. the server process of a Kubernetes serving job for distributed serving. + 1. *Execution* executes the graph by constructing instances of class [`Variable`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/variable.h#L24) and [`OperatorBase`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L70), according to the protobuf message. + +## Description and Realization of Computation Graph + +At compile time, the Python program generates a protobuf message representation of the graph, or a description of the graph. + +At runtime, the C++ program realizes the graph and runs it. + +| | Representation (protobuf messages) | Realization (C++ class objects) | +|---|---|---| +|Data|[VarDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L107)|[Variable](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/variable.h#L24)| +|Operation|[OpDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L35)|[Operator](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L64)| +|Block|BlockDesc|Block| + +The word *graph* is interchangeable with *block* in this document. A graph consists of computation steps and local variables similar to a C++/Java program block, or a pair of parentheses(`{` and `}`). + +## Compilation and Execution + +1. Run a Python program to describe the graph. In particular, the Python application program does the following: + + 1. Create `VarDesc` to represent local/intermediate variables, + 1. Create operators and set attributes, + 1. Validate attribute values, + 1. Infer the type and the shape of variables, + 1. Plan memory-reuse for variables, + 1. Generate the backward graph + 1. Add optimization operators to the computation graph. + 1. Optionally, split the graph for distributed training. + +1. The invocation of `train` or [`infer`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/inference.py#L108) methods in the Python program does the following: + + 1. Create a new Scope instance in the [scope hierarchy](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md) for each run of a block, + 1. realize local variables defined in the BlockDesc message in the new scope, + 1. a scope is similar to the stack frame in programming languages, + + 1. Create an instance of class `Block`, in which, + 1. realize operators in the BlockDesc message, + + 1. Run the Block by calling + 1. `Block::Eval(vector* targets)` for forward and backward computations, or + 1. `Block::Eval(vector* targets)` for optimization. + + +## Intermediate Representation (IR) + +```text +Compile Time -> IR -> Runtime +``` + +### Benefits of IR + +- Optimization + ```text + Compile Time -> IR -> Optimized IR -> Runtime + ``` +- Automatically send partitioned IR to different nodes. + - Automatic Data Parallelism + ```text + Compile Time + |-> Single GPU IR + |-> [trainer-IR-0, trainer-IR-1, pserver-IR] + |-> Node-0 (runs trainer-IR-0) + |-> Node-1 (runs trainer-IR-1) + |-> Node-2 (runs pserver-IR) + ``` + - Automatic Model Parallelism (planned for future) + +--- + +# Operator/OpWithKernel/OpKernel + +![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/49caf1fb70820fb4a6c217634317c9306f361f36/op_op_with_kern_class_diagram.dot) + +--- + +# Operator +![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/dd598e8f1976f5759f58af5e5ef94738a6b2e661/op.dot) + +* `Operator` is the fundamental building block of the user interface. + * Operator stores input/output variable names and attributes. + * The `InferShape` interface is used to infer the shape of the output variables based on the shapes of the input variables. + * Use `Run` to compute the `output` variables from the `input` variables. + +--- + +# OpWithKernel/Kernel + +![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/9d7f4eba185cf41c8e2fbfb40ae21890dbddcd39/op_with_kernel.dot) + +* `OpWithKernel` inherits `Operator`. +* `OpWithKernel` contains a Kernel map. + * `OpWithKernel::Run` get device's kernel, and invoke `OpKernel::Compute`. + * `OpKernelKey` is the map key. Only device place now, but may be data type later. + +--- + +# Why separate Kernel and Operator + +* Separate GPU and CPU code. + * Make Paddle capable of running without GPU. +* Make one operator (which is a user interface) and create many implementations. + * For example, same multiplication op can have different implementations kernels such as FP16 kernel, FP32 kernel, MKL, eigen kernel. +--- + +# Libraries for Kernel development + +* `Eigen::Tensor` contains basic math and element-wise functions. + * Note that `Eigen::Tensor` has broadcast implementation. + * Limit the number of `tensor.device(dev) = ` in your code. +* `thrust::transform` and `std::transform`. + * `thrust` has the same API as C++ standard library. Using `transform`, one can quickly implement customized element-wise kernels. + * `thrust`, in addition, supports more complex APIs, like `scan`, `reduce`, `reduce_by_key`. +* Hand-writing `GPUKernel` and `CPU` code + * Do not write in header (`.h`) files. CPU Kernel should be in cpp source (`.cc`) and GPU kernels should be in cuda (`.cu`) files. (GCC cannot compile GPU code.) +--- +# Operator Registration + +## Why is registration necessary? +We need a method to build mappings between Op type names and Op classes. + +## How is registration implemented? +Maintaining a map, whose key is the type name and the value is the corresponding Op constructor. + +--- +# The Registry Map + +### `OpInfoMap` + +`op_type(string)` -> `OpInfo` + +`OpInfo`: + +- **`creator`**: The Op constructor. +- **`grad_op_type`**: The type of the gradient Op. +- **`proto`**: The Op's Protobuf, including inputs, outputs and required attributes. +- **`checker`**: Used to check attributes. + +--- +# Related Concepts + +### Op_Maker +It's constructor takes `proto` and `checker`. They are completed during Op_Maker's construction. ([ScaleOpMaker](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)) + +### Register Macros +```cpp +REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, grad_op_class) +REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) +``` + +### USE Macros +Make sure the registration process is executed and linked. + +--- +# Registration Process +1. Write an Op class and its gradient Op class, if required. +2. Write an Op maker class. In the constructor of this class, describe the inputs, outputs and attributes of the operator. +3. Invoke the macro `REGISTER_OP`. This macro will + 1. Call maker class to complete `proto` and `checker` + 2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap` + +4. Invoke the `USE` macro in which the Op is used to make sure that it is linked. + +--- +# Backward Module (1/2) +### Create Backward Operator +- Mapping from forward Op to backward Op +![backward](https://gist.githubusercontent.com/dzhwinter/a6fbd4623ee76c459f7f94591fd1abf0/raw/61026ab6e518e66bde66a889bc42557a1fccff33/backward.png) + +--- +# Backward Module (2/2) +### Build Backward Network +- **Input**: a graph of forward operators +- **Output**: a graph of backward operators +- **Corner cases in construction** + - Shared Variables => insert an `Add` operator to combine gradients + - No Gradient => insert a `fill_zero_grad` operator + - Recursive NetOp => call `Backward` recursively + - RNN Op => recursively call `Backward` on stepnet + - RNN Op => recursively call `Backward` on stepnet + + +--- +# Scope, Variable, Tensor + +* `Tensor` is an n-dimension array with type. + * Only dims and data pointers are stored in `Tensor`. + * All operations on `Tensor` are written in `Operator` or global functions. + * Variable length Tensor design [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md) +* `Variable` instances are the inputs and the outputs of an operator, not just `Tensor`. + * `step_scopes` in RNN is a variable and not a tensor. +* `Scope` is where variables are stored. + * map + * `Scope` has a hierarchical structure. The local scope can get variables from its parent scope. + +--- +# Block (in design) +## the difference between original RNNOp and Block +- As an operator is more intuitive than `RNNOp`, +- Offers a new interface `Eval(targets)` to deduce the minimal block to `Run`, +- Fits the compile-time/ runtime separation design paradigm. + - During the compilation, `SymbolTable` stores `VarDesc`s and `OpDesc`s and serialize to a `BlockDesc` + - When graph executes, a Block with `BlockDesc` is passed. It then creates `Op` and `Var` instances and then invokes `Run`. + +--- +# Milestone +- Take Paddle/books as the main line, the requirement of the models motivates framework refactoring, +- Model migration + - Framework development gives **priority support** to model migration, for example, + - the MNIST demo needs a Python interface, + - the RNN models require the framework to support `LoDTensor`. + - Determine some timelines, + - Frequently used Ops need to be migrated first, + - Different models can be migrated in parallel. +- Improve the framework at the same time +- Accept imperfection, concentrate on solving the specific problem at the right price. + +--- +# Control the migration quality +- Compare the performance of migrated models with old ones. +- Follow the google C++ style guide. +- Build the automatic workflow of generating Python/C++ documentations. + - The documentation of layers and ops should be written inside the code. + - Take the documentation quality into account when submitting pull requests. + - Preview the documentations, read and improve them from a user's perspective. diff --git a/doc/design/register_grad_op.md b/doc/design/register_grad_op.md new file mode 100644 index 0000000000000000000000000000000000000000..3cf8a59446d244bb3a388b87b14273d9096c839a --- /dev/null +++ b/doc/design/register_grad_op.md @@ -0,0 +1,90 @@ +# Design Doc: Gradient Operators Registration + + +## The Problem Posed + +In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. + +However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. + +More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. + +## Current Implementation + +OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is + +```cpp +struct OpInfo { + std::function creator_; + std::string grad_op_type_; + ... +}; + +map OpInfoMap; + +OperatorBase* CreateGradientOperator(const OperatorBase& op) { + return OpInfoMap.at(op.Type()).creator_(...); +} +``` + +## Proposed Solution + +The mapping relationship between an operator and its gradient operators is a function. The interface of that function is: + +```cpp +// (OpDesc) --> vector +std::function(const OpDescBind&)>; +``` + +The function takes an `OpDescBind` of the forward operator and returns one or many gradient operator descriptions. `OpDescBind` is a C++ wrapper for protobuf message `OpDesc` to manipulate `OpDesc` fast. + +The `GradOpDescMaker` will be registered in `OpInfo`, to replace `grad_op_type_` field. The `OpInfo` should be + +```cpp +struct OpInfo { + std::function>(const OpDescBind&)> grad_op_maker_; + ... +}; +``` + +The `grad_op_maker_ ` is `nullptr` if the operator does not have associated gradient operators. + +We propose a base class called `GradOpDescMakerBase` to let operator developers generate `Gradient Operators` easily. The public interface of that class is + +```cpp +class GradOpDescMakerBase { +public: + GradOpDescMakerBase(const OpDescBind& ); + virtual std::vector> operator()()const = 0; +}; +``` + +We can convert `GradOpDescMakerBase` to `std::function>(const OpDescBind&)>` by + +```cpp +using GradOpMaker = ...; +std::function(const OpDescBind&)> func; +func = [] (const OpDescBind& fwd_op) { + GradOpMaker maker(fwd_op); + return maker(); +}; +``` + +We can write many helper functions since the `GradOpDescMakerBase` is a class now. The basic helper functions get the variables of `Input`, `Output`, `InputGradient` and `OutputGradient` in the forwarding operator. + +We should chagne register macros at the same time. In the current solution, there is no difference between forwarding operators and backward operators. So `REGISTER_OP` just register one operator. If the `REGISTER_OPERATOR ` contains `OpProtoAndCheckerMaker` and `GradOpDescMaker`, we just list them in the same macro. It can be done by a macro contains `__VA_ARGS__`. + +The user interface should be + +```cpp +vector MinusOpGradMaker(OpDesc) {...} +REGISTER_OPERATOR(minus, MinusOp, MinusOpProtoAndCheckerMaker, SumOpGradMaker); +// Developers can still manually implement gradient operator. +REGISTER_OPERATOR(minus_grad, MinusGradOp); +``` + +The interface of current `REGISTER_OP` macro could not be changed. In `REGISTER_OP`, it will invoke `REGISTER_OPERATOR` two times and generate GradOpDescMaker inside. + +```cpp +REGISTER_OP(minus, MinusOp, MinusOpProtoAndCheckerMaker, minus_grad, MinusGradOp); +``` diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md new file mode 100644 index 0000000000000000000000000000000000000000..62ff8f3229bbbb5bc82e4da29259baffc30c2c87 --- /dev/null +++ b/doc/design/releasing_process.md @@ -0,0 +1,67 @@ +# PaddlePaddle发行规范 + +PaddlePaddle使用git-flow branching model做分支管理,使用[Semantic Versioning](http://semver.org/)标准表示PaddlePaddle版本号。 + +PaddlePaddle每次发新的版本,遵循以下流程: + +1. 从`develop`分支派生出新的分支,分支名为`release/版本号`。例如,`release/0.10.0` +2. 将新分支的版本打上tag,tag为`版本号rc.Patch号`。第一个tag为`0.10.0rc1`,第二个为`0.10.0rc2`,依次类推。 +3. 对这个版本的提交,做如下几个操作: + * 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步 + * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 + * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 + * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 + * 编译这个版本的python wheel包,并发布到pypi。 + * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 + * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 + * 上传方法: + ``` + cd build/python + pip install twine + twine upload dist/[package to upload] + ``` +4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 +5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 +6. 协同完成Release Note的书写 + + +需要注意的是: + +* `release/版本号`分支一旦建立,一般不允许再从`develop`分支合入`release/版本号`。这样保证`release/版本号`分支功能的封闭,方便测试人员测试PaddlePaddle的行为。 +* 在`release/版本号`分支存在的时候,如果有bugfix的行为,需要将bugfix的分支同时merge到`master`, `develop`和`release/版本号`这三个分支。 + +# PaddlePaddle 分支规范 + +PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,并适应github的特性做了一些区别。 + +* PaddlePaddle的主版本库遵循[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范。其中: + * `master`分支为稳定(stable branch)版本分支。每一个`master`分支的版本都是经过单元测试和回归测试的版本。 + * `develop`分支为开发(develop branch)版本分支。每一个`develop`分支的版本都经过单元测试,但并没有经过回归测试。 + * `release/版本号`分支为每一次Release时建立的临时分支。在这个阶段的代码正在经历回归测试。 + +* 其他用户的fork版本库并不需要严格遵守[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)分支规范,但所有fork的版本库的所有分支都相当于特性分支。 + * 建议,开发者fork的版本库使用`develop`分支同步主版本库的`develop`分支 + * 建议,开发者fork的版本库中,再基于`develop`版本fork出自己的功能分支。 + * 当功能分支开发完毕后,向PaddlePaddle的主版本库提交`Pull Reuqest`,进而进行代码评审。 + * 在评审过程中,开发者修改自己的代码,可以继续在自己的功能分支提交代码。 + +* BugFix分支也是在开发者自己的fork版本库维护,与功能分支不同的是,BugFix分支需要分别给主版本库的`master`、`develop`与可能有的`release/版本号`分支,同时提起`Pull Request`。 + +# PaddlePaddle回归测试列表 + +本列表说明PaddlePaddle发版之前需要测试的功能点。 + +## PaddlePaddle Book中所有章节 + +PaddlePaddle每次发版本首先要保证PaddlePaddle Book中所有章节功能的正确性。功能的正确性包括验证PaddlePaddle目前的`paddle_trainer`训练和纯使用`Python`训练模型正确性。 + +| | 新手入门章节 | 识别数字 | 图像分类 | 词向量 | 情感分析 | 语意角色标注 | 机器翻译 | 个性化推荐 | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | +| API.V2 + Docker + GPU | | | | | | | | | +| API.V2 + Docker + CPU | | | | | | | | | +| `paddle_trainer` + Docker + GPU | | | | | | | | | +| `paddle_trainer` + Docker + CPU | | | | | | | | | +| API.V2 + Ubuntu + GPU | | | | | | | | | +| API.V2 + Ubuntu + CPU | | | | | | | | | +| `paddle_trainer` + Ubuntu + GPU | | | | | | | | | +| `paddle_trainer` + Ubuntu + CPU | | | | | | | | | diff --git a/doc/design/scope.md b/doc/design/scope.md new file mode 100644 index 0000000000000000000000000000000000000000..b1f9bb4378eb5ec6926f1e53f7c1f4fd5674064c --- /dev/null +++ b/doc/design/scope.md @@ -0,0 +1,124 @@ +# Design of Scope in Paddle + +## Overview + +Scope is an important concept in programming languages, which defines a program region that a set of bindings between names and entities applies. In a specific scope, a valid name is uniquely associated with an entity, such as a variable. And in another scope, this name may refer to other entity or nothing at all. It clearly restricts the visibility and validity of names in a program. Hence **Scope** is introduced to PaddlePaddle to manage variables in context. But different from the original abstract concept, Scope now becomes an object with two important attributes: + +- Scope is an association of a name to variable. +- Variables in a parent scope can be retrieved from local scope. + +A detailed explanation of these two attributes goes as following. + + +## Scope is an association of a name to variable. + +Scope is an association of a name to variable. All variables belong to `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can run in different scopes and update different variable in the scope. + + +1. Scope only contains a map of a name to variable. + + All parameters, data, states in a Net should be variables and stored inside a scope. Each op should get inputs and outputs to do computation from a scope, such as data buffer, state (momentum) etc. + +1. Variable can only be created by Scope and a variable can only be got from Scope. User cannot create or get a variable outside a scope. This is a constraints of our framework, and will keep our framework simple and clear. + +1. Scope only contains methods that are used to Create and Get Variables. Scope do not contain Operators and have no information to run them. + `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. + - `Create` is used to create a Variable by its name and add the mapping relation. + - `Get` is used to find a Variable by name. + +1. Every variable only belongs to one certain Scope. + + Variable can not belong to many scopes. If you want to use variables from parent scope, you can use `parent scope`. + +1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. + + Because Variable can only be got from Scope. When destroying Scope, we also need to destroy all the Variables in it. If user store `Variable` pointer to private data member or some global variable, the pointer will be an invalid pointer when associated `Scope` is destroyed. + +```cpp +class Scope { + public: + Variable* NewVar(const std::string& name); + const Variable* FindVar(const std::string& name) const; + + private: + std::unordered_map> vars_; +}; +``` + + +## Parent scope and local scope + +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be a local scope. There are two attributes about local scope. + +1. We can create local variables in a local scope. When that local scope is destroyed, all local variables should also be destroyed. +2. Variables in a parent scope can be retrieved from local scopes of that parent scope, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. + +```cpp +class Scope { + public: + Scope(const std::shared_ptr& scope): parent_(scope) {} + + Variable* FindVar(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } else if (parent_ != nullptr) { + return parent_->FindVar(name); + } else { + return nullptr; + } + } + + private: + std::shared_ptr parent_ {nullptr}; +}; +``` + +In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. + +A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. + +# Interface Design + +```cpp +class Variable { + private: + Variable() = default; + friend class Scope; +}; + +class Scope { + private: + Scope(const std::shared_ptr& parent = nullptr); + + public: + static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); + + // return nullptr if not found. + Variable* FindVar(const std::string& name) const; + + // return if already contains same name variable. + Variable* NewVar(const std::string& name); + + private: + std::shared_ptr parent_; + std::unordered_map> vars_; +}; +``` +## Only scope can create a variable + +To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`. + +## When scope destroyed, all variables inside this scope should be destroyed together + +The scope hold unique pointers for all variables. User can `FindVar` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together. + +## Sharing a parent scope + +Local scope contains a `parent_` pointer. It is a linked-list for scopes. Using a `shared_ptr` because when a local scope is using, its parents cannot be destroyed. + +Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shared pointer. We cannot construct a scope variable, because it cannot be passed to other scope as `parent` pointer. + +## Orthogonal interface + +`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily. diff --git a/doc/design/selected_rows.md b/doc/design/selected_rows.md new file mode 100644 index 0000000000000000000000000000000000000000..9e6f3b20cbcdc55e481fbe7bf5fa555d8b3c3d45 --- /dev/null +++ b/doc/design/selected_rows.md @@ -0,0 +1,74 @@ +# Design Doc: Selected Rows + +`SelectedRows` is a kind of sparse tensor data type, which is designed to support `embedding` operators. The gradient of embedding table is a sparse tensor. Only a few rows are non-zero values in that tensor. It is straightforward to represent the sparse tensor by the following sparse tensor data structure: + +```cpp +class SelectedRows { + private: + vector rows_; + Tensor value_; + int height_; +}; +``` + +The field `height_` shows the first dimension of `SelectedRows`. The `rows` are the indices of which rows of `SelectedRows` are non-zeros. The `value_` field is an N-dim tensor and shape is `[rows.size() /* NUM_ROWS */, ...]`, which supplies values for each row. The dimension of `SelectedRows` satisfies `[height_] + value_.shape[1:]`. + +Suppose that a SelectedRows-typed variable `x` has many rows, but only two of them have values -- row 73 is `[1, 2]` and row 84 is `[3, 4]`, the `SelectedRows` representation would be: + +``` +x = SelectedRow { + rows = [73, 84], + value = [[1, 2], [3,4]] +} +``` + + +## SelectedRows in Protobuf + +`SelectedRows` is a kind of `Variable`. `VarDesc` in protobuf should describe the `SelectedRows` information. Only the tensor dimension of a `SelectedRows` will be described in compile-time since the `rows_` and `value_` are related to training data. +So we use `TensorDesc` to unify `data_type` and `dims`. A LodTensorDesc contains a `TensorDesc` and `lod_level`. The description of `SelectedRows` is a Tensor description. + +```proto +message TensorDesc { + required DataType data_type = 1; + repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] +} + +message LodTensorDesc { + required TensorDesc tensor = 1; + optional int lod_level = 2; +} + +message VarDesc { + required string name = 1; + enum VarType { + LOD_TENSOR = 0; + SELECTED_ROWS = 1; + } + required VarType type = 2; + optional LodTensorDesc lod_desc = 3; + optional TensorDesc selected_rows_desc = 4; + optional bool persistable = 5 [ default = false ]; +} +``` + +## InferShape for Selected Rows + +Just like `LoD` information, `InferShape` method will inference output tensor type as well. The operator should decide whether its output is a `SelectedRows` or `Dense` tensor. + +For example, the gradient operator of `TableLookup` will always generate `SelectedRows`. Its `InferShape` method should be like following + +```cpp +void TableLookupGrad::InferShape(context) { + ... + context.SetDataType("Embedding.Grad", kSelectedRows); +} +``` + + +## Sparse Operators + +There are several operators should be written to support `SelectedRows`. They are: + +1. Operators which generates `SelectedRows` gradient. e.g. Gradient of `TableLookupOp`. +2. Optimize operators which support `SelectedRows` gradient. e.g. `SGD` or `AdaGrad` for `SelectedRows`. However, there should be only one `SGD` operator. `OpWithKernel::Run` should select a suitable kernel for both `dense` tensor or `SelectedRows`. diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md new file mode 100644 index 0000000000000000000000000000000000000000..c7aeed7f9b4637e1c29d530f37b42d12500af82f --- /dev/null +++ b/doc/design/simple_op_design.md @@ -0,0 +1,202 @@ +## Interaction between C++ and Python + +Users employ API in Python to describe their own network, however, the network construction actually happens in C++. so Protobuf is introduced to send the message between Python and C++. + +The Interaction between Python and C++ can be simplified as two steps: + +1. C++ tells Python how many Ops there are, and what parameter do users need to offer to initialize a new Op. Python then builds API for each Op at compile time. + +2. Users invoke APIs built by Python and provide necessary parameters. These parameters will be sent to C++ for finishing the Op construction task. + +### Message from C++ to Python + +We define a Protobuf message class `OpProto` to hold message needed in the first step. What should an `OpProto` contain? This question is equivalent to “What message do we need to offer, to build a Python API which is legal and user oriented and can use to describe a whole Op.” + +Following message are necessary: + +1. Op's name, and its simple comment. +2. Input and output variable number; each variable's name, type, and comment. +3. Op's attributes; each attribute includes name, type, comment, **default value** and **value range**. + +So `OpProto` can be defined as follows: + +```proto +enum AttrType { + INT = 1; + FLOAT = 2; + STRING = 3; + INTS = 4; + FLOATS = 5; + STRINGS = 6; +}; + +message AttrValue { + AttrType type = 1; + optional int iv = 2; + optional float fv = 3; + optional string sv = 4; + repeated int ivs = 5; + repeated float fvs = 6; + repeated string svs = 7; +}; + +message AttrProto { + required string name = 1; + required string comment = 2; + required AttrType type = 3; +}; + +message VarProto { + required string name = 1; + required string comment = 2; + required bool is_tensor = 3; +}; + +message OpProto { + repeated VarProto inputs = 1; + repeated VarProto outputs = 2; + repeated AttrProto attrs = 3; + required string type = 4; + required string comment = 5; +}; +``` + +To generate Python code automatically: + +```python +def create_python_ops_creatation_functions(): + op_protos = paddle.framework.OpRegistry.get_all_op_proto() + for type_name in op_protos: + op_proto = op_protos[type_name] + def __impl__(**kwargs): # User must use key word args in Paddle API + inputs = [kwargs.get(ipt.name, "") for ipt in op_proto.inputs] + outputs = [kwargs.get(opt.name, "") for opt in op_proto.outputs] + attrs = [cast_to_op_attr(attr, kwargs.get(attr.name, None)) for attr in op_proto.attrs] + opdesc = (input, outputs, type_name, attrs) + return paddle.framework.OpRegistry.CreateOp(opdesc) + __impl__.__doc__ = create_doc_string(op_proto) + globals()[type_name] = __impl__ + +create_python_ops_creatation_functions() +``` + +### Message from Python to C++ + +To hold message needed in the above second step, we define Protobuf message class `OpDesc`. It is used to hold user-specified parameters in Op describing. + +```proto +message OpDesc { + required string type = 1; + repeated string inputs = 2; + repeated string outputs = 3; + map attrs = 4; +}; +``` + +## OpProto Register + +Every Op has its own `OpProto`. For using convenience, we need to register them and record all their messages. For each `Op` class, we define a corresponding `OpMaker` class, in whose constructor we implement the `OpProto`'s building process. `OpMaker`'s constructor will be invoked by another function `OpRegistry::RegisterOp()`. + +```cpp +class OpProtoMaker { +public: + OpProtoMaker(OpProto* proto): proto_(proto) {} +protected: + OpProto* proto_; + void AddInput(const std::string& name, const std::string& desc) {...} + void AddAttr(const std::string& name, const std::string& desc, TypeId type) {...} + void AddComment(const std::string& comment) { ... } +}; + +class OpRegistry { +public: + using OpCreator = std::function; + + template + static void RegisterOp(const std::string& name) { + gCreators_[name] = [](const OpDesc& desc) { + return new OpType(desc); + }; + OpProto& opProto = gProtos_[name]; + OpMaker()(&opProto); + } + + static map gCreators_; + static map gProtos_; +}; + +template +class OpRegister { + public: + OpRegister(std::string type) { + OpRegistry::RegisterOp(type); + } +}; + +#define REGISTER_OP(op_class, op_maker_class, type_name) \ + class op_class##Register { \ + private: \ + const static OpRegister<#op_class, #op_maker_class> reg; \ + }; \ + const Register op_class##Register::reg(#type_name); + +class CosineOp { +// ... +} + +struct CosineOpProtoMaker : public OpProtoMaker { + CosineOpProtoMaker(OpProto* proto) : OpProtoMaker(proto) { + AddInput("input", "input of cosine op"); + AddAttr("scale", "scale of cosine op", float).Default(1.0).GreaterThan(0.0); + AddType("cos"); + AddComment("This is cos op"); + } +} + +REGISTER_OP(CosineOp, CosineOpProtoMaker, cos); +``` + +In `REGISTER_OP(CosineOp, CosineOpProtoMaker, cos)`, we register not only `CosineOp` but also `CosineOpProto`. As fields of `CosineOpProto`, the default value and value range of `scale` are also registered here. + +## Python API + +Python APIs are divided into two types, high-level API and low-level API. + +### High-Level API + +High-level API is called by users directly, so it should keep its style consistent with existing V2 APIs. + +Here is a sample about how a define a fc layer: + +```python +hd = fc_layer(input=data, size=56, with_bias=True, activation="sigmoid"); +``` + +`hd` is the output of `fc_layer` and it's a `variable`. It can be further sent into other layers as input. + +The definition of `fc_layer()`: + +```python +def fc_layer(input, size, with_bias, activation): + attr_map = {"size":size} + check_attrs(attr_map) + w = make_variable('w') + if with_bias: + b = make_variable('b') + else: + b = None + fc_output = make_variable('fc_output'); + fc_op(input, w, b, fc_output, attr_map) + act_output = make_variable('sigmod_output'); + if activation == "sigmod": + sigmod_op(fc_output, act_output); + elif: + # ... + return act_output; +``` + +### Low Leval API + +In above sample, `fc_op` and `sigmod_op` are low-level API. They build `OpDesc` and invoke corresponding C++ code. + +*TODO* diff --git a/doc/design/speech/README.MD b/doc/design/speech/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..7304650e628dba210488cd2dc4836318b5383b2a --- /dev/null +++ b/doc/design/speech/README.MD @@ -0,0 +1,155 @@ +# DeepSpeech2 on PaddlePaddle: Design Doc + +We are planning to build Deep Speech 2 (DS2) \[[1](#references)\], a powerful Automatic Speech Recognition (ASR) engine, on PaddlePaddle. For the first-stage plan, we have the following short-term goals: + +- Release a basic distributed implementation of DS2 on PaddlePaddle. +- Contribute a chapter of Deep Speech to PaddlePaddle Book. + +Intensive system optimization and low-latency inference library (details in \[[1](#references)\]) are not yet covered in this first-stage plan. + +## Table of Contents + +- [Tasks](#tasks) +- [Task Dependency](#task-dependency) +- [Design Details](#design-details) + - [Overview](#overview) + - [Row Convolution](#row-convolution) + - [Beam Search With CTC and LM](#beam-search-with-ctc-and-lm) +- [Future Work](#future-work) +- [References](#references) + +## Tasks + +We roughly break down the project into 14 tasks: + +1. Develop an **audio data provider**: + - Json filelist generator. + - Audio file format transformer. + - Spectrogram feature extraction, power normalization etc. + - Batch data reader with SortaGrad. + - Data augmentation (optional). + - Prepare (one or more) public English data sets & baseline. +2. Create a **simplified DS2 model configuration**: + - With only fixed-length (by padding) audio sequences (otherwise need *Task 3*). + - With only bidirectional-GRU (otherwise need *Task 4*). + - With only greedy decoder (otherwise need *Task 5, 6*). +3. Develop to support **variable-shaped** dense-vector (image) batches of input data. + - Update `DenseScanner` in `dataprovider_converter.py`, etc. +4. Develop a new **lookahead-row-convolution layer** (See \[[1](#references)\] for details): + - Lookahead convolution windows. + - Within-row convolution, without kernels shared across rows. +5. Build KenLM **language model** (5-gram) for beam search decoder: + - Use KenLM toolkit. + - Prepare the corpus & train the model. + - Create infererence interfaces (for Task 6). +6. Develop a **beam search decoder** with CTC + LM + WORDCOUNT: + - Beam search with CTC. + - Beam search with external custom scorer (e.g. LM). + - Try to design a more general beam search interface. +7. Develop a **Word Error Rate evaluator**: + - update `ctc_error_evaluator`(CER) to support WER. +8. Prepare internal dataset for Mandarin (optional): + - Dataset, baseline, evaluation details. + - Particular data preprocessing for Mandarin. + - Might need cooperating with the Speech Department. +9. Create **standard DS2 model configuration**: + - With variable-length audio sequences (need *Task 3*). + - With unidirectional-GRU + row-convolution (need *Task 4*). + - With CTC-LM beam search decoder (need *Task 5, 6*). +10. Make it run perfectly on **clusters**. +11. Experiments and **benchmarking** (for accuracy, not efficiency): + - With public English dataset. + - With internal (Baidu) Mandarin dataset (optional). +12. Time **profiling** and optimization. +13. Prepare **docs**. +14. Prepare PaddlePaddle **Book** chapter with a simplified version. + +## Task Dependency + +Tasks parallelizable within phases: + +Roadmap | Description | Parallelizable Tasks +----------- | :------------------------------------ | :-------------------- +Phase I | Simplified model & components | *Task 1* ~ *Task 8* +Phase II | Standard model & benchmarking & profiling | *Task 9* ~ *Task 12* +Phase III | Documentations | *Task13* ~ *Task14* + +Issue for each task will be created later. Contributions, discussions and comments are all highly appreciated and welcomed! + +## Design Details + +### Overview + +Traditional **ASR** (Automatic Speech Recognition) pipelines require great human efforts devoted to elaborately tuning multiple hand-engineered components (e.g. audio feature design, accoustic model, pronuncation model and language model etc.). **Deep Speech 2** (**DS2**) \[[1](#references)\], however, trains such ASR models in an end-to-end manner, replacing most intermediate modules with only a single deep network architecture. With scaling up both the data and model sizes, DS2 achieves a very significant performance boost. + +Please read Deep Speech 2 \[[1](#references),[2](#references)\] paper for more background knowledge. + +The classical DS2 network contains 15 layers (from bottom to top): + +- **Two** data layers (audio spectrogram, transcription text) +- **Three** 2D convolution layers +- **Seven** uni-directional simple-RNN layers +- **One** lookahead row convolution layers +- **One** fully-connected layers +- **One** CTC-loss layer + +
+
+Figure 1. Archetecture of Deep Speech 2 Network. +
+ +We don't have to persist on this 2-3-7-1-1-1 depth \[[2](#references)\]. Similar networks with different depths might also work well. As in \[[1](#references)\], authors use a different depth (e.g. 2-2-3-1-1-1) for final experiments. + +Key ingredients about the layers: + +- **Data Layers**: + - Frame sequences data of audio **spectrogram** (with FFT). + - Token sequences data of **transcription** text (labels). + - These two type of sequences do not have the same lengthes, thus a CTC-loss layer is required. +- **2D Convolution Layers**: + - Not only temporal convolution, but also **frequency convolution**. Like a 2D image convolution, but with a variable dimension (i.e. temporal dimension). + - With striding for only the first convlution layer. + - No pooling for all convolution layers. +- **Uni-directional RNNs** + - Uni-directional + row convolution: for low-latency inference. + - Bi-direcitional + without row convolution: if we don't care about the inference latency. +- **Row convolution**: + - For looking only a few steps ahead into the feature, instead of looking into a whole sequence in bi-directional RNNs. + - Not nessesary if with bi-direcitional RNNs. + - "**Row**" means convolutions are done within each frequency dimension (row), and no convolution kernels shared across. +- **Batch Normalization Layers**: + - Added to all above layers (except for data and loss layer). + - Sequence-wise normalization for RNNs: BatchNorm only performed on input-state projection and not state-state projection, for efficiency consideration. + + +Required Components | PaddlePaddle Support | Need to Develop +:------------------------------------- | :-------------------------------------- | :----------------------- +Data Layer I (Spectrogram) | Not supported yet. | TBD (Task 3) +Data Layer II (Transcription) | `paddle.data_type.integer_value_sequence` | - +2D Convolution Layer | `paddle.layer.image_conv_layer` | - +DataType Converter (vec2seq) | `paddle.layer.block_expand` | - +Bi-/Uni-directional RNNs | `paddle.layer.recurrent_group` | - +Row Convolution Layer | Not supported yet. | TBD (Task 4) +CTC-loss Layer | `paddle.layer.warp_ctc` | - +Batch Normalization Layer | `paddle.layer.batch_norm` | - +CTC-Beam search | Not supported yet. | TBD (Task 6) + +### Row Convolution + +TODO by Assignees + +### Beam Search with CTC and LM + +TODO by Assignees + +## Future Work + +- Efficiency Improvement +- Accuracy Improvement +- Low-latency Inference Library +- Large-scale benchmarking + +## References + +1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016. +2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595. diff --git a/doc/design/speech/image/ds2_network.png b/doc/design/speech/image/ds2_network.png new file mode 100644 index 0000000000000000000000000000000000000000..1a5b2184d47928cc2849d5a7c8ea2d8cf5337e11 Binary files /dev/null and b/doc/design/speech/image/ds2_network.png differ diff --git a/doc/design/tensor_array.md b/doc/design/tensor_array.md new file mode 100644 index 0000000000000000000000000000000000000000..8378e97bf7cfaae54c36b1b92e202b16e4fe1e28 --- /dev/null +++ b/doc/design/tensor_array.md @@ -0,0 +1,271 @@ +# Design for TensorArray +This design doc presents the necessity of a new C++ class `TensorArray`. +In addition to the very simple C++ implementation + +```c++ +class TensorArray { + public: + explicit TensorArray(const LoDTensor&); + explicit TensorArray(size_t size); + + private: + vector values_; +}; +``` + +We also need to expose it to PaddlePaddle's Python API, +because users would want to use it with our very flexible operators `WhileLoop`. +An example for a RNN based on dynamic operators is + +```python +input = pd.data(...) +num_steps = Var(12) + +TensorArray states(size=num_steps) +TensorArray step_inputs(unstack_from=input) +TensorArray step_outputs(size=num_steps) + +W = Tensor(...) +U = Tensor(...) +default_state = some_op() + +step = Var(1) + +wloop = paddle.create_whileloop(loop_vars=[step]) +with wloop.frame(): + wloop.break_if(pd.equal(step, num_steps) + pre_state = states.read(step-1, default_state) + step_input = step_inputs.read(step) + state = pd.sigmoid(pd.matmul(U, pre_state) + pd.matmul(W, step_input)) + states.write(step, state) + step_outputs.write(step, state) # output state + step.update(state+1) + +output = step_outputs.stack() +``` + +## Background +Steps are one of the core concepts of RNN. In each time step of RNN, there should be several input segments, states, and output segments; all these components act like arrays, for example, call `states[step_id]` will get the state in `step_id`th time step. + +An RNN can be implemented with the following pseudocode + +```c++ +Array states; +Array input_segments; +Array output_segments; +Parameter W, U; + +step = 1 +seq_len = 12 +while_loop { + if (step == seq_len) break; + states[step] = sigmoid(W * states[step-1] + U * input_segments[step]); + output_segments[step] = states[step] // take state as output + step++; +} +``` +According to the [RNN roadmap](https://github.com/PaddlePaddle/Paddle/issues/4561), there are several different RNNs that PaddlePaddle will eventually support. + +Currently, the basic RNN implementation supported by PaddlePaddle is the `recurrent_op` which takes tensors as input and splits them into `input_segments`. + + +Since a tensor cannot store variable-length sequences directly, PaddlePaddle implements the tensor with level of details (`LoDTensor` for short). +Segmenting the `LoDTensor` is much more complicated than splitting a tensor, that makes it necessary to refactor the `recurrent_op` with `LoDTensor` segmenting support. + +As the next step in RNN support, `dynamic_recurrent_op` should be introduced to handle inputs with variable-length sequences. + +The implementation is similar to `recurrent_op`. +The key difference is the way **the original input `LoDTensors` and outupts are split to get the `input_segments` and the `output_segments`.** + + +Though it can't be built over `recurrent_op` or `dynamic_recurrent_op` directly, +the logic behind splitting a tensor or a LoD tensor into `input_segments` remains the same. + +## Why `TensorArray` +The logic behind splitting the inputs to segments, states and outputs is similar and can be shared in a seperate module. + +The array of `states`, `input_segments` and `output_segments` would be exposed to users when writing a dynamic RNN model similar to the above pseudo codes. + +So there should be an array-like container, which can store the segments of a tensor or LoD tensor. + +**This container can store an array of tensors and provides several methods to split a tensor or a LoD tensor** . +This is where the notion of `TensorArray` comes from. + +## Introduce TensorArray to uniform all the three RNNs +TensorArray as a new concept is borrowed from TensorFlow, +it is meant to be used with dynamic iteration primitives such as `while_loop` and `map_fn`. + +This concept can be used to support our new design of dynamic operations, and help to refactor some existing variant-sentence-related layers, +such as `recurrent_op`, `RecurrentGradientMachine`. + +In [our design for dynamic RNN](https://github.com/PaddlePaddle/Paddle/pull/4401), +`TensorArray` is used to segment inputs and store states in all time steps. +By providing some methods similar to a C++ array, +the definition of some state-based dynamic models such as RNN can be more natural and highly flexible. + +## Dynamic-operations on TensorArray + +`TensorArray` will be used directly when defining dynamic models, so some operators listed below should be implemented + +```python +# several helper operators for TensorArray +def tensor_array_stack(ta, tensor): + ''' + get a tensor array `ta`, return a packed `tensor`. + ''' + pass + +def tensor_array_unstack(tensor, ta): + ''' + get a `tensor`, unstack it and get a tensor array `ta`. + ''' + pass + +def tensor_array_write(ta, index, tensor, data_shared): + ''' + get a `tensor` and a scalar tensor `index`, write `tensor` into index-th + value of the tensor array `ta`. + `data_shared` is an attribute that specifies whether to copy or reference the tensors. + ''' + pass + +def tensor_array_read(ta, index, tensor): + ''' + get a tensor array `ta`, a scalar tensor `index`, read the index-th value of + `ta` and return as the `tensor`. + ''' + pass + +def tensor_array_size(ta, tensor): + ''' + get a tensor array `ta`, return the size of `ta` and return as the scalar `tensor`. + ''' + pass +``` + +It is trivial for users to use so many low-level operators, so some helper methods should be proposed in python wrapper to make `TensorArray` easier to use, +for example + +```python +class TensorArray: + def __init__(self, name): + self.name = name + self.desc = TensorArrayDesc() + + def stack(self, name=None): + ''' + Pack the values in a `TensorArray` into a tensor with rank one higher + than each tensor in `values`. + `stack` can be used to split tensor into time steps for RNN or whileloop. + + @name: str + the name of the variable to output. + ''' + tensor = NewVar(name) + tensor_array_stack(self.name, tensor) + return tensor + + def unstack(self, input): + ''' + Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors. + `unstack` can be used to concatenate all the time steps for RNN or whileloop. + + @input: str + the name of input tensor + ''' + tensor_array_unstack(tensor, self.name) + + def write(self, index, value, data_shared=True): + ''' + Write value into index of the TensorArray. + If `data_shared` is set to True, than the index-th value in TensorArray will + be shared with the tensor passed in. + + @index: str + name of a scalar tensor + @value: str + name of a tensor + @data_shared: bool + ''' + tensor_array_write(self.name, index, value, data_shared) + + def read(self, index, output): + ''' + Read the value at location `index` in the `TensorArray`. + + @index: str + name of a scalar tensor + @output: + name of a output variable + ''' + tensor_array_read(self.name, index, output) + + + def size(self, output): + ''' + Return the number of values. + + @output: str + name of a scalar tensor + ''' + tensor_array_size(self.name, output) +``` + +## LoDTensor-related Supports +The `RecurrentGradientMachine` in Paddle serves as a flexible RNN layer; it takes varience-length sequences as input, and output sequences too. + +Since each step of RNN can only take a tensor-represented batch of data as input, +some preprocess should be taken on the inputs such as sorting the sentences by their length in descending order and cut each word and pack to new batches. + +Such cut-like operations can be embedded into `TensorArray` as general methods called `unpack` and `pack`, +these two operations are similar to `stack` and `unstack` except that they operate on variable-length sequences formated as a LoD tensor rather than a tensor. + +Some definitions are like + +```python +def unpack(level): + ''' + Split LodTensor in some `level` and generate batches, if set `sort_by_length`, + will sort by length. + + Returns: + - a new `TensorArray`, whose values are LodTensors and represents batches + of data. + - an int32 Tensor, which stores the map from the new batch's indices to + original LoDTensor + ''' + pass + +def pack(level, indices_map): + ''' + Recover the original LoD-arranged LoDTensor with the values in a `TensorArray` + and `level` and `indices_map`. + ''' + pass +``` + +With these two methods, a varience-length sentence supported RNN can be implemented like + +```c++ +// input is the varient-length data +LodTensor sentence_input(xxx); +TensorArray ta; +Tensor indice_map; +Tensor boot_state = xxx; // to initialize rnn's first state +TensorArray::unpack(input, 1/*level*/, true/*sort_by_length*/, &ta, &indice_map); +TessorArray step_outputs; +TensorArray states; + +for (int step = 0; step = ta.size(); step++) { + auto state = states.read(step); + // rnnstep is a function which acts like a step of RNN + auto step_input = ta.read(step); + auto step_output = rnnstep(step_input, state); + step_outputs.write(step_output, true/*data_shared*/); +} + +// rnn_output is the final output of an rnn +LoDTensor rnn_output = ta.pack(ta, indice_map); +``` +the code above shows that by embedding the LoDTensor-related preprocess operations into `TensorArray`, +the implementation of a RNN that supports varient-length sentences is far more concise than `RecurrentGradientMachine` because the latter mixes all the codes together, hard to read and extend. diff --git a/doc/design/test.dot b/doc/design/test.dot new file mode 100644 index 0000000000000000000000000000000000000000..62c69b8fc8010a26a54a6ee8ef1488aad94d747a --- /dev/null +++ b/doc/design/test.dot @@ -0,0 +1,35 @@ + +digraph Test { + z -> generator -> G_img; + G_img -> discriminator -> D_f -> d_loss_f; + label0 -> d_loss_f -> d_loss; + + img -> discriminator -> D_t -> d_loss_t; + label1 -> d_loss_t -> d_loss; + + d_loss -> d_loss_t[color=red, style=dashed]; + d_loss -> d_loss_f[color=red, style=dashed]; + d_loss_t -> D_t[color=red, style=dashed]; + d_loss_f -> D_f[color=red, style=dashed]; + D_t -> discriminator[color=red, style=dashed]; + D_f -> discriminator[color=red, style=dashed]; + + D_f -> g_loss; + label2 -> g_loss; + + g_loss -> D_f[color=green, style=dashed]; + D_f -> discriminator[color=green, style=dashed]; + discriminator -> G_img[color=green, style=dashed]; + G_img -> generator[color=green, style=dashed]; + + discriminator [color=red, shape=box]; + generator [color=green, shape=box]; + z [shape=diamond]; + img [shape=diamond]; + label0 [shape=diamond]; + label1 [shape=diamond]; + label2 [shape=diamond]; + + d_loss [color=red]; + g_loss [color=green]; +} diff --git a/doc/design/test.dot.png b/doc/design/test.dot.png new file mode 100644 index 0000000000000000000000000000000000000000..4e121a40b9f7b2232d7cdda315bad15926446f55 Binary files /dev/null and b/doc/design/test.dot.png differ diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md new file mode 100644 index 0000000000000000000000000000000000000000..bfbbdd0578ebc69ea4b49ade9b041573a9e9ad55 --- /dev/null +++ b/doc/design/var_desc.md @@ -0,0 +1,124 @@ +## Background +PaddlePaddle divides the description of neural network computation graph into two stages: compile time and runtime. + +PaddlePaddle use proto message to describe compile time graph because + +1. Computation graph should be able to be saved to a file. +1. In distributed training, the graph will be serialized and send to multiple workers. + +The computation graph is constructed by Data Node and Operation Node. The concept to represent them is in the table below. + +| |compile time|runtime| +|---|---|---| +|Data|VarDesc(proto)|Variable(cpp)| +|Operation|OpDesc(proto)|Operator(cpp)| + + +## Definition of VarDesc + +A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. + +```proto +message VarDesc { + required string name = 1; + optional LoDTensorDesc lod_tensor = 2; +} +``` + +## Definition of LodTensorDesc + +```proto +enum DataType { + BOOL = 0; + INT16 = 1; + INT32 = 2; + INT64 = 3; + FP16 = 4; + FP32 = 5; + FP64 = 6; +} + +message LoDTensorDesc { + required DataType data_type = 1; + repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] + optional int32 lod_level = 3 [default=0]; +} +``` + +## Definition of Variable in Python + +In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable. + +```python +image = Variable(dims=[-1, 640, 480]) +# fc1 and fc2 are both Variable +fc1 = layer.fc(input=image, output_size=10) +fc2 = layer.fc(input=fc1, output_size=20) +``` +### what should class `Variable` Have +1. `name`.a name of string type is used to mark the value of the Variable. +1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator. +1. `operator`. Variable should record which operator produce itself. The reaon is: + - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. + +In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph. + +```python +import VarDesc +import LoDTensorDesc +import framework + +def AddInitialOperator(variable, initializer): + # add an initialize Operator to block to init this Variable + +class Variable(object): + def __init__(self, name, dims, type, initializer): + self._block = get_default_block() + self._name = name + self.op = None + + tensor_desc = LoDTensorDesc(data_type=type, dims=dims) + _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) + self._var = framework.CreateVar(_var_desc) + self._block.add_var(self) + + # add initial op according to initializer + if initializer is not None: + AddInitialOperator(self, initializer) + + def dims(self): + return self._var.dims() + + def data_type(self): + return self._var.data_type() + + def to_proto(self): + pass +``` + +Then we can use this Variable to create a fc layer in Python. + +```python +import paddle as pd + +def flatten_size(X, num_flatten_dims): + prod = 1 # of last num_flatten_dims + for i in xrange(num_flatten_dims): + prod = prod * X.dims[-i-1] + return prod + +def layer.fc(X, output_size, num_flatten_dims): + W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) + b = Variable(pd.random_uniform(), type=FP32, dims=[output_size]) + out = Variable(type=FP32) + y = operator.fc(X, W, b, output=out) # fc will put fc op input into out + pd.InferShape(y) + return out + +x = Variable(dims=[-1, 640, 480]) +y = layer.fc(x, output_size=100) +z = layer.fc(y, output_size=200) + +paddle.eval(targets=[z], ...) +print(z) +``` diff --git a/doc/faq/build_and_install/index_cn.rst b/doc/faq/build_and_install/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..f1677e216f31d79b53ac29a0afbf6fbb886a0dcd --- /dev/null +++ b/doc/faq/build_and_install/index_cn.rst @@ -0,0 +1,111 @@ +################### +编译安装与单元测试 +################### + +.. contents:: + +1. 运行Docker GPU镜像出现 "CUDA driver version is insufficient" +---------------------------------------------------------------- + +用户在使用PaddlePaddle GPU的Docker镜像的时候,常常出现 `Cuda Error: CUDA driver version is insufficient for CUDA runtime version`, 原因在于没有把机器上CUDA相关的驱动和库映射到容器内部。 +具体的解决方法是: + +.. code-block:: bash + + $ export CUDA_SO="$(\ls usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddlepaddle:latest-gpu + +更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 `_ 。 + + +2. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致 +---------------------------------------------------------------- + +这是目前CMake寻找Python的逻辑存在缺陷,如果系统安装了多个Python版本,CMake找到的Python库和Python解释器版本可能有不一致现象,导致编译PaddlePaddle失败。正确的解决方法是, +用户强制指定特定的Python版本,具体操作如下: + + .. code-block:: bash + + cmake .. -DPYTHON_EXECUTABLE= -DPYTHON_LIBRARY= -DPYTHON_INCLUDE_DIR= + +用户需要指定本机上Python的路径:````, ````, ```` + +3. CMake源码编译,Paddle版本号为0.0.0 +-------------------------------------- + +如果运行 :code:`paddle version`, 出现 :code:`PaddlePaddle 0.0.0`;或者运行 :code:`cmake ..`,出现 + +.. code-block:: bash + + CMake Warning at cmake/version.cmake:20 (message): + Cannot add paddle version from git tag + +那么用户需要拉取所有的远程分支到本机,命令为 :code:`git fetch upstream`,然后重新cmake即可。 + +4. paddlepaddle\*.whl is not a supported wheel on this platform. +------------------------------------------------------------------------ + +出现这个问题的主要原因是,没有找到和当前系统匹配的paddlepaddle安装包。最新的paddlepaddle python安装包支持Linux x86_64和MacOS 10.12操作系统,并安装了python 2.7和pip 9.0.1。 + +更新 :code:`pip` 包的方法是\: + +.. code-block:: bash + + pip install --upgrade pip + +如果还不行,可以执行 :code:`python -c "import pip; print(pip.pep425tags.get_supported())"` 获取当前系统支持的python包的后缀, +并对比是否和正在安装的后缀一致。 + +如果系统支持的是 :code:`linux_x86_64` 而安装包是 :code:`manylinux1_x86_64` ,需要升级pip版本到最新; +如果系统支持 :code:`manylinux1_x86_64` 而安装包(本地)是 :code:`linux_x86_64` ,可以重命名这个whl包为 :code:`manylinux1_x86_64` 再安装。 + +5. 编译安装后执行 import paddle.v2 as paddle 报ImportError: No module named v2 +------------------------------------------------------------------------------------------ +先查看一下是否曾经安装过paddle v1版本,有的话需要先卸载: + +pip uninstall py_paddle paddle + +然后安装paddle的python环境, 在build目录下执行 + +pip install python/dist/paddle*.whl && pip install ../paddle/dist/py_paddle*.whl + +6. 遇到“非法指令”或者是“illegal instruction” +-------------------------------------------- + +PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二进制发行版可能会导致这种错误,请选择正确的版本。 + +7. python相关的单元测试都过不了 +-------------------------------- + +如果出现以下python相关的单元测试都过不了的情况: + +.. code-block:: bash + + 24 - test_PyDataProvider (Failed) + 26 - test_RecurrentGradientMachine (Failed) + 27 - test_NetworkCompare (Failed) + 28 - test_PyDataProvider2 (Failed) + 32 - test_Prediction (Failed) + 33 - test_Compare (Failed) + 34 - test_Trainer (Failed) + 35 - test_TrainerOnePass (Failed) + 36 - test_CompareTwoNets (Failed) + 37 - test_CompareTwoOpts (Failed) + 38 - test_CompareSparse (Failed) + 39 - test_recurrent_machine_generation (Failed) + 40 - test_PyDataProviderWrapper (Failed) + 41 - test_config_parser (Failed) + 42 - test_swig_api (Failed) + 43 - layers_test (Failed) + +并且查询PaddlePaddle单元测试的日志,提示: + +.. code-block:: bash + + paddle package is already in your PYTHONPATH. But unittest need a clean environment. + Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'. + +解决办法是: + +* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。 diff --git a/doc/faq/cluster/index_cn.rst b/doc/faq/cluster/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..e59c1e1a54a0c876d1e6e89f88030de59fb9fc1a --- /dev/null +++ b/doc/faq/cluster/index_cn.rst @@ -0,0 +1,17 @@ +############### +集群训练与预测 +############### + +.. contents:: + +1. 集群多节点训练,日志中保存均为网络通信类错误 +------------------------------------------------ + +集群多节点训练,日志报错为网络通信类错误,比如 :code:`Connection reset by peer` 等。 +此类报错通常是由于某一个节点的错误导致这个节点的训练进程退出,从而引发其他节点无法连接导致,可以参考下面的步骤排查: + +* 从 :code:`train.log` , :code:`server.log` 找到最早报错的地方,查看是否是其他错误引发的报错(比如FPE,内存不足,磁盘空间不足等)。 + +* 如果发现最早的报错就是网络通信的问题,很有可能是非独占方式执行导致的端口冲突,可以联系OP,看当前MPI集群是否支持resource=full参数提交,如果支持增加此参数提交,并更换job 端口。 + +* 如果当前MPI集群并不支持任务独占模式,可以联系OP是否可以更换集群或升级当前集群。 diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index df5e172252277a881480cd2816eb901b711abe6b..9929767cac212237b3e2c3a547ba9a3c9d5f0979 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -1,301 +1,11 @@ -#################### FAQ -#################### +==== -.. contents:: +.. toctree:: + :maxdepth: 1 -1. 如何减少内存占用 ---------------------------------- - -神经网络的训练本身是一个非常消耗内存和显存的工作,经常会消耗数10GB的内存和数GB的显存。 -PaddlePaddle的内存占用主要分为如下几个方面\: - -* DataProvider缓冲池内存(只针对内存) -* 神经元激活内存(针对内存和显存) -* 参数内存 (针对内存和显存) -* 其他内存杂项 - -其中,其他内存杂项是指PaddlePaddle本身所用的一些内存,包括字符串分配,临时变量等等,暂不考虑在内。 - -减少DataProvider缓冲池内存 -++++++++++++++++++++++++++ - -PyDataProvider使用的是异步加载,同时在内存里直接随即选取数据来做Shuffle。即 - -.. graphviz:: - - digraph { - rankdir=LR; - 数据文件 -> 内存池 -> PaddlePaddle训练 - } - -所以,减小这个内存池即可减小内存占用,同时也可以加速开始训练前数据载入的过程。但是,这 -个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的, -那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为 - -.. literalinclude:: src/reduce_min_pool_size.py - -这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 :ref:`api_pydataprovider2` 。 - -神经元激活内存 -++++++++++++++ - -神经网络在训练的时候,会对每一个激活暂存一些数据,如神经元激活值等。 -在反向传递的时候,这些数据会被用来更新参数。这些数据使用的内存主要和两个参数有关系, -一是batch size,另一个是每条序列(Sequence)长度。所以,其实也是和每个mini-batch中包含 -的时间步信息成正比。 - -所以做法可以有两种: - -* 减小batch size。 即在网络配置中 :code:`settings(batch_size=1000)` 设置成一个小一些的值。但是batch size本身是神经网络的超参数,减小batch size可能会对训练结果产生影响。 -* 减小序列的长度,或者直接扔掉非常长的序列。比如,一个数据集大部分序列长度是100-200, - 但是突然有一个10000长的序列,就很容易导致内存超限,特别是在LSTM等RNN中。 - -参数内存 -++++++++ - -PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需要使用不同大小的内存。 -例如使用 :code:`adadelta` 算法,则需要使用等于权重参数规模大约5倍的内存。举例,如果参数保存下来的模型目录 -文件为 :code:`100M`, 那么该优化算法至少需要 :code:`500M` 的内存。 - -可以考虑使用一些优化算法,例如 :code:`momentum`。 - -2. 如何加速PaddlePaddle的训练速度 ---------------------------------- - -加速PaddlePaddle训练可以考虑从以下几个方面\: - -* 减少数据载入的耗时 -* 加速训练速度 -* 利用分布式训练驾驭更多的计算资源 - -减少数据载入的耗时 -++++++++++++++++++ - -使用\ :code:`pydataprovider`\ 时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。 -:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。 - -.. literalinclude:: src/reduce_min_pool_size.py - -同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。 - - -加速训练速度 -++++++++++++ - -PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 :code:`sparse_binary_vector` 、 :code:`sparse_vector` 、或者 :code:`integer_value` 的任一一种。同时,与这个训练数据交互的Layer,需要将其Parameter设置成 sparse 更新模式,即设置 :code:`sparse_update=True` - -这里使用简单的 :code:`word2vec` 训练语言模型距离,具体使用方法为\: - -使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为\: - -.. literalinclude:: src/word2vec_dataprovider.py - -这个任务的配置为\: - -.. literalinclude:: src/word2vec_config.py - - -利用更多的计算资源 -++++++++++++++++++ - -利用更多的计算资源可以分为一下几个方式来进行\: - -* 单机CPU训练 - - * 使用多线程训练。设置命令行参数 :code:`trainer_count`。 - -* 单机GPU训练 - - * 使用显卡训练。设置命令行参数 :code:`use_gpu`。 - * 使用多块显卡训练。设置命令行参数 :code:`use_gpu` 和 :code:`trainer_count` 。 - -* 多机训练 - - * 请参考 :ref:`cluster_train` 。 - - -3. 遇到“非法指令”或者是“illegal instruction” --------------------------------------------- - -PaddlePaddle使用avx SIMD指令提高cpu执行效率,因此错误的使用二进制发行版可能会导致这种错误,请选择正确的版本。 - -4. 如何选择SGD算法的学习率 --------------------------- - -在采用sgd/async_sgd进行训练时,一个重要的问题是选择正确的learning_rate。如果learning_rate太大,那么训练有可能不收敛,如果learning_rate太小,那么收敛可能很慢,导致训练时间过长。 - -通常做法是从一个比较大的learning_rate开始试,如果不收敛,那减少学习率10倍继续试验,直到训练收敛为止。那么如何判断训练不收敛呢?可以估计出如果模型采用不变的输出最小的cost0是多少。 - -如果训练过程的的cost明显高于这个常数输出的cost,那么我们可以判断为训练不收敛。举一个例子,假如我们是三分类问题,采用multi-class-cross-entropy作为cost,数据中0,1,2三类的比例为 :code:`0.2, 0.5, 0.3` , 那么常数输出所能达到的最小cost是 :code:`-(0.2*log(0.2)+0.5*log(0.5)+0.3*log(0.3))=1.03` 。如果训练一个pass(或者更早)后,cost还大于这个数,那么可以认为训练不收敛,应该降低学习率。 - - -5. 如何初始化参数 ------------------ - -默认情况下,PaddlePaddle使用均值0,标准差为 :math:`\frac{1}{\sqrt{d}}` 来初始化参数。其中 :math:`d` 为参数矩阵的宽度。这种初始化方式在一般情况下不会产生很差的结果。如果用户想要自定义初始化方式,PaddlePaddle目前提供两种参数初始化的方式\: - -* 高斯分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_mean=0.0, initial_std=1.0)` -* 均匀分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0)` - -比如设置一个全连接层的参数初始化方式和bias初始化方式,可以使用如下代码。 - -.. code-block:: python - - hidden = fc_layer(input=ipt, param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0), - bias_attr=ParamAttr(initial_mean=1.0, initial_std=0.0)) - -上述代码将bias全部初始化为1.0, 同时将参数初始化为 :code:`[1.0, -1.0]` 的均匀分布。 - -6. 如何共享参数 ---------------- - -PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字的参数,会共享参数。设置参数的名字,可以使用 :code:`ParamAttr(name="YOUR_PARAM_NAME")` 来设置。更方便的设置方式,是使得要共享的参数使用同样的 :code:`ParamAttr` 对象。 - -简单的全连接网络,参数共享的配置示例为\: - -.. literalinclude:: ../../python/paddle/trainer_config_helpers/tests/configs/shared_fc.py - -这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。 - -7. \*-cp27mu-linux_x86_64.whl is not a supported wheel on this platform. ------------------------------------------------------------------------- - -出现这个问题的主要原因是,系统编译wheel包的时候,使用的 :code:`wheel` 包是最新的, -而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。 -更新 :code:`pip` 包的方法是\: - -.. code-block:: bash - - pip install --upgrade pip - -8. python相关的单元测试都过不了 --------------------------------- - -如果出现以下python相关的单元测试都过不了的情况: - -.. code-block:: bash - - 24 - test_PyDataProvider (Failed) - 26 - test_RecurrentGradientMachine (Failed) - 27 - test_NetworkCompare (Failed) - 28 - test_PyDataProvider2 (Failed) - 32 - test_Prediction (Failed) - 33 - test_Compare (Failed) - 34 - test_Trainer (Failed) - 35 - test_TrainerOnePass (Failed) - 36 - test_CompareTwoNets (Failed) - 37 - test_CompareTwoOpts (Failed) - 38 - test_CompareSparse (Failed) - 39 - test_recurrent_machine_generation (Failed) - 40 - test_PyDataProviderWrapper (Failed) - 41 - test_config_parser (Failed) - 42 - test_swig_api (Failed) - 43 - layers_test (Failed) - -并且查询PaddlePaddle单元测试的日志,提示: - -.. code-block:: bash - - paddle package is already in your PYTHONPATH. But unittest need a clean environment. - Please uninstall paddle package before start unittest. Try to 'pip uninstall paddle'. - -解决办法是: - -* 卸载PaddlePaddle包 :code:`pip uninstall paddle`, 清理掉老旧的PaddlePaddle安装包,使得单元测试有一个干净的环境。如果PaddlePaddle包已经在python的site-packages里面,单元测试会引用site-packages里面的python包,而不是源码目录里 :code:`/python` 目录下的python包。同时,即便设置 :code:`PYTHONPATH` 到 :code:`/python` 也没用,因为python的搜索路径是优先已经安装的python包。 - - -9. 运行Docker GPU镜像出现 "CUDA driver version is insufficient" ----------------------------------------------------------------- - -用户在使用PaddlePaddle GPU的Docker镜像的时候,常常出现 `Cuda Error: CUDA driver version is insufficient for CUDA runtime version`, 原因在于没有把机器上CUDA相关的驱动和库映射到容器内部。 -具体的解决方法是: - -.. code-block:: bash - - $ export CUDA_SO="$(\ls usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - $ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - $ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddlepaddle:latest-gpu - -更多关于Docker的安装与使用, 请参考 `PaddlePaddle Docker 文档 `_ 。 - - -10. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致 ----------------------------------------------------------------- - -这是目前CMake寻找Python的逻辑存在缺陷,如果系统安装了多个Python版本,CMake找到的Python库和Python解释器版本可能有不一致现象,导致编译PaddlePaddle失败。正确的解决方法是, -用户强制指定特定的Python版本,具体操作如下: - - .. code-block:: bash - - cmake .. -DPYTHON_EXECUTABLE= -DPYTHON_LIBRARY= -DPYTHON_INCLUDE_DIR= - -用户需要指定本机上Python的路径:````, ````, ```` - -10. A protocol message was rejected because it was too big ----------------------------------------------------------- - -如果在训练NLP相关模型时,出现以下错误: - -.. code-block:: bash - - [libprotobuf ERROR google/protobuf/io/coded_stream.cc:171] A protocol message was rejected because it was too big (more than 67108864 bytes). To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. - F1205 14:59:50.295174 14703 TrainerConfigHelper.cpp:59] Check failed: m->conf.ParseFromString(configProtoStr) - -可能的原因是:传给dataprovider的某一个args过大,一般是由于直接传递大字典导致的。错误的define_py_data_sources2类似: - -.. code-block:: python - - src_dict = dict() - for line_count, line in enumerate(open(src_dict_path, "r")): - src_dict[line.strip()] = line_count - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={"src_dict": src_dict}) - -解决方案是:将字典的地址作为args传给dataprovider,然后在dataprovider里面根据该地址加载字典。即define_py_data_sources2应改为: - -.. code-block:: python - - define_py_data_sources2( - train_list, - test_list, - module="dataprovider", - obj="process", - args={"src_dict_path": src_dict_path}) - -完整源码可参考 `seqToseq `_ 示例。 - -11. 如何指定GPU设备 -------------------- - -例如机器上有4块GPU,编号从0开始,指定使用2、3号GPU: - -* 方式1:通过 `CUDA_VISIBLE_DEVICES `_ 环境变量来指定特定的GPU。 - -.. code-block:: bash - - env CUDA_VISIBLE_DEVICES=2,3 paddle train --use_gpu=true --trainer_count=2 - -* 方式2:通过命令行参数 ``--gpu_id`` 指定。 - -.. code-block:: bash - - paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 - - -12. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办? ------------------------------------------------------------------------- - -Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。 -主要原因包括两个方面: - -* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。 -* 模型一直不收敛,发散到了一个数值特别大的地方。 -* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。 - -主要的解决办法是减小学习律或者对数据进行归一化处理。 + build_and_install/index_cn.rst + model/index_cn.rst + parameter/index_cn.rst + local/index_cn.rst + cluster/index_cn.rst diff --git a/doc/faq/local/index_cn.rst b/doc/faq/local/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..75c4ba028e497e29e9030a86514348726d9c0a80 --- /dev/null +++ b/doc/faq/local/index_cn.rst @@ -0,0 +1,213 @@ +############### +本地训练与预测 +############### + +.. contents:: + +1. 如何减少内存占用 +------------------- + +神经网络的训练本身是一个非常消耗内存和显存的工作,经常会消耗数10GB的内存和数GB的显存。 +PaddlePaddle的内存占用主要分为如下几个方面\: + +* DataProvider缓冲池内存(只针对内存) +* 神经元激活内存(针对内存和显存) +* 参数内存 (针对内存和显存) +* 其他内存杂项 + +其中,其他内存杂项是指PaddlePaddle本身所用的一些内存,包括字符串分配,临时变量等等,暂不考虑在内。 + +减少DataProvider缓冲池内存 +++++++++++++++++++++++++++ + +PyDataProvider使用的是异步加载,同时在内存里直接随即选取数据来做Shuffle。即 + +.. graphviz:: + + digraph { + rankdir=LR; + 数据文件 -> 内存池 -> PaddlePaddle训练 + } + +所以,减小这个内存池即可减小内存占用,同时也可以加速开始训练前数据载入的过程。但是,这 +个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的, +那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为 + +.. literalinclude:: src/reduce_min_pool_size.py + +这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 :ref:`api_pydataprovider2` 。 + +神经元激活内存 +++++++++++++++ + +神经网络在训练的时候,会对每一个激活暂存一些数据,如神经元激活值等。 +在反向传递的时候,这些数据会被用来更新参数。这些数据使用的内存主要和两个参数有关系, +一是batch size,另一个是每条序列(Sequence)长度。所以,其实也是和每个mini-batch中包含 +的时间步信息成正比。 + +所以做法可以有两种: + +* 减小batch size。 即在网络配置中 :code:`settings(batch_size=1000)` 设置成一个小一些的值。但是batch size本身是神经网络的超参数,减小batch size可能会对训练结果产生影响。 +* 减小序列的长度,或者直接扔掉非常长的序列。比如,一个数据集大部分序列长度是100-200, + 但是突然有一个10000长的序列,就很容易导致内存超限,特别是在LSTM等RNN中。 + +参数内存 +++++++++ + +PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需要使用不同大小的内存。 +例如使用 :code:`adadelta` 算法,则需要使用等于权重参数规模大约5倍的内存。举例,如果参数保存下来的模型目录 +文件为 :code:`100M`, 那么该优化算法至少需要 :code:`500M` 的内存。 + +可以考虑使用一些优化算法,例如 :code:`momentum`。 + +2. 如何加速训练速度 +------------------- + +加速PaddlePaddle训练可以考虑从以下几个方面\: + +* 减少数据载入的耗时 +* 加速训练速度 +* 利用分布式训练驾驭更多的计算资源 + +减少数据载入的耗时 +++++++++++++++++++ + +使用\ :code:`pydataprovider`\ 时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。 +:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。 + +.. literalinclude:: src/reduce_min_pool_size.py + +同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。 + + +加速训练速度 +++++++++++++ + +PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 :code:`sparse_binary_vector` 、 :code:`sparse_vector` 、或者 :code:`integer_value` 的任一一种。同时,与这个训练数据交互的Layer,需要将其Parameter设置成 sparse 更新模式,即设置 :code:`sparse_update=True` + +这里使用简单的 :code:`word2vec` 训练语言模型距离,具体使用方法为\: + +使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为\: + +.. literalinclude:: src/word2vec_dataprovider.py + +这个任务的配置为\: + +.. literalinclude:: src/word2vec_config.py + + +利用更多的计算资源 +++++++++++++++++++ + +利用更多的计算资源可以分为一下几个方式来进行\: + +* 单机CPU训练 + + * 使用多线程训练。设置命令行参数 :code:`trainer_count`。 + +* 单机GPU训练 + + * 使用显卡训练。设置命令行参数 :code:`use_gpu`。 + * 使用多块显卡训练。设置命令行参数 :code:`use_gpu` 和 :code:`trainer_count` 。 + +* 多机训练 + + * 请参考 :ref:`cluster_train` 。 + +3. 如何指定GPU设备 +------------------ + +例如机器上有4块GPU,编号从0开始,指定使用2、3号GPU: + +* 方式1:通过 `CUDA_VISIBLE_DEVICES `_ 环境变量来指定特定的GPU。 + +.. code-block:: bash + + env CUDA_VISIBLE_DEVICES=2,3 paddle train --use_gpu=true --trainer_count=2 + +* 方式2:通过命令行参数 ``--gpu_id`` 指定。 + +.. code-block:: bash + + paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 + + +4. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办? +------------------------------------------------------------------------ + +Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。 +主要原因包括两个方面: + +* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。 +* 模型一直不收敛,发散到了一个数值特别大的地方。 +* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。 + +这里有两种有效的解决方法: + +1. 设置 :code:`gradient_clipping_threshold` 参数,示例代码如下: + +.. code-block:: python + +optimizer = paddle.optimizer.RMSProp( + learning_rate=1e-3, + gradient_clipping_threshold=10.0, + regularization=paddle.optimizer.L2Regularization(rate=8e-4)) + +具体可以参考 `nmt_without_attention `_ 示例。 + +2. 设置 :code:`error_clipping_threshold` 参数,示例代码如下: + +.. code-block:: python + +decoder_inputs = paddle.layer.fc( + act=paddle.activation.Linear(), + size=decoder_size * 3, + bias_attr=False, + input=[context, current_word], + layer_attr=paddle.attr.ExtraLayerAttribute( + error_clipping_threshold=100.0)) + +完整代码可以参考示例 `machine translation `_ 。 + +两种方法的区别: + +1. 两者都是对梯度的截断,但截断时机不同,前者在 :code:`optimzier` 更新网络参数时应用;后者在激活函数反向计算时被调用; +2. 截断对象不同:前者截断可学习参数的梯度,后者截断回传给前层的梯度; + +除此之外,还可以通过减小学习律或者对数据进行归一化处理来解决这类问题。 + +5. 如何调用 infer 接口输出多个layer的预测结果 +----------------------------------------------- + +* 将需要输出的层作为 :code:`paddle.inference.Inference()` 接口的 :code:`output_layer` 参数输入,代码如下: + +.. code-block:: python + + inferer = paddle.inference.Inference(output_layer=[layer1, layer2], parameters=parameters) + +* 指定要输出的字段进行输出。以输出 :code:`value` 字段为例,代码如下: + +.. code-block:: python + + out = inferer.infer(input=data_batch, field=["value"]) + +需要注意的是: + +* 如果指定了2个layer作为输出层,实际上需要的输出结果是两个矩阵; +* 假设第一个layer的输出A是一个 N1 * M1 的矩阵,第二个 Layer 的输出B是一个 N2 * M2 的矩阵; +* paddle.v2 默认会将A和B 横向拼接,当N1 和 N2 大小不一样时,会报如下的错误: + +.. code-block:: python + + ValueError: all the input array dimensions except for the concatenation axis must match exactly + +多个层的输出矩阵的高度不一致导致拼接失败,这种情况常常发生在: + +* 同时输出序列层和非序列层; +* 多个输出层处理多个不同长度的序列; + +此时可以在调用infer接口时通过设置 :code:`flatten_result=False` , 跳过“拼接”步骤,来解决上面的问题。这时,infer接口的返回值是一个python list: + +* list 中元素的个数等于网络中输出层的个数; +* list 中每个元素是一个layer的输出结果矩阵,类型是numpy的ndarray; +* 每一个layer输出矩阵的高度,在非序列输入时:等于样本数;序列输入时等于:输入序列中元素的总数;宽度等于配置中layer的size; diff --git a/doc/faq/src/reduce_min_pool_size.py b/doc/faq/local/src/reduce_min_pool_size.py similarity index 100% rename from doc/faq/src/reduce_min_pool_size.py rename to doc/faq/local/src/reduce_min_pool_size.py diff --git a/doc/faq/src/word2vec_config.py b/doc/faq/local/src/word2vec_config.py similarity index 100% rename from doc/faq/src/word2vec_config.py rename to doc/faq/local/src/word2vec_config.py diff --git a/doc/faq/src/word2vec_dataprovider.py b/doc/faq/local/src/word2vec_dataprovider.py similarity index 100% rename from doc/faq/src/word2vec_dataprovider.py rename to doc/faq/local/src/word2vec_dataprovider.py diff --git a/doc/faq/model/index_cn.rst b/doc/faq/model/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..b47bbe05bdb39d1ade9434a7e54bf6ca88a91cc9 --- /dev/null +++ b/doc/faq/model/index_cn.rst @@ -0,0 +1,69 @@ +######### +模型配置 +######### + +.. contents:: + +1. 出现 :code:`Duplicated layer name` 错误怎么办 +-------------------------------------------------- + +出现该错误的原因一般是用户对不同layer的参数 :code:`name` 设置了相同的取值。遇到该错误时,先找出参数 :code:`name` 取值相同的layer,然后将这些layer的参数 :code:`name` 设置为不同的值。 + +2. :code:`paddle.layer.memory` 的参数 :code:`name` 如何使用 +------------------------------------------------------------- + +* :code:`paddle.layer.memory` 用于获取特定layer上一时间步的输出,该layer是通过参数 :code:`name` 指定,即,:code:`paddle.layer.memory` 会关联参数 :code:`name` 取值相同的layer,并将该layer上一时间步的输出作为自身当前时间步的输出。 + +* PaddlePaddle的所有layer都有唯一的name,用户通过参数 :code:`name` 设定,当用户没有显式设定时,PaddlePaddle会自动设定。而 :code:`paddle.layer.memory` 不是真正的layer,其name由参数 :code:`memory_name` 设定,当用户没有显式设定时,PaddlePaddle会自动设定。:code:`paddle.layer.memory` 的参数 :code:`name` 用于指定其要关联的layer,需要用户显式设定。 + +3. 两种使用 drop_out 的方法有何区别 +------------------------------------ + +* 在PaddlePaddle中使用dropout有两种方式 + + * 在相应layer的 :code:`layer_atter` 设置 :code:`drop_rate`,以 :code:`paddle.layer.fc` 为例,代码如下: + + .. code-block:: python + + fc = paddle.layer.fc(input=input, layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=0.5)) + + * 使用 :code:`paddle.layer.dropout`,以 :code:`paddle.layer.fc` 为例,代码如下: + + .. code-block:: python + + fc = paddle.layer.fc(input=input) + drop_fc = paddle.layer.dropout(input=fc, dropout_rate=0.5) + +* :code:`paddle.layer.dropout` 实际上使用了 :code:`paddle.layer.add_to`,并在该layer里采用第一种方式设置 :code:`drop_rate` 来使用dropout的。这种方式对内存消耗较大。 + +* PaddlePaddle在激活函数里实现dropout,而不是在layer里实现。 + +* :code:`paddle.layer.lstmemory`、:code:`paddle.layer.grumemory`、:code:`paddle.layer.recurrent` 不是通过一般的方式来实现对输出的激活,所以不能采用第一种方式在这几个layer里设置 :code:`drop_rate` 来使用dropout。若要对这几个layer使用dropout,可采用第二种方式,即使用 :code:`paddle.layer.dropout`。 + +4. 不同的 recurrent layer 的区别 +---------------------------------- +以LSTM为例,在PaddlePaddle中包含以下 recurrent layer: + +* :code:`paddle.layer.lstmemory` +* :code:`paddle.networks.simple_lstm` +* :code:`paddle.networks.lstmemory_group` +* :code:`paddle.networks.bidirectional_lstm` + +按照具体实现方式可以归纳为2类: + +1. 由 recurrent_group 实现的 recurrent layer: + + * 用户在使用这一类recurrent layer时,可以访问由recurrent unit在一个时间步内计算得到的中间值(例如:hidden states, memory cells等); + * 上述的 :code:`paddle.networks.lstmemory_group` 是这一类的 recurrent layer ; + +2. 将recurrent layer作为一个整体来实现: + + * 用户在使用这一类recurrent layer,只能访问它们的输出值; + * 上述的 :code:`paddle.networks.lstmemory_group` 、 :code:`paddle.networks.simple_lstm` 和 :code:`paddle.networks.bidirectional_lstm` 属于这一类的实现; + +将recurrent layer作为一个整体来实现, 能够针对CPU和GPU的计算做更多优化, 所以相比于recurrent group的实现方式, 第二类 recurrent layer 计算效率更高。 在实际应用中,如果用户不需要访问LSTM的中间变量,而只需要获得recurrent layer计算的输出,我们建议使用第二类实现。 + +此外,关于LSTM, PaddlePaddle中还包含 :code:`paddle.networks.lstmemory_unit` 这一计算单元: + + * 不同于上述介绍的recurrent layer , :code:`paddle.networks.lstmemory_unit` 定义了LSTM单元在一个时间步内的计算过程,它并不是一个完整的recurrent layer,也不能接收序列数据作为输入; + * :code:`paddle.networks.lstmemory_unit` 只能在recurrent_group中作为step function使用; diff --git a/doc/faq/parameter/index_cn.rst b/doc/faq/parameter/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..c721b623183cc7d8d17e2c9fb1635ea07b8970cc --- /dev/null +++ b/doc/faq/parameter/index_cn.rst @@ -0,0 +1,201 @@ +######### +参数设置 +######### + +.. contents:: + +1. 如何选择SGD算法的学习率 +-------------------------- + +在采用sgd/async_sgd进行训练时,一个重要的问题是选择正确的learning_rate。如果learning_rate太大,那么训练有可能不收敛,如果learning_rate太小,那么收敛可能很慢,导致训练时间过长。 + +通常做法是从一个比较大的learning_rate开始试,如果不收敛,那减少学习率10倍继续试验,直到训练收敛为止。那么如何判断训练不收敛呢?可以估计出如果模型采用不变的输出最小的cost0是多少。 + +如果训练过程的的cost明显高于这个常数输出的cost,那么我们可以判断为训练不收敛。举一个例子,假如我们是三分类问题,采用multi-class-cross-entropy作为cost,数据中0,1,2三类的比例为 :code:`0.2, 0.5, 0.3` , 那么常数输出所能达到的最小cost是 :code:`-(0.2*log(0.2)+0.5*log(0.5)+0.3*log(0.3))=1.03` 。如果训练一个pass(或者更早)后,cost还大于这个数,那么可以认为训练不收敛,应该降低学习率。 + +2. 如何设置学习率退火(learning rate annealing) +------------------------------------------------ + +在相应的优化算法里设置learning_rate_schedule及相关参数,以使用Adam算法为例,代码如下: + +.. code-block:: python + + optimizer = paddle.optimizer.Adam( + learning_rate=1e-3, + learning_rate_decay_a=0.5, + learning_rate_decay_b=0.75, + learning_rate_schedule="poly",) + +PaddlePaddle目前支持8种learning_rate_schedule,这8种learning_rate_schedule及其对应学习率计算方式如下: + +* "constant" + + lr = learning_rate + +* "poly" + + lr = learning_rate * pow(1 + learning_rate_decay_a * num_samples_processed, -learning_rate_decay_b) + + 其中,num_samples_processed为已训练样本数,下同。 + +* "caffe_poly" + + lr = learning_rate * pow(1.0 - num_samples_processed / learning_rate_decay_a, learning_rate_decay_b) + +* "exp" + + lr = learning_rate * pow(learning_rate_decay_a, num_samples_processed / learning_rate_decay_b) + +* "discexp" + + lr = learning_rate * pow(learning_rate_decay_a, floor(num_samples_processed / learning_rate_decay_b)) + +* "linear" + + lr = max(learning_rate - learning_rate_decay_a * num_samples_processed, learning_rate_decay_b) + +* "manual" + + 这是一种按已训练样本数分段取值的学习率退火方法。使用该learning_rate_schedule时,用户通过参数 :code:`learning_rate_args` 设置学习率衰减因子分段函数,当前的学习率为所设置 :code:`learning_rate` 与当前的衰减因子的乘积。以使用Adam算法为例,代码如下: + + .. code-block:: python + + optimizer = paddle.optimizer.Adam( + learning_rate=1e-3, + learning_rate_schedule="manual", + learning_rate_args="1000:1.0,2000:0.9,3000:0.8",) + + 在该示例中,当已训练样本数小于等于1000时,学习率为 :code:`1e-3 * 1.0`;当已训练样本数大于1000小于等于2000时,学习率为 :code:`1e-3 * 0.9`;当已训练样本数大于2000时,学习率为 :code:`1e-3 * 0.8`。 + +* "pass_manual" + + 这是一种按已训练pass数分段取值的学习率退火方法。使用该learning_rate_schedule时,用户通过参数 :code:`learning_rate_args` 设置学习率衰减因子分段函数,当前的学习率为所设置 :code:`learning_rate` 与当前的衰减因子的乘积。以使用Adam算法为例,代码如下: + + .. code-block:: python + + optimizer = paddle.optimizer.Adam( + learning_rate=1e-3, + learning_rate_schedule="manual", + learning_rate_args="1:1.0,2:0.9,3:0.8",) + + 在该示例中,当已训练pass数小于等于1时,学习率为 :code:`1e-3 * 1.0`;当已训练pass数大于1小于等于2时,学习率为 :code:`1e-3 * 0.9`;当已训练pass数大于2时,学习率为 :code:`1e-3 * 0.8`。 + +3. 如何初始化参数 +----------------- + +默认情况下,PaddlePaddle使用均值0,标准差为 :math:`\frac{1}{\sqrt{d}}` 来初始化参数。其中 :math:`d` 为参数矩阵的宽度。这种初始化方式在一般情况下不会产生很差的结果。如果用户想要自定义初始化方式,PaddlePaddle目前提供两种参数初始化的方式\: + +* 高斯分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_mean=0.0, initial_std=1.0)` +* 均匀分布。将 :code:`param_attr` 设置成 :code:`param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0)` + +比如设置一个全连接层的参数初始化方式和bias初始化方式,可以使用如下代码。 + +.. code-block:: python + + hidden = fc_layer(input=ipt, param_attr=ParamAttr(initial_max=1.0, initial_min=-1.0), + bias_attr=ParamAttr(initial_mean=1.0, initial_std=0.0)) + +上述代码将bias全部初始化为1.0, 同时将参数初始化为 :code:`[1.0, -1.0]` 的均匀分布。 + +4. 如何共享参数 +--------------- + +PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字的参数,会共享参数。设置参数的名字,可以使用 :code:`ParamAttr(name="YOUR_PARAM_NAME")` 来设置。更方便的设置方式,是使得要共享的参数使用同样的 :code:`ParamAttr` 对象。 + +简单的全连接网络,参数共享的配置示例为\: + +.. literalinclude:: ../../python/paddle/trainer_config_helpers/tests/configs/shared_fc.py + +这里 :code:`hidden_a` 和 :code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`。 + +5. 如何加载预训练参数 +------------------------ + +* 对加载预训练参数的层,设置其参数属性 :code:`is_static=True`,使该层的参数在训练过程中保持不变。以embedding层为例,代码如下: + +.. code-block:: python + + emb_para = paddle.attr.Param(name='emb', is_static=True) + paddle.layer.embedding(size=word_dim, input=x, param_attr=emb_para) + + +* 从模型文件将预训练参数载入 :code:`numpy.array`,在创建parameters后,使用 :code:`parameters.set()` 加载预训练参数。PaddlePaddle保存的模型参数文件前16字节为头信息,用户将参数载入 :code:`numpy.array` 时须从第17字节开始。以embedding层为例,代码如下: + +.. code-block:: python + + def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + parameters = paddle.parameters.create(my_cost) + parameters.set('emb', load_parameter(emb_param_file, 30000, 256)) + +6. 存储的参数格式是什么,如何和明文进行相互转化 +-------------------------------------------------- + +PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数两部分组成。头信息中,1~4字节表示PaddlePaddle版本信息,请直接填充0;5~8字节表示每个参数占用的字节数,当保存的网络参数为float类型时为4,double类型时为8;9~16字节表示保存的参数总个数。 + +将PaddlePaddle保存的模型参数还原回明文时,可以使用相应数据类型的 :code:`numpy.array` 加载具体网络参数,此时可以跳过PaddlePaddle模型参数文件的头信息。若在PaddlePaddle编译时,未指定按照double精度编译,默认情况下按照float精度计算,保存的参数也是float类型。这时在使用 :code:`numpy.array` 时,一般设置 :code:`dtype=float32` 。示例如下: + +.. code-block:: python + + def read_parameter(fname, width): + s = open(fname).read() + # skip header + vec = np.fromstring(s[16:], dtype=np.float32) + # width is the size of the corresponding layer + np.savetxt(fname + ".csv", vec.reshape(width, -1), + fmt="%.6f", delimiter=",") + + +将明文参数转化为PaddlePaddle可加载的模型参数时,首先构造头信息,再写入网络参数。下面的代码将随机生成的矩阵转化为可以被PaddlePaddle加载的模型参数。 + +.. code-block:: python + + def gen_rand_param(param_file, width, height, need_trans): + np.random.seed() + header = struct.pack("iil", 0, 4, height * width) + param = np.float32(np.random.rand(height, width)) + with open(param_file, "w") as fparam: + fparam.write(header + param.tostring()) + +7. A protocol message was rejected because it was too big +------------------------------------------------------------ + +如果在训练NLP相关模型时,出现以下错误: + +.. code-block:: bash + + [libprotobuf ERROR google/protobuf/io/coded_stream.cc:171] A protocol message was rejected because it was too big (more than 67108864 bytes). To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h. + F1205 14:59:50.295174 14703 TrainerConfigHelper.cpp:59] Check failed: m->conf.ParseFromString(configProtoStr) + +可能的原因是:传给dataprovider的某一个args过大,一般是由于直接传递大字典导致的。错误的define_py_data_sources2类似: + +.. code-block:: python + + src_dict = dict() + for line_count, line in enumerate(open(src_dict_path, "r")): + src_dict[line.strip()] = line_count + + define_py_data_sources2( + train_list, + test_list, + module="dataprovider", + obj="process", + args={"src_dict": src_dict}) + +解决方案是:将字典的地址作为args传给dataprovider,然后在dataprovider里面根据该地址加载字典。即define_py_data_sources2应改为: + +.. code-block:: python + + define_py_data_sources2( + train_list, + test_list, + module="dataprovider", + obj="process", + args={"src_dict_path": src_dict_path}) + +完整源码可参考 `seqToseq `_ 示例。 + + diff --git a/doc/getstarted/basic_usage/index_cn.rst b/doc/getstarted/basic_usage/index_cn.rst index 428f58830e0b10c024f31238b7404c6df193eecd..b473944fc7fb89d3e0a0b330933f2226734bb5bd 100644 --- a/doc/getstarted/basic_usage/index_cn.rst +++ b/doc/getstarted/basic_usage/index_cn.rst @@ -55,7 +55,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍 # 线性计算网络层: ȳ = wx + b ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) # 计算误差函数,即 ȳ 和真实 y 之间的距离 - cost = mse_cost(input= ȳ, label=y) + cost = square_error_cost(input= ȳ, label=y) outputs(cost) @@ -69,7 +69,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍 - **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x` 和 `y`。 - **全连接层**:全连接层 `fc_layer` 是基础的计算单元,这里利用它建模变量之间的线性关系。计算单元是神经网络的核心,PaddlePaddle支持大量的计算单元和任意深度的网络连接,从而可以拟合任意的函数来学习复杂的数据关系。 - - **回归误差代价层**:回归误差代价层 `mse_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。 + - **回归误差代价层**:回归误差代价层 `square_error_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。 定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令: diff --git a/doc/getstarted/basic_usage/index_en.rst b/doc/getstarted/basic_usage/index_en.rst index 6775da20c2f51000f305b095d40abd27b8fa6c0e..2cc438ebbe0f97345d25354b93b4ebbd43502415 100644 --- a/doc/getstarted/basic_usage/index_en.rst +++ b/doc/getstarted/basic_usage/index_en.rst @@ -49,7 +49,7 @@ To recover this relationship between ``X`` and ``Y``, we use a neural network wi x = data_layer(name='x', size=1) y = data_layer(name='y', size=1) y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) - cost = mse_cost(input=y_predict, label=y) + cost = square_error_cost(input=y_predict, label=y) outputs(cost) Some of the most fundamental usages of PaddlePaddle are demonstrated: diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md index 69f4501f370dcc9d603ec54a63d68568d66e832e..2f1461489495618718d5abaeab9cbeda9b93700f 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ b/doc/getstarted/build_and_install/build_from_source_en.md @@ -22,6 +22,7 @@ To compile the source code, your computer must be equipped with the following de - **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X) - **BLAS**: MKL, OpenBlas or ATLAS - **Python**: only support Python 2.7 +- **Go** **Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported! For CUDA 8.0, GCC versions later than 5.3 are not supported! @@ -67,7 +68,7 @@ As a simple example, consider the following: 1. **BLAS Dependencies(optional)** - CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically. + CMake will search BLAS libraries from the system. If not found, OpenBLAS will be downloaded, built and installed automatically. To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. ```bash @@ -107,6 +108,18 @@ As a simple example, consider the following: sudo apt-get install -y python python-pip python-numpy libpython-dev bison sudo pip install 'protobuf==3.1.0.post1' + # Install Go + # You can follow https://golang.org/doc/install for a detailed explanation. + wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ + tar -C $HOME -xzf go.tgz && \ + mkdir $HOME/gopath && \ + rm go.tgz + + # Setup environment variables + export GOROOT=$HOME/go + export GOPATH=$HOME/gopath + export PATH=$PATH:$GOROOT/bin + # install cmake 3.4 curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \ @@ -118,9 +131,9 @@ As a simple example, consider the following: To build GPU version, you will need the following installed: 1. a CUDA-capable GPU - 2. A supported version of Linux with a gcc compiler and toolchain + 2. A supported version of Linux with a GCC compiler and toolchain 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) - 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) + 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn) The CUDA development environment relies on tight integration with the host development environment, including the host compiler and C runtime libraries, and is therefore only supported on @@ -159,6 +172,7 @@ export PATH=/bin:$PATH # install PaddlePaddle Python modules. sudo pip install /opt/paddle/share/wheels/*.whl ``` + ## Build on Centos 7 ### Install Dependencies @@ -179,9 +193,9 @@ sudo pip install /opt/paddle/share/wheels/*.whl To build GPU version, you will need the following installed: 1. a CUDA-capable GPU - 2. A supported version of Linux with a gcc compiler and toolchain + 2. A supported version of Linux with a GCC compiler and toolchain 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) - 4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn) + 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn) The CUDA development environment relies on tight integration with the host development environment, including the host compiler and C runtime libraries, and is therefore only supported on @@ -209,7 +223,7 @@ mkdir build && cd build ``` Finally, you can build and install PaddlePaddle: - + ```bash # you can add build option here, such as: cmake3 .. -DCMAKE_INSTALL_PREFIX= diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index da2d4234658b6ea4730346e721437cc1633c4362..30b144d849bec367cd0197b6082889e011193a9a 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 +Docker使用入门 +------------------------------ + +几个基础的概念帮助理解和使用Docker: + +- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行: + + .. code-block:: bash + + docker images + + 来列出当前系统中的所有镜像,同样可以执行: + + .. code-block:: bash + + docker pull paddlepaddle/paddle:0.10.0 + + 来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用docker.paddlepaddle.org/paddle下载。 + +- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。 + 实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。 + 可以执行: + + .. code-block:: bash + + docker run paddlepaddle/paddle:0.10.0 + + 来使用一个镜像启动一个容器。 + +- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中 + 访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到 + Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用 + debian镜像,并且启动后执行 :code:`ls /data`。 + + .. code-block:: bash + + docker run --rm -v $(pwd):/data debian ls /data PaddlePaddle发布的Docker镜像使用说明 ------------------------------ @@ -12,13 +49,13 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打 像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 -`dockerhub.com `_ 提供最新 -的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国 -内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您 -在国内,请把文档里命令中的paddlepaddle/paddle替换成 -docker.paddlepaddle.org/paddle。 +`dockerhub.com `_ +和国内镜像`docker.paddlepaddle.org` 提供最新 +的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。 -1. 开发镜像::code:`paddlepaddle/paddle:-dev` +**注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。** + +1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` 这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布, 文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。 @@ -37,13 +74,13 @@ docker.paddlepaddle.org/paddle。 .. code-block:: bash - docker run -it --rm paddlepaddle/paddle:-dev /bin/bash + docker run -it --rm -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /bin/bash 或者,可以以后台进程方式运行容器: .. code-block:: bash - docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:-dev + docker run -d -p 2202:22 -p 8888:8888 -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /usr/sbin/sshd -D 然后用密码 :code:`root` SSH进入容器: @@ -68,12 +105,14 @@ docker.paddlepaddle.org/paddle。 如果输出是No,就需要选择使用no-AVX的镜像 + **注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用** + 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 .. code-block:: bash - nvidia-docker run -it --rm paddledev/paddle:0.10.0rc1-gpu /bin/bash + nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash 注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。: @@ -81,7 +120,7 @@ docker.paddlepaddle.org/paddle。 export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:-gpu + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu 3. 运行以及发布您的AI程序 @@ -98,7 +137,7 @@ docker.paddlepaddle.org/paddle。 nvidia-docker run -it -v $PWD:/work paddle /work/a.py - 这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:` + 这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0` 创建和发布自己的AI程序镜像。 运行PaddlePaddle Book @@ -177,7 +216,7 @@ Paddle的Docker开发镜像带有一个通过 `woboq code browser .. code-block:: bash - docker run -d --name paddle-cpu-doc paddle:-dev + docker run -d --name paddle-cpu-doc paddle:0.10.0-dev docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx 接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 03df497506099d2fb758bd0ab437d2c082f2b537..94860240f6a4a9bed8a865684a8a79960489280e 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -23,7 +23,7 @@ Docker is simple as long as we understand a few basic concepts: .. code-block:: bash - docker pull paddlepaddle/paddle:0.10.0rc2 + docker pull paddlepaddle/paddle:0.10.0 to download a Docker image, paddlepaddle/paddle in this example, from Dockerhub.com. @@ -35,7 +35,7 @@ Docker is simple as long as we understand a few basic concepts: .. code-block:: bash - docker run paddlepaddle/paddle:0.10.0rc2 + docker run paddlepaddle/paddle:0.10.0 to start a container to run a Docker image, paddlepaddle/paddle in this example. @@ -62,13 +62,36 @@ of PaddlePaddle, we release both of them. Production image includes CPU-only version and a CUDA GPU version and their no-AVX versions. We put the docker images on `dockerhub.com -`_. You can find the -latest versions under "tags" tab at dockerhub.com. If you are in -China, you can use our Docker image registry mirror to speed up the -download process. To use it, please replace all paddlepaddle/paddle in -the commands to docker.paddlepaddle.org/paddle. +`_. You can find the +latest versions under "tags" tab at dockerhub.com. -1. Production images, this image might have multiple variants: +** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.** + + +1. development image :code:`paddlepaddle/paddle:-dev` + + This image has packed related develop tools and runtime + environment. Users and developers can use this image instead of + their own local computer to accomplish development, build, + releasing, document writing etc. While different version of paddle + may depends on different version of libraries and tools, if you + want to setup a local environment, you must pay attention to the + versions. The development image contains: + + - gcc/clang + - nvcc + - Python + - sphinx + - woboq + - sshd + + Many developers use servers with GPUs, they can use ssh to login to + the server and run :code:`docker exec` to enter the docker + container and start their work. Also they can start a development + docker image with SSHD service, so they can login to the container + and start work. + +2. Production images, this image might have multiple variants: - GPU/AVX::code:`paddlepaddle/paddle:-gpu` - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` @@ -84,12 +107,12 @@ the commands to docker.paddlepaddle.org/paddle. if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - + **NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.** To run the CPU-only image as an interactive container: .. code-block:: bash - docker run -it --rm paddlepaddle/paddle:0.10.0rc2 /bin/bash + docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash Above method work with the GPU image too -- the recommended way is using `nvidia-docker `_. @@ -101,30 +124,7 @@ the commands to docker.paddlepaddle.org/paddle. .. code-block:: bash - nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash - -2. development image :code:`paddlepaddle/paddle:-dev` - - This image has packed related develop tools and runtime - environment. Users and developers can use this image instead of - their own local computer to accomplish development, build, - releasing, document writing etc. While different version of paddle - may depends on different version of libraries and tools, if you - want to setup a local environment, you must pay attention to the - versions. The development image contains: - - - gcc/clang - - nvcc - - Python - - sphinx - - woboq - - sshd - - Many developers use servers with GPUs, they can use ssh to login to - the server and run :code:`docker exec` to enter the docker - container and start their work. Also they can start a development - docker image with SSHD service, so they can login to the container - and start work. + nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash Train Model Using Python API @@ -149,13 +149,13 @@ Run the program using docker: .. code-block:: bash - docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 python /workspace/example.py + docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py Or if you are using GPU for training: .. code-block:: bash - nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu python /workspace/example.py + nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py Above commands will start a docker container by running :code:`python /workspace/example.py`. It will stop once :code:`python @@ -166,7 +166,7 @@ run PaddlePaddle program interactively: .. code-block:: bash - docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2 /bin/bash + docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash # now we are inside docker container cd /workspace python example.py @@ -175,7 +175,7 @@ Running with GPU is identical: .. code-block:: bash - nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0rc2-gpu /bin/bash + nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash # now we are inside docker container cd /workspace python example.py diff --git a/doc/getstarted/build_and_install/index_cn.rst b/doc/getstarted/build_and_install/index_cn.rst index a24df6c518fad84a48061ecb34ee46cb312a4995..dd9923697ab85825557aa89a08870bece7c76673 100644 --- a/doc/getstarted/build_and_install/index_cn.rst +++ b/doc/getstarted/build_and_install/index_cn.rst @@ -6,14 +6,12 @@ 安装流程 ++++++++ -PaddlePaddle提供数个预编译的二进制来进行安装,包括Docker镜像,ubuntu的deb安装包等。我们推荐使用Docker镜像来部署环境,同时欢迎贡献更多的安装包。 +PaddlePaddle提供Docker镜像来部署环境。 .. toctree:: :maxdepth: 1 docker_install_cn.rst - ubuntu_install_cn.rst - 编译流程 diff --git a/doc/getstarted/build_and_install/index_en.rst b/doc/getstarted/build_and_install/index_en.rst index 1bfd4f75c0b9b82d61d28a30f03181f7be159f24..8a53588e0439df8f4d5fd529b7a20262c67d4e58 100644 --- a/doc/getstarted/build_and_install/index_en.rst +++ b/doc/getstarted/build_and_install/index_en.rst @@ -8,14 +8,13 @@ Install PaddlePaddle :maxdepth: 1 docker_install_en.rst - ubuntu_install_en.rst Build from Source ----------------- .. warning:: - Please use :code:`deb` package or :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code. + Please use :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code. .. toctree:: :maxdepth: 1 diff --git a/doc/getstarted/build_and_install/ubuntu_install_cn.rst b/doc/getstarted/build_and_install/ubuntu_install_cn.rst deleted file mode 100644 index 9e39ccb00f5d5655c30148900a3d76a22aacfc01..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/ubuntu_install_cn.rst +++ /dev/null @@ -1,71 +0,0 @@ -Ubuntu部署PaddlePaddle -=================================== - -PaddlePaddle提供了ubuntu 14.04 deb安装包。 - -安装 ------- - -安装包的下载地址是\: https://github.com/PaddlePaddle/Paddle/releases - -它包含四个版本\: - -* cpu版本: 支持主流x86处理器平台, 使用了avx指令集。 - -* cpu-noavx版本:支持主流x86处理器平台,没有使用avx指令集。 - -* gpu版本:支持主流x86处理器平台,支持nvidia cuda平台,使用了avx指令集。 - -* gpu-noavx版本:支持主流x86处理器平台,支持nvidia cuda平台,没有使用avx指令集。 - -下载完相关安装包后,执行: - -.. code-block:: shell - - sudo apt-get install gdebi - gdebi paddle-*-cpu.deb - -或者: - -.. code-block:: shell - - dpkg -i paddle-*-cpu.deb - apt-get install -f - - -在 :code:`dpkg -i` 的时候如果报一些依赖未找到的错误是正常的, -在 :code:`apt-get install -f` 里会继续安装 PaddlePaddle。 - -安装完成后,可以使用命令 :code:`paddle version` 查看安装后的paddle 版本: - -.. code-block:: shell - - PaddlePaddle 0.8.0b1, compiled with - with_avx: ON - with_gpu: OFF - with_double: OFF - with_python: ON - with_rdma: OFF - with_timer: OFF - with_predict_sdk: - - -可能遇到的问题 --------------- - -libcudart.so/libcudnn.so找不到 -++++++++++++++++++++++++++++++ - -安装完成后,运行 :code:`paddle train` 报错\: - -.. code-block:: shell - - 0831 12:36:04.151525 1085 hl_dso_loader.cc:70] Check failed: nullptr != *dso_handle For Gpu version of PaddlePaddle, it couldn't find CUDA library: libcudart.so Please make sure you already specify its path.Note: for training data on Cpu using Gpu version of PaddlePaddle,you must specify libcudart.so via LD_LIBRARY_PATH. - -原因是未设置cuda运行时环境变量。 如果使用GPU版本的PaddlePaddle,请安装CUDA 7.5 和CUDNN 5到本地环境中,并设置: - -.. code-block:: shell - - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH - export PATH=/usr/local/cuda/bin:$PATH - diff --git a/doc/getstarted/build_and_install/ubuntu_install_en.rst b/doc/getstarted/build_and_install/ubuntu_install_en.rst deleted file mode 100644 index ea8042085bf458be96e71017d229d88ad867695b..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/ubuntu_install_en.rst +++ /dev/null @@ -1,25 +0,0 @@ -Debian Package installation guide -================================= - -PaddlePaddle supports :code:`deb` pacakge. The installation of this :code:`deb` package is tested in ubuntu 14.04, but it should be support other debian based linux, too. - -There are four versions of debian package, :code:`cpu`, :code:`gpu`, :code:`cpu-noavx`, :code:`gpu-noavx`. And :code:`noavx` version is used to support CPU which does not contain :code:`AVX` instructions. The download url of :code:`deb` package is \: https://github.com/baidu/Paddle/releases/ - - -After downloading PaddlePaddle deb packages, you can use :code:`gdebi` install. - -.. code-block:: bash - - gdebi paddle-*.deb - -If :code:`gdebi` is not installed, you can use :code:`sudo apt-get install gdebi` to install it. - -Or you can use following commands to install PaddlePaddle. - -.. code-block:: bash - - dpkg -i paddle-*.deb - apt-get install -f - -And if you use GPU version deb package, you need to install CUDA toolkit and cuDNN, and set related environment variables(such as LD_LIBRARY_PATH) first. It is normal when `dpkg -i` get errors. `apt-get install -f` will continue install paddle, and install dependences. - diff --git a/doc/getstarted/concepts/src/train.py b/doc/getstarted/concepts/src/train.py index 679d0a931a7d650108ea89a04080a55d2976f72e..8aceb23406a476f08639cc6223cdf730b728a705 100644 --- a/doc/getstarted/concepts/src/train.py +++ b/doc/getstarted/concepts/src/train.py @@ -8,7 +8,7 @@ paddle.init(use_gpu=False) x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2)) y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) -cost = paddle.layer.mse_cost(input=y_predict, label=y) +cost = paddle.layer.square_error_cost(input=y_predict, label=y) # create parameters parameters = paddle.parameters.create(cost) @@ -31,7 +31,7 @@ def event_handler(event): # define training dataset reader def train_reader(): train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) - train_y = np.array([-2, -3, -7, -7]) + train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): diff --git a/doc/getstarted/concepts/use_concepts_cn.rst b/doc/getstarted/concepts/use_concepts_cn.rst index e63ca11102c8ce457afcc3c262fa5f159361c01d..c243083794bb3c4659242de99b3b2715af9d7c24 100644 --- a/doc/getstarted/concepts/use_concepts_cn.rst +++ b/doc/getstarted/concepts/use_concepts_cn.rst @@ -81,9 +81,9 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 .. code-block:: bash y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) - cost = paddle.layer.mse_cost(input=y_predict, label=y) + cost = paddle.layer.square_error_cost(input=y_predict, label=y) -其中,x与y为之前描述的输入层;而y_predict是接收x作为输入,接上一个全连接层;cost接收y_predict与y作为输入,接上均方误差层。 +其中,x与y为之前描述的输入层;而y_predict是接收x作为输入,接上一个全连接层;cost接收y_predict与y作为输入,接上平方误差层。 最后一层cost中记录了神经网络的所有拓扑结构,通过组合不同的layer,我们即可完成神经网络的搭建。 @@ -111,7 +111,7 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 # define training dataset reader def train_reader(): train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) - train_y = np.array([-2, -3, -7, -7]) + train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): yield train_x[i], train_y[i] @@ -147,4 +147,4 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 .. literalinclude:: src/train.py :linenos: -有关线性回归的实际应用,可以参考PaddlePaddle book的 `第一章节 `_。 \ No newline at end of file +有关线性回归的实际应用,可以参考PaddlePaddle book的 `第一章节 `_。 diff --git a/doc/getstarted/index_cn.rst b/doc/getstarted/index_cn.rst index 0cb27f802c40ef123fdc9c6799aad3b2a5f554c0..aa418c657a4ba16cce61c030066f4d3e14e891cc 100644 --- a/doc/getstarted/index_cn.rst +++ b/doc/getstarted/index_cn.rst @@ -7,4 +7,4 @@ build_and_install/index_cn.rst concepts/use_concepts_cn.rst -- `深度学习入门课程 `_ +- `深度学习入门课程 `_ diff --git a/doc/getstarted/index_en.rst b/doc/getstarted/index_en.rst index 9f771e93e8b63eb98e31ec12667bd1aa007af20e..be3253e3d41b99a2b696e2c5ef6463ed49680d69 100644 --- a/doc/getstarted/index_en.rst +++ b/doc/getstarted/index_en.rst @@ -6,4 +6,4 @@ GET STARTED build_and_install/index_en.rst -- `Deep Learning 101 `_ +- `Deep Learning 101 `_ diff --git a/doc/howto/cross_compiling/cross_compiling_for_android_cn.md b/doc/howto/cross_compiling/cross_compiling_for_android_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..90dc84718c9ce1374cda6022de177afeeb60279d --- /dev/null +++ b/doc/howto/cross_compiling/cross_compiling_for_android_cn.md @@ -0,0 +1,75 @@ +# 构建Android平台上的PaddlePaddle库 + +用户可通过交叉编译的方式,在用户熟悉的开发平台(Linux,Mac OS X和Windows)上编译Android平台上适用的PaddlePaddle库。 +本文档将以Linux x86-64平台为例,介绍交叉编译Android平台上适用的PaddlePaddle库的方法和步骤。 + +## 准备交叉编译环境 + +从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。Android平台上使用的C/C++交叉编译工具链为[Android NDK](https://developer.android.com/ndk/downloads/index.html?hl=zh-cn),用户可自行前往下载预编译好的版本,也可通过以下命令获取: + +```bash +wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip +unzip -q android-ndk-r14b-linux-x86_64.zip +``` + +Android NDK中包含了所有Android API级别、所有架构(arm/arm64/x86/mips)需要用到的编译工具和系统库。用户可根据自己的编译目标架构、所需支持的最低Android API级别,构建[独立工具链](https://developer.android.google.cn/ndk/guides/standalone_toolchain.html?hl=zh-cn)。 +比如: + +```bash +your/path/to/android-ndk-r14b-linux-x86_64/build/tools/make-standalone-toolchain.sh \ + --arch=arm --platform=android-21 --install-dir=your/path/to/my_standalone_toolchain +``` + +此命令将在your/path/to/my_standalone_toolchain目录生成一套编译工具链,面向架构为32位ARM架构,支持的最小的Android API级别为21,使用的编译器为arm-linux-androideabi-gcc (GCC) 4.9。 + +注意:**PaddlePaddle要求使用的编译工具链所支持的Andoid API级别不小于21**。 + +## 配置交叉编译参数 + +CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/android.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/android.cmake),以提供一些默认的编译器和编译参数相关配置。注意,从CMake 3.7版本开始,CMake官方对Android平台的交叉编译提供了通用的支持。PaddlePaddle若检测到用户使用的CMake版本不低于3.7时,将会将用户传进来的配置参数传递CMake系统,交由CMake系统本身来处理。有关参数配置的详细说明见[cmake-toolchains](https://cmake.org/cmake/help/v3.7/manual/cmake-toolchains.7.html#cross-compiling)。 + +交叉编译Android版本的PaddlePaddle库时,有一些必须配置的参数: +- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须设置为`Android`。在设置`CMAKE_SYSTEM_NAME=Android`后,PaddlePaddle的CMake系统才认为是在交叉编译Android系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及Android所需`arm_soft_fp_abi`分支的目标机版OpenBLAS库。此外,还会强制设置一些PaddlePaddle参数的值(`WITH_GPU=OFF`、`WITH_AVX=OFF`、`WITH_PYTHON=OFF`、`WITH_RDMA=OFF`)。 +- `WITH_C_API`,必须设置为`ON`。在Android平台上只支持使用C-API来预测。 +- `WITH_SWIG_PY`,必须设置为`OFF`。在Android平台上不支持通过swig调用来训练或者预测。 + +Android平台可选配置参数: + +- `ANDROID_STANDALONE_TOOLCHAIN`,独立工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动推导和设置需要使用的交叉编译器、sysroot、以及Android API级别;否则,用户需要在cmake时手动设置这些值。无默认值。 +- `ANDROID_ABI`,目标架构ABI。目前只支持`armeabi-v7a`,默认值为`armeabi-v7a`。 +- `ANDROID_NATIVE_API_LEVEL`,工具链的Android API级别。若没有显式设置,PaddlePaddle将根据`ANDROID_STANDALONE_TOOLCHAIN`的值自动推导得到。 +- `ANROID_ARM_MODE`,是否使用ARM模式。可设置`ON/OFF`,默认值为`ON`。 +- `ANDROID_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。 + +其他配置参数: + +- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。 + +一种常用的cmake配置如下: + +```bash +cmake -DCMAKE_SYSTEM_NAME=Android \ + -DANDROID_STANDALONE_TOOLCHAIN=your/path/to/my_standalone_toolchain \ + -DANDROID_ABI=armeabi-v7a \ + -DANDROID_ARM_NEON=ON \ + -DANDROID_ARM_MODE=ON \ + -DCMAKE_INSTALL_PREFIX=your/path/to/install \ + -DWITH_C_API=ON \ + -DWITH_SWIG_PY=OFF \ + .. +``` + +用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。 + +## 编译和安装 + +CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle预测库。 + +```bash +make +make install +``` + +注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。 + +执行完安装命令后,`your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Android版本的库。自此,PaddlePaddle的已经安装完成,用户可将`your/path/to/install`目录下的生成文件用于深度学习相关Android App中,调用方法见C-API文档。 diff --git a/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md b/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..085b5dda1615a9af918b59870db460fcc5acdcca --- /dev/null +++ b/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md @@ -0,0 +1,65 @@ +# 构建Raspberry Pi平台上的PaddlePaddle库 + +对于Rasspberry Pi系统,用户可通过ssh等方式登录到Raspberry Pi系统上,按照[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档所述,直接编译Raspberry Pi平台上适用的PaddlePaddle库。 + +用户也可以在自己熟悉的开发平台上,通过交叉编译的方式来编译。这篇文档将以Linux x86-64平台为例,介绍交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。 + +## 准备交叉编译环境 + +从源码交叉编译PaddlePaddle,用户需要提前准备好交叉编译环境。用户可自行前往[github](https://github.com/raspberrypi/tools)下载Raspberry Pi平台使用的C/C++交叉编译工具链,也可通过以下命令获取: + +```bash +git clone https://github.com/raspberrypi/tools.git +``` + +该github仓库中包含若干个预编译好的、针对不同平台的编译工具。宿主机是Linux x86-64环境,则需选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具,所使用的编译器为arm-linux-gnueabihf-gcc 4.8.3。 + +注意,该编译工具链需要系统glibc支持2.14以上。 + +## 配置交叉编译参数 + +CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置,PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake),以提供一些默认的编译器和编译参数相关配置。 + +交叉编译Raspberry Pi版本PaddlePaddle库时,有一些必须配置的参数: + +- `CMAKE_SYSTEM_NAME`,CMake编译的目标平台,必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后,PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本,并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。 + +Raspberry Pi平台可选配置参数: + +- `RPI_TOOLCHAIN`,编译工具链所在的绝对路径,或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器;否则,用户需要在cmake时手动设置这些值。无默认值。 +- `RPI_ARM_NEON`,是否使用NEON指令。目前必须设置成`ON`,默认值为`ON`。 + +其他配置参数: + +- `HOST_C/CXX_COMPILER`,宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值;若环境变量`CC`没有设置,则设置成`cc`编译器。 + +cmake参数如下; + +``` +cmake -DCMAKE_SYSTEM_NAME=RPi \ + -DRPI_TOOLCHAIN=your/path/to/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 \ + -DRPI_ARM_NEON=ON \ + -DCMAKE_INSTALL_PREFIX=your/path/to/install \ + -DWITH_GPU=OFF \ + -DWITH_C_API=ON \ + -DWITH_PYTHON=OFF \ + -DWITH_SWIG_PY=OFF \ + .. +``` + +用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小,可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`;若希望最快的执行速度,则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。 + +## 编译和安装 + +CMake配置完成后,执行以下命令,PaddlePaddle将自动下载和编译所有第三方依赖库、编译和安装PaddlePaddle。 + +```bash +make +make install +``` + +注意:如果你曾经在源码目录下编译过其他平台的PaddlePaddle库,请先使用`rm -rf`命令删除`third_party`目录和`build`目录,以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。 + +执行完安装命令后,由于上一步cmake配置中`WITH_C_API`设置为`ON`,`your/path/to/install`目录中会包含`include`和`lib`目录,其中`include`中包含C-API的头文件,`lib`中包含一个Raspberry Pi版本的库。 + +更多的编译配置见[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档。 diff --git a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst index 79048e92482851af6c2dd7d055868ebcaa7a298b..e05173c2006ff47ecb6ca5a4fe1502de750acc59 100644 --- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst +++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst @@ -28,17 +28,17 @@ pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API seq_pool = pooling(input=layer, pooling_type=pooling.Max(), - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_SEQUENCE) - `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。 -- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - 输入:一个双层序列,或一个单层序列 - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值) -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: - 作用:一个双层序列经过运算变成一个单层序列 - 输入:必须是一个双层序列 @@ -52,15 +52,15 @@ last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详 .. code-block:: bash last = last_seq(input=layer, - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_SEQUENCE) -- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - 输入:一个双层序列或一个单层序列 - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。 -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: - 作用:一个双层序列经过运算变成一个单层序列 - 输入:必须是一个双层序列 - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 @@ -74,9 +74,9 @@ expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。 ex = expand(input=layer1, expand_as=layer2, - expand_level=ExpandLevel.FROM_TIMESTEP) + expand_level=ExpandLevel.FROM_NO_SEQUENCE) -- `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值): +- `expand_level=ExpandLevel.FROM_NO_SEQUENCE` 时(默认值): - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列 - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息 diff --git a/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst index 96e52b910a22576fd75c9d4e1bef6e2cf74bc84f..efdc44455ea4dc81a87b4d4fc8a81e78b15cb06a 100644 --- a/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst +++ b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst @@ -81,7 +81,7 @@ * 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。 -* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.EACH_SEQUENCE`\ 。 +* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.TO_SEQUENCE`\ 。 * 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 diff --git a/doc/howto/deep_model/rnn/index_cn.rst b/doc/howto/deep_model/rnn/index_cn.rst index 9e805ca85191b793c8798a239927a318c70b96f5..9ecab5594cff47cde4700b7ce0f58013a960a16e 100644 --- a/doc/howto/deep_model/rnn/index_cn.rst +++ b/doc/howto/deep_model/rnn/index_cn.rst @@ -4,6 +4,7 @@ RNN相关模型 .. toctree:: :maxdepth: 1 + rnn_config_cn.rst recurrent_group_cn.md hierarchical_layer_cn.rst hrnn_rnn_api_compare_cn.rst diff --git a/doc/howto/deep_model/rnn/index_en.rst b/doc/howto/deep_model/rnn/index_en.rst index 13a153b05c578e0af82ee29db5ea27fd4b6d6f59..7adc79873d699fdfd5a85034bcef964dd1f19132 100644 --- a/doc/howto/deep_model/rnn/index_en.rst +++ b/doc/howto/deep_model/rnn/index_en.rst @@ -1,2 +1,7 @@ RNN Models ========== + +.. toctree:: + :maxdepth: 1 + + rnn_config_en.rst diff --git a/doc/howto/deep_model/rnn/rnn_config_cn.rst b/doc/howto/deep_model/rnn/rnn_config_cn.rst index ac2bd0775f4ab2e0a0c37462e2c23001123b152b..4d684cf8ad5a8082cf31fb27027119b3d3e700b6 100644 --- a/doc/howto/deep_model/rnn/rnn_config_cn.rst +++ b/doc/howto/deep_model/rnn/rnn_config_cn.rst @@ -5,36 +5,13 @@ RNN配置 中配置循环神经网络(RNN)。PaddlePaddle 高度支持灵活和高效的循环神经网络配置。 在本教程中,您将了解如何: -- 准备用来学习循环神经网络的序列数据。 - 配置循环神经网络架构。 - 使用学习完成的循环神经网络模型生成序列。 我们将使用 vanilla 循环神经网络和 sequence to sequence 模型来指导你完成这些步骤。sequence to sequence -模型的代码可以在\ ``demo / seqToseq``\ 找到。 - -准备序列数据 ------------- - -PaddlePaddle -不需要对序列数据进行任何预处理,例如填充。唯一需要做的是将相应类型设置为输入。例如,以下代码段定义了三个输入。 -它们都是序列,它们的大小是\ ``src_dict``\ ,\ ``trg_dict``\ 和\ ``trg_dict``\ : - -.. code:: python - - settings.input_types = [ - integer_value_sequence(len(settings.src_dict)), - integer_value_sequence(len(settings.trg_dict)), - integer_value_sequence(len(settings.trg_dict))] - -在\ ``process``\ 函数中,每个\ ``yield``\ 函数将返回三个整数列表。每个整数列表被视为一个整数序列: - -.. code:: python - - yield src_ids, trg_ids, trg_ids_next - -有关如何编写数据提供程序的更多细节描述,请参考 :ref:`api_pydataprovider2` 。完整的数据提供文件在 -``demo/seqToseq/dataprovider.py``\ 。 +模型的代码可以在 `book/08.machine_translation `_ 找到。 +wmt14数据的提供文件在 `python/paddle/v2/dataset/wmt14.py `_ 。 配置循环神经网络架构 -------------------- @@ -85,19 +62,19 @@ vanilla act=None, rnn_layer_attr=None): def __rnn_step__(ipt): - out_mem = memory(name=name, size=size) - rnn_out = mixed_layer(input = [full_matrix_projection(ipt), - full_matrix_projection(out_mem)], - name = name, - bias_attr = rnn_bias_attr, - act = act, - layer_attr = rnn_layer_attr, - size = size) + out_mem = paddle.layer.memory(name=name, size=size) + rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt), + paddle.layer.full_matrix_projection(input=out_mem)], + name = name, + bias_attr = rnn_bias_attr, + act = act, + layer_attr = rnn_layer_attr, + size = size) return rnn_out - return recurrent_group(name='%s_recurrent_group' % name, - step=__rnn_step__, - reverse=reverse, - input=input) + return paddle.layer.recurrent_group(name='%s_recurrent_group' % name, + step=__rnn_step__, + reverse=reverse, + input=input) PaddlePaddle 使用“Memory”(记忆模块)实现单步函数。\ **Memory**\ 是在PaddlePaddle中构造循环神经网络时最重要的概念。 @@ -140,43 +117,52 @@ Sequence to Sequence Model with Attention .. code:: python # 定义源语句的数据层 - src_word_id = data_layer(name='source_language_word', size=source_dict_dim) + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) # 计算每个词的词向量 - src_embedding = embedding_layer( + src_embedding = paddle.layer.embedding( input=src_word_id, size=word_vector_dim, - param_attr=ParamAttr(name='_source_language_embedding')) + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) # 应用前向循环神经网络 - src_forward = grumemory(input=src_embedding, size=encoder_size) + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) # 应用反向递归神经网络(reverse=True表示反向循环神经网络) - src_backward = grumemory(input=src_embedding, - size=encoder_size, - reverse=True) + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) # 将循环神经网络的前向和反向部分混合在一起 - encoded_vector = concat_layer(input=[src_forward, src_backward]) + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) # 投射编码向量到 decoder_size - encoder_proj = mixed_layer(input = [full_matrix_projection(encoded_vector)], - size = decoder_size) + encoded_proj = paddle.layer.mixed( + size=decoder_size, + input=paddle.layer.full_matrix_projection(encoded_vector)) # 计算反向RNN的第一个实例 - backward_first = first_seq(input=src_backward) + backward_first = paddle.layer.first_seq(input=src_backward) # 投射反向RNN的第一个实例到 decoder size - decoder_boot = mixed_layer(input=[full_matrix_projection(backward_first)], size=decoder_size, act=TanhActivation()) + decoder_boot = paddle.layer.mixed( + size=decoder_size, + act=paddle.activation.Tanh(), + input=paddle.layer.full_matrix_projection(backward_first)) 解码器使用 ``recurrent_group`` 来定义循环神经网络。单步函数和输出函数在 ``gru_decoder_with_attention`` 中定义: .. code:: python - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] - trg_embedding = embedding_layer( - input=data_layer(name='target_language_word', - size=target_dict_dim), - size=word_vector_dim, - param_attr=ParamAttr(name='_target_language_embedding')) + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) group_inputs.append(trg_embedding) # 对于配备有注意力机制的解码器,在训练中, @@ -185,9 +171,10 @@ Sequence to Sequence Model with Attention # StaticInput 意味着不同时间步的输入都是相同的值, # 否则它以一个序列输入,不同时间步的输入是不同的。 # 所有输入序列应该有相同的长度。 - decoder = recurrent_group(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) 单步函数的实现如下所示。首先,它定义解码网络的\ **Memory**\ 。然后定义 attention,门控循环单元单步函数和输出函数: @@ -198,27 +185,32 @@ attention,门控循环单元单步函数和输出函数: # 定义解码器的Memory # Memory的输出定义在 gru_step 内 # 注意 gru_step 应该与它的Memory名字相同 - decoder_mem = memory(name='gru_decoder', - size=decoder_size, - boot_layer=decoder_boot) + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) # 计算 attention 加权编码向量 - context = simple_attention(encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) # 混合当前词向量和attention加权编码向量 - decoder_inputs = mixed_layer(inputs = [full_matrix_projection(context), - full_matrix_projection(current_word)], - size = decoder_size * 3) + decoder_inputs = paddle.layer.mixed( + size=decoder_size * 3, + input=[ + paddle.layer.full_matrix_projection(input=context), + paddle.layer.full_matrix_projection(input=current_word) + ]) # 定义门控循环单元循环神经网络单步函数 - gru_step = gru_step_layer(name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) # 定义输出函数 - out = mixed_layer(input=[full_matrix_projection(input=gru_step)], - size=target_dict_dim, - bias_attr=True, - act=SoftmaxActivation()) + out = paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax(), + input=paddle.layer.full_matrix_projection(input=gru_step)) return out 生成序列 @@ -238,41 +230,32 @@ attention,门控循环单元单步函数和输出函数: - ``beam_size``: beam search 算法中的beam大小。 - ``max_length``: 生成序列的最大长度。 -- 使用 ``seqtext_printer_evaluator`` - 根据索引矩阵和字典打印文本。这个函数需要设置: - - - ``id_input``: 数据的整数ID,用于标识生成的文件中的相应输出。 - - ``dict_file``: 用于将词ID转换为词的字典文件。 - - ``result_file``: 生成结果文件的路径。 - 代码如下: .. code:: python - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] # 在生成时,解码器基于编码源序列和最后生成的目标词预测下一目标词。 # 编码源序列(编码器输出)必须由只读Memory的 StaticInput 指定。 # 这里, GeneratedInputs 自动获取上一个生成的词,并在最开始初始化为起始词,如 。 - trg_embedding = GeneratedInput( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) + trg_embedding = paddle.layer.GeneratedInput( + size=target_dict_dim, + embedding_name='_target_language_embedding', + embedding_size=word_vector_dim) group_inputs.append(trg_embedding) - beam_gen = beam_search(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, # Beginnning token. - eos_id=1, # End of sentence token. - beam_size=beam_size, - max_length=max_length) - - seqtext_printer_evaluator(input=beam_gen, - id_input=data_layer(name="sent_id", size=1), - dict_file=trg_dict_path, - result_file=gen_trans_file) - outputs(beam_gen) - -注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 :ref:`semantic_role_labeling` 了解更多详细信息。 - -完整的配置文件在\ ``demo/seqToseq/seqToseq_net.py``\ 。 + beam_gen = paddle.layer.beam_search( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs, + bos_id=0, # Beginnning token. + eos_id=1, # End of sentence token. + beam_size=beam_size, + max_length=max_length) + + return beam_gen + +注意,这种生成技术只用于类似解码器的生成过程。如果你正在处理序列标记任务,请参阅 `book/06.understand_sentiment `_ 了解更多详细信息。 + +完整的配置文件在 `book/08.machine_translation/train.py `_ 。 diff --git a/doc/howto/deep_model/rnn/rnn_config_en.rst b/doc/howto/deep_model/rnn/rnn_config_en.rst index 73f5d5371fcd3ce95253cad47b0d8e738284441c..2b581290a41005c04cb1d8b6febe57f17d2416d3 100644 --- a/doc/howto/deep_model/rnn/rnn_config_en.rst +++ b/doc/howto/deep_model/rnn/rnn_config_en.rst @@ -3,34 +3,11 @@ RNN Configuration This tutorial will guide you how to configure recurrent neural network in PaddlePaddle. PaddlePaddle supports highly flexible and efficient recurrent neural network configuration. In this tutorial, you will learn how to: -- prepare sequence data for learning recurrent neural networks. - configure recurrent neural network architecture. - generate sequence with learned recurrent neural network models. -We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at :code:`demo/seqToseq`. - -===================== -Prepare Sequence Data -===================== - -PaddlePaddle does not need any preprocessing to sequence data, such as padding. The only thing that needs to be done is to set the type of the corresponding type to input. For example, the following code snippets defines three input. All of them are sequences, and the size of them are :code:`src_dict`, :code:`trg_dict`, and :code:`trg_dict`: - -.. code-block:: python - - settings.input_types = [ - integer_value_sequence(len(settings.src_dict)), - integer_value_sequence(len(settings.trg_dict)), - integer_value_sequence(len(settings.trg_dict))] - - -Then at the :code:`process` function, each :code:`yield` function will return three integer lists. Each integer list is treated as a sequence of integers: - -.. code-block:: python - - yield src_ids, trg_ids, trg_ids_next - - -For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`. +We will use vanilla recurrent neural network, and sequence to sequence model to guide you through these steps. The code of sequence to sequence model can be found at `book/08.machine_translation `_ . +And the data preparation of this model can be found at `python/paddle/v2/dataset/wmt14.py `_ =============================================== Configure Recurrent Neural Network Architecture @@ -75,19 +52,19 @@ Its **output function** simply takes :math:`x_t` as the output. act=None, rnn_layer_attr=None): def __rnn_step__(ipt): - out_mem = memory(name=name, size=size) - rnn_out = mixed_layer(input = [full_matrix_projection(ipt), - full_matrix_projection(out_mem)], - name = name, - bias_attr = rnn_bias_attr, - act = act, - layer_attr = rnn_layer_attr, - size = size) + out_mem = paddle.layer.memory(name=name, size=size) + rnn_out = paddle.layer.mixed(input = [paddle.layer.full_matrix_projection(input=ipt), + paddle.layer.full_matrix_projection(input=out_mem)], + name = name, + bias_attr = rnn_bias_attr, + act = act, + layer_attr = rnn_layer_attr, + size = size) return rnn_out - return recurrent_group(name='%s_recurrent_group' % name, - step=__rnn_step__, - reverse=reverse, - input=input) + return paddle.layer.recurrent_group(name='%s_recurrent_group' % name, + step=__rnn_step__, + reverse=reverse, + input=input) PaddlePaddle uses memory to construct step function. **Memory** is the most important concept when constructing recurrent neural networks in PaddlePaddle. A memory is a state that is used recurrently in step functions, such as :math:`x_{t+1} = f_x(x_t)`. One memory contains an **output** and a **input**. The output of memory at the current time step is utilized as the input of the memory at the next time step. A memory can also has a **boot layer**, whose output is utilized as the initial value of the memory. In our case, the output of the gated recurrent unit is employed as the output memory. Notice that the name of the layer :code:`rnn_out` is the same as the name of :code:`out_mem`. This means the output of the layer :code:`rnn_out` (:math:`x_{t+1}`) is utilized as the **output** of :code:`out_mem` memory. @@ -113,43 +90,52 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge .. code-block:: python # Define the data layer of the source sentence. - src_word_id = data_layer(name='source_language_word', size=source_dict_dim) + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) # Calculate the word embedding of each word. - src_embedding = embedding_layer( + src_embedding = paddle.layer.embedding( input=src_word_id, size=word_vector_dim, - param_attr=ParamAttr(name='_source_language_embedding')) + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) # Apply forward recurrent neural network. - src_forward = grumemory(input=src_embedding, size=encoder_size) + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) # Apply backward recurrent neural network. reverse=True means backward recurrent neural network. - src_backward = grumemory(input=src_embedding, - size=encoder_size, - reverse=True) + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) # Mix the forward and backward parts of the recurrent neural network together. - encoded_vector = concat_layer(input=[src_forward, src_backward]) + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) # Project encoding vector to decoder_size. - encoder_proj = mixed_layer(input = [full_matrix_projection(encoded_vector)], - size = decoder_size) + encoded_proj = paddle.layer.mixed( + size=decoder_size, + input=paddle.layer.full_matrix_projection(encoded_vector)) # Compute the first instance of the backward RNN. - backward_first = first_seq(input=src_backward) + backward_first = paddle.layer.first_seq(input=src_backward) # Project the first instance of backward RNN to decoder size. - decoder_boot = mixed_layer(input=[full_matrix_projection(backward_first)], size=decoder_size, act=TanhActivation()) + decoder_boot = paddle.layer.mixed( + size=decoder_size, + act=paddle.activation.Tanh(), + input=paddle.layer.full_matrix_projection(backward_first)) The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: .. code-block:: python - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] - trg_embedding = embedding_layer( - input=data_layer(name='target_language_word', - size=target_dict_dim), - size=word_vector_dim, - param_attr=ParamAttr(name='_target_language_embedding')) + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) group_inputs.append(trg_embedding) # For decoder equipped with attention mechanism, in training, @@ -158,9 +144,10 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network. # StaticInput means the same value is utilized at different time steps. # Otherwise, it is a sequence input. Inputs at different time steps are different. # All sequence inputs should have the same length. - decoder = recurrent_group(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function: @@ -171,27 +158,32 @@ The implementation of the step function is listed as below. First, it defines th # Defines the memory of the decoder. # The output of this memory is defined in gru_step. # Notice that the name of gru_step should be the same as the name of this memory. - decoder_mem = memory(name='gru_decoder', - size=decoder_size, - boot_layer=decoder_boot) + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) # Compute attention weighted encoder vector. - context = simple_attention(encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) # Mix the current word embedding and the attention weighted encoder vector. - decoder_inputs = mixed_layer(inputs = [full_matrix_projection(context), - full_matrix_projection(current_word)], - size = decoder_size * 3) + decoder_inputs = paddle.layer.mixed( + size=decoder_size * 3, + input=[ + paddle.layer.full_matrix_projection(input=context), + paddle.layer.full_matrix_projection(input=current_word) + ]) # Define Gated recurrent unit recurrent neural network step function. - gru_step = gru_step_layer(name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) # Defines the output function. - out = mixed_layer(input=[full_matrix_projection(input=gru_step)], - size=target_dict_dim, - bias_attr=True, - act=SoftmaxActivation()) + out = paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax(), + input=paddle.layer.full_matrix_projection(input=gru_step)) return out @@ -207,45 +199,37 @@ After training the model, we can use it to generate sequences. A common practice - :code:`eos_id`: the end token. Every sentence ends with the end token. - :code:`beam_size`: the beam size used in beam search. - :code:`max_length`: the maximum length of the generated sentences. - -* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set: - - - :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files. - - :code:`dict_file`: the dictionary file for converting word id to word. - - :code:`result_file`: the path of the generation result file. The code is listed below: .. code-block:: python - group_inputs=[StaticInput(input=encoded_vector,is_seq=True), - StaticInput(input=encoded_proj,is_seq=True)] + group_input1 = paddle.layer.StaticInput(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInput(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] # In generation, decoder predicts a next target word based on # the encoded source sequence and the last generated target word. # The encoded source sequence (encoder's output) must be specified by # StaticInput which is a read-only memory. # Here, GeneratedInputs automatically fetchs the last generated word, # which is initialized by a start mark, such as . - trg_embedding = GeneratedInput( - size=target_dict_dim, - embedding_name='_target_language_embedding', - embedding_size=word_vector_dim) + trg_embedding = paddle.layer.GeneratedInput( + size=target_dict_dim, + embedding_name='_target_language_embedding', + embedding_size=word_vector_dim) group_inputs.append(trg_embedding) - beam_gen = beam_search(name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs, - bos_id=0, # Beginnning token. - eos_id=1, # End of sentence token. - beam_size=beam_size, - max_length=max_length) + beam_gen = paddle.layer.beam_search( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs, + bos_id=0, # Beginnning token. + eos_id=1, # End of sentence token. + beam_size=beam_size, + max_length=max_length) - seqtext_printer_evaluator(input=beam_gen, - id_input=data_layer(name="sent_id", size=1), - dict_file=trg_dict_path, - result_file=gen_trans_file) - outputs(beam_gen) + return beam_gen -Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling` for more details. +Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `book/06.understand_sentiment `_ for more details. -The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`. +The full configuration file is located at `book/08.machine_translation/train.py `_ . diff --git a/doc/howto/dev/build_cn.md b/doc/howto/dev/build_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..0b911f7b7509da4a147c65954acb7e7c38f489da --- /dev/null +++ b/doc/howto/dev/build_cn.md @@ -0,0 +1,124 @@ +# 编译PaddlePaddle和运行单元测试 + +## 需要的软硬件 + +为了开发PaddlePaddle,我们需要 + +1. 一台电脑,可以装的是 Linux, BSD, Windows 或者 MacOS 操作系统,以及 +1. Docker。 + +不需要依赖其他任何软件了。即便是 Python 和 GCC 都不需要,因为我们会把所有编译工具都安装进一个 Docker image 里。 + +## 总体流程 + +1. 获取源码 + + ```bash + git clone https://github.com/paddlepaddle/paddle + ``` + +2. 安装开发工具到 Docker image 里 + + ```bash + cd paddle; docker build -t paddle:dev . + ``` + + 请注意这个命令结尾处的 `.`;它表示 `docker build` 应该读取当前目录下的 [`Dockerfile`文件](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile),按照其内容创建一个名为 `paddle:dev` 的 Docker image,并且把各种开发工具安装进去。 + +3. 编译 + + 以下命令启动一个 Docker container 来执行 `paddle:dev` 这个 Docker image,同时把当前目录(源码树根目录)映射为 container 里的 `/paddle` 目录,并且运行 `Dockerfile` 描述的默认入口程序 [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `cmake` 和 `make` 来编译 `/paddle` 里的源码,结果输出到 `/paddle/build`,也就是本地的源码树根目录里的 `build` 子目录。 + + ```bash + docker run --rm -v $PWD:/paddle paddle:dev + ``` + + 上述命令编译出一个 CUDA-enabled 版本。如果我们只需要编译一个只支持 CPU 的版本,可以用 + + ```bash + docker run --rm -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev + ``` + +4. 运行单元测试 + + 用本机的第一个 GPU 来运行包括 GPU 单元测试在内的所有单元测试: + + ```bash + NV_GPU=0 nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + + 如果编译的时候我们用了 `WITH_GPU=OFF` 选项,那么编译过程只会产生 CPU-based 单元测试,那么我们也就不需要 nvidia-docker 来运行单元测试了。我们只需要: + + ```bash + docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + + 有时候我们只想运行一个特定的单元测试,比如 `memory_test`,我们可以 + + ```bash + nvidia-docker run --rm -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + +5. 清理 + + 有时候我们会希望清理掉已经下载的第三方依赖以及已经编译的二进制文件。此时只需要: + + ```bash + rm -rf build + ``` + +## 为什么要 Docker 呀? + +- 什么是 Docker? + + 如果您没有听说 Docker,可以把它想象为一个类似 virtualenv 的系统,但是虚拟的不仅仅是 Python 的运行环境。 + +- Docker 还是虚拟机? + + 有人用虚拟机来类比 Docker。需要强调的是:Docker 不会虚拟任何硬件,Docker container 里运行的编译工具实际上都是在本机的 CPU 和操作系统上直接运行的,性能和把编译工具安装在本机运行一样。 + +- 为什么用 Docker? + + 把工具和配置都安装在一个 Docker image 里可以标准化编译环境。这样如果遇到问题,其他人可以复现问题以便帮助。 + + 另外,对于习惯使用Windows和MacOS的开发者来说,使用Docker就不用配置交叉编译环境了。 + +- 我可以选择不用Docker吗? + + 当然可以。大家可以用把开发工具安装进入 Docker image 一样的方式,把这些工具安装到本机。这篇文档介绍基于 Docker 的开发流程,是因为这个流程比其他方法都更简便。 + +- 学习 Docker 有多难? + + 理解 Docker 并不难,大概花十分钟看一下[这篇文章](https://zhuanlan.zhihu.com/p/19902938)。这可以帮您省掉花一小时安装和配置各种开发工具,以及切换机器时需要新安装的辛苦。别忘了 PaddlePaddle 更新可能导致需要新的开发工具。更别提简化问题复现带来的好处了。 + +- 我可以用 IDE 吗? + + 当然可以,因为源码就在本机上。IDE 默认调用 make 之类的程序来编译源码,我们只需要配置 IDE 来调用 Docker 命令编译源码即可。 + + 很多 PaddlePaddle 开发者使用 Emacs。他们在自己的 `~/.emacs` 配置文件里加两行 + + ```emacs + (global-set-key "\C-cc" 'compile) + (setq compile-command + "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") + ``` + + 就可以按 `Ctrl-C` 和 `c` 键来启动编译了。 + +- 可以并行编译吗? + + 是的。我们的 Docker image 运行一个 [Bash 脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh)。这个脚本调用 `make -j$(nproc)` 来启动和 CPU 核一样多的进程来并行编译。 + +## 可能碰到的问题 + +- Docker 需要 sudo + + 如果用自己的电脑开发,自然也就有管理员权限(sudo)了。如果用公用的电脑开发,需要请管理员安装和配置好 Docker。此外,PaddlePaddle 项目在努力开始支持其他不需要 sudo 的集装箱技术,比如 rkt。 + +- 在 Windows/MacOS 上编译很慢 + + Docker 在 Windows 和 MacOS 都可以运行。不过实际上是运行在一个 Linux 虚拟机上。可能需要注意给这个虚拟机多分配一些 CPU 和内存,以保证编译高效。具体做法请参考[这个issue](https://github.com/PaddlePaddle/Paddle/issues/627)。 + +- 磁盘不够 + + 本文中的例子里,`docker run` 命令里都用了 `--rm` 参数,这样保证运行结束之后的 containers 不会保留在磁盘上。可以用 `docker ps -a` 命令看到停止后但是没有删除的 containers。`docker build` 命令有时候会产生一些中间结果,是没有名字的 images,也会占用磁盘。可以参考[这篇文章](https://zaiste.net/posts/removing_docker_containers/)来清理这些内容。 diff --git a/doc/howto/dev/build_en.md b/doc/howto/dev/build_en.md new file mode 100644 index 0000000000000000000000000000000000000000..d0048e3714a5861a503736879d6c0870e5906c95 --- /dev/null +++ b/doc/howto/dev/build_en.md @@ -0,0 +1,124 @@ +# Build PaddlePaddle from Source Code and Run Unit Test + +## What Developers Need + +To contribute to PaddlePaddle, you need + +1. A computer -- Linux, BSD, Windows, MacOS, and +1. Docker. + +Nothing else. Not even Python and GCC, because you can install all build tools into a Docker image. We run all the tools by running this image. + +## General Process + +1. Retrieve source code. + + ```bash + git clone https://github.com/paddlepaddle/paddle + ``` + +2. Install build tools into a Docker image. + + ```bash + cd paddle; docker build -t paddle:dev . + ``` + + Please be aware of the `.` at the end of the command, which refers to the [`./Dockerfile` file](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile). `docker build` follows instructions in this file to create a Docker image named `paddle:dev`, and installs building tools into it. + +3. Build from source. + + This following command starts a Docker container that executes the Docker image `paddle:dev`, mapping the current directory to `/paddle/` in the container, and runs the default entry-point [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh) as specified in the Dockefile. `build.sh` invokes `cmake` and `make` to build PaddlePaddle source code, which had been mapped to `/paddle`, and writes outputs to `/paddle/build`, which maps to `build` in the current source directory on the computer. + + ```bash + docker run -v $PWD:/paddle paddle:dev + ``` + + Above command builds a CUDA-enabled version. If we want to build a CPU-only version, we can type + + ```bash + docker run -e WITH_GPU=OFF -v $PWD:/paddle paddle:dev + ``` + +4. Run unit tests. + + To run all unit tests using the first GPU of a node: + + ```bash + NV_GPU=0 nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + + If we used `WITH_GPU=OFF` at build time, it generates only CPU-based unit tests, and we don't need nvidia-docker to run them. We can just run + + ```bash + docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest" + ``` + + Sometimes we want to run a specific unit test, say `memory_test`, we can run + + ```bash + nvidia-docker run -v $PWD:/paddle paddle:dev bash -c "cd /paddle/build; ctest -V -R memory_test" + ``` + +5. Clean Build. + + Sometimes, we might want to clean all thirt-party dependents and built binaries. To do so, just + + ```bash + rm -rf build + ``` + +## Docker, Or Not? + +- What is Docker? + + If you haven't heard of it, consider it something like Python's virtualenv. + +- Docker or virtual machine? + + Some people compare Docker with VMs, but Docker doesn't virtualize any hardware nor running a guest OS, which means there is no compromise on the performance. + +- Why Docker? + + Using a Docker image of build tools standardizes the building environment, which makes it easier for others to reproduce your problems and to help. + + Also, some build tools don't run on Windows or Mac or BSD, but Docker runs almost everywhere, so developers can use whatever computer they want. + +- Can I choose not to use Docker? + + Sure, you don't have to install build tools into a Docker image; instead, you can install them in your local computer. This document exists because Docker would make the development way easier. + +- How difficult is it to learn Docker? + + It takes you ten minutes to read [an introductory article](https://docs.docker.com/get-started) and saves you more than one hour to install all required build tools, configure them, especially when new versions of PaddlePaddle require some new tools. Not even to mention the time saved when other people trying to reproduce the issue you have. + +- Can I use my favorite IDE? + + Yes, of course. The source code resides on your local computer, and you can edit it using whatever editor you like. + + Many PaddlePaddle developers are using Emacs. They add the following few lines into their `~/.emacs` configure file: + + ```emacs + (global-set-key "\C-cc" 'compile) + (setq compile-command + "docker run --rm -it -v $(git rev-parse --show-toplevel):/paddle paddle:dev") + ``` + + so they could type `Ctrl-C` and `c` to build PaddlePaddle from source. + +- Does Docker do parallel building? + + Our building Docker image runs a [Bash script](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/scripts/docker/build.sh), which calls `make -j$(nproc)` to starts as many processes as the number of your CPU cores. + +## Some Gotchas + +- Docker requires sudo + + An owner of a computer has the administrative privilege, a.k.a., sudo, and Docker requires this privilege to work properly. If you use a shared computer for development, please ask the administrator to install and configure Docker. We will do our best to support rkt, another container technology that doesn't require sudo. + +- Docker on Windows/MacOS builds slowly + + On Windows and MacOS, Docker containers run in a Linux VM. You might want to give this VM some more memory and CPUs so to make the building efficient. Please refer to [this issue](https://github.com/PaddlePaddle/Paddle/issues/627) for details. + +- Not enough disk space + + Examples in this article uses option `--rm` with the `docker run` command. This option ensures that stopped containers do not exist on hard disks. We can use `docker ps -a` to list all containers, including stopped. Sometimes `docker build` generates some intermediate dangling images, which also take disk space. To clean them, please refer to [this article](https://zaiste.net/posts/removing_docker_containers/). diff --git a/doc/howto/dev/contribute_to_paddle_cn.md b/doc/howto/dev/contribute_to_paddle_cn.md index 775938612e8d213b92e2eb69dae805838dc5ae96..699390145226ec2b65fdf5122db187e1d30d669e 100644 --- a/doc/howto/dev/contribute_to_paddle_cn.md +++ b/doc/howto/dev/contribute_to_paddle_cn.md @@ -7,6 +7,7 @@ - 确保编译器选项 `WITH_STYLE_CHECK` 已打开,并且编译能通过代码样式检查。 - 所有代码必须具有单元测试。 - 通过所有单元测试。 +- 请遵守[提交代码的一些约定](#提交代码的一些约定)。 以下教程将指导您提交代码。 ## [Fork](https://help.github.com/articles/fork-a-repo/) @@ -83,7 +84,7 @@ no changes added to commit (use "git add" and/or "git commit -a") ➜ docker build -t paddle:dev . ``` -随后可以用这个开发镜像开build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以: +随后可以用这个开发镜像开始build PaddlePaddle的源码。比如如果要build一个不依赖GPU,但是支持AVX指令集,并且包括unit tests的PaddlePaddle,可以: ```bash ➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev @@ -217,3 +218,22 @@ upstream ``` 至此,我们就完成了一次代码贡献的过程。 + +## 提交代码的一些约定 + +为了使评审人在评审代码时更好地专注于代码本身,请您每次提交代码时,遵守以下约定: +1. 请保证Travis-CI 中单元测试能顺利通过。如果没过,说明提交的代码存在问题,评审人一般不做评审。 +2. 提交PUll Request前: + - 请注意commit的数量: + - 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。 + - 建议:每次提交时,保持尽量少的commit,可以通过`git commit --amend`补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](http://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)。 + - 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。 +3. 如果解决了某个Issue的问题,请在该PUll Request的**第一个**评论框中加上:`fix #issue_number`,这样当该PUll Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)。 + +此外,在回复评审人意见时,请您遵守以下约定: +1. 评审人的每个意见都必须回复(这是开源社区的基本礼貌,别人帮了忙,应该说谢谢): + - 对评审意见同意且按其修改完的,给个简单的`Done`即可; + - 对评审意见不同意的,请给出您自己的反驳理由。 +2. 如果评审意见比较多: + - 请给出总体的修改情况。 + - 请采用[start a review](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/)进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。 diff --git a/doc/howto/dev/contribute_to_paddle_en.md b/doc/howto/dev/contribute_to_paddle_en.md index 9b0d3e83c0dc264650eda73e6801c60a75439b4a..40d1eb62d722244139cc84eb170c190d988f5626 100644 --- a/doc/howto/dev/contribute_to_paddle_en.md +++ b/doc/howto/dev/contribute_to_paddle_en.md @@ -4,9 +4,9 @@ We sincerely appreciate your contributions. You can use fork and pull request workflow to merge your code. ## Code Requirements -- Your code must be fully documented by - [doxygen](http://www.stack.nl/~dimitri/doxygen/) style. -- Make sure the compiler option WITH\_STYLE\_CHECK is on and the compiler +- Your code comments must be fully documented by + [Doxygen](http://www.stack.nl/~dimitri/doxygen/) style. +- Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler passes the code style check. - All code must have unit test. - Pass all unit tests. @@ -20,32 +20,25 @@ It's just that simple. ## Clone -Paddle is currently using [git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/). -The **develop** is the main branch, and other user's branches are feature branches. +Clone remote repository. -Once you've created a fork, you can use your favorite git client to clone your -repo or just head straight to the command line: - -```shell -# Clone your fork to your local machine -git clone --branch develop https://github.com/USERNAME/Paddle.git -``` -If your repository doesn't contain **develop** branch, just create it by your own. - -```shell -git clone https://github.com/USERNAME/Paddle.git Paddle -cd Paddle -git checkout -b develop # create develop branch. -git remote add upstream https://github.com/PaddlePaddle/Paddle.git # add upstream to baidu/Paddle -git pull upstream develop # update to upstream +```bash +➜ git clone https://github.com/USERNAME/Paddle +➜ cd Paddle ``` -Then you can start to develop by making a local developement branch +## Create a local branch + +Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/). -```shell -git checkout -b MY_COOL_STUFF_BRANCH +All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch . + +```bash +➜ git checkout -b my-cool-stuff ``` +Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`. + ## Using `pre-commit` hook Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git @@ -58,89 +51,169 @@ To use [pre-commit](http://pre-commit.com/), you should install it by `pip install pre-commit`, and currently, Paddle uses `clang-format` to format c/cpp sources. Please make sure clang-format 3.8+ installed. -Then just run `pre-commit install` in your Paddle clone directory. When you -commit your code, the pre-commit hook will check the local code if there is +Install and run it as follow: + +```bash +➜ pip install pre-commit +➜ pre-commit install +``` + +When you commit your code, the pre-commit hook will check the local code if there is anything not suitable to commit, and so on. +## Start to develop + +In this tutorial, I delete a line in README.md and created a new file. + +We can use `git status` to inspect the changes of current directory, `git diff` to see difference. + +```bash +➜ git status +On branch test +Changes not staged for commit: + (use "git add ..." to update what will be committed) + (use "git checkout -- ..." to discard changes in working directory) + + modified: README.md + +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +no changes added to commit (use "git add" and/or "git commit -a") +``` +## Build and Test + +We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs. + +If you want to build the develop image, just run: + +```bash +➜ docker build -t paddle:dev . +``` + +Then we can use the develop image to build PaddlePaddle source. For example: + +```bash +➜ docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev +``` + +The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated. + +Then we can generate the production image by copying the compiled PaddlePaddle program into the image by + +```bash +➜ docker build -t paddle:prod -f build/Dockerfile . +``` + +Run unit test finally: + +```bash +➜ docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" +``` + +For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst). + ## Commit -Commit your changes by following command lines: +Next we cancel the changes to the README.md file and then commit our changes by following command lines: + +```bash +➜ git checkout -- README.md +➜ git status +On branch test +Untracked files: + (use "git add ..." to include in what will be committed) + + test + +nothing added to commit but untracked files present (use "git add" to track) +➜ git add test +``` -```shell -# show the working tree status -git status -# add modified files -git add xx -env EDITOR=vim git commit # You can write your comments by vim/nano/emacs. +We should write a description of each commit by `git commit` to allow others to know +the changes in these files. + +```bash +➜ git commit +CRLF end-lines remover...............................(no files to check)Skipped +yapf.................................................(no files to check)Skipped +Check for added large files..............................................Passed +Check for merge conflicts................................................Passed +Check for broken symlinks................................................Passed +Detect Private Key...................................(no files to check)Skipped +Fix End of Files.....................................(no files to check)Skipped +clang-formater.......................................(no files to check)Skipped +[my-cool-stuff c703c041] add test file + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 233 ``` -The first line of commit infomation is the title. The second and later lines -are the details if any. ## Keeping Fork Up to Date Before pull your request, you should sync your code from the latest PaddlePaddle. To do this, you'll need to add a remote at first: -```shell -# see the current configured remote repository -git remote -v -# add upstream repository -git remote add upstream https://github.com/PaddlePaddle/Paddle.git -# verify the new upstream -git remote -v +```bash +➜ git remote add upstream https://github.com/PaddlePaddle/Paddle +➜ git remote +origin +upstream ``` Update your fork with the latest upstream changes: -```shell -git pull --rebase upstream develop +```bash +➜ git fetch upstream +➜ git pull upstream develop ``` -If there are no unique commits locally, git will simply perform a fast-forward. -However, if you have been making changes (in the vast majority of cases you -probably shouldn't be), you may have to deal with conflicts. - Now, your local master branch is up-to-date with everything modified upstream. ## Push to GitHub -```shell +```bash # push to your repository in Github -git push -u origin MY_COOL_STUFF_BRANCH # create remote branch MY_COOL_STUFF_BRANCH to origin. +➜ git push origin my-cool-stuff ``` -## Pull Request +## Create an issue and a Pull Request + +Create an Issue to describe the problem and record its number. Go to the page for your fork on GitHub, select your development branch, -and click the **pull request button**. - -## Update your pull request with the lastest version - -During the code review, your pull request may become stale because new commits in -baidu/Paddle. GitHub allows autmotic update if there is no conflict. You can do this -by clicking the "Update Branch" button in your pull request page. However, in the case -of conflict, you need to do the update manually. You need to do the following on -your local repository: -```shell -git checkout MY_COOL_STUFF_BRANCH -git pull upstream develop -# You may need to resolve the conflict according to the git prompt. -# Make and test your code. -git push origin MY_COOL_STUFF_BRANCH +and click the `New pull request`. + +screen shot 2017-04-26 at 9 09 28 pm + +Then select the target branch: + +screen shot 2017-04-26 at 9 11 52 pm + +We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in . + +Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch. + +## Delete origin branch + +After the PR is merge into the main repository, we can delete the remote branch on the PR page. + +screen shot 2017-04-26 at 9 18 24 pm + +Or just run: + +```bash +➜ git push origin :my-cool-stuff ``` -Now your Pull Request is updated with the latest version. -## Revise your pull request +## Delete local branch -When you revise your pull request according to reviewer's comments, please use 'git commit' instead of 'git commit --amend' to commit your changes so that the reviewers can see the difference between the new pull requrest and the old pull request. +Finally, we delete local branch: -The possible commands are +```bash +➜ git checkout develop -```shell -git checkout MY_COOL_STUFF_BRANCH -git pull upstream develop # update local to newest code base. -# May be some conflicts will occured. -# And develop your cool stuff -env EDITOR=vim git commit # add your revise log -git push origin MY_COOL_STUFF_BRANCH +# delete my-cool-stuff branch +➜ git branch -D my-cool-stuff ``` diff --git a/doc/howto/dev/new_layer_cn.rst b/doc/howto/dev/new_layer_cn.rst index 9489a921c70ad6ee5709f46445554f5d9640162c..75037e693b32f923ee7dc9dfec322495fe4ce10a 100644 --- a/doc/howto/dev/new_layer_cn.rst +++ b/doc/howto/dev/new_layer_cn.rst @@ -37,7 +37,7 @@ \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} -假设 :math:`z = f(W^T x + b)` ,那么 +假设 :math:`z = W^T x + b` ,那么 .. math:: diff --git a/doc/howto/dev/new_layer_en.rst b/doc/howto/dev/new_layer_en.rst index 46481f5ead33dc6a26507e021fd9ae0f8316e940..110a9fb38f890a766bb4480e91feb22d3b0838a5 100644 --- a/doc/howto/dev/new_layer_en.rst +++ b/doc/howto/dev/new_layer_en.rst @@ -29,7 +29,7 @@ Fully connected layer takes a dense input vector with dimension :math:`D_i`. It where :math:`f(.)` is an nonlinear *activation* function, such as sigmoid, tanh, and Relu. -The transformation matrix :math:`W` and bias vector :math:`b` are the *parameters* of the layer. The *parameters* of a layer are learned during training in the *backward pass*. The backward pass computes the gradients of the output function with respect to all parameters and inputs. The optimizer can use chain rule to compute the gradients of the loss function with respect to each parameter. +The transformation matrix :math:`W` and bias vector :math:`b` are the *parameters* of the layer. The *parameters* of a layer are learned during training in the *backward pass*. The backward pass computes the gradients of the output function with respect to all parameters and inputs. The optimizer can use chain rule to compute the gradients of the loss function with respect to each parameter. Suppose our loss function is :math:`c(y)`, then @@ -37,7 +37,7 @@ Suppose our loss function is :math:`c(y)`, then \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} -Suppose :math:`z = f(W^T x + b)`, then +Suppose :math:`z = W^T x + b`, then .. math:: @@ -48,7 +48,7 @@ This derivative can be automatically computed by our base layer class. Then, for fully connected layer, we need to compute: .. math:: - + \frac{\partial z}{\partial x} = W, \frac{\partial z_j}{\partial W_{ij}} = x_i, \frac{\partial z}{\partial b} = \mathbf 1 where :math:`\mathbf 1` is an all one vector, :math:`W_{ij}` is the number at the i-th row and j-th column of the matrix :math:`W`, :math:`z_j` is the j-th component of the vector :math:`z`, and :math:`x_i` is the i-th component of the vector :math:`x`. @@ -322,7 +322,7 @@ All the gradient check unit tests are located in :code:`paddle/gserver/tests/tes /* weight */ true); } } - + If you are creating a new file for the test, such as :code:`paddle/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake. .. code-block:: bash diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..c823d7e9fcd63dd7719ac1403952b03c2d2f03c0 --- /dev/null +++ b/doc/howto/dev/new_op_cn.md @@ -0,0 +1,332 @@ +# 如何写新的Operator + + - [概念简介](#概念简介) + - [实现C++类](#实现C++类) + - [定义ProtoMaker类](#定义ProtoMaker类) + - [定义Operator类](#定义Operator类) + - [定义OpKernel类](#定义OpKernel类) + - [注册Operator](#注册Operator) + - [编译](#编译) + - [绑定Python](#绑定Python) + - [实现单元测试](#实现单元测试) + - [前向Operator单测](#前向Operator单测) + - [反向Operator单测](#反向Operator单测) + - [编译和执行](#编译和执行) + + +## 概念简介 + +简单介绍需要用到基类,详细介绍请参考设计文档。 + +- `framework::OperatorBase`: Operator(简写,Op)基类。 +- `framework::OpKernel`: Op计算函数的基类,称作Kernel。 +- `framework::OperatorWithKernel`:继承自OperatorBase,Op有计算函数,称作有Kernel。 +- `class OpProtoAndCheckerMaker`:描述该Op的输入、输出、属性、注释,主要用于Python API接口生成 + +依据是否包含kernel,可以将Op分为两种:包含Kernel的Op和不包含kernel的Op,前者Op的定义继承自`OperatorBase`,后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写,简单总结Op需要包含的内容如下: + + + 内容 | 定义位置 +-------------- | :---------------------- +OpProtoMake定义 | `.cc`文件,Backward Op不需要定义OpProtoMake +Op定义 | `.cc`文件 +Kernel实现 | CPU、GPU共享Kernel实现在`.h`文件中,否则,CPU 实现在`.cc`文件中,GPU 实现在`.cu`文件中。 +注册Op | Op注册实现在`.cc`文件;Kernel注册CPU实现在`.cc`文件中,GPU实现在`.cu`文件中 + + +实现新的op都添加至目录[paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)下,文件命名以`*_op.h`(如有) 、 `*_op.cc` 、`*_op.cu`(如有)结尾。**系统会根据文件名自动构建op和其对应的Python扩展。** + + +下面以矩阵乘操作,即[MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc)为例来介绍如何写带Kernel的Operator。 + + +## 实现C++类 + + +### 1. 定义ProtoMaker类 + +矩阵乘法的公式:$Out = X * Y$, 可见该计算由两个输入,一个输出组成。 + +首先定义`ProtoMaker`来描述该Op的输入、输出,并添加注释: + +```cpp +class MulOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor), 2D tensor of size (M x K)"); + AddInput("Y", "(Tensor), 2D tensor of size (K x N)"); + AddOutput("Out", "(Tensor), 2D tensor of size (M x N)"); + AddComment(R"DOC( +Two Element Mul Operator. +The equation is: Out = X * Y +)DOC"); + } +}; +``` + +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)继承自`framework::OpProtoAndCheckerMaker`,构造函数含有2个参数: + + - `framework::OpProto` : 前者存储Op的输入输出和参数属性,将用于Python API接口的生成。 + - `framework::OpAttrChecker` :后者用于检查参数属性的合法性。 + +构造函数里通过`AddInput`添加输入参数,通过`AddOutput`添加输出参数,通过`AddComment`添加Op的注释。这些函数会将对应内容添加到`OpProto`中。 + +上面的代码在`MulOp`中添加两个输入`X`和`Y`,添加了一个输出`Out`,并解释了各自含义,命名请遵守[命名规范](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md)。 + + +再以[`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)为例: + +```cpp +template +class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of scale operator.").NotInGradient(); + AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); + AddComment(R"DOC(Scale operator +The equation is: Out = scale*X +)DOC"); + AddAttr("scale", "scale of scale operator.").SetDefault(1.0); + } +}; +``` + +这个例子有两处不同: + +- `AddInput("X","...").NotInGradient()` : 表示`X`这个输入不参与`ScaleOp`对应的梯度Op计算之中,如果Op的某个输入不参与反向梯度的计算,请显示地调用`.NotInGradient()`进行设置。 + +- `AddAttr("scale", "...").SetDefault(1.0);` : 增加`scale`系数,作为参数属性,并且设置默认值为1.0。 + + +### 2. 定义Operator类 + +下面的点实现了MulOp的定义: + +```cpp +class MulOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(dim0.size(), 2, + "input X(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("X")); + PADDLE_ENFORCE_EQ(dim1.size(), 2, + "input Y(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("Y")); + PADDLE_ENFORCE_EQ( + dim0[1], dim1[0], + "First matrix's width must be equal with second matrix's height."); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); + } +}; +``` + +[`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22)继承自`OperatorWithKernel`。`public`成员: + +```cpp +using framework::OperatorWithKernel::OperatorWithKernel; +``` + +这句表示使用基类`OperatorWithKernel`的构造函数,也可写成: + +```cpp +MulOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} +``` + +还需要重写`InferShape`接口。`InferShape`为const函数,不能修改Op的成员变量,参数为`const framework::InferShapeContext &ctx`,通过该参数可获取到输入输出以及属性。它的功能是: + + - 1). 做检查, 尽早报错:检查输入数据维度、类型等是否合法。 + - 2). 设置输出Tensor的形状。 + +通常`OpProtoMaker`和`Op`类的定义写在`.cc`文件中,和下面将要介绍的注册函数一起放在`.cc`中 + +### 3. 定义OpKernel类 + +`MulKernel`继承自`framework::OpKernel`,带有下面两个模板参数: + +- `typename Place`: 表示设备类型,不同设备(CPU、GPU)共享同一个Kernel时,需加该模板参数,不共享则不加,一个不共享的例子是[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +- `typename T` : 表示数据类型,如`float`, `double`等。 + +需要为`MulKernel`类重写`Compute`接口。 +- `Compute`接受一个输入参数:`const framework::ExecutionContext& context`。 +- 与`InferShapeContext`相比,`ExecutionContext`增加了设备类型,同样可获取到输入输出和属性参数。 +- `Compute`函数里实现`OpKernel`的具体计算逻辑。 + +下面是 `MulKernel` `Compute`的实现: + + ```cpp + template + class MulKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Y = context.Input("Y"); + auto* Z = context.Output("Out"); + Z->mutable_data(context.GetPlace()); + auto* device_context = + const_cast(context.device_context_); + math::matmul(*X, false, *Y, false, 1, Z, 0, device_context); + } + }; + ``` + +需要注意:**不同设备(CPU、GPU)共享一个Op定义,是否则共享同一个`OpKernel`,取决于`Compute`调用的函数是否支持不同设备。** + +`MulOp`的CPU、GPU实现共享同一个`Kernel`。`OpKernel`不共享的例子可以参考:[`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43)。 + +为了使`OpKernel`的计算过程书写更加简单,并且CPU、GPU的代码可以复用,我们通常借助 Eigen unsupported Tensor模块来实现`Compute`接口。关于在PaddlePaddle中如何使用Eigen库,请参考[使用文档](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md)。 + + +到此,前向Op实现完成。接下来,需要在`.cc`文件中注册该op和kernel。 +反向Op类的定义,反向OpKernel的定义与前向Op类似,这里不再赘述。**但需注意反向Op没有`ProtoMaker`**。 + +### 4. 注册Operator + +- 在`.cc`文件中注册前向、反向Op类,注册CPU Kernel。 + + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); + REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + + 在上面的代码中: + + - `REGISTER_OP` : 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`,注册`ops::MulOpGrad`,类型名为`mul_grad`。 + - `REGISTER_OP_WITHOUT_GRADIENT` : 用于注册没有反向的Op。 + - `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulGradKernel`类。 + + +- 在 `.cu`文件中注册GPU Kernel。 + - 请注意,如果GPU Kernel的实现基于Eigen unsupported模块,那么在 `.cu`的开始请加上宏定义 `#define EIGEN_USE_GPU`,代码示例如下: + + ```cpp + // if use Eigen unsupported module before include head files + #define EIGEN_USE_GPU + + namespace ops = paddle::operators; + REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + +### 5. 编译 + +运行下面命令可以进行编译: + +``` +make mul_op +``` + +## 绑定Python + +系统会对新增的op自动绑定Python,并链接到生成的lib库中。 + +## 实现单元测试 + +单测包括对比前向Op不同设备(CPU、GPU)的实现、对比反向OP不同设备(CPU、GPU)的实现、反向Op的梯度测试。下面介绍介绍[`MulOp`的单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py)。 + +### 前向Operator单元测试 + +前向Op单元测试继承自`unittest.TestCase`,并定义元类`__metaclass__ = OpTestMeta`。各项更加具体的单元测试在`OpTestMeta`里完成。测试前向Operator,需要: + +1. 在`setUp`函数定义输入、输出,以及相关的属性参数。 +2. 生成随机的输入数据。 +3. 在Python脚本中实现与前向operator相同的计算逻辑,得到输出值,与operator前向计算的输出进行对比。 + + + ```python + import unittest + import numpy as np + from gradient_checker import GradientChecker, create_op + from op_test_util import OpTestMeta + + class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "mul" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + ``` + +上面的代码首先导入依赖的包,下面是对`setUp`函数中操作的重要变量的详细解释: + +- `self.type = "mul" ` : 定义类型,与operator注册时注册的类型一致。 +- `self.inputs` : 定义输入,类型为`numpy.array`,并初始化。 +- `self.outputs` : 定义输出,并在Python脚本中完成与operator同样的计算逻辑,返回Python端的计算结果。 + + +### 反向Operator单元测试 + +反向Op单元测试继承自`GradientChecker`,而`GradientChecker`继承自`unittest.TestCase`,因此,**反向单元测试函数需要以`test_`开头**。 + +```python +class TestMulGradOp(GradientChecker): + def setUp(self): + self.op = create_op("mul") + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + + def test_check_grad_normal(self): + # mul op will enlarge the relative error + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) +``` + +下面解释代码中一些关键的地方: + +- 调用`create_op("mul")`创建反向Op对应的前向Op。 +- `test_check_grad_normal`中调用`check_grad`使用数值法检测梯度正确性和稳定性。 + - 第一个参数`["X", "Y"]` : 指定对输入变量`X`、`Y`做梯度检测。 + - 第二个参数`"Out"` : 指定前向网络最终的输出目标变量`Out`。 + - 第三个参数`max_relative_error`:指定检测梯度时能容忍的最大错误值。 +- `test_check_grad_ingore_x`和`test_check_grad_ingore_y`分支用来测试只需要计算一个输入梯度的情况。 + + +### 编译和执行单元测试 + +`python/paddle/v2/framework/tests` 目录下新增的 `test_*.py` 单元测试会被自动加入工程进行编译。 + +请注意,**不同于Op的编译测试,运行单元测试测时需要编译整个工程**,并且编译时需要打开`WITH_TESTING`, 即`cmake paddle_dir -DWITH_TESTING=ON`。编译成功后,执行下面的命令来运行单元测试: + +```bash +make test ARGS="-R test_mul_op -V" +``` + +或者: + +```bash +ctest -R test_mul_op +``` + +## 注意事项 + +- 为每个Op创建单独的`*_op.h`(如有)、`*_op.cc`和`*_op.cu`(如有)。不允许一个文件中包含多个Op,这将会导致编译出错。 +- 注册Op时的类型名,需要和该Op的名字一样。即不允许在`A_op.cc`里面,注册`REGISTER_OP(B, ...)`等,这将会导致单元测试出错。 +- 如果Op没有实现GPU Kernel,请不要创建空的`*_op.cu`,这将会导致单元测试出错。 +- 如果多个Op依赖一些共用的函数,可以创建非`*_op.*`格式的文件来存放,如`gather.h`文件。 diff --git a/doc/howto/dev/new_op_en.md b/doc/howto/dev/new_op_en.md new file mode 100644 index 0000000000000000000000000000000000000000..1e88e1f5b4df710f1b69f0305d8d8a2921c4249a --- /dev/null +++ b/doc/howto/dev/new_op_en.md @@ -0,0 +1,342 @@ +# How to write a new operator + + - [Background](#background) + - [Implementing C++ Types](#implementing-c++-types) + - [Defining ProtoMaker](#defining-protoMaker) + - [Defining Operator](#defining-operator) + - [Registering Operator](#registering-operator) + - [Compilation](#compilation) + - [Python Binding](#python-binding) + - [Unit Tests](#unit-tests) + - [Testing Forward Operators](#testing-forward-operators) + - [Testing Backward Operators](#testing-backward-operators) + - [Compiling and Running](#compiling-and-running) + - [Remarks](#remarks) +## Background + +Here are the base types needed. For details, please refer to the design docs. + +- `framework::OperatorBase`: Operator (Op)base class. +- `framework::OpKernel`: Base class for Op computation. +- `framework::OperatorWithKernel`: Inherited from OperatorBase, describing an operator with computation. +- `class OpProtoAndCheckerMaker`: Describes an Operator's input, output, attributes and description, mainly used to interface with Python API. + +An operator can be differentiated by whether in has kernel methods. An operator with kernel inherits from `OperatorWithKernel` while the ones without inherit from `OperatorBase`. This tutorial focuses on implementing operators with kernels. In short, an operator includes the following information: + + + Information | Where is it defined +-------------- | :---------------------- +OpProtoMake definition | `.cc`files, Backward Op does not need an OpProtoMake interface. +Op definition | `.cc` files +Kernel implementation | The kernel methods shared between CPU and GPU are defined in `.h` files. CPU-specific kernels live in `.cc` files, while GPU-specific kernels are implemented in `.cu`files. +Registering the Op | Ops are registered in `.cc` files; For Kernel registration, `.cc` files contain the CPU implementation, while `.cu` files contain the GPU implementation. + + +New Operator implementations are added to the list [paddle/operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators), with file names in the format `*_op.h` (if applicable), `*_op.cc`, `*_op.cu` (if applicable).** The system will use the naming scheme to automatically build operators and their corresponding Python extensions. ** + + +Let's take matrix multiplication operator, [MulOp](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc), as an example to introduce the writing of an Operator with Kernel. + + +## Implementing C++ Types + + +### 1. Defining Class ProtoMaker + +Matrix Multiplication can be written as $Out = X * Y$, meaning that the operation consists of two inputs and pne output. + +First, define `ProtoMaker` to describe the Operator's input, output, and additional comments: + +```cpp +class MulOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor), 2D tensor of size (M x K)"); + AddInput("Y", "(Tensor), 2D tensor of size (K x N)"); + AddOutput("Out", "(Tensor), 2D tensor of size (M x N)"); + AddComment(R"DOC( +Two Element Mul Operator. +The equation is: Out = X * Y +)DOC"); + } +}; +``` + +[`MulOpMaker`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L43)is inherited from`framework::OpProtoAndCheckerMaker`, consisting of 2 variables in the constructor: + + - `framework::OpProto` stores Operator input and variable attribute, used for generating Python API interfaces. + - `framework::OpAttrChecker` is used to validate variable attributes. + +The constructor utilizes `AddInput`, `AddOutput`, and `AddComment`, so that the corresponding information will be added to `OpProto`. + +The code above adds two inputs `X` and `Y` to `MulOp`, an output `Out`, and their corresponding descriptions, in accordance to Paddle's [naming convention](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md). + + +An additional example [`ScaleOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37) is implemented as follows: + +```cpp +template +class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ScaleOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The input tensor of scale operator.").NotInGradient(); + AddOutput("Out", "The output tensor of scale operator.").NotInGradient(); + AddComment(R"DOC(Scale operator +The equation is: Out = scale*X +)DOC"); + AddAttr("scale", "scale of scale operator.").SetDefault(1.0); + } +}; +``` + +There are two changes in this example: + +- `AddInput("X","...").NotInGradient()` expresses that input `X` is not involved in `ScaleOp`'s corresponding computation. If an input to an operator is not participating in back-propagation, please explicitly set `.NotInGradient()`. + +- `AddAttr("scale", "...").SetDefault(1.0);` adds `scale`constant as an attribute, and sets the default value to 1.0. + + +### 2. Defining Operator + +The following code defines the interface for MulOp: + +```cpp +class MulOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto dim0 = ctx.Input("X")->dims(); + auto dim1 = ctx.Input("Y")->dims(); + PADDLE_ENFORCE_EQ(dim0.size(), 2, + "input X(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("X")); + PADDLE_ENFORCE_EQ(dim1.size(), 2, + "input Y(%s) should be a tensor with 2 dims, a matrix", + ctx.op_.Input("Y")); + PADDLE_ENFORCE_EQ( + dim0[1], dim1[0], + "First matrix's width must be equal with second matrix's height."); + ctx.Output("Out")->Resize({dim0[0], dim1[1]}); + } +}; +``` + +[`MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/mul_op.cc#L22) is inherited from `OperatorWithKernel`. Its `public` member + +```cpp +using framework::OperatorWithKernel::OperatorWithKernel; +``` + +expresses an operator constructor using base class `OperatorWithKernel`, alternatively written as + +```cpp +MulOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} +``` + +`InferShape` interface needs to be re-written.`InferShape` is a constant method and cannot modify Op's member variables, its constant member `const framework::InferShapeContext &ctx` can be used to extract input, output, and attributes. It functions to + + - 1). validate and error out early: it checks input data dimensions and types. + - 2). configures the tensor shape in the output. + +Usually `OpProtoMaker` and `Op`'s type definitions are written in `.cc` files, which also include the registration methods introduced later. + +### 3. Defining OpKernel + +`MulKernel` inherits `framework::OpKernel`, which includes the following templates: + +- `typename Place` denotes device type. When different devices, namely the CPU and the GPU, share the same kernel, this template needs to be added. If they don't share kernels, this must not be added. An example of a non-sharing kernel is [`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43). + +- `typename T` denotes data type, such as `float` or `double`. + +`MulKernel` types need to rewrite the interface for `Compute`. +- `Compute` takes one input variable `const framework::ExecutionContext& context`. +- Compared with `InferShapeContext`, `ExecutionContext` includes device types, and can similarly extract input, output, and attribute variables. +- `Compute` implements the computation logics of an `OpKernel`. + +`MulKernel`'s implementation of `Compute` is as follows: + + ```cpp + template + class MulKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* X = context.Input("X"); + auto* Y = context.Input("Y"); + auto* Z = context.Output("Out"); + Z->mutable_data(context.GetPlace()); + auto* device_context = + const_cast(context.device_context_); + math::matmul(*X, false, *Y, false, 1, Z, 0, device_context); + } + }; + ``` + +Note that **different devices (CPU, GPU)share an Op definition; whether or not they share the same `OpKernel` depends on whether `Compute` calls functions that support both devices.** + +`MulOp`'s CPU and GPU share the same `Kernel`. A non-sharing `OpKernel` example can be seen in [`OnehotCrossEntropyOpKernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/cross_entropy_op.h#L43). + +To ease the writing of `OpKernel` compute, and for reusing code cross-device, [`Eigen-unsupported Tensor`](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md?fileviewer=file-view-default) module is used to implement `Compute` interface. To learn about how the Eigen library is used in PaddlePaddle, please see [usage document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/use_eigen_cn.md). + + +This concludes the forward implementation of an operator. Next its operation and kernel need to be registered in a `.cc` file. + +The definition of its corresponding backward operator, if applicable, is similar to that of an forward operator. **Note that a backward operator does not include a `ProtoMaker`**. + +### 4. Registering Operator + +- In `.cc` files, register forward and backward operator classes and the CPU kernel. + + ```cpp + namespace ops = paddle::operators; + REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad); + REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_CPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + + In that code block, + + - `REGISTER_OP` registers the `ops::MulOp` class, type named `mul`, its type `ProtoMaker` is `ops::MulOpMaker`, registering `ops::MulOpGrad` as `mul_grad`. + - `REGISTER_OP_WITHOUT_GRADIENT` registers an operator without gradient. + - `REGISTER_OP_CPU_KERNEL` registers `ops::MulKernel` class and specialized template types `paddle::platform::CPUPlace` and `float`, which also registers `ops::MulGradKernel`. + + +- Registering GPU Kernel in `.cu` files + - Note that if GPU Kernel is implemented using the `Eigen unsupported` module, then on top of `.cu`, a macro definition `#define EIGEN_USE_GPU` is needed, such as + + ```cpp + // if use Eigen unsupported module before include head files + #define EIGEN_USE_GPU + + namespace ops = paddle::operators; + REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel); + REGISTER_OP_GPU_KERNEL(mul_grad, + ops::MulGradKernel); + ``` + +### 5. Compilation + +Run the following commands to compile. + +``` +make mul_op +``` + +## Python Binding + +The system will automatically bind to Python and link it to a generated library. + +## Unit Tests + +Unit tests for an operator include + +1. comparing a forward operator's implementations on different devices, + +2. comparing a backward operator's implementation on different devices, and + +3. a scaling test for the backward operator. + +Here, we introduce the [unit tests for `MulOp`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/tests/test_mul_op.py). + +### Testing Forward Operators + +A forward operator unit test inherits `unittest.TestCase` and defines metaclass `__metaclass__ = OpTestMeta`. More concrete tests are performed in `OpTestMeta`. Testing a forward operator requires the following: + +1. Defining input, output and relevant attributes in `setUp` method. + +2. Generating random input data. + +3. Implementing the same computation logic in a Python script: + + ```python + import unittest + import numpy as np + from gradient_checker import GradientChecker, create_op + from op_test_util import OpTestMeta + + class TestMulOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "mul" + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + ``` +Get its output, and compare it with the forward operator's own output. + +The code above first loads required packages. In addition, we have + +- `self.type = "mul" ` defines the type that is identical to what the operator's registered type. +- `self.inputs` defines input, with type `numpy.array` and initializes it. +- `self.outputs` defines output and completes the same operator computation in the Python script, and returns its result from the Python script. + +### Testing Backward Operators + +A backward operator unit test inherits `GradientChecker`, which inherits `unittest.TestCase`. As a result, **a backward operator unit test needs to be have the prefix `test_`**. + +```python +class TestMulGradOp(GradientChecker): + def setUp(self): + self.op = create_op("mul") + self.inputs = { + 'X': np.random.random((32, 84)).astype("float32"), + 'Y': np.random.random((84, 100)).astype("float32") + } + + def test_check_grad_normal(self): + # mul op will enlarge the relative error + self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5) + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) +``` + +Some key points in the code above include: + +- `create_op("mul")` creates the backward operator's corresponding forward operator. +- `test_normal` calls `check_grad` to validate scaling tests' correctness and stability through numeric methods. + - The first variable `["X", "Y"]` appoints `X` and `Y` to be scale tested. + - The second variable `"Out"` points to the network's final output target `Out`. + - The third variable `max_relative_error` points to the maximum relative tolerance error during scaling tests. +- `test_check_grad_ingore_x` and `test_check_grad_ingore_y`branches test the cases where there is only one scaling input. + +### Compiling and Running + + +Any new unit testing file of the format `test_*.py` added to the director `python/paddle/v2/framework/tests` is automatically added to the project to compile. + +Note that **unlike the compile test for Ops, running unit tests requires compiling the entire project** and requires compiling with flag `WITH_TESTING` on i.e. `cmake paddle_dir -DWITH_TESTING=ON`. + +After successfully compiling the project, run the following command to run unit tests: + +```bash +make test ARGS="-R test_mul_op -V" +``` + +Or, + +```bash +ctest -R test_mul_op +``` + +## Remarks + +- Every `*_op.h` (if applicable), `*_op.cc`, and `*_op.cu` (if applicable) must be created for a unique Op. Compiling will fail if multiple operators are included per file. +- The type with which an operator is registered needs to be identical to the Op's name. Registering `REGISTER_OP(B, ...)` in `A_op.cc` will cause unit testing failures. +- If the operator does not implement a GPU kernel, please refrain from creating an empty `*_op.cu` file, or else unit tests will fail. +- If multiple operators rely on some shared methods, a file NOT named `*_op.*` can be created to store them, such as `gather.h`. diff --git a/doc/howto/dev/use_eigen_cn.md b/doc/howto/dev/use_eigen_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..1367323b71277984834d9d4f0d9bea0f69478479 --- /dev/null +++ b/doc/howto/dev/use_eigen_cn.md @@ -0,0 +1,146 @@ +## 在Paddle中如何使用Eigen + +神经网络本质上是一个计算图,计算需要的数据存放在`Tensor`中,而计算过程是由`Operartor`来描述的。在执行时,`Operator`调用对应`OpKernel`中的`Compute`接口,实现对`Tensor`的操作。 + + +### Eigen Tensor模块 + +Eigen Tensor模块对element-wise计算提供了强大的支持,并且书写一份代码,可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块,因此可能测试不够完备,文档较少。 + +关于Eigen Tensor模块的详细介绍请参考[文档1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) 和[文档2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md) + + +### paddle::framework::Tensor + +Paddle Tensor定义在framework目录下,其主要接口如下: + +```cpp +class Tensor { + public: + /*! Return a pointer to mutable memory block. */ + template + inline T* data(); + + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(platform::Place place); + + /** + * @brief Return a pointer to mutable memory block. + * + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(DDim dims, platform::Place place); + + /*! Resize the dimensions of the memory block. */ + inline Tensor& Resize(const DDim& dims); + + /*! Return the dimensions of the memory block. */ + inline const DDim& dims() const; + + private: + /*! holds the memory block if allocated. */ + std::shared_ptr holder_; + + /*! points to dimensions of memory block. */ + DDim dim_; +}; +``` + +`Placeholder`的作用是延迟分配内存,即我们可以先定义一个Tensor,然后使用Resize接口设置Tensor的大小,最后再调用mutable_data接口分配实际的内存。 + +```cpp +paddle::framework::Tensor t; +paddle::platform::CPUPlace place; +// set size first +t.Resize({2, 3}); +// allocate memory on CPU later +t.mutable_data(place); +``` + +### paddle::framework::Tensor使用样例 +下面以AddOp为例说明Tensor的使用过程: + +- InferShape + +在运行神经网络计算图时,我们先调用每个`Operator`的`InferShape`接口,根据输入Tensor的大小来设置输出Tensor的大小,`Resize`接口会被调用。 + +```cpp +void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Two input of Add Op's dimension must be same."); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); +} +``` + + +- Run + +`Operator`的`Run`接口最终会调用对应`OpKernel`的`Compute`接口,在这时真正的分配内存,`mutable_data`接口会被调用。 + +```cpp +void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); + + output->mutable_data(context.GetPlace()); + + auto x = EigenVector::Flatten(*input0); + auto y = EigenVector::Flatten(*input1); + auto z = EigenVector::Flatten(*output); + + auto place = context.GetEigenDevice(); + + z.device(place) = x + y; +} +``` + + +### paddle::framework::Tensor到EigenTensor的转换 + +如上一小节所示,在具体的计算中,我们需要先把输入Tensor和输出Tensor转换为Eigen支持的格式。我们在[eigen.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen.h)中提供了一些全局函数用来实现paddle::framework::Tensor到EigenTensor/EigenMatrix/EigenVector/EigenScalar的转换。 + +以EigenTensor为例,做一个介绍 + +```cpp +Tensor t; +float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); +for (int i = 0; i < 1 * 2 * 3; i++) { + p[i] = static_cast(i); +} + +EigenTensor::Type et = EigenTensor::From(t); +``` + +From是EigenTensor模板提供的一个接口,可以实现从paddle::framework::Tensor到对EigenTensor的转换。由于Tensor的rank是模板参数,因此在转换时需要显示的指定。 + +在Eigen中,不同rank的Tensor是不同类型,Vector是rank为1的Tensor。需要额外注意的是,EigenVector::From方法是把paddle中的一维Tensor转为Eigen的一维Tensor,在这里用EigenVector来表示;而EigenVector::Flatten方法是把paddle中的一个Tensor进行reshape操作,压扁成为Eigen的一维Tensor,类型仍然为EigenVector。 + +更多的转换方法请参考eigen_test.cc中的[单元测试](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen_test.cc)。 + + + +### 实现计算 + +当需要完成计算时,我们需要等式左边的EigenTensor调用device接口。在这里需要注意的是,这里的EigenTensor之间的运算只是改变了原有Tensor中的数据,而不会改变原有Tensor的shape信息。 + +```cpp +auto x = EigenVector::Flatten(*input0); +auto y = EigenVector::Flatten(*input1); +auto z = EigenVector::Flatten(*output); +auto place = context.GetEigenDevice(); +z.device(place) = x + y; +``` + +在这段代码中,input0/input1/output可以是任意维度的Tensor。我们调用了EigenVector的Flatten接口,把任意维度的Tensor转为了一维的EigenVector。而在计算结束之后,input0/input1/output的原有shape信息不变。如果想改变原有Tensor的shape信息,可以调用Resize接口进行改变。 + +由于Eigen Tensor模块的文档较少,我们可以参考TensorFlow的[kernels](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/kernels)模块下的相关`OpKernel`的计算代码。 diff --git a/doc/howto/dev/use_eigen_en.md b/doc/howto/dev/use_eigen_en.md new file mode 100644 index 0000000000000000000000000000000000000000..e169106e12f5d62696f1f0e7163562793b32c18c --- /dev/null +++ b/doc/howto/dev/use_eigen_en.md @@ -0,0 +1,146 @@ +## How to use Eigen in Paddle + +Essentially, a neural network is a compute graph. T data needed for the computation is stored in `Tensor`s and its computation procedure is described by `Operator`s. An `Operator` calls the `Compute` interface in its corresponding `OpKernel` and operates on the `Tensor`. + + +### Eigen Tensor Module + +The Eigen Tensor module supports powerful element-wise computation. In addition, a piece of code written using it can be run on both the CPU and the GPU. + +Note that Eigen Tensor is still being actively developed, so its tests are not completely covered and its documentation may be sparse. + +For details on Eigen Tensor module, please see [doc 1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) and [doc 2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md). + + +### paddle::framework::Tensor + +Paddle Tensor's is defined in the framework directory with the following interface: + +```cpp +class Tensor { + public: + /*! Return a pointer to mutable memory block. */ + template + inline T* data(); + + /** + * @brief Return a pointer to mutable memory block. + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(platform::Place place); + + /** + * @brief Return a pointer to mutable memory block. + * + * @param[in] dims The dimensions of the memory block. + * @param[in] place The place of the memory block. + * + * @note If not exist, then allocation. + */ + template + inline T* mutable_data(DDim dims, platform::Place place); + + /*! Resize the dimensions of the memory block. */ + inline Tensor& Resize(const DDim& dims); + + /*! Return the dimensions of the memory block. */ + inline const DDim& dims() const; + + private: + /*! holds the memory block if allocated. */ + std::shared_ptr holder_; + + /*! points to dimensions of memory block. */ + DDim dim_; +}; +``` + +`Placeholder` is used to delay memory allocation; that is, we can first define a tensor, using `Resize` to configure its shape, and then call `mutuable_data` to allocate the actual memory. + +```cpp +paddle::framework::Tensor t; +paddle::platform::CPUPlace place; +// set size first +t.Resize({2, 3}); +// allocate memory on CPU later +t.mutable_data(place); +``` + +### paddle::framework::Tensor Usage +`AddOp` demonstrates Tensor's usage. + +- InferShape + +When computing a neural network's compute graph, first call every `Operator`'s `InferShape` method, and use `Resize` to configure the size of the output tensor. + +```cpp +void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), + ctx.Input("Y")->dims(), + "Two input of Add Op's dimension must be same."); + ctx.Output("Out")->Resize(ctx.Input("X")->dims()); +} +``` + + +- Run + +```cpp +void Compute(const framework::ExecutionContext& context) const override { + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Y"); + auto* output = context.Output("Out"); + + output->mutable_data(context.GetPlace()); + + auto x = EigenVector::Flatten(*input0); + auto y = EigenVector::Flatten(*input1); + auto z = EigenVector::Flatten(*output); + + auto place = context.GetEigenDevice(); + + z.device(place) = x + y; +} +``` + + +### paddle::framework::Tensor到EigenTensor的转换 + +As shown above, in actual computation, we need to transform the input and output `Tensor`s into formats Eigen supports. We show some functions in [eigen.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen.h) to implement the transformation from `paddle::framework::Tensor`to `EigenTensor/EigenMatrix/EigenVector/EigenScalar`. + +Using EigenTensor as an example: + +```cpp +Tensor t; +float* p = t.mutable_data(make_ddim({1, 2, 3}), platform::CPUPlace()); +for (int i = 0; i < 1 * 2 * 3; i++) { + p[i] = static_cast(i); +} + +EigenTensor::Type et = EigenTensor::From(t); +``` + +`From` is an interfacing method provided by the EigenTensor template, which implements the transformation from a `paddle::framework::Tensor` object to an EigenTensor. Since `rank` is a template parameter, it needs to be explicitly specified at the time of the transformation. + +In Eigen, tensors with different ranks are different types, with `Vector` bring a rank-1 instance. Note that `EigenVector::From` uses a transformation from an 1-dimensional Paddle tensor to a 1-dimensional Eigen tensor while `EigenVector::Flatten` reshapes a paddle tensor and flattens it into a 1-dimensional Eigen tensor. Both resulting tensors are still typed EigenVector. + +For more transformations, see the [unit tests](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/eigen_test.cc) in the `eigen_test.cc` file. + + + +### Implementing Computation + +While computing, the device interface is needed from the EigenTensors on the left hand side of the assignments. Note that the computation between EigenTensors only changes the data originally inthe Tensor and does not change all the shape information associated with the Tensor. + +```cpp +auto x = EigenVector::Flatten(*input0); +auto y = EigenVector::Flatten(*input1); +auto z = EigenVector::Flatten(*output); +auto place = context.GetEigenDevice(); +z.device(place) = x + y; +``` + +In this code segment, input0/input1/output can be Tensors of arbitrary dimension. We are calling Flatten from EigenVector, transforming a tensor of any dimension into a 1-dimensional EigenVector. After completing computation, input0/input1/output will retain the same shape information, and they can be resized using the `Resize` interface. + +Because the Eigen Tensor module is under-documented, please refer to `OpKernel`'s computation code in TensorFlow's [kernel module documentation](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/kernels). diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst index 5051a892304fdc8b0f1a19a7d4560d5ee007c47d..731a63f945c29ba78538b3d71289b234e569354d 100644 --- a/doc/howto/dev/write_docs_cn.rst +++ b/doc/howto/dev/write_docs_cn.rst @@ -5,50 +5,60 @@ PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。 -如何构建PaddlePaddle的文档 -========================== +如何构建文档 +============ -PaddlePaddle的文档构建有直接构建和基于Docker构建两种方式。构建PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使用基于Docker来构建PaddlePaddle的文档。 +PaddlePaddle的文档构建有两种方式。 - -使用Docker构建PaddlePaddle的文档 --------------------------------- +使用Docker构建 +-------------- 使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即 -.. code-block:: bash +.. code-block:: bash - cd TO_YOUR_PADDLE_CLONE_PATH - cd paddle/scripts/tools/build_docs - bash build_docs.sh + cd TO_YOUR_PADDLE_CLONE_PATH + cd paddle/scripts/tools/build_docs + sh build_docs.sh -编译完成后,该目录下会生成如下两个子目录\: +编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。 +打开浏览器访问对应目录下的index.html即可访问本地文档。 -* doc 英文文档目录 -* doc_cn 中文文档目录 +直接构建 +-------- -打开浏览器访问对应目录下的index.html即可访问本地文档。 +如果提示正确,可以执行以下命令编译生成文档,即 -.. code-block:: bash +.. code-block:: bash - open doc_cn/index.html + cd TO_YOUR_PADDLE_CLONE_PATH + mkdir -p build + cd build + cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON + make gen_proto_py + make paddle_docs paddle_docs_cn + +编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。 +打开浏览器访问对应目录下的index.html即可访问本地文档。 -直接构建PaddlePaddle的文档 --------------------------- +如何书写文档 +============ -TBD +PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程进行书写。 -如何书写PaddlePaddle的文档 -========================== +如何更新文档主题 +================ -TBD +PaddlePaddle文档主题在 `TO_YOUR_PADDLE_CLONE_PATH/doc_theme` 文件夹下,包含所有和前端网页设计相关的文件。 -如何更新www.paddlepaddle.org文档 -================================ +如何更新doc.paddlepaddle.org +============================ -TBD +更新的文档以PR的形式提交到github中,提交方式参见 `贡献文档 `_ 。 +目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 `_ 和 +`英文文档 `_ 。 -.. _cmake: https://cmake.org/ -.. _sphinx: http://www.sphinx-doc.org/en/1.4.8/ +.. _cmake: https://cmake.org/ +.. _sphinx: http://www.sphinx-doc.org/en/1.4.8/ diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst index 26449a6365843b526b3ac3111b337d2f17524c9d..0608aa30968b0e8474eef330e4d2cc63c9def97d 100644 --- a/doc/howto/index_cn.rst +++ b/doc/howto/index_cn.rst @@ -19,6 +19,7 @@ .. toctree:: :maxdepth: 1 + dev/build_cn.rst dev/write_docs_cn.rst dev/contribute_to_paddle_cn.md diff --git a/doc/howto/index_en.rst b/doc/howto/index_en.rst index 1fbfcd260b912078f00ed5b720ed607db725c4e2..1b6034be4edffd2cbc822018b733b9a3836ea84a 100644 --- a/doc/howto/index_en.rst +++ b/doc/howto/index_en.rst @@ -18,6 +18,7 @@ Development .. toctree:: :maxdepth: 1 + dev/build_en.rst dev/new_layer_en.rst dev/contribute_to_paddle_en.md diff --git a/doc/howto/usage/k8s/k8s_distributed_cn.md b/doc/howto/usage/k8s/k8s_distributed_cn.md index 3121b3f59df650c0a22d0bd305a6f793b202d30e..a9bebf09558b06993119803458977abedbbfbdd0 100644 --- a/doc/howto/usage/k8s/k8s_distributed_cn.md +++ b/doc/howto/usage/k8s/k8s_distributed_cn.md @@ -213,7 +213,7 @@ I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done. [WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config. [INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating] -[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__mse_cost_0__] +[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__square_error_cost_0__] I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process diff --git a/doc/index_en.rst b/doc/index_en.rst index 168c7667c61da677905585d6c4b5037ce80b3765..64684b8b9b27e245c6b32ea28809d3bbce22fab9 100644 --- a/doc/index_en.rst +++ b/doc/index_en.rst @@ -7,4 +7,3 @@ PaddlePaddle Documentation getstarted/index_en.rst howto/index_en.rst api/index_en.rst - about/index_en.rst diff --git a/doc/survey/cluster_bootstrapping_tools.md b/doc/survey/cluster_bootstrapping_tools.md new file mode 100644 index 0000000000000000000000000000000000000000..1cd9962700bb49866f1ed6987abc28b27888a23f --- /dev/null +++ b/doc/survey/cluster_bootstrapping_tools.md @@ -0,0 +1,71 @@ +# Cluster bootstrapping tool survey +## Abstract +In order to bring up a cluster from bare metal machine to a fully functional kubernetes cluster for Paddlepaddle to run, we need to utilize some tools. Here we are going to compare [Sextant](https://github.com/k8sp/sextant) and [Tectonic installer](https://github.com/coreos/tectonic-installer) + +## Basic assumptions +Here are some basic assumptions before we move on to details +1. You are an administrator of a bare metal machine cluster, which means: + * you have full control to each of the machines. + * you have full control to the network which machines are connected to. +2. Machines can be booted from network with PEX or iPXE +3. You understand the [general procedure to bring up a cluster](#appendix-general-procedure-to-bring-up-a-cluster) + +if your cluster is able to mark above items with checkmarks, then keep reading. + +## Comparing Sextant and Tectonic installer +### Sextant +Sextant is an end2end solution to bring up a bare metal cluster to a fully functional k8s cluster, it integrates DHCP, name service, PEX, cloud-config-service, docker registry services altogether. + +#### Pros +1. End2End: basically all admin need to do is to config the cluster.yaml and power on the cluster. +2. Offline cluster configuration: Sextant has 2 phases during working with it, config time and deploy time. when admin is configuring, it requires admin's machine has internet connectivity, which will download some images, etc. But in deploy time, it's completely OK to go offline since all dependencies are ready during config time. +3. docker registry integrated. +4. GPU machine took care of. + +### Cons +1. k8s API server is not deployed with high availability in considering by default. +2. No grouping support. +3. No API interface, a one-off service. + + +### Tectonic installer +First of all, Tectonic is not free, it requires coreos.com account as a step of installation, and free user can only create less than 10 nodes. + +Tectonic is a suite of software which wraps around k8s and providing more utility regarding dev ops, ie, +Tectonic installer as it's named, it installs Tectonic to a bare metal cluster which means it's not totally an equivalent of Sextant. At the "booting a cluster" part, it mostly utilizes [Matchbox](https://github.com/coreos/matchbox), which is a general cluster bootstrapper. + +Matchbox's Approach is similar to Sexstant. + +### Pros +1. supports grouping machines. +2. supports running provisioning service in rtk. (not a big deal though). +3. supports http/gRPC API interface. +4. supports multi-template. + +### Cons +1. Not an e2e solution to bring up a cluster, need a lot of extra work and other software. +2. [Not fully supporting](https://github.com/coreos/matchbox/issues/550) centOS deployment yet. + +## Conclusion +Sextant is a better solution overall for paddle cloud deploying to a bare metal cluster. It would be great if Sextant can also 1) deploy k8s api server with high availability by default; 2) not designed as a one-off service. + + + +## Appendix: General procedure to bring up a cluster +It's physically impossible for a cluster admin to manually install OS and applications into cluster nodes one by one, here is what an admin would do in cloud industry: +1. setup a bootstrap machine with static IP in the cluster, which has following services: + * DHCP: assigns ip address for rest of the nodes. + * name service: to map node name to a IP + * PXE related services: the booting related info will be delivered to newly booted machines as their IP is assigned via DHCP service, PXE service will provide further booting and installing info and image with TFTP and http protocol. + * cluster config service: this is for providing cluster node with OS config via http + * optional docker registry: a built-in docker registry makes the whole cluster independent from connecting internet, and speeds up software distribution. +2. New node powers on, it will + * broadcast the request for an IP address + * DHCP server assigns the IP address, and deliver the PXE booting related info to the node. + * cluster node will request config files with booting info delivered with DHCP via the TFTP service, and in most of the cases, the config file will point to a http service for the booting image. + * Since PXE is configured with initrd, it will utilize the cloud config service and do further installations like coreOS or K8s installations. + * then restart the node. + +For further understanding, following 2 links from Matchbox are some good readings: +* [Machine lifecycle](https://github.com/coreos/matchbox/blob/master/Documentation/machine-lifecycle.md) +* [PXE booting](https://github.com/coreos/matchbox/blob/master/Documentation/network-booting.md) diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 95cad835b11816f4d2e256c2abd662a545a5bad2..41b35b5b233abd737db07aaeb6c6dd4bf6d42b08 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,22 +13,18 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -124,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b477f0120c4fa0544012080b7cfb8572d3c44b04..5822c2481dd61da2084b0de76f6f65aa4e32e033 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,15 +13,11 @@ # serve to show the default. import sys import os, subprocess +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform -try: - import py_paddle - import paddle - import paddle.v2 -except ImportError: - print("Must install paddle python package before generating documentation") - sys.exit(1) +import paddle +import paddle.v2 MarkdownParser = parser.CommonMarkParser @@ -29,7 +25,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -124,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc_theme/templates/layout.html b/doc_theme/templates/layout.html index 65e61c5f298e19adc6330c378779a6edf418752e..9fca69dc4e7f0827acfc755a97a662350214b90e 100644 --- a/doc_theme/templates/layout.html +++ b/doc_theme/templates/layout.html @@ -101,7 +101,7 @@