提交 82530e24 编写于 作者: X Xi Chen

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into aws-benchmark

...@@ -25,12 +25,3 @@ third_party/ ...@@ -25,12 +25,3 @@ third_party/
# clion workspace. # clion workspace.
cmake-build-* cmake-build-*
# generated while compiling
paddle/pybind/pybind.h
CMakeFiles
cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
paddle/fluid/pybind/pybind.h
python/paddle/version.py
...@@ -28,7 +28,7 @@ INCLUDE(ExternalProject) ...@@ -28,7 +28,7 @@ INCLUDE(ExternalProject)
SET(MKLML_PROJECT "extern_mklml") SET(MKLML_PROJECT "extern_mklml")
SET(MKLML_VER "mklml_lnx_2018.0.1.20171007") SET(MKLML_VER "mklml_lnx_2018.0.1.20171007")
SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.11/${MKLML_VER}.tgz") SET(MKLML_URL "http://paddlepaddledeps.bj.bcebos.com/${MKLML_VER}.tgz")
SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml")
SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}")
SET(MKLML_DST_DIR "mklml") SET(MKLML_DST_DIR "mklml")
......
...@@ -54,5 +54,7 @@ add_library(snappystream STATIC IMPORTED GLOBAL) ...@@ -54,5 +54,7 @@ add_library(snappystream STATIC IMPORTED GLOBAL)
set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION
"${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a") "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
add_dependencies(snappystream extern_snappystream) add_dependencies(snappystream extern_snappystream)
...@@ -62,7 +62,8 @@ ExternalProject_Add( ...@@ -62,7 +62,8 @@ ExternalProject_Add(
) )
MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}")
INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers.
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include warpctc headers.
ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL) ADD_LIBRARY(warpctc SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) SET_PROPERTY(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
......
...@@ -25,7 +25,8 @@ ELSE(WIN32) ...@@ -25,7 +25,8 @@ ELSE(WIN32)
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE) SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
ENDIF(WIN32) ENDIF(WIN32)
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR}) # For zlib code to include its own headers.
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install) # For Paddle code to include zlib.h.
ExternalProject_Add( ExternalProject_Add(
extern_zlib extern_zlib
......
...@@ -244,14 +244,14 @@ function(cc_test TARGET_NAME) ...@@ -244,14 +244,14 @@ function(cc_test TARGET_NAME)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS}) add_executable(${TARGET_NAME} ${cc_test_SRCS})
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS) # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
if("${cc_test_DEPS}" MATCHES "ARCHIVE_START") if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END) list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
endif() endif()
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS} COMMAND ${TARGET_NAME} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endfunction(cc_test) endfunction(cc_test)
...@@ -311,8 +311,8 @@ function(nv_test TARGET_NAME) ...@@ -311,8 +311,8 @@ function(nv_test TARGET_NAME)
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
endfunction(nv_test) endfunction(nv_test)
...@@ -387,8 +387,8 @@ function(hip_test TARGET_NAME) ...@@ -387,8 +387,8 @@ function(hip_test TARGET_NAME)
endif() endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources}) add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags)
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
endif() endif()
endfunction(hip_test) endfunction(hip_test)
...@@ -561,9 +561,9 @@ function(py_test TARGET_NAME) ...@@ -561,9 +561,9 @@ function(py_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python ${py_test_ENVS} COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endfunction() endfunction()
......
# FileManager设计文档
## 目标
在本文档中,我们设计说明了名为FileManager系统,方便用户上传自己的训练数据以进行分布式训练
主要功能包括:
- 提供常用的命令行管理命令管理文件和目录
- 支持大文件的断点上传、下载
## 名词解释
- PFS:是`Paddlepaddle cloud File System`的缩写,是对用户文件存储空间的抽象,与之相对的是local filesystem。目前我们用CephFS来搭建。
- [CephFS](http://docs.ceph.com/docs/master/cephfs/):一个POSIX兼容的文件系统。
- Chunk:逻辑划上文件分块的单位。
## 模块
### 架构图
<image src=./src/filemanager.png width=900>
### PFSClient
- 功能: 详细设计[link](./pfs/pfsclient.md)
- 提供用户管理文件的命令
- 需要可以跨平台执行
- 双向验证
PFSClient需要和Ingress之间做双向验证<sup>[tls](#tls)</sup>,所以用户需要首先在`cloud.paddlepaddle.org`上注册一下,申请用户空间,并且把系统生成的CA(certificate authority)、Key、CRT(CA signed certificate)下载到本地,然后才能使用PFSClient。
### [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/)
- 功能:
提供七层协议的反向代理、基于粘性会话的负载均衡功能。
- 透传用户身份的办法
Ingress需要把PFSClient的身份信息传给PFSServer,配置的方法参考[link](http://www.integralist.co.uk/posts/clientcertauth.html#3)
### PFSServer
PFSServer提供RESTful API接口,接收处理PFSClient端的文件管理请求,并且把结果返回PFSClient端。
RESTful API
- /api/v1/files
- `GET /api/v1/files`: Get metadata of files or directories.
- `POST /api/v1/files`: Create files or directories.
- `PATCH /api/v1/files`: Update files or directories.
- `DELETE /api/v1/files`: Delete files or directories.
- /api/v1/file/chunks
- `GET /api/v1/storage/file/chunks`: Get chunks's metadata of a file.
- /api/v1/storage/files
- `GET /api/v1/storage/files`: Download files or directories.
- `POST /api/v1/storage/files`: Upload files or directories.
- /api/v1/storage/file/chunks
- `GET /api/v1/storage/file/chunks`: Download chunks's data.
- `POST /api/v1/storage/file/chunks`: Upload chunks's data.
## 文件传输优化
### 分块文件传输
用户文件可能是比较大的,上传到Cloud或者下载到本地的时间可能比较长,而且在传输的过程中也可能出现网络不稳定的情况。为了应对以上的问题,我们提出了Chunk的概念,一个Chunk由所在的文件偏移、数据、数据长度及校验值组成。文件的上传和下载都是通过对Chunk的操作来实现的。由于Chunk比较小(默认256K),完成一个传输动作完成的时间也比较短,不容易出错。PFSClient需要在传输完毕最后一个Chunk的时候检查destination文件的MD5值是否和source文件一致。
一个典型的Chunk如下所示:
```
type Chunk struct {
fileOffset int64
checksum uint32
len uint32
data []byte
}
```
### 生成sparse文件
当destination文件不存在或者大小和source文件不一致时,可以用[Fallocate](https://Go.org/pkg/syscall/#Fallocate)生成sparse文件,然后就可以并发写入多个Chunk。
### 覆盖不一致的部分
文件传输的的关键在于需要PFSClient端对比source和destination的文件Chunks的checksum是否保持一致,不一致的由PFSClient下载或者传输Chunk完成。这样已经传输成功的部分就不用重新传输了。
## 用户使用流程
参考[link](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md)
## 框架生成
[swagger](https://github.com/swagger-api/swagger-codegen)生成PFSClient和PFSServer的框架部分,以便我们可以把更多的精力放到逻辑本身上。
## 参考文档
- <a name=tls></a>[TLS complete guide](https://github.com/k8sp/tls/blob/master/tls.md)
- [aws.s3](http://docs.aws.amazon.com/cli/latest/reference/s3/)
- [linux man document](https://linux.die.net/man/)
# PFSClient
## Description
The `pfs` command is a Command Line Interface to manage your files on PaddlePaddle Cloud
## Synopsis
```
paddle [options] pfs <subcommand> [parameters]
```
## Options
```
--profile (string)
Use a specific profile from your credential file.
--help (string)
Display more information about command
--version
Output version information and exit
--debug
Show detailed debugging log
--only-show-errors (boolean)
Only errors and warnings are displayed. All other output is suppressed.
```
## Path Arguments
When using a command, we need to specify path arguments. There are two path argument type: `localpath` and `pfspath`.
A `pfspath` begin with `/pfs`, eg: `/pfs/$DATACENTER/home/$USER/folder`.
[Here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/cluster_train/data_dispatch.md#上传训练文件) is how to config datacenters.
## order of Path Arguments
Commonly, if there are two path arguments, the first is the source, and the second is the destination.
## Subcommonds
- rm - remove files or directories
```
Synopsis:
rm [-r] [-v] <PFSPath> ...
Options:
-r
Remove directories and their contents recursively
-v
Cause rm to be verbose, showing files after they are removed.
Examples:
paddle pfs rm /pfs/$DATACENTER/home/$USER/file
paddle pfs rm -r /pfs/$DATACENTER/home/$USER/folder
```
- mv - move (rename) files
```
Synopsis:
mv [-f | -n] [-v] <LocalPath> <PFSPath>
mv [-f | -n] [-v] <LocalPath> ... <PFSPath>
mv [-f | -n] [-v] <PFSPath> <LocalPath>
mv [-f | -n] [-v] <PFSPath> ... <LocalPath>
mv [-f | -n] [-v] <PFSPath> <PFSPath>
mv [-f | -n] [-v] <PFSPath> ... <PFSPath>
Options:
-f
Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.)
-n
Do not overwrite an existing file. (The -n option overrides previous -f options.)
-v
Cause mv to be verbose, showing files after they are moved.
Examples:
paddle pfs mv ./text1.txt /pfs/$DATACENTER/home/$USER/text1.txt
```
- cp - copy files or directories
```
Synopsis:
cp [-r] [-f | -n] [-v] [--preserve--links] <LocalPath> <PFSPath>
cp [-r] [-f | -n] [-v] [--preserve--links] <LocalPath> ... <PFSPath>
cp [-r] [-f | -n] [-v] [--preserve--links] <PFSPath> <LocalPath>
cp [-r] [-f | -n] [-v] [--preserve--links] <PFSPath> ... <LocalPath>
cp [-r] [-f | -n] [-v] [--preserve--links] <PFSPath> <PFSPath>
cp [-r] [-f | -n] [-v] [--preserve--links] <PFSPath> ... <PFSPath>
Options:
-r
Copy directories recursively
-f
Do not prompt for confirmation before overwriting the destination path. (The -f option overrides previous -n options.)
-n
Do not overwrite an existing file. (The -n option overrides previous -f options.)
-v
Cause cp to be verbose, showing files after they are copied.
--preserve--links
Reserve links when copy links
Examples:
paddle pfs cp ./file /pfs/$DATACENTER/home/$USER/file
paddle pfs cp /pfs/$DATACENTER/home/$USER/file ./file
```
- ls- list files
```
Synopsis:
ls [-r] <PFSPath> ...
Options:
-R
List directory(ies) recursively
Examples:
paddle pfs ls /pfs/$DATACENTER/home/$USER/file
paddle pfs ls /pfs/$DATACENTER/home/$USER/folder
```
- mkdir - mkdir directory(ies)
Create intermediate directory(ies) as required.
```
Synopsis:
mkdir <PFSPath> ...
Examples:
paddle pfs mkdir /pfs/$DATACENTER/home/$USER/folder
```
...@@ -27,7 +27,7 @@ sphinx_add_target(paddle_fluid_docs ...@@ -27,7 +27,7 @@ sphinx_add_target(paddle_fluid_docs
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_fluid_docs gen_proto_py) add_dependencies(paddle_fluid_docs gen_proto_py paddle_python)
# configured documentation tools and intermediate build results # configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
...@@ -50,6 +50,6 @@ sphinx_add_target(paddle_fluid_docs_cn ...@@ -50,6 +50,6 @@ sphinx_add_target(paddle_fluid_docs_cn
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN}) ${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_fluid_docs_cn gen_proto_py) add_dependencies(paddle_fluid_docs_cn gen_proto_py paddle_python)
add_subdirectory(api) add_subdirectory(api)
...@@ -19,4 +19,4 @@ sphinx_add_target(paddle_fluid_apis ...@@ -19,4 +19,4 @@ sphinx_add_target(paddle_fluid_apis
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_fluid_apis gen_proto_py framework_py_proto copy_paddle_pybind) add_dependencies(paddle_fluid_apis gen_proto_py framework_py_proto copy_paddle_pybind paddle_python)
...@@ -5,9 +5,11 @@ In a large scale machine learning setup where the size of the training data is h ...@@ -5,9 +5,11 @@ In a large scale machine learning setup where the size of the training data is h
Polyak and Juditsky (1992) showed that the test performance of simple average of parameters obtained by Stochastic Gradient Descent (SGD) is as good as that of parameter values that are obtained by training the model over and over again, over the training dataset. Polyak and Juditsky (1992) showed that the test performance of simple average of parameters obtained by Stochastic Gradient Descent (SGD) is as good as that of parameter values that are obtained by training the model over and over again, over the training dataset.
Hence, to accelerate the speed of Stochastic Gradient Descent, Averaged Stochastic Gradient Descent (ASGD) was proposed in Polyak and Juditsky (1992). For ASGD, the running average of parameters obtained by SGD, is used as the estimator for <img src="./images/theta_star.gif"/><br/> . The averaging is done as follows: Hence, to accelerate the speed of Stochastic Gradient Descent, Averaged Stochastic Gradient Descent (ASGD) was proposed in Polyak and Juditsky (1992). For ASGD, the running average of parameters obtained by SGD, is used as the estimator for <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/theta_star.gif"/><br/> . The averaging is done as follows:
![](./images/asgd.gif) <p align="center">
<img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/asgd.gif"><br />
</p>
We propose averaging for any optimizer similar to how ASGD performs it, as mentioned above. We propose averaging for any optimizer similar to how ASGD performs it, as mentioned above.
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## Introduction ## Introduction
A Channel is a data structure that allows for synchronous interprocess A Channel is a data structure that allows for synchronous interprocess
communication via message passing. It is a fundemental component of CSP communication via message passing. It is a fundemental component of CSP
(communicating sequential processes), and allows for users to pass data (communicating sequential processes), and allows for users to pass data
between threads without having to worry about synchronization. between threads without having to worry about synchronization.
...@@ -18,7 +18,7 @@ Creates a new channel that takes in variables of a specific dtype. ...@@ -18,7 +18,7 @@ Creates a new channel that takes in variables of a specific dtype.
- **fluid.make_channel(dtype, capacity=0)** - **fluid.make_channel(dtype, capacity=0)**
- **dtype**: The data type of variables being sent/received through channel - **dtype**: The data type of variables being sent/received through channel
- **capacity**: The capacity of the channel. A capacity of 0 represents - **capacity**: The capacity of the channel. A capacity of 0 represents
an unbuffered channel. Capacity > 0 represents a buffered channel an unbuffered channel. Capacity > 0 represents a buffered channel
``` ```
...@@ -40,8 +40,8 @@ fluid.channel_close(ch) ...@@ -40,8 +40,8 @@ fluid.channel_close(ch)
### Send data to a channel ### Send data to a channel
Sends a variable to a channel. Currently, variables of dtype `LoDTensor`, Sends a variable to a channel. Currently, variables of dtype `LoDTensor`,
`LoDRankTable`, `LoDTensorArray`, `SelectedRows`, `ReaderHolder`, and `LoDRankTable`, `LoDTensorArray`, `SelectedRows`, `ReaderHolder`, and
`ChannelHolder` are supported. `ChannelHolder` are supported.
By default, the data of the Variable is moved from the sender to the receiver, By default, the data of the Variable is moved from the sender to the receiver,
...@@ -52,7 +52,7 @@ however the user can optionally copy the data before performing the send. ...@@ -52,7 +52,7 @@ however the user can optionally copy the data before performing the send.
- **variable**: The variable to send to the channel - **variable**: The variable to send to the channel
- **is_copy**: If set to True, channel_send will perform a variable assign - **is_copy**: If set to True, channel_send will perform a variable assign
to copy the source variable to a new variable to be sent. to copy the source variable to a new variable to be sent.
``` ```
ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)
var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=100) var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=100)
...@@ -68,7 +68,7 @@ receiving variable. ...@@ -68,7 +68,7 @@ receiving variable.
- **channel**: The channel to receive the variable from - **channel**: The channel to receive the variable from
- **return_variable**: The destination variable used to store the data of the - **return_variable**: The destination variable used to store the data of the
variable received from the channel variable received from the channel
``` ```
ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)
var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=-1) var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=-1)
...@@ -84,9 +84,9 @@ internal queues, locks, and conditional variables. ...@@ -84,9 +84,9 @@ internal queues, locks, and conditional variables.
### QueueMessage ### QueueMessage
QueueMessage encapsulates the state of the channel send/receive operation to be QueueMessage encapsulates the state of the channel send/receive operation to be
put in the **sendq/recvq**. It contains a condition variable used to lock the put in the **sendq/recvq**. It contains a condition variable used to lock the
thread (when there are no available sends/receives). In addition, it contains thread (when there are no available sends/receives). In addition, it contains
a callback function to notify a thread when the QueueMessage is being a callback function to notify a thread when the QueueMessage is being
processed by the channel. processed by the channel.
### Queues ### Queues
...@@ -108,21 +108,21 @@ channel_recv operation will put a new QueueMessage on the recvq and block the ...@@ -108,21 +108,21 @@ channel_recv operation will put a new QueueMessage on the recvq and block the
current thread under two conditions: current thread under two conditions:
1. The channel is buffered and there is no data on the buff_ 1. The channel is buffered and there is no data on the buff_
2. The channel is unbuffered and does not have a sender 2. The channel is unbuffered and does not have a sender
### State diagram ### State diagram
#### Channel Send #### Channel Send
<p align="center"> <p align="center">
<img src="./images/channel_send.png"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/channel_send.png"/><br/>
</p> </p>
#### Channel Receive #### Channel Receive
<p align="center"> <p align="center">
<img src="./images/channel_recv.png"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/channel_recv.png"/><br/>
</p> </p>
## Limitations and Considerations ## Limitations and Considerations
### Variable Copy ### Variable Copy
...@@ -135,5 +135,5 @@ be sent before it is sent. ...@@ -135,5 +135,5 @@ be sent before it is sent.
Please note that this is acheived by adding an **assign** operator and creating Please note that this is acheived by adding an **assign** operator and creating
a temporary variable that is sent in place of the original variable. Please a temporary variable that is sent in place of the original variable. Please
note that **assign** operator has limited support for only certain variables note that **assign** operator has limited support for only certain variables
datatypes. datatypes.
...@@ -23,21 +23,25 @@ The following table compares concepts in Fluid and Go ...@@ -23,21 +23,25 @@ The following table compares concepts in Fluid and Go
<td>user-defined functions </td> <td>user-defined functions </td>
<td> <td>
<a href="https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid">layers</a></td> <a href="https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid">layers</a></td>
<td></td>
</tr> </tr>
<tr> <tr>
<td>control-flow and built-in functions </td> <td>control-flow and built-in functions </td>
<td> <td>
<a href="https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators">intrinsics/operators</a></td> <a href="https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators">intrinsics/operators</a></td>
<td></td>
</tr> </tr>
<tr> <tr>
<td>goroutines, channels </td> <td>goroutines, channels </td>
<td> <td>
<a href="https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h">class ThreadPool</a></td> <a href="https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework/thread_pool.h">class ThreadPool</a></td>
<td></td>
</tr> </tr>
<tr> <tr>
<td>runtime </td> <td>runtime </td>
<td> <td>
<a href="https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h">class Executor</a></td> <a href="https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.h">class Executor</a></td>
<td></td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
......
...@@ -2,13 +2,13 @@ ...@@ -2,13 +2,13 @@
## Introduction ## Introduction
In golang, the [**select**](https://golang.org/ref/spec#Select_statements) In golang, the [**select**](https://golang.org/ref/spec#Select_statements)
statement lets a goroutine wait on multiple communication operations at the statement lets a goroutine wait on multiple communication operations at the
same time. The **select** blocks until one of its cases can run, then same time. The **select** blocks until one of its cases can run, then
executes the case. If multiple cases are ready to run, then one case is executes the case. If multiple cases are ready to run, then one case is
choosen at random to be executed. choosen at random to be executed.
With the introduction of CSP for Paddle, we mimic this behavior by With the introduction of CSP for Paddle, we mimic this behavior by
creating a ***select_op***. creating a ***select_op***.
## How to use it ## How to use it
...@@ -17,11 +17,11 @@ The **select_op** is available as a c++ operator. However most users ...@@ -17,11 +17,11 @@ The **select_op** is available as a c++ operator. However most users
will prefer to use the much simplier Python API. will prefer to use the much simplier Python API.
- **fluid.Select()**: Creates a select operator and adds it to the current - **fluid.Select()**: Creates a select operator and adds it to the current
block within the main program. Also creates a sub block and adds it to the block within the main program. Also creates a sub block and adds it to the
main program. This sub block is used to hold all variables and operators main program. This sub block is used to hold all variables and operators
used by the case statements. used by the case statements.
Within the select block, users can add cases by Within the select block, users can add cases by
calling **select.case** or **select.default** method. calling **select.case** or **select.default** method.
- **fluid.Select.case(channel_action, channel, result_variable)**: Represents - **fluid.Select.case(channel_action, channel, result_variable)**: Represents
...@@ -37,13 +37,13 @@ execute. ...@@ -37,13 +37,13 @@ execute.
``` ```
ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) ch1 = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)
quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) quit_ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR)
x = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0) x = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=0)
y = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=1) y = fill_constant(shape=[1], dtype=core.VarDesc.VarType.INT32, value=1)
while_cond = fill_constant(shape=[1], dtype=core.VarDesc.VarType.BOOL, value=True) while_cond = fill_constant(shape=[1], dtype=core.VarDesc.VarType.BOOL, value=True)
while_op = While(cond=while_cond) while_op = While(cond=while_cond)
with while_op.block(): with while_op.block():
with fluid.Select() as select: with fluid.Select() as select:
with select.case(fluid.channel_send, channel, x): with select.case(fluid.channel_send, channel, x):
...@@ -99,17 +99,17 @@ blocks { ...@@ -99,17 +99,17 @@ blocks {
} }
} }
// Create "select" operator. // Create "select" operator.
// inputs: // inputs:
// X: All input variables used by operators within the select block // X: All input variables used by operators within the select block
// case_to_execute: Variable filled in by select_op when it determines // case_to_execute: Variable filled in by select_op when it determines
// which case to execute. // which case to execute.
// //
// outputs: // outputs:
// Out: All output variables referenced by operators within select block. // Out: All output variables referenced by operators within select block.
// //
// attrs: // attrs:
// sub_block: The block id containing the select "cases" // sub_block: The block id containing the select "cases"
// cases: Serialized list of all cases in the select op. // cases: Serialized list of all cases in the select op.
// Each case is serialized as: '<index>,<type>,<channel>,<value>' // Each case is serialized as: '<index>,<type>,<channel>,<value>'
// where type is 0 for default, 1 for send, and 2 for receive. // where type is 0 for default, 1 for send, and 2 for receive.
// No channel and values are needed for default cases. // No channel and values are needed for default cases.
...@@ -150,7 +150,7 @@ into **X**. It will also create a temp variable called **case_to_execute**. Th ...@@ -150,7 +150,7 @@ into **X**. It will also create a temp variable called **case_to_execute**. Th
filled in by the select_op after it has completed processing the case statements. filled in by the select_op after it has completed processing the case statements.
If there are no available cases to execute (ie: all cases are blocked on channel operations, and If there are no available cases to execute (ie: all cases are blocked on channel operations, and
there is no default statement), then the select_op will block the current thread. The thread will there is no default statement), then the select_op will block the current thread. The thread will
unblock once there is a channel operation affecting one of the case statements, at which point, the unblock once there is a channel operation affecting one of the case statements, at which point, the
**select_op** will set the **case_to_execute** variable to the index of the case to execute. **select_op** will set the **case_to_execute** variable to the index of the case to execute.
...@@ -247,17 +247,17 @@ blocks { ...@@ -247,17 +247,17 @@ blocks {
``` ```
Cases are represented by a **conditional_block operator**, whose's condition is set as the output of Cases are represented by a **conditional_block operator**, whose's condition is set as the output of
equal(**case_to_execute**, **case_index**). Since each case index is unique in this sub-block, equal(**case_to_execute**, **case_index**). Since each case index is unique in this sub-block,
only one case will be executed. only one case will be executed.
### select_op flow ### select_op flow
<p align="center"> <p align="center">
<img src="./images/select_op_workflow.png"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/select_op_workflow.png"/><br/>
</p> </p>
The select algorithm is inspired by golang's select routine. Please refer to The select algorithm is inspired by golang's select routine. Please refer to
http://www.tapirgames.com/blog/golang-concurrent-select-implementation for more information. http://www.tapirgames.com/blog/golang-concurrent-select-implementation for more information.
## Backward Pass ## Backward Pass
......
...@@ -40,11 +40,11 @@ computation is only specified in Python code which sits outside of PaddlePaddle, ...@@ -40,11 +40,11 @@ computation is only specified in Python code which sits outside of PaddlePaddle,
Similar to how a compiler uses an intermediate representation (IR) so that the programmer does not need to manually optimize their code for most of the cases, we can have an intermediate representation in PaddlePaddle as well. The compiler optimizes the IR as follows: Similar to how a compiler uses an intermediate representation (IR) so that the programmer does not need to manually optimize their code for most of the cases, we can have an intermediate representation in PaddlePaddle as well. The compiler optimizes the IR as follows:
<img src="src/compiler.png"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/compiler.png"/>
PaddlePaddle can support model parallelism by converting the IR so that the user no longer needs to manually perform the computation and operations in the Python component: PaddlePaddle can support model parallelism by converting the IR so that the user no longer needs to manually perform the computation and operations in the Python component:
<img src="src/paddle-compile.png"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/paddle-compile.png"/>
The IR for PaddlePaddle after refactoring is called a `Block`, it specifies the computation dependency graph and the variables used in the computation. The IR for PaddlePaddle after refactoring is called a `Block`, it specifies the computation dependency graph and the variables used in the computation.
...@@ -60,7 +60,7 @@ For a detailed explanation, refer to this document - ...@@ -60,7 +60,7 @@ For a detailed explanation, refer to this document -
The revamped distributed training architecture can address the above discussed limitations. Below is the illustration of how it does so: The revamped distributed training architecture can address the above discussed limitations. Below is the illustration of how it does so:
<img src="src/distributed_architecture.png"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/distributed_architecture.png"/>
The major components are: *Python API*, *Distribute Transpiler* and *Remote Executor*. The major components are: *Python API*, *Distribute Transpiler* and *Remote Executor*.
...@@ -152,7 +152,7 @@ for data in train_reader(): ...@@ -152,7 +152,7 @@ for data in train_reader():
`JobDesc` object describe the distributed job resource specification to run on `JobDesc` object describe the distributed job resource specification to run on
Cluster environment. Cluster environment.
<img src="src/remote_executor.png" width="500" align="center" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/remote_executor.png" width="500" align="center" />
`RemoteExecutor.run` sends the `ProgramDesc` and `RemoteExecutor.run` sends the `ProgramDesc` and
[TrainingJob](https://github.com/PaddlePaddle/cloud/blob/unreleased-tpr/doc/autoscale/README.md#training-job-resource) [TrainingJob](https://github.com/PaddlePaddle/cloud/blob/unreleased-tpr/doc/autoscale/README.md#training-job-resource)
...@@ -171,7 +171,7 @@ In the future, a more general placement algorithm should be implemented, which m ...@@ -171,7 +171,7 @@ In the future, a more general placement algorithm should be implemented, which m
The local training architecture will be the same as the distributed training architecture, the difference is that everything runs locally, and there is just one PaddlePaddle runtime: The local training architecture will be the same as the distributed training architecture, the difference is that everything runs locally, and there is just one PaddlePaddle runtime:
<img src="src/local_architecture.png"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/local_architecture.png"/>
### Training Data ### Training Data
......
...@@ -8,11 +8,11 @@ Op graph to a multi-CPU Op graph, and run `ParallelDo` Op to run the graph. ...@@ -8,11 +8,11 @@ Op graph to a multi-CPU Op graph, and run `ParallelDo` Op to run the graph.
## Transpiler ## Transpiler
<img src="src/multi-threads/single-thread@3x.png" width="300"> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/single-thread@3x.png" width="300">
After converted: After converted:
<img src="src/multi-threads/multi-threads@3x.png" width="1000"> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/multi-threads@3x.png" width="1000">
## Implement ## Implement
......
...@@ -41,11 +41,11 @@ We will need these OPs: *Send*, *Recv*, *Enqueue*, *Dequeue*. ...@@ -41,11 +41,11 @@ We will need these OPs: *Send*, *Recv*, *Enqueue*, *Dequeue*.
Below is an example of converting the user defined graph to the Below is an example of converting the user defined graph to the
subgraphs for the trainer and the parameter server: subgraphs for the trainer and the parameter server:
<img src="src/local-graph.png" width="300"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/local-graph.png" width="300"/>
After converting: After converting:
<img src="src/dist-graph.png" width="700"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/dist-graph.png" width="700"/>
1. The parameter variable W and its optimizer program are placed on the parameter server. 1. The parameter variable W and its optimizer program are placed on the parameter server.
1. Operators are added to the program. 1. Operators are added to the program.
...@@ -69,8 +69,7 @@ In Fluid, we introduce [SelectedRows](../selected_rows.md) to represent a list o ...@@ -69,8 +69,7 @@ In Fluid, we introduce [SelectedRows](../selected_rows.md) to represent a list o
non-zero gradient data. So when we do parameter optimization both locally and remotely, non-zero gradient data. So when we do parameter optimization both locally and remotely,
we only need to send those non-zero rows to the optimizer operators: we only need to send those non-zero rows to the optimizer operators:
<img src="src/sparse_update.png" width="700" /> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/sparse_update.png" width="700" />
### Benefits ### Benefits
- Model parallelism becomes easier to implement: it is an extension to - Model parallelism becomes easier to implement: it is an extension to
......
...@@ -5,7 +5,7 @@ This document describes the RNN (Recurrent Neural Network) operator and how it i ...@@ -5,7 +5,7 @@ This document describes the RNN (Recurrent Neural Network) operator and how it i
## RNN Algorithm Implementation ## RNN Algorithm Implementation
<p align="center"> <p align="center">
<img src="./rnn.jpg"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/rnn.jpg"/>
</p> </p>
The above diagram shows an RNN unrolled into a full network. The above diagram shows an RNN unrolled into a full network.
...@@ -22,7 +22,7 @@ There are several important concepts here: ...@@ -22,7 +22,7 @@ There are several important concepts here:
There could be local variables defined in each step-net. PaddlePaddle runtime realizes these variables in *step-scopes* which are created for each step. There could be local variables defined in each step-net. PaddlePaddle runtime realizes these variables in *step-scopes* which are created for each step.
<p align="center"> <p align="center">
<img src="./rnn.png"/><br/> <img src="https://github.com/PaddlePaddle/Paddle/tree/develop/doc/fluid/images/rnn.png"/><br/>
Figure 2 illustrates the RNN's data flow Figure 2 illustrates the RNN's data flow
</p> </p>
...@@ -93,7 +93,7 @@ For example, we could have a 2-level RNN, where the top level corresponds to par ...@@ -93,7 +93,7 @@ For example, we could have a 2-level RNN, where the top level corresponds to par
The following figure illustrates feeding in text into the lower level, one sentence at a step, and the feeding in step outputs to the top level. The final top level output is about the whole text. The following figure illustrates feeding in text into the lower level, one sentence at a step, and the feeding in step outputs to the top level. The final top level output is about the whole text.
<p align="center"> <p align="center">
<img src="./2_level_rnn.png"/> <img src="https://github.com/PaddlePaddle/Paddle/tree/develop/doc/fluid/images/2_level_rnn.png"/>
</p> </p>
```python ```python
...@@ -149,5 +149,5 @@ If the `output_all_steps` is set to False, it will only output the final time st ...@@ -149,5 +149,5 @@ If the `output_all_steps` is set to False, it will only output the final time st
<p align="center"> <p align="center">
<img src="./rnn_2level_data.png"/> <img src="https://github.com/PaddlePaddle/Paddle/tree/develop/doc/fluid/images/rnn_2level_data.png"/>
</p> </p>
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
## What is batch normalization ## What is batch normalization
Batch normalization is a frequently-used method in deep network training. It adjusts the mean and variance of a layer's output, and make the data distribution easier for next layer's training. Batch normalization is a frequently-used method in deep network training. It adjusts the mean and variance of a layer's output, and make the data distribution easier for next layer's training.
The principle of batch normalization can be summarized into a simple function: The principle of batch normalization can be summarized into a simple function:
...@@ -66,7 +66,7 @@ As most C++ operators do, `batch_norm_op` is defined by inputs, outputs, attribu ...@@ -66,7 +66,7 @@ As most C++ operators do, `batch_norm_op` is defined by inputs, outputs, attribu
The following graph showes the training computational process of `batch_norm_op`: The following graph showes the training computational process of `batch_norm_op`:
<img src="../images/batch_norm_op_kernel.png" width="800"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/batch_norm_op_kernel.png" width="800"/>
cudnn provides APIs to finish the whole series of computation, we can use them in our GPU kernel. cudnn provides APIs to finish the whole series of computation, we can use them in our GPU kernel.
...@@ -74,13 +74,13 @@ cudnn provides APIs to finish the whole series of computation, we can use them i ...@@ -74,13 +74,13 @@ cudnn provides APIs to finish the whole series of computation, we can use them i
`batch_norm_op` is warpped as a layer in Python: `batch_norm_op` is warpped as a layer in Python:
```python ```python
def batch_norm_layer(net, def batch_norm_layer(net,
input, input,
output, output,
scale, scale,
bias, bias,
use_global_est = False, use_global_est = False,
epsilon = 1e-6, epsilon = 1e-6,
momentum = 0.99): momentum = 0.99):
mean_cache = scope.new_var(name = 'estimated_mean', trainable = False) mean_cache = scope.new_var(name = 'estimated_mean', trainable = False)
...@@ -119,15 +119,15 @@ for pass_id in range(PASS_NUM): ...@@ -119,15 +119,15 @@ for pass_id in range(PASS_NUM):
if pass_id % 100 == 0: if pass_id % 100 == 0:
net.infer(test_image) # run inferencing model net.infer(test_image) # run inferencing model
# ... # ...
``` ```
`is_infer` is an attribute. Once an operator is created, its attributes can not be changed. It suggests us that we shall maintain two `batch_norm_op` in the model, one's `is_infer` is `True`(we call it `infer_batch_norm_op`) and the other one's is `False`(we call it `train_batch_norm_op`). They share all parameters and variables, but be placed in two different branches. That is to say, if a network contains a `batch_norm_op`, it will fork into two branches, one go through `train_batch_norm_op` and the other one go through `infer_batch_norm_op`: `is_infer` is an attribute. Once an operator is created, its attributes can not be changed. It suggests us that we shall maintain two `batch_norm_op` in the model, one's `is_infer` is `True`(we call it `infer_batch_norm_op`) and the other one's is `False`(we call it `train_batch_norm_op`). They share all parameters and variables, but be placed in two different branches. That is to say, if a network contains a `batch_norm_op`, it will fork into two branches, one go through `train_batch_norm_op` and the other one go through `infer_batch_norm_op`:
<div align=center> <div align=center>
<img src="../images/batch_norm_fork.png" width="500"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/batch_norm_fork.png" width="500"/>
</div> </div>
Just like what is shown in the above graph, the net forks before `batch_norm_op` and will never merge again. All the operators after `batch_norm_op` will duplicate. Just like what is shown in the above graph, the net forks before `batch_norm_op` and will never merge again. All the operators after `batch_norm_op` will duplicate.
When the net runs in training mode, the end of the left branch will be set as the running target, so the dependency tracking process will ignore right branch automatically. When the net runs in inferencing mode, the process is reversed. When the net runs in training mode, the end of the left branch will be set as the running target, so the dependency tracking process will ignore right branch automatically. When the net runs in inferencing mode, the process is reversed.
......
...@@ -6,23 +6,23 @@ A central problem in machine learning is how to design an algorithm that will pe ...@@ -6,23 +6,23 @@ A central problem in machine learning is how to design an algorithm that will pe
### Parameter Norm Penalties ### Parameter Norm Penalties
Most common regularization approaches in deep learning are based on limiting the capacity of the models by adding a parameter norm penalty to the objective function `J`. This is given as follows: Most common regularization approaches in deep learning are based on limiting the capacity of the models by adding a parameter norm penalty to the objective function `J`. This is given as follows:
<img src="./images/loss_equation.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/loss_equation.png" align="center"/><br/>
The parameter `alpha` is a hyperparameter that weights the relative contribution of the norm penalty term, `omega`, relative to the standard objective function `J`. The parameter `alpha` is a hyperparameter that weights the relative contribution of the norm penalty term, `omega`, relative to the standard objective function `J`.
The most commonly used norm penalties are the L2 norm penalty and the L1 norm penalty. These are given as follows: The most commonly used norm penalties are the L2 norm penalty and the L1 norm penalty. These are given as follows:
##### L2 Regularization: ##### L2 Regularization:
<img src="./images/l2_regularization.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/l2_regularization.png" align="center"/><br/>
##### L1 Regularization ##### L1 Regularization
<img src="./images/l1_regularization.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/l1_regularization.png" align="center"/><br/>
A much more detailed mathematical background of regularization can be found [here](http://www.deeplearningbook.org/contents/regularization.html). A much more detailed mathematical background of regularization can be found [here](http://www.deeplearningbook.org/contents/regularization.html).
## Regularization Survey ## Regularization Survey
A detailed survey of regularization in various deep learning frameworks can be found [here](https://github.com/PaddlePaddle/Paddle/wiki/Regularization-Survey). A detailed survey of regularization in various deep learning frameworks can be found [here](https://github.com/PaddlePaddle/Paddle/wiki/Regularization-Survey).
## Proposal for Regularization in PaddlePaddle ## Proposal for Regularization in PaddlePaddle
...@@ -32,41 +32,35 @@ In the new design, we propose to create new operations for regularization. For n ...@@ -32,41 +32,35 @@ In the new design, we propose to create new operations for regularization. For n
- L2_regularization_op - L2_regularization_op
- L1_regularization_op - L1_regularization_op
These ops can be like any other ops with their own CPU/GPU implementations either using Eigen or separate CPU and GPU kernels. As the initial implementation, we can implement their kernels using Eigen following the abstraction pattern implemented for [Activation Ops](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/accuracy_op.h). This abstraction pattern can make it very easy to implement new regularization schemes other than L1 and L2 norm penalties. These ops can be like any other ops with their own CPU/GPU implementations either using Eigen or separate CPU and GPU kernels. As the initial implementation, we can implement their kernels using Eigen following the abstraction pattern implemented for [Activation Ops](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/accuracy_op.h). This abstraction pattern can make it very easy to implement new regularization schemes other than L1 and L2 norm penalties.
The idea of building ops for regularization is in sync with the refactored Paddle philosophy of using operators to represent any computation unit. The way these ops will be added to the computation graph, will be decided by the [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) in Python API. The idea of building ops for regularization is in sync with the refactored Paddle philosophy of using operators to represent any computation unit. The way these ops will be added to the computation graph, will be decided by the [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) in Python API.
### Computation Graph ### Computation Graph
Below is an example of a really simple feed forward neural network. Below is an example of a really simple feed forward neural network.
<img src="./images/feed_forward.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/feed_forward.png" align="center"/><br/>
The Python API will modify this computation graph to add regularization operators. The modified computation graph will look as follows: The Python API will modify this computation graph to add regularization operators. The modified computation graph will look as follows:
<img src="./images/feed_forward_regularized.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/feed_forward_regularized.png" align="center"/><br/>
       
### Python API implementation for Regularization ### Python API implementation for Regularization
Using the low level ops, `L2_regularization_op` and `L1_regularization_op`, any user can add regularization to their computation graphs. However, this will require a lot of lines of code and we should design Python APIs that support regularization. An example of such an API can be seen in [Keras](https://keras.io/regularizers/). As per the PaddlePaddle [Python API design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md), the layer functions are responsible for creating operators, operator parameters and variables. Since regularization is a property of parameters, it makes sense to create these in the layer functions. Using the low level ops, `L2_regularization_op` and `L1_regularization_op`, any user can add regularization to their computation graphs. However, this will require a lot of lines of code and we should design Python APIs that support regularization. An example of such an API can be seen in [Keras](https://keras.io/regularizers/). As per the PaddlePaddle [Python API design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md), the layer functions are responsible for creating operators, operator parameters and variables. Since regularization is a property of parameters, it makes sense to create these in the layer functions.
#### Creation of Regularization ops #### Creation of Regularization ops
There are two possibilities for creating the regularization ops: There are two possibilities for creating the regularization ops:
1. We create these ops immediately while building the computation graph. 1. We create these ops immediately while building the computation graph.
2. We add these ops in a lazy manner, just before the backward, similar to the way the optimization ops are added. 2. We add these ops in a lazy manner, just before the backward, similar to the way the optimization ops are added.
The proposal is to add these ops in a lazy manner just before the backward pass. The proposal is to add these ops in a lazy manner just before the backward pass.
#### Storage of Regularization attributes #### Storage of Regularization attributes
Since we want to create the regularization ops in a lazy manner, the regularization attributes (type of regularization and weight of regularization penalty) can be stored as attributes of the [`Parameter`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/framework.py#L421) class. This is because regularization is a property of the parameters and storing regularization properties with Parameters also allows for shared parameters. Since we want to create the regularization ops in a lazy manner, the regularization attributes (type of regularization and weight of regularization penalty) can be stored as attributes of the [`Parameter`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/framework/framework.py#L421) class. This is because regularization is a property of the parameters and storing regularization properties with Parameters also allows for shared parameters.
#### High-level API #### High-level API
In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we also need to provide regularization functionality in layer functions. The design of these APIs can be postponed for later right now. A good reference for these APIs can be found in [Keras](https://keras.io/regularizers/) and also by looking at Tensorflow in [`tf.contrib.layers`](https://www.tensorflow.org/api_guides/python/contrib.layers). In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we also need to provide regularization functionality in layer functions. The design of these APIs can be postponed for later right now. A good reference for these APIs can be found in [Keras](https://keras.io/regularizers/) and also by looking at Tensorflow in [`tf.contrib.layers`](https://www.tensorflow.org/api_guides/python/contrib.layers).
...@@ -116,7 +116,7 @@ The classical DS2 network contains 15 layers (from bottom to top): ...@@ -116,7 +116,7 @@ The classical DS2 network contains 15 layers (from bottom to top):
- **One** CTC-loss layer - **One** CTC-loss layer
<div align="center"> <div align="center">
<img src="images/ds2_network.png" width=350><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ds2_network.png" width=350><br/>
Figure 1. Archetecture of Deep Speech 2 Network. Figure 1. Archetecture of Deep Speech 2 Network.
</div> </div>
...@@ -142,7 +142,7 @@ Key ingredients about the layers: ...@@ -142,7 +142,7 @@ Key ingredients about the layers:
- **Batch Normalization Layers**: - **Batch Normalization Layers**:
- Added to all above layers (except for data and loss layer). - Added to all above layers (except for data and loss layer).
- Sequence-wise normalization for RNNs: BatchNorm only performed on input-state projection and not state-state projection, for efficiency consideration. - Sequence-wise normalization for RNNs: BatchNorm only performed on input-state projection and not state-state projection, for efficiency consideration.
<table> <table>
<thead> <thead>
<tr> <tr>
...@@ -208,7 +208,7 @@ TODO by Assignees ...@@ -208,7 +208,7 @@ TODO by Assignees
### Beam Search with CTC and LM ### Beam Search with CTC and LM
<div align="center"> <div align="center">
<img src="images/beam_search.png" width=600><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/beam_search.png" width=600><br/>
Figure 2. Algorithm for CTC Beam Search Decoder. Figure 2. Algorithm for CTC Beam Search Decoder.
</div> </div>
......
...@@ -199,7 +199,7 @@ Packing the `selected_generation_scores` will get a `LoDTensor`, and each tail i ...@@ -199,7 +199,7 @@ Packing the `selected_generation_scores` will get a `LoDTensor`, and each tail i
## LoD and shape changes during decoding ## LoD and shape changes during decoding
<p align="center"> <p align="center">
<img src="./images/LOD-and-shape-changes-during-decoding.jpg"/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/LOD-and-shape-changes-during-decoding.jpg"/>
</p> </p>
According to the image above, the only phase that changes the LoD is beam search. According to the image above, the only phase that changes the LoD is beam search.
......
# Design for GAN # Design for GAN
GAN (General Adversarial Net [https://arxiv.org/abs/1406.2661]) is an important model for unsupervised learning and widely used in many areas. GAN (General Adversarial Net [https://arxiv.org/abs/1406.2661]) is an important model for unsupervised learning and widely used in many areas.
It applies several important concepts in machine learning system design, including building and running subgraphs, dependency tracing, different optimizers in one executor and so forth. It applies several important concepts in machine learning system design, including building and running subgraphs, dependency tracing, different optimizers in one executor and so forth.
In our GAN design, we wrap it as a user-friendly easily customized python API to design different models. We take the conditional DC-GAN (Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks [https://arxiv.org/abs/1511.06434]) as an example due to its good performance on image generation. In our GAN design, we wrap it as a user-friendly easily customized python API to design different models. We take the conditional DC-GAN (Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks [https://arxiv.org/abs/1511.06434]) as an example due to its good performance on image generation.
<p align="center"> <p align="center">
<img src="./test.dot.png" width = "35%" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/test.dot.png" width = "35%" align="center"/><br/>
Figure 1. The overall running logic of GAN. The black solid arrows indicate the forward pass; the green dashed arrows indicate the backward pass of generator training; the red dashed arrows indicate the backward pass of the discriminator training. The BP pass of the green (red) arrow should only update the parameters in the green (red) boxes. The diamonds indicate the data providers. d\_loss and g\_loss marked in red and green are the two targets we would like to run. Figure 1. The overall running logic of GAN. The black solid arrows indicate the forward pass; the green dashed arrows indicate the backward pass of generator training; the red dashed arrows indicate the backward pass of the discriminator training. The BP pass of the green (red) arrow should only update the parameters in the green (red) boxes. The diamonds indicate the data providers. d\_loss and g\_loss marked in red and green are the two targets we would like to run.
</p> </p>
The operators, layers and functions required/optional to build a GAN demo is summarized in https://github.com/PaddlePaddle/Paddle/issues/4563. The operators, layers and functions required/optional to build a GAN demo is summarized in https://github.com/PaddlePaddle/Paddle/issues/4563.
<p align="center"> <p align="center">
<img src="./dcgan.png" width = "90%" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/dcgan.png" width = "90%" align="center"/><br/>
Figure 2. Photo borrowed from the original DC-GAN paper. Figure 2. Photo borrowed from the original DC-GAN paper.
</p> </p>
## The Conditional-GAN might be a class. ## The Conditional-GAN might be a class.
This design we adopt the popular open source design in https://github.com/carpedm20/DCGAN-tensorflow and https://github.com/rajathkmp/DCGAN. It contains following data structure: This design we adopt the popular open source design in https://github.com/carpedm20/DCGAN-tensorflow and https://github.com/rajathkmp/DCGAN. It contains following data structure:
- DCGAN(object): which contains everything required to build a GAN model. It provides following member functions methods as API: - DCGAN(object): which contains everything required to build a GAN model. It provides following member functions methods as API:
...@@ -29,7 +29,7 @@ This design we adopt the popular open source design in https://github.com/carped ...@@ -29,7 +29,7 @@ This design we adopt the popular open source design in https://github.com/carped
Returns a generated image. Returns a generated image.
- discriminator(image): - discriminator(image):
Given an image, decide if it is from a real source or a fake one. Given an image, decide if it is from a real source or a fake one.
Returns a 0/1 binary label. Returns a 0/1 binary label.
- build_model(self): - build_model(self):
...@@ -47,7 +47,7 @@ To be more detailed, we introduce our design of DCGAN as following: ...@@ -47,7 +47,7 @@ To be more detailed, we introduce our design of DCGAN as following:
```python ```python
class DCGAN(object): class DCGAN(object):
def __init__(self, y_dim=None): def __init__(self, y_dim=None):
# hyper parameters # hyper parameters
self.y_dim = y_dim # conditional gan or not self.y_dim = y_dim # conditional gan or not
self.batch_size = 100 self.batch_size = 100
...@@ -82,18 +82,18 @@ class DCGAN(object): ...@@ -82,18 +82,18 @@ class DCGAN(object):
# input z: the random noise # input z: the random noise
# input y: input data label (optional) # input y: input data label (optional)
# output G_im: generated fake images # output G_im: generated fake images
if not self.y_dim: if not self.y_dim:
z = pd.layer.concat(1, [z, y]) z = pd.layer.concat(1, [z, y])
G_h0 = pd.layer.fc(z, self.G_w0, self.G_b0) G_h0 = pd.layer.fc(z, self.G_w0, self.G_b0)
G_h0_bn = pd.layer.batch_norm(G_h0) G_h0_bn = pd.layer.batch_norm(G_h0)
G_h0_relu = pd.layer.relu(G_h0_bn) G_h0_relu = pd.layer.relu(G_h0_bn)
G_h1 = pd.layer.deconv(G_h0_relu, self.G_w1, self.G_b1) G_h1 = pd.layer.deconv(G_h0_relu, self.G_w1, self.G_b1)
G_h1_bn = pd.layer.batch_norm(G_h1) G_h1_bn = pd.layer.batch_norm(G_h1)
G_h1_relu = pd.layer.relu(G_h1_bn) G_h1_relu = pd.layer.relu(G_h1_bn)
G_h2 = pd.layer.deconv(G_h1_relu, self.G_W2, self.G_b2)) G_h2 = pd.layer.deconv(G_h1_relu, self.G_W2, self.G_b2))
G_im = pd.layer.tanh(G_im) G_im = pd.layer.tanh(G_im)
return G_im return G_im
...@@ -111,11 +111,11 @@ class DCGAN(object): ...@@ -111,11 +111,11 @@ class DCGAN(object):
D_h0 = pd.layer.conv2d(image, w=self.D_w0, b=self.D_b0) D_h0 = pd.layer.conv2d(image, w=self.D_w0, b=self.D_b0)
D_h0_bn = pd.layer.batchnorm(h0) D_h0_bn = pd.layer.batchnorm(h0)
D_h0_relu = pd.layer.lrelu(h0_bn) D_h0_relu = pd.layer.lrelu(h0_bn)
D_h1 = pd.layer.conv2d(D_h0_relu, w=self.D_w1, b=self.D_b1) D_h1 = pd.layer.conv2d(D_h0_relu, w=self.D_w1, b=self.D_b1)
D_h1_bn = pd.layer.batchnorm(D_h1) D_h1_bn = pd.layer.batchnorm(D_h1)
D_h1_relu = pd.layer.lrelu(D_h1_bn) D_h1_relu = pd.layer.lrelu(D_h1_bn)
D_h2 = pd.layer.fc(D_h1_relu, w=self.D_w2, b=self.D_b2) D_h2 = pd.layer.fc(D_h1_relu, w=self.D_w2, b=self.D_b2)
return D_h2 return D_h2
``` ```
...@@ -123,7 +123,7 @@ class DCGAN(object): ...@@ -123,7 +123,7 @@ class DCGAN(object):
### Class member function: Build the model ### Class member function: Build the model
- Define data readers as placeholders to hold the data; - Define data readers as placeholders to hold the data;
- Build generator and discriminators; - Build generator and discriminators;
- Define two training losses for discriminator and generator, respectively. - Define two training losses for discriminator and generator, respectively.
If we have execution dependency engine to back-trace all tensors, the module building our GAN model will be like this: If we have execution dependency engine to back-trace all tensors, the module building our GAN model will be like this:
```python ```python
class DCGAN(object): class DCGAN(object):
...@@ -133,7 +133,7 @@ class DCGAN(object): ...@@ -133,7 +133,7 @@ class DCGAN(object):
self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size]) self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
self.z = pd.data(tf.float32, [None, self.z_size]) self.z = pd.data(tf.float32, [None, self.z_size])
# step 1: generate images by generator, classify real/fake images with discriminator # step 1: generate images by generator, classify real/fake images with discriminator
if self.y_dim: # if conditional GAN, includes label if self.y_dim: # if conditional GAN, includes label
self.G = self.generator(self.z, self.y) self.G = self.generator(self.z, self.y)
...@@ -147,12 +147,12 @@ class DCGAN(object): ...@@ -147,12 +147,12 @@ class DCGAN(object):
# generate fake images # generate fake images
self.sampled = self.sampler(self.z) self.sampled = self.sampler(self.z)
self.D_f = self.discriminator(self.images) self.D_f = self.discriminator(self.images)
# step 2: define the two losses # step 2: define the two losses
self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size)) self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size))
self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size)) self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size))
self.d_loss = self.d_loss_real + self.d_loss_fake self.d_loss = self.d_loss_real + self.d_loss_fake
self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_f, np.ones(self.batch_szie)) self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_f, np.ones(self.batch_szie))
``` ```
...@@ -176,7 +176,7 @@ class DCGAN(object): ...@@ -176,7 +176,7 @@ class DCGAN(object):
self.G = self.generator(self.z) self.G = self.generator(self.z)
self.D_g = self.discriminator(self.G, self.y) self.D_g = self.discriminator(self.G, self.y)
self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_g, np.ones(self.batch_szie)) self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_g, np.ones(self.batch_szie))
with pd.default_block().d_block(): with pd.default_block().d_block():
if self.y_dim: # if conditional GAN, includes label if self.y_dim: # if conditional GAN, includes label
self.D_t = self.discriminator(self.images, self.y) self.D_t = self.discriminator(self.images, self.y)
...@@ -217,7 +217,7 @@ if __name__ == "__main__": ...@@ -217,7 +217,7 @@ if __name__ == "__main__":
# load mnist data # load mnist data
data_X, data_y = self.load_mnist() data_X, data_y = self.load_mnist()
# Two subgraphs required!!! # Two subgraphs required!!!
with pd.block().d_block(): with pd.block().d_block():
d_optim = pd.train.Adam(lr = .001, beta= .1) d_optim = pd.train.Adam(lr = .001, beta= .1)
...@@ -228,7 +228,7 @@ if __name__ == "__main__": ...@@ -228,7 +228,7 @@ if __name__ == "__main__":
# executor # executor
sess = pd.executor() sess = pd.executor()
# training # training
for epoch in xrange(10000): for epoch in xrange(10000):
for batch_id in range(N / batch_size): for batch_id in range(N / batch_size):
...@@ -239,7 +239,7 @@ if __name__ == "__main__": ...@@ -239,7 +239,7 @@ if __name__ == "__main__":
batch_z = np.random.uniform(-1., 1., [batch_size, z_dim]) batch_z = np.random.uniform(-1., 1., [batch_size, z_dim])
if batch_id % 2 == 0: if batch_id % 2 == 0:
sess.run(d_step, sess.run(d_step,
feed_dict = {dcgan.images: batch_im, feed_dict = {dcgan.images: batch_im,
dcgan.y: batch_label, dcgan.y: batch_label,
dcgan.z: batch_z}) dcgan.z: batch_z})
......
...@@ -9,5 +9,5 @@ ...@@ -9,5 +9,5 @@
use_eigen_cn.md use_eigen_cn.md
name_convention.md name_convention.md
support_new_device.md support_new_device.md
releasing_process.md releasing_process_cn.md
op_markdown_format.md op_markdown_format.md
...@@ -9,5 +9,5 @@ Development ...@@ -9,5 +9,5 @@ Development
use_eigen_en.md use_eigen_en.md
name_convention.md name_convention.md
support_new_device.md support_new_device.md
releasing_process.md releasing_process_en.md
op_markdown_format.md op_markdown_format.md
...@@ -10,19 +10,10 @@ PaddlePaddle每次发新的版本,遵循以下流程: ...@@ -10,19 +10,10 @@ PaddlePaddle每次发新的版本,遵循以下流程:
* 使用Regression Test List作为检查列表,测试本次release的正确性。 * 使用Regression Test List作为检查列表,测试本次release的正确性。
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步 * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,到第二步
* 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True` * 修改`python/setup.py.in`中的版本信息,并将`istaged`字段设为`True`
* 编译这个版本的python wheel包,并发布到pypi。 * 将这个版本的python wheel包发布到pypi。
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64` * 更新Docker镜像(参考后面的操作细节)。
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel` 1. 第三步完成后,将`release/版本号`分支合入master分支,将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。
* 上传方法: 1. 协同完成Release Note的书写。
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
* 编译这个版本的Docker发行镜像,发布到dockerhub。如果失败,修复Docker编译镜像问题,Patch号加一,返回第二步
1. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
1. 协同完成Release Note的书写
需要注意的是: 需要注意的是:
...@@ -31,13 +22,18 @@ PaddlePaddle每次发新的版本,遵循以下流程: ...@@ -31,13 +22,18 @@ PaddlePaddle每次发新的版本,遵循以下流程:
## 发布wheel包到pypi ## 发布wheel包到pypi
使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview) 1. 使用[PaddlePaddle CI](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview)
完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以 完成自动化二进制编译,参考下图,选择需要发布的版本(通常包含一个CPU版本和一个GPU版本),点击"run"右侧的"..."按钮,可以
弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。等待编译完成后 弹出下面的选择框,在第二个tab (Changes)里选择需要发布的分支,这里选择0.11.0,然后点击"Run Build"按钮。
可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m``cp27mu`的版本。然后按照上述的方法 <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ci_build_whl.png">
使用`twine`工具上传即可。 1. 等待编译完成后可以在此页面的"Artifacts"下拉框中找到生成的3个二进制文件,分别对应CAPI,`cp27m``cp27mu`的版本。
1. 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`
<img src="ci_build_whl.png"> 1. 上传:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
* 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux * 注:CI环境使用 https://github.com/PaddlePaddle/buildtools 这里的DockerImage作为编译环境以支持更多的Linux
发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。 发型版,如果需要手动编译,也可以使用这些镜像。这些镜像也可以从 https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/ 下载得到。
...@@ -48,10 +44,20 @@ PaddlePaddle每次发新的版本,遵循以下流程: ...@@ -48,10 +44,20 @@ PaddlePaddle每次发新的版本,遵循以下流程:
上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上 上述PaddlePaddle CI编译wheel完成后会自动将Docker镜像push到DockerHub,所以,发布Docker镜像只需要对自动push的镜像打上
版本号对应的tag即可: 版本号对应的tag即可:
1. 进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看latest tag的更新时间是否在上述编译wheel包完成后是否最新。 ```
1. 执行 `docker pull paddlepaddle/paddle:[latest tag]`,latest tag可以是latest或latest-gpu等。 docker pull [镜像]:latest
1. 执行 `docker tag paddlepaddle/paddle:[latest tag] paddlepaddle/paddle:[version]` docker tag [镜像]:latest [镜像]:[version]
1. 执行 `docker push paddlepaddle/paddle:[version]` docker push [镜像]:[version]
```
需要更新的镜像tag包括:
* `[version]`: CPU版本
* `[version]-openblas`: openblas版本
* `[version]-gpu`: GPU版本(CUDA 8.0 cudnn 5)
* `[version]-gpu-[cudaver]-[cudnnver]`: 不同cuda, cudnn版本的镜像
之后可进入 https://hub.docker.com/r/paddlepaddle/paddle/tags/ 查看是否发布成功。
## PaddlePaddle 分支规范 ## PaddlePaddle 分支规范
...@@ -76,7 +82,7 @@ PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git- ...@@ -76,7 +82,7 @@ PaddlePaddle开发过程使用[git-flow](http://nvie.com/posts/a-successful-git-
### PaddlePaddle Book中所有章节 ### PaddlePaddle Book中所有章节
PaddlePaddle每次发版本首先要保证PaddlePaddle Book中所有章节功能的正确性。功能的正确性包括验证PaddlePaddle目前的`paddle_trainer`训练和纯使用`Python`训练模型正确性。 PaddlePaddle每次发版本首先要保证PaddlePaddle Book中所有章节功能的正确性。功能的正确性包括验证PaddlePaddle目前的`paddle_trainer`训练和纯使用`Python`训练(V2和Fluid)模型正确性。
<table> <table>
<thead> <thead>
......
# PaddlePaddle Releasing Process
PaddlePaddle manages its branches using "git-flow branching model", and [Semantic Versioning](http://semver.org/) as it's version number semantics.
Each time we release a new PaddlePaddle version, we should follow the below steps:
1. Fork a new branch from `develop` named `release/[version]`, e.g. `release/0.10.0`.
1. Push a new tag on the release branch, the tag name should be like `[version]rc.patch`. The
first tag should be `0.10.0rc1`, and the second should be `0.10.0.rc2` and so on.
1. After that, we should do:
* Run all regression test on the Regression Test List (see PaddlePaddle TeamCity CI), to confirm
that this release has no major bugs.
* If regression test fails, we must fix those bugs and create a new `release/[version]`
branch from previous release branch.
* Modify `python/setup.py.in`, change the version number and change `ISTAGED` to `True`.
* Publish PaddlePaddle release wheel packages to pypi (see below instructions for detail).
* Update the Docker images (see below instructions for detail).
1. After above step, merge `release/[version]` branch to master and push a tag on the master commit,
then merge `master` to `develop`.
1. Update the Release Note.
***NOTE:***
* Do ***NOT*** merge commits from develop branch to release branches to keep the release branch contain
features only for current release, so that we can test on that version.
* If we want to fix bugs on release branches, we must merge the fix to master, develop and release branch.
## Publish Wheel Packages to pypi
1. Use our [CI tool](https://paddleci.ngrok.io/project.html?projectId=Manylinux1&tab=projectOverview)
to build all wheel packages needed to publish. As shown in the following picture, choose a build
version, click "..." button on the right side of "Run" button, and switch to the second tab in the
pop-up box, choose the current release branch and click "Run Build" button. You may repeat this
step to start different versions of builds.
<img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/ci_build_whl.png">
1. After the build succeeds, download the outputs under "Artifacts" including capi, `cp27m` and `cp27mu`.
1. Since pypi.python.org follows [PEP 513](https://www.python.org/dev/peps/pep-0513), before we
upload the package using `twine`, we need to rename the package from `linux_x86_64` to
`manylinux1_x86_64`.
1. Start the upload:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
* NOTE: We use a special Docker image to build our releases to support more Linux distributions, you can
download it from https://hub.docker.com/r/paddlepaddle/paddle_manylinux_devel/tags/, or build it using
scripts under `tools/manylinux1`.
* pypi does not allow overwrite the already uploaded version of wheel package, even if you delete the
old version. you must change the version number before upload a new one.
## Publish Docker Images
Our CI tool will push latest images to DockerHub, so we only need to push a version tag like:
```
docker pull [image]:latest
docker tag [image]:latest [image]:[version]
docker push [image]:[version]
```
Tags that need to be updated are:
* `[version]`: CPU only version image
* `[version]-openblas`: openblas version image
* `[version]-gpu`: GPU version(using CUDA 8.0 cudnn 5)
* `[version]-gpu-[cudaver]-[cudnnver]`: tag for different cuda, cudnn versions
You can then checkout the latest pushed tags at https://hub.docker.com/r/paddlepaddle/paddle/tags/.
## Branching Model
We use [git-flow](http://nvie.com/posts/a-successful-git-branching-model/) as our branching model,
with some modifications:
* `master` branch is the stable branch. Each version on the master branch is tested and guaranteed.
* `develop` branch is for development. Each commit on develop branch has passed CI unit test, but no
regression tests are run.
* `release/[version]` branch is used to publish each release. Latest release version branches have
bugfix only for that version, but no feature updates.
* Developer forks are not required to follow
[git-flow](http://nvie.com/posts/a-successful-git-branching-model/)
branching model, all forks is like a feature branch.
* Advise: developer fork's develop branch is used to sync up with main repo's develop branch.
* Advise: developer use it's fork's develop branch to for new branch to start developing.
* Use that branch on developer's fork to create pull requests and start reviews.
* developer can push new commits to that branch when the pull request is open.
* Bug fixes are also started from developers forked repo. And, bug fixes branch can merge to
`master`, `develop` and `releases`.
## PaddlePaddle Regression Test List
### All Chapters of PaddlePaddle Book
We need to guarantee that all the chapters of PaddlePaddle Book can run correctly. Including
V1 (`paddle_trainer` training) and V2 training and Fluid training.
<table>
<thead>
<tr>
<th></th>
<th>Linear Regression</th>
<th>Recognize Digits</th>
<th>Image Classification</th>
<th>Word2Vec</th>
<th>Personalized Recommendation</th>
<th>Sentiment Analysis</th>
<th>Semantic Role Labeling</th>
<th>Machine Translation</th>
</tr>
</thead>
<tbody>
<tr>
<td>API.V2 + Docker + GPU </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> API.V2 + Docker + CPU </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>`paddle_trainer` + Docker + GPU </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>`paddle_trainer` + Docker + CPU </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> API.V2 + Ubuntu + GPU</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td>API.V2 + Ubuntu + CPU </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> `paddle_trainer` + Ubuntu + GPU</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
<tr>
<td> `paddle_trainer` + Ubuntu + CPU</td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
<td> </td>
</tr>
</tbody>
</table>
...@@ -23,7 +23,7 @@ But how to record the time for the mixed C++ and CUDA program? There many C++ A ...@@ -23,7 +23,7 @@ But how to record the time for the mixed C++ and CUDA program? There many C++ A
The overall flow is shown as the following figure. The overall flow is shown as the following figure.
<img src="./images/profiler.png" align="center"/><br/> <img src="https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/profiler.png" align="center"/><br/>
### Event ### Event
...@@ -36,10 +36,10 @@ enum EventKind { ...@@ -36,10 +36,10 @@ enum EventKind {
kPopRange}; kPopRange};
``` ```
- kMark: only a marker without time range. - kMark: only a marker without time range.
- kPushRange: mark the starting event for time range. - kPushRange: mark the starting event for time range.
- kPopRange: mark the ending event for time range. - kPopRange: mark the ending event for time range.
For the CPU code, the events only need to record the current time. For the CUDA code, the [event management functions of CUDA](http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT) are used. For many pieces of code, an event lists are used to record each piece. For the CPU code, the events only need to record the current time. For the CUDA code, the [event management functions of CUDA](http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT) are used. For many pieces of code, an event lists are used to record each piece.
```c++ ```c++
class Event { class Event {
...@@ -66,11 +66,11 @@ struct EventList { ...@@ -66,11 +66,11 @@ struct EventList {
}; };
``` ```
As mentioned above, there is no need to record the timeline when disabling the profiler. So there is a global state to enable or disable the profiler. As mentioned above, there is no need to record the timeline when disabling the profiler. So there is a global state to enable or disable the profiler.
```c++ ```c++
enum ProfilerState { enum ProfilerState {
kDisabled, kDisabled,
kCPU, kCPU,
kCUDA kCUDA
}; };
......
digraph G {
rnn [label="1st level RNN" shape=box]
subgraph cluster0 {
label = "time step 0"
sent0 [label="sentence"]
sent1 [label="sentence"]
rnn1 [label="2nd level RNN" shape=box]
sent0 -> rnn1
sent1 -> rnn1
}
subgraph cluster1 {
label = "time step 1"
sent2 [label="sentence"]
sent3 [label="sentence"]
rnn2 [label="2nd level RNN" shape=box]
sent2 -> rnn2
sent3 -> rnn2
}
subgraph cluster2 {
label = "time step 2"
sent4 [label="sentence"]
sent5 [label="sentence"]
rnn3 [label="2nd level RNN" shape=box]
sent4 -> rnn3
sent5 -> rnn3
}
para0 [label="paragraph info 0"]
para1 [label="paragraph info 1"]
para2 [label="paragraph info 2"]
rnn1 -> para0
rnn2 -> para1
rnn3 -> para2
para0 -> rnn
para1 -> rnn
para2 -> rnn
chapter [label="chapter info"]
rnn -> chapter
}
digraph ImageBatchNormForkGragh {
subgraph cluster_before {
Prev [label="...", shape=plaintext];
Rnn [label="rnn_op", shape=box];
BatchNorm [label="batch_norm_op", shape=box];
Fc [label="fc_op", shape=box];
After [label="...", shape=plaintext];
Prev -> Rnn -> BatchNorm -> Fc -> After;
label="original";
}
subgraph cluster_after {
Prev2 [label="...", shape=plaintext];
Rnn2 [label="rnn_op", shape=box];
BatchNorm2_1 [label="train_batch_norm_op", shape=box];
BatchNorm2_2 [label="infer_batch_norm_op", shape=box];
Fc2_1 [label="fc_op", shape=box];
Fc2_2 [label="fc_op", shape=box];
After2_1 [label="...", shape=plaintext];
After2_2 [label="...", shape=plaintext];
Prev2 -> Rnn2 -> BatchNorm2_1 -> Fc2_1 -> After2_1;
Rnn2 -> BatchNorm2_2 ->Fc2_2 ->After2_2
label="forked";
}
}
cat ./graph_construction_example.dot | \
sed 's/color=red/color=red, style=invis/g' | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_only.png
cat ./graph_construction_example.dot | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_backward.png
cat ./graph_construction_example.dot | \
dot -Tpng > graph_construction_example_all.png
digraph ImageClassificationGraph {
///////// The forward part /////////
FeedX [label="Feed", color=blue, shape=box];
FeedY [label="Feed", color=blue, shape=box];
InitW [label="Init", color=blue, shape=diamond];
Initb [label="Init", color=blue, shape=diamond];
FC [label="FC", color=blue, shape=box];
MSE [label="MSE", color=blue, shape=box];
x [label="x", color=blue, shape=oval];
l [label="l", color=blue, shape=oval];
y [label="y", color=blue, shape=oval];
W [label="W", color=blue, shape=doublecircle];
b [label="b", color=blue, shape=doublecircle];
cost [label="cost", color=blue, shape=oval];
FeedX -> x -> FC -> y -> MSE -> cost [color=blue];
FeedY -> l [color=blue];
InitW -> W [color=blue];
Initb -> b [color=blue];
W -> FC [color=blue];
b -> FC [color=blue];
l -> MSE [color=blue];
////////// The backward part /////////
MSE_Grad [label="MSE_grad", color=red, shape=box];
FC_Grad [label="FC_grad", color=red, shape=box];
d_cost [label="d cost", color=red, shape=oval];
d_y [label="d y", color=red, shape=oval];
d_b [label="d b", color=red, shape=oval];
d_W [label="d W", color=red, shape=oval];
cost -> MSE_Grad [color=red];
d_cost -> MSE_Grad [color=red];
l -> MSE_Grad [color=red];
y -> MSE_Grad -> d_y [color=red];
x -> FC_Grad [color=red];
y -> FC_Grad [color=red];
d_y -> FC_Grad [color=red];
W -> FC_Grad -> d_W [color=red];
b -> FC_Grad -> d_b [color=red];
////////// The optimizaiton part //////////
OPT_W [label="SGD", color=green, shape=box];
OPT_b [label="SGD", color=green, shape=box];
W -> OPT_W [color=green];
b -> OPT_b [color=green];
d_W -> OPT_W -> W [color=green];
d_b -> OPT_b -> b [color=green];
////////// Groupings //////////
subgraph clusterMSE {
style=invis;
MSE;
MSE_Grad;
}
subgraph clusterFC {
style=invis;
FC;
FC_Grad;
}
}
digraph G {
label = "simple RNN implementation"
ranksep=2;
//graph [nodesep=1, ranksep=1];
node[nodesep=1]
subgraph cluster0 {
label = "global scope"
rankdir = TB
W
boot_memory
input
output
}
subgraph cluster1 {
label = "step-scope 0"
rankdir = TB
memory0[label="memory"]
prememory0[label="pre-memory"]
step_input0[label="step input"]
step_output0[label="step output"]
}
subgraph cluster2 {
label = "step-scope 1"
rankdir = TB
memory1[label="memory"]
prememory1[label="pre-memory"]
step_input1[label="step input"]
step_output1[label="step output"]
}
subgraph cluster3 {
label = "step-scope 2"
rankdir = TB
memory2[label="memory"]
prememory2[label="pre-memory"]
step_input2[label="step input"]
step_output2[label="step output"]
}
stepnet [shape=box]
stepnet0 [shape=box, style=dashed]
stepnet1 [shape=box, style=dashed]
stepnet2 [shape=box, style=dashed]
edge[color=blue]
boot_memory -> prememory0 [label="init" color="blue"]
memory0 -> prememory1 [label="copy/reference" color="blue"]
memory1 -> prememory2 [label="copy/reference" color="blue"]
edge[color=black]
W -> stepnet0[constraint=false, style=dashed]
W -> stepnet1[constraint=false, style=dashed]
W -> stepnet2[constraint=false, style=dashed]
memory0 -> stepnet0[style=dashed]
prememory0 -> stepnet0 -> step_output0[style=dashed]
memory1 -> stepnet1[style=dashed]
prememory1 -> stepnet1 -> step_output1[style=dashed]
memory2 -> stepnet2[style=dashed]
prememory2 -> stepnet2 -> step_output2[style=dashed]
input -> step_input0
input -> step_input1
input -> step_input2
step_input0 -> stepnet0 [style=dashed]
step_input1 -> stepnet1[style=dashed]
step_input2 -> stepnet2[style=dashed]
step_output0 -> output
step_output1 -> output
step_output2 -> output
stepnet0 -> stepnet[style=dashed]
stepnet1 -> stepnet[style=dashed]
stepnet2 -> stepnet[style=dashed]
}
digraph G {
chapter [label="chapter"]
subgraph cluster0 {
label = "paragraph 0"
top_rnn0[label="top rnn step 0" shape=box]
p0 [label="paragraph 0"]
p1 [label="paragraph 1"]
}
subgraph cluster1{
label = "paragraph 1"
top_rnn1[label="top rnn step 1" shape=box]
p2 [label="paragraph 0"]
p3 [label="paragraph 1"]
}
subgraph cluster_p0 {
label = "sentence 0"
low_rnn0 [label="low rnn step 0" shape=box]
s00 [label="sentence 0"]
s01 [label="sentence 1"]
low_rnn0 -> s00
low_rnn0 -> s01
}
subgraph cluster_p1 {
label = "sentence 1"
low_rnn1 [label="low rnn step 1" shape=box]
s10 [label="sentence 0"]
s11 [label="sentence 1"]
low_rnn1 -> s10
low_rnn1 -> s11
}
subgraph cluster_p2 {
label = "sentence 1"
low_rnn2 [label="low rnn step 0" shape=box]
s20 [label="sentence 0"]
s21 [label="sentence 1"]
low_rnn2 -> s20
low_rnn2 -> s21
}
subgraph cluster_p3 {
label = "sentence 1"
low_rnn3 [label="low rnn step 1" shape=box]
s30 [label="sentence 0"]
s31 [label="sentence 1"]
low_rnn3 -> s30
low_rnn3 -> s31
}
chapter -> top_rnn0
chapter -> top_rnn1
top_rnn0 -> p0
top_rnn0 -> p1
top_rnn1 -> p2
top_rnn1 -> p3
p0 -> low_rnn0
p1 -> low_rnn1
p2 -> low_rnn2
p3 -> low_rnn3
}
digraph Test {
z -> generator -> G_img;
G_img -> discriminator -> D_f -> d_loss_f;
label0 -> d_loss_f -> d_loss;
img -> discriminator -> D_t -> d_loss_t;
label1 -> d_loss_t -> d_loss;
d_loss -> d_loss_t[color=red, style=dashed];
d_loss -> d_loss_f[color=red, style=dashed];
d_loss_t -> D_t[color=red, style=dashed];
d_loss_f -> D_f[color=red, style=dashed];
D_t -> discriminator[color=red, style=dashed];
D_f -> discriminator[color=red, style=dashed];
D_f -> g_loss;
label2 -> g_loss;
g_loss -> D_f[color=green, style=dashed];
D_f -> discriminator[color=green, style=dashed];
discriminator -> G_img[color=green, style=dashed];
G_img -> generator[color=green, style=dashed];
discriminator [color=red, shape=box];
generator [color=green, shape=box];
z [shape=diamond];
img [shape=diamond];
label0 [shape=diamond];
label1 [shape=diamond];
label2 [shape=diamond];
d_loss [color=red];
g_loss [color=green];
}
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle import paddle
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) sys.path.insert(0, os.path.abspath('@PADDLE_BINARY_DIR@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
import paddle import paddle
......
...@@ -27,7 +27,7 @@ sphinx_add_target(paddle_v2_docs ...@@ -27,7 +27,7 @@ sphinx_add_target(paddle_v2_docs
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_v2_docs gen_proto_py) add_dependencies(paddle_v2_docs gen_proto_py paddle_python)
# configured documentation tools and intermediate build results # configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
...@@ -50,6 +50,6 @@ sphinx_add_target(paddle_v2_docs_cn ...@@ -50,6 +50,6 @@ sphinx_add_target(paddle_v2_docs_cn
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN}) ${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_v2_docs_cn gen_proto_py) add_dependencies(paddle_v2_docs_cn gen_proto_py paddle_python)
add_subdirectory(api) add_subdirectory(api)
...@@ -19,4 +19,4 @@ sphinx_add_target(paddle_v2_apis ...@@ -19,4 +19,4 @@ sphinx_add_target(paddle_v2_apis
${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_EN}) ${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_v2_apis gen_proto_py framework_py_proto copy_paddle_pybind) add_dependencies(paddle_v2_apis gen_proto_py framework_py_proto copy_paddle_pybind paddle_python)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册