提交 c1d5aaa1 编写于 作者: Y yi.wu

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into thinnerdocker

# Use ccache if found ccache program # Use ccache if found ccache program
find_program(CCACHE_FOUND ccache) find_program(CCACHE_PATH ccache)
if(CCACHE_FOUND) if(CCACHE_PATH)
message(STATUS "Ccache is founded, use ccache to speed up compile.") message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_FOUND) endif(CCACHE_PATH)
\ No newline at end of file
...@@ -16,6 +16,14 @@ INCLUDE(ExternalProject) ...@@ -16,6 +16,14 @@ INCLUDE(ExternalProject)
FIND_PACKAGE(Protobuf 3.1) FIND_PACKAGE(Protobuf 3.1)
IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
ENDIF(PROTOBUF_FOUND)
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
......
...@@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME) ...@@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME)
generate_rdma_links() generate_rdma_links()
endif() endif()
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
else()
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu)
endif()
else()
set(METRIC_LIBS "")
endif()
target_circle_link_libraries(${TARGET_NAME} target_circle_link_libraries(${TARGET_NAME}
ARCHIVE_START ARCHIVE_START
paddle_gserver paddle_gserver
paddle_function paddle_function
${METRIC_LIBS}
ARCHIVE_END ARCHIVE_END
paddle_pserver paddle_pserver
paddle_trainer_lib paddle_trainer_lib
...@@ -95,7 +84,6 @@ function(link_paddle_exe TARGET_NAME) ...@@ -95,7 +84,6 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter paddle_parameter
paddle_proto paddle_proto
paddle_cuda paddle_cuda
${METRIC_LIBS}
${EXTERNAL_LIBS} ${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT} ${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS} ${CMAKE_DL_LIBS}
......
...@@ -286,3 +286,16 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字 ...@@ -286,3 +286,16 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字
.. code-block:: bash .. code-block:: bash
paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 paddle train --use_gpu=true --trainer_count=2 --gpu_id=2
12. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办?
------------------------------------------------------------------------
Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。
主要原因包括两个方面:
* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。
* 模型一直不收敛,发散到了一个数值特别大的地方。
* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。
主要的解决办法是减小学习律或者对数据进行归一化处理。
...@@ -4,118 +4,137 @@ PaddlePaddle的Docker容器使用方式 ...@@ -4,118 +4,137 @@ PaddlePaddle的Docker容器使用方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。
通过Docker容器开发PaddlePaddle 纯CPU和GPU的docker镜像使用说明
------------------------------ ------------------------------
开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。
我们通过设置 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 自动生成最新的docker镜像:
`paddledev/paddle:0.10.0rc1-cpu` 和 `paddledev/paddle:0.10.0rc1-gpu`。
1. 将开发环境构建为Docker镜像 以交互容器方式运行纯CPU的镜像:
.. code-block:: bash .. code-block:: bash
git clone --recursive https://github.com/PaddlePaddle/Paddle docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
cd Paddle
docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile .
或者,可以以后台进程方式运行容器:
请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数: .. code-block:: bash
.. code-block:: bash docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu
docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . 然后用密码 :code:`root` SSH进入容器:
.. code-block:: bash
2. 运行开发环境 ssh -p 2202 root@localhost
当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面: SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。
.. code-block:: bash
docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev 以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker:
以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。 .. code-block:: bash
请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了: export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
.. code-block:: bash
ssh root@localhost -p 2202 运行PaddlePaddle书籍
---------------------
3. 在Docker开发环境中编译与安装PaddlPaddle代码 Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle: PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。
如果您想要更深入了解deep learning,PaddlePaddle书籍一定是您最好的选择。
.. code-block:: bash 当您进入容器内之后,只用运行以下命令:
/paddle/paddle/scripts/docker/build.sh .. code-block:: bash
以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试: jupyter notebook
.. code-block:: bash 然后在浏览器中输入以下网址:
cd /paddle/build .. code-block:: text
ctest
http://localhost:8888/
纯CPU和GPU的docker镜像 就这么简单,享受您的旅程!
----------------------
对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。我们通过设置 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 自动运行以下两个命令:
非AVX镜像
---------
纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX:
.. code-block:: bash .. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile . if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
以交互容器方式运行纯CPU的镜像: 如果输出是No,我们就需要手动编译一个非AVX版本的镜像:
.. code-block:: bash .. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
或者,可以以后台进程方式运行容器:
.. code-block:: bash 通过Docker容器开发PaddlePaddle
------------------------------
docker run -d -p 2202:22 paddledev/paddle:cpu-latest 开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。
然后用密码 :code:`root` SSH进入容器: 1. 将开发环境构建为Docker镜像
.. code-block:: bash .. code-block:: bash
ssh -p 2202 root@localhost git clone --recursive https://github.com/PaddlePaddle/Paddle
cd Paddle
docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile .
SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。
请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数:
以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker: .. code-block:: bash
.. code-block:: bash docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON .
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
2. 运行开发环境
非AVX镜像 当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面:
---------
纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: .. code-block:: bash
docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev
.. code-block:: bash 以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi 请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了:
如果输出是No,我们就需要手动编译一个非AVX版本的镜像: .. code-block:: bash
.. code-block:: bash ssh root@localhost -p 2202
cd ~ 3. 在Docker开发环境中编译与安装PaddlPaddle代码
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle 当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle:
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . .. code-block:: bash
/paddle/paddle/scripts/docker/build.sh
以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试:
.. code-block:: bash
cd /paddle/build
ctest
文档 文档
...@@ -128,7 +147,7 @@ Paddle的Docker镜像带有一个通过 `woboq code browser ...@@ -128,7 +147,7 @@ Paddle的Docker镜像带有一个通过 `woboq code browser
.. code-block:: bash .. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
...@@ -9,6 +9,100 @@ Please be aware that you will need to change `Dockers settings ...@@ -9,6 +9,100 @@ Please be aware that you will need to change `Dockers settings
of your hardware resource on Mac OS X and Windows. of your hardware resource on Mac OS X and Windows.
Usage of CPU-only and GPU Images
----------------------------------
For each version of PaddlePaddle, we release 2 Docker images, a
CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically generate the latest docker images `paddledev/paddle:0.10.0rc1-cpu`
and `paddledev/paddle:0.10.0rc1-gpu`.
To run the CPU-only image as an interactive container:
.. code-block:: bash
docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
or, we can run it as a daemon container
.. code-block:: bash
docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu
and SSH to this container using password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
An advantage of using SSH is that we can connect to PaddlePaddle from
more than one terminals. For example, one terminal running vi and
another one running Python interpreter. Another advantage is that we
can run the PaddlePaddle container on a remote server and SSH to it
from a laptop.
Above methods work with the GPU image too -- just please don't forget
to install CUDA driver and let Docker knows about it:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
PaddlePaddle Book
------------------
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
Once you are inside the container, simply issue the command:
.. code-block:: bash
jupyter notebook
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
Non-AVX Images
--------------
Please be aware that the CPU-only and the GPU images both use the AVX
instruction set, but old computers produced before 2008 do not support
AVX. The following command checks if your Linux computer supports
AVX:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
If it doesn't, we will need to build non-AVX images manually from
source code:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
Development Using Docker Development Using Docker
------------------------ ------------------------
...@@ -82,103 +176,6 @@ Windows -- in a consistent way. ...@@ -82,103 +176,6 @@ Windows -- in a consistent way.
cd /paddle/build cd /paddle/build
ctest ctest
4. Run PaddlePaddle Book under Docker Container
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
Once you are inside the container, simply issue the command:
.. code-block:: bash
jupyter notebook
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
CPU-only and GPU Images
-----------------------
For each version of PaddlePaddle, we release 2 Docker images, a
CPU-only one and a CUDA GPU one. We do so by configuring
`dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_
automatically runs the following commands:
.. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu .
To run the CPU-only image as an interactive container:
.. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash
or, we can run it as a daemon container
.. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest
and SSH to this container using password :code:`root`:
.. code-block:: bash
ssh -p 2202 root@localhost
An advantage of using SSH is that we can connect to PaddlePaddle from
more than one terminals. For example, one terminal running vi and
another one running Python interpreter. Another advantage is that we
can run the PaddlePaddle container on a remote server and SSH to it
from a laptop.
Above methods work with the GPU image too -- just please don't forget
to install CUDA driver and let Docker knows about it:
.. code-block:: bash
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
Non-AVX Images
--------------
Please be aware that the CPU-only and the GPU images both use the AVX
instruction set, but old computers produced before 2008 do not support
AVX. The following command checks if your Linux computer supports
AVX:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
If it doesn't, we will need to build non-AVX images manually from
source code:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
Documentation Documentation
------------- -------------
...@@ -194,7 +191,7 @@ container: ...@@ -194,7 +191,7 @@ container:
.. code-block:: bash .. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
......
...@@ -228,16 +228,6 @@ ...@@ -228,16 +228,6 @@
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td> <td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr> </tr>
<tr>
<td class="left" rowspan = "2">度量学习(metric learning)</td><td class="left">external</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left">data_server_port</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr> <tr>
<td class="left" rowspan = "16">参数服务器(PServer)</td><td class="left">start_pserver</td> <td class="left" rowspan = "16">参数服务器(PServer)</td><td class="left">start_pserver</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td> <td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
......
...@@ -228,16 +228,6 @@ It looks like there are a lot of arguments. However, most of them are for develo ...@@ -228,16 +228,6 @@ It looks like there are a lot of arguments. However, most of them are for develo
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td> <td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr> </tr>
<tr>
<td class="left" rowspan = "2">metric learning</td><td class="left">external</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr>
<td class="left">data_server_port</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
</tr>
<tr> <tr>
<td class="left" rowspan = "16">PServer</td><td class="left">start_pserver</td> <td class="left" rowspan = "16">PServer</td><td class="left">start_pserver</td>
<td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td> <td class="left"></td><td class="left"></td><td class="left"></td><td class="left"></td>
......
...@@ -180,15 +180,6 @@ ...@@ -180,15 +180,6 @@
 - 用户可以自定义beam search的方法,编译成动态库,供PaddlePaddle加载。 该参数用于指定动态库路径.  - 用户可以自定义beam search的方法,编译成动态库,供PaddlePaddle加载。 该参数用于指定动态库路径.
- 类型: string (默认: "", null). - 类型: string (默认: "", null).
## 度量学习(Metric Learning)
* `--external`
- 指示是否使用外部机器进行度量学习.
- 类型: bool (默认: 0).
* `--data_server_port`
- 数据服务器(data server)的监听端口,主要用在度量学习中.
- 类型: int32 (默认: 21134).
## 数据支持(DataProvider) ## 数据支持(DataProvider)
* `--memory_threshold_on_load_data` * `--memory_threshold_on_load_data`
......
...@@ -184,15 +184,6 @@ ...@@ -184,15 +184,6 @@
- Specify shared dynamic library. It can be defined out of paddle by user. - Specify shared dynamic library. It can be defined out of paddle by user.
- type: string (default: "", null). - type: string (default: "", null).
## Metric Learning
* `--external`
- Whether to use external machine for metric learning.
- type: bool (default: 0).
* `--data_server_port`
- Listening port for dserver (data server), dserver is mainly used in metric learning.
- type: int32 (default: 21134).
## DataProvider ## DataProvider
* `--memory_threshold_on_load_data` * `--memory_threshold_on_load_data`
......
...@@ -24,9 +24,6 @@ limitations under the License. */ ...@@ -24,9 +24,6 @@ limitations under the License. */
DEFINE_bool(allow_only_one_model_on_one_gpu, DEFINE_bool(allow_only_one_model_on_one_gpu,
true, true,
"If true, do not allow multiple models on one GPU device"); "If true, do not allow multiple models on one GPU device");
#ifdef PADDLE_METRIC_LEARNING
DECLARE_bool(external);
#endif
namespace paddle { namespace paddle {
...@@ -45,11 +42,7 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, ...@@ -45,11 +42,7 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config,
trainerBarrier_(FLAGS_trainer_count), trainerBarrier_(FLAGS_trainer_count),
allBarrier_(FLAGS_trainer_count + 1), allBarrier_(FLAGS_trainer_count + 1),
inArgsCopied_(false) { inArgsCopied_(false) {
#ifdef PADDLE_METRIC_LEARNING
isPassGrad_ = FLAGS_external;
#else
isPassGrad_ = false; isPassGrad_ = false;
#endif
numThreads_ = FLAGS_trainer_count; numThreads_ = FLAGS_trainer_count;
if (useGpu) { if (useGpu) {
//! TODO(yuyang18): When useGpu=false && paddle is not compiled with gpu, //! TODO(yuyang18): When useGpu=false && paddle is not compiled with gpu,
......
...@@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap, ...@@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap,
return false; return false;
} }
crf_.reset(new LinearChainCRF( crf_.reset(new LinearChainCRF(
numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData(), nullptr)); numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData()));
return true; return true;
} }
......
...@@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap, ...@@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2)); CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2));
parameter_ = parameters_[0]; parameter_ = parameters_[0];
weight_.reset(new Weight(numClasses_ + 2, numClasses_, parameter_));
// We don't need sequenceStartPositions because each sample of output_ is // We don't need sequenceStartPositions because each sample of output_ is
// for the cost of one sequence. // for the cost of one sequence.
...@@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) { ...@@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) {
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
if (i >= crfs_.size()) { if (i >= crfs_.size()) {
crfs_.emplace_back(numClasses_, crfs_.emplace_back(numClasses_, weight_->getW()->getData());
parameter_->getBuf(PARAMETER_VALUE)->getData(),
parameter_->getBuf(PARAMETER_GRADIENT)
? parameter_->getBuf(PARAMETER_GRADIENT)->getData()
: nullptr);
} }
output_.value->getData()[i] = output_.value->getData()[i] =
crfs_[i].forward(output.value->getData() + numClasses_ * starts[i], crfs_[i].forward(output.value->getData() + numClasses_ * starts[i],
...@@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) { ...@@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) {
const int* starts = label.sequenceStartPositions->getData(false); const int* starts = label.sequenceStartPositions->getData(false);
int numSequences = label.sequenceStartPositions->getSize() - 1; int numSequences = label.sequenceStartPositions->getSize() - 1;
bool needWGrad = weight_->getWGrad() ? true : false;
for (int i = 0; i < numSequences; ++i) { for (int i = 0; i < numSequences; ++i) {
crfs_[i].backward(output.value->getData() + numClasses_ * starts[i], crfs_[i].backward(output.value->getData() + numClasses_ * starts[i],
output.grad->getData() + numClasses_ * starts[i],
label.ids->getData() + starts[i], label.ids->getData() + starts[i],
starts[i + 1] - starts[i]); starts[i + 1] - starts[i],
if (weightLayer_) { needWGrad);
real weight = getInputValue(*weightLayer_)->getElement(i, 0); real instanceWeight = weightLayer_
? getInputValue(*weightLayer_)->getElement(i, 0)
: real(1.0f);
instanceWeight *= coeff_;
MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]); MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]);
grad->mulScalar(weight); grad->add(*crfs_[i].getXGrad(), real(1.0f), instanceWeight);
if (needWGrad) {
weight_->getWGrad()->add(
*crfs_[i].getWGrad(), real(1.0f), instanceWeight);
} }
} }
if (coeff_ != real(1.0f)) {
output.grad->mulScalar(coeff_);
}
parameter_->incUpdate(callback); parameter_->incUpdate(callback);
} }
......
...@@ -39,6 +39,7 @@ protected: ...@@ -39,6 +39,7 @@ protected:
ParameterPtr parameter_; ParameterPtr parameter_;
std::vector<LinearChainCRF> crfs_; std::vector<LinearChainCRF> crfs_;
LayerPtr weightLayer_; // weight for each sequence LayerPtr weightLayer_; // weight for each sequence
std::unique_ptr<Weight> weight_; // parameters
real coeff_; // weight for the layer real coeff_; // weight for the layer
}; };
......
...@@ -381,8 +381,7 @@ void Layer::backwardActivation() { ...@@ -381,8 +381,7 @@ void Layer::backwardActivation() {
void Layer::forwardDropOut() { void Layer::forwardDropOut() {
auto& outV = getOutputValue(); auto& outV = getOutputValue();
if (passType_ == PASS_TRAIN || passType_ == PASS_METRIC_TRAIN || if (passType_ == PASS_TRAIN) {
passType_ == PASS_METRIC_TRAIN_WITH_NOERROR) {
// new dropOutMask_ if dropOutMask_ is null ptr // new dropOutMask_ if dropOutMask_ is null ptr
Matrix::resizeOrCreate(dropOutMask_, Matrix::resizeOrCreate(dropOutMask_,
outV->getHeight(), outV->getHeight(),
......
...@@ -17,18 +17,12 @@ limitations under the License. */ ...@@ -17,18 +17,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
LinearChainCRF::LinearChainCRF(int numClasses, real* para, real* grad) LinearChainCRF::LinearChainCRF(int numClasses, real* para)
: numClasses_(numClasses) { : numClasses_(numClasses) {
a_ = Matrix::create(para, 1, numClasses_); a_ = Matrix::create(para, 1, numClasses_);
b_ = Matrix::create(para + numClasses_, 1, numClasses_); b_ = Matrix::create(para + numClasses_, 1, numClasses_);
w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_); w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_);
if (grad) {
da_ = Matrix::create(grad, 1, numClasses_);
db_ = Matrix::create(grad + numClasses_, 1, numClasses_);
dw_ = Matrix::create(grad + 2 * numClasses_, numClasses_, numClasses_);
}
ones_ = Matrix::create(1, numClasses_); ones_ = Matrix::create(1, numClasses_);
ones_->one(); ones_->one();
...@@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) { ...@@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) {
return -ll; return -ll;
} }
void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { void LinearChainCRF::backward(real* x, int* s, int length, bool needWGrad) {
MatrixPtr matX = Matrix::create(x, length, numClasses_); MatrixPtr matX = Matrix::create(x, length, numClasses_);
MatrixPtr matDX = Matrix::create(dx, length, numClasses_); Matrix::resizeOrCreate(matGrad_, length, numClasses_);
MatrixPtr matGrad = Matrix::create(length, numClasses_);
Matrix::resizeOrCreate(beta_, length, numClasses_); Matrix::resizeOrCreate(beta_, length, numClasses_);
real* b = b_->getData(); real* b = b_->getData();
real* dw = dw_ ? dw_->getData() : nullptr; if (needWGrad) {
Matrix::resizeOrCreate(matWGrad_, numClasses_ + 2, numClasses_);
matWGrad_->zeroMem();
da_ = matWGrad_->subRowMatrix(0, 1);
db_ = matWGrad_->subRowMatrix(1, 2);
dw_ = matWGrad_->subRowMatrix(2, numClasses_ + 2);
}
real* alpha = alpha_->getData(); real* alpha = alpha_->getData();
real* beta = beta_->getData(); real* beta = beta_->getData();
real* expW = expW_->getData(); real* expW = expW_->getData();
real* expX = expX_->getData(); real* expX = expX_->getData();
real* grad = matGrad->getData(); real* grad = matGrad_->getData();
for (int i = 0; i < numClasses_; ++i) { for (int i = 0; i < numClasses_; ++i) {
beta[(length - 1) * numClasses_ + i] = exp(b[i]); beta[(length - 1) * numClasses_ + i] = exp(b[i]);
...@@ -140,23 +139,21 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { ...@@ -140,23 +139,21 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) {
normalizeL1(beta + k * numClasses_, numClasses_); normalizeL1(beta + k * numClasses_, numClasses_);
} }
matGrad->dotMul(*alpha_, *beta_); matGrad_->dotMul(*alpha_, *beta_);
matGrad->rowNormalizeL1(*matGrad); matGrad_->rowNormalizeL1(*matGrad_);
for (int k = 0; k < length; ++k) { for (int k = 0; k < length; ++k) {
grad[k * numClasses_ + s[k]] -= (real)1; grad[k * numClasses_ + s[k]] -= (real)1;
} }
matDX->add(*matGrad);
if (da_) { if (needWGrad) {
da_->add(*matGrad->subMatrix(/* startRow= */ 0, /* numRows= */ 1)); da_->add(*matGrad_->subMatrix(/* startRow= */ 0, /* numRows= */ 1));
} db_->add(*matGrad_->subMatrix(/* startRow= */ length - 1, 1));
if (db_) {
db_->add(*matGrad->subMatrix(/* startRow= */ length - 1, 1));
}
beta_->dotMul(*beta_, *expX_); beta_->dotMul(*beta_, *expX_);
beta_->rowNormalizeL1(*beta_); beta_->rowNormalizeL1(*beta_);
for (int k = 1; dw && k < length; ++k) { real* dw = dw_->getData();
for (int k = 1; k < length; ++k) {
real sum = 0; real sum = 0;
for (int i = 0; i < numClasses_; ++i) { for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) { for (int j = 0; j < numClasses_; ++j) {
...@@ -174,6 +171,7 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { ...@@ -174,6 +171,7 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) {
} }
dw[s[k - 1] * numClasses_ + s[k]] -= (real)1; dw[s[k - 1] * numClasses_ + s[k]] -= (real)1;
} }
}
} }
void LinearChainCRF::decode(real* x, int* s, int length) { void LinearChainCRF::decode(real* x, int* s, int length) {
......
...@@ -21,7 +21,7 @@ namespace paddle { ...@@ -21,7 +21,7 @@ namespace paddle {
class LinearChainCRF { class LinearChainCRF {
public: public:
/** /**
* The size of para and grad must be \f$(numClasses + 2) * numClasses\f$. * The size of para must be \f$(numClasses + 2) * numClasses\f$.
* The first numClasses values of para are for starting weights (\f$a\f$). * The first numClasses values of para are for starting weights (\f$a\f$).
* The next numClasses values of para are for ending weights (\f$b\f$), * The next numClasses values of para are for ending weights (\f$b\f$),
* The remaning values are for transition weights (\f$w\f$). * The remaning values are for transition weights (\f$w\f$).
...@@ -34,7 +34,7 @@ public: ...@@ -34,7 +34,7 @@ public:
* all possible * all possible
* sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF. * sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF.
*/ */
LinearChainCRF(int numClasses, real* para, real* grad); LinearChainCRF(int numClasses, real* para);
/** /**
* Calculate the negative log likelihood of s given x. * Calculate the negative log likelihood of s given x.
...@@ -45,29 +45,45 @@ public: ...@@ -45,29 +45,45 @@ public:
/** /**
* Calculate the gradient with respect to x, a, b, and w. * Calculate the gradient with respect to x, a, b, and w.
* The gradient of x will be stored in dx.
* backward() can only be called after a corresponding call to forward() with * backward() can only be called after a corresponding call to forward() with
* the same x, s and length. * the same x, s and length.
* @note The gradient is added to dx and grad (provided at constructor). * The gradient with respect to a, b, and w will not be calculated if
* needWGrad is false.
* @note Please call getWGrad() and getXGrad() to get the gradient with
* respect to (a, b, w) and x respectively.
*/ */
void backward(real* x, real* dx, int* s, int length); void backward(real* x, int* s, int length, bool needWGrad);
/** /**
* Find the most probable sequence given x. The result will be stored in s. * Find the most probable sequence given x. The result will be stored in s.
*/ */
void decode(real* x, int* s, int length); void decode(real* x, int* s, int length);
/*
* Return the gradient with respect to (a, b, w). It can only be called after
* a corresponding call to backward().
*/
MatrixPtr getWGrad() { return matWGrad_; }
/*
* Return the gradient with respect to x. It can only be called after a
* corresponding call to backward().
*/
MatrixPtr getXGrad() { return matGrad_; }
protected: protected:
int numClasses_; int numClasses_;
MatrixPtr a_; MatrixPtr a_;
MatrixPtr b_; MatrixPtr b_;
MatrixPtr w_; MatrixPtr w_;
MatrixPtr matWGrad_;
MatrixPtr da_; MatrixPtr da_;
MatrixPtr db_; MatrixPtr db_;
MatrixPtr dw_; MatrixPtr dw_;
MatrixPtr ones_; MatrixPtr ones_;
MatrixPtr expX_; MatrixPtr expX_;
MatrixPtr matGrad_;
MatrixPtr alpha_; MatrixPtr alpha_;
MatrixPtr beta_; MatrixPtr beta_;
MatrixPtr maxX_; MatrixPtr maxX_;
......
...@@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad ...@@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad
add_test(NAME test_LayerGrad add_test(NAME test_LayerGrad
COMMAND test_LayerGrad) COMMAND test_LayerGrad)
################ test_CRFLayerGrad ####################
add_unittest_without_exec(test_CRFLayerGrad
test_CRFLayerGrad.cpp
LayerGradUtil.cpp)
add_test(NAME test_CRFLayerGrad
COMMAND test_CRFLayerGrad)
add_unittest_without_exec(test_ActivationGrad add_unittest_without_exec(test_ActivationGrad
test_ActivationGrad.cpp test_ActivationGrad.cpp
LayerGradUtil.cpp) LayerGradUtil.cpp)
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/LinearChainCRF.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
static inline bool getNextSequence(std::vector<int>& seq, int numClasses) {
for (auto& v : seq) {
if (++v < numClasses) {
return true;
}
v = 0;
}
return false;
}
// log(exp(x) + exp(y))
static inline real logSum(real x, real y) {
real maxValue = std::max(x, y);
if (std::isinf(maxValue)) {
return -std::numeric_limits<real>::infinity();
} else {
return maxValue + log(exp(x - maxValue) + exp(y - maxValue));
}
}
static inline std::vector<int> genRandLabels(int numClasses, int length) {
std::vector<int> labels(length);
for (int i = 0; i < length; ++i) {
labels[i] = rand() % numClasses; // NOLINT
}
return labels;
}
TEST(CRFLayer, cost) {
const int numClasses = 4;
CpuVector para(numClasses * (numClasses + 2));
real* a = para.getData();
real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses);
x.randomizeUniform();
para.randnorm(0, 2);
std::vector<int> goldenLabels = genRandLabels(numClasses, length);
real cost = crf.forward(x.getData(), goldenLabels.data(), length);
real logZ = -std::numeric_limits<real>::infinity();
real logNominator = -std::numeric_limits<real>::infinity();
std::vector<int> testResult(length, 0);
do {
real score = a[testResult.front()];
score += x.getElement(0, testResult.front());
for (int k = 1; k < length; ++k) {
score += x.getElement(k, testResult[k]) +
w[numClasses * testResult[k - 1] + testResult[k]];
}
score += b[testResult.back()];
logZ = logSum(logZ, score);
if (goldenLabels == testResult) {
logNominator = score;
}
} while (getNextSequence(testResult, numClasses));
real trueCost = -logNominator + logZ;
real diff = fabs(trueCost - cost);
diff /= fabs(cost) < fabs(trueCost) ? fabs(cost) : fabs(trueCost);
VLOG(1) << "cost=" << cost << " trueCost=" << trueCost << " diff=" << diff
<< std::endl;
if (typeid(real) == typeid(double)) { // NOLINT
EXPECT_LE(diff, 1e-10);
} else {
EXPECT_LE(diff, 5e-3);
}
}
}
}
inline real epsilon() { return typeid(real) == typeid(double) ? 1e-10 : 0.06; }
TestConfig initTestConfig(size_t numClasses, bool withWeight) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(numClasses);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA,
"layer_0",
numClasses,
numClasses * (numClasses + 2)});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SEQUENCE_LABEL, "layer_label", numClasses, 0});
config.layerConfig.add_inputs();
if (withWeight) {
config.inputDefs.push_back({INPUT_DENSE_DIM_DATA, "layer_weight", 1, 0});
config.layerConfig.add_inputs();
}
return config;
}
TEST(Layer, CRFLayer) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ false);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
TEST(Layer, CRFLayerUseWeight) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ true);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
int main(int argc, char** argv) {
initMain(argc, argv);
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
...@@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) { ...@@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) {
} }
} }
TEST(Layer, CRFLayer) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(10);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 120});
config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
// Not support GPU now
testLayerGrad(config,
"crf",
100,
/* trans */ false,
/* useGpu */ false,
false /*useWeight*/,
0.03 /*epsilon*/);
}
TEST(Layer, CTCLayer) { TEST(Layer, CTCLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("ctc"); config.layerConfig.set_type("ctc");
......
...@@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) { ...@@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) {
real* a = para.getData(); real* a = para.getData();
real* b = para.getData() + numClasses; real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses; real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData(), nullptr); LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) { for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses); CpuMatrix x(length, numClasses);
......
...@@ -30,9 +30,6 @@ namespace paddle { ...@@ -30,9 +30,6 @@ namespace paddle {
* the first solution arms with sendThreads_/recvThreads_ and sendJobQueue_/ * the first solution arms with sendThreads_/recvThreads_ and sendJobQueue_/
* recvJobQueue_. the second solution use some shared thread pool to manage * recvJobQueue_. the second solution use some shared thread pool to manage
* connections. * connections.
* In addition to pserver, metric learning also uses network to exchange
* features within multi-machines, so this class just abstracts some basic
* threads and queue buffer creation for them
*/ */
class BaseClient { class BaseClient {
protected: protected:
......
...@@ -367,11 +367,8 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, ...@@ -367,11 +367,8 @@ void ParameterServer2::addGradient(const SendParameterRequest& request,
std::vector<Buffer>* outputBuffers) { std::vector<Buffer>* outputBuffers) {
VLOG(1) << "pserver: addGradient"; VLOG(1) << "pserver: addGradient";
/// forwardbackward delta from all trainers // forwardbackward delta from all trainers
/// indicate the fluctuation caused by forwardbackward. // indicate the fluctuation caused by forwardbackward.
#ifndef PADDLE_METRIC_LEARNING
// @TODO(yanfei):
// add support tuning forwardbackward balance for metric learning
if (!numPassFinishClients_) { if (!numPassFinishClients_) {
REGISTER_BARRIER_DELTA_SERVER_SET( REGISTER_BARRIER_DELTA_SERVER_SET(
*statSet_, *statSet_,
...@@ -381,7 +378,6 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, ...@@ -381,7 +378,6 @@ void ParameterServer2::addGradient(const SendParameterRequest& request,
request.forwardbackward_time(), request.forwardbackward_time(),
isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); isSparseServer_ ? "_sparseUpdater" : "_denseUpdater");
} }
#endif
{ {
/// approximately pure network overhead /// approximately pure network overhead
......
...@@ -18,6 +18,7 @@ ENV WITH_GPU=OFF ...@@ -18,6 +18,7 @@ ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root ENV HOME /root
......
...@@ -18,6 +18,7 @@ ENV WITH_GPU=ON ...@@ -18,6 +18,7 @@ ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON} ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF} ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root ENV HOME /root
......
# Build docker image 因为我们不提供非Ubuntu的bulid支持,所以如果用户用其他操作系统,比如CoreOS、CentOS、MacOS X、Windows,开发都得在docker里。所以需要能build本地修改后的代码。
We use a docker environment to build paddle binaries and put it into a runtime image `paddle-core` for uses of most cases 我们可能需要两个 Docker images:
***Notice***: do **not** run in this directory, run under the top level of this project like: 1. development image:不包括源码,但是包括开发环境(预先安装好各种工具),也就是说Dockerfile.dev里既不需要 COPY 也不需要 RUN git clone。虽然这个image和源码无关,但是不同版本的源码需要依赖不同的第三方库,所以这个image的tag里还是要包含git branch/tag name,比如叫做 `paddlepaddle/paddle:dev-0.10.0rc1`,这里的0.10.0.rc1是一个branch name,其中rc是release candidate的意思。正是发布之后就成了master branch里的一个tag,叫做0.10.0。
``` 1. production image: 不包括编译环境,也不包括源码,只包括build好的libpaddle.so和必要的Python packages,用于在Kubernetes机群上跑应用的image。比如叫做 `paddlepaddle/paddle:0.10.0rc1`
sh paddle/scripts/docker/buildall.sh
``` 从1.生成2.的过程如下:
1. 在本机(host)上开发。假设源码位于 `~/work/paddle`
1. 用dev image build 我们的源码:
```bash
docker run -it -p 2022:22 -v $PWD:/paddle paddlepaddle/paddle:dev-0.10.0rc1 /paddle/build.sh
```
注意,这里的 `-v ` 参数把host上的源码目录里的内容映射到了container里的`/paddle` 目录;而container里的 `/paddle/build.sh` 就是源码目录里的 `build.sh`。上述命令调用了本地源码中的 bulid.sh 来build了本地源码,结果在container里的 `/paddle/build` 目录里,也就是本地的源码目录里的 `build` 子目录。
1. 我们希望上述 `build.sh` 脚本在 `build` 子目录里生成一个Dockerfile,使得我们可以运行:
```bash
docker build -t paddle ./build
```
来生成我们的production image。
1. 有了这个production image之后,我们可能会希望docker push 到dockerhub.com的我们自己的名下,然后可以用来启动本地或者远程(Kubernetes)jobs:
```bash
docker tag paddle yiwang/paddle:did-some-change
docker push
paddlectl run yiwang/paddle:did-some-change /paddle/demo/mnist/train.py
```
其中 paddlectl 应该是我们自己写的一个脚本,调用kubectl来在Kubernetes机群上启动一个job的。
曾经的讨论背景:
["PR 1599"](https://github.com/PaddlePaddle/Paddle/pull/1599)
["PR 1598"](https://github.com/PaddlePaddle/Paddle/pull/1598)
...@@ -57,6 +57,12 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then ...@@ -57,6 +57,12 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
pip install /usr/local/opt/paddle/share/wheels/paddle*.whl pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
paddle version paddle version
if [[ ${DOCKER_BUILD:-FALSE} == 'TRUE' ]]; then
# reduce docker image size
rm -rf /paddle/build
rm -rf /usr/local/opt/paddle/share/wheels/
fi
fi fi
trap : 0 trap : 0
...@@ -30,10 +30,6 @@ limitations under the License. */ ...@@ -30,10 +30,6 @@ limitations under the License. */
#include "TrainerConfigHelper.h" #include "TrainerConfigHelper.h"
#include "TrainerInternal.h" #include "TrainerInternal.h"
#ifdef PADDLE_METRIC_LEARNING
#include "paddle/internals/metric_learning/MetricTrainer.h"
#endif
DECLARE_int32(num_passes); DECLARE_int32(num_passes);
namespace paddle { namespace paddle {
...@@ -201,12 +197,8 @@ protected: ...@@ -201,12 +197,8 @@ protected:
// parameter util // parameter util
std::unique_ptr<ParameterUtil> paramUtil_; std::unique_ptr<ParameterUtil> paramUtil_;
#ifdef PADDLE_METRIC_LEARNING
MetricTrainer trainerInternal_;
#else
// trainer Internal // trainer Internal
TrainerInternal trainerInternal_; TrainerInternal trainerInternal_;
#endif
}; };
} // namespace paddle } // namespace paddle
...@@ -30,7 +30,6 @@ DEFINE_bool(parallel_nn, ...@@ -30,7 +30,6 @@ DEFINE_bool(parallel_nn,
DEFINE_int32(trainer_count, 1, "Defined how many trainers to train"); DEFINE_int32(trainer_count, 1, "Defined how many trainers to train");
DEFINE_int32(gpu_id, 0, "Which gpu core to use"); DEFINE_int32(gpu_id, 0, "Which gpu core to use");
DEFINE_int32(port, 20134, "Listening port for pserver"); DEFINE_int32(port, 20134, "Listening port for pserver");
DEFINE_int32(data_server_port, 21134, "Listening port for dserver");
DEFINE_int32(ports_num, DEFINE_int32(ports_num,
1, 1,
"Number of ports for sending dense parameter," "Number of ports for sending dense parameter,"
......
...@@ -19,7 +19,6 @@ limitations under the License. */ ...@@ -19,7 +19,6 @@ limitations under the License. */
DECLARE_bool(parallel_nn); DECLARE_bool(parallel_nn);
DECLARE_int32(async_count); DECLARE_int32(async_count);
DECLARE_int32(port); DECLARE_int32(port);
DECLARE_int32(data_server_port);
DECLARE_bool(use_gpu); DECLARE_bool(use_gpu);
DECLARE_int32(gpu_id); DECLARE_int32(gpu_id);
DECLARE_int32(trainer_count); DECLARE_int32(trainer_count);
......
...@@ -23,11 +23,6 @@ enum PassType { ...@@ -23,11 +23,6 @@ enum PassType {
PASS_TEST, // Test pass PASS_TEST, // Test pass
PASS_GC, // Gradient Check pass PASS_GC, // Gradient Check pass
PASS_METRIC, // pass for generate template output with no drop rate. PASS_METRIC, // pass for generate template output with no drop rate.
// pass for metric learning training with metric learning error, only used
// when we are doing KNN evaluation.
PASS_METRIC_TRAIN,
PASS_METRIC_TRAIN_WITH_NOERROR, // Pass for metric learning training
// with no evaluation.
}; };
enum ParameterType { enum ParameterType {
......
...@@ -2301,14 +2301,9 @@ def Generator( ...@@ -2301,14 +2301,9 @@ def Generator(
@config_layer('expand') @config_layer('expand')
class ExpandLayer(LayerBase): class ExpandLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, trans_type='non-seq', bias=False, **xargs):
name,
inputs,
trans_type='non-seq',
device=None,
bias=False):
super(ExpandLayer, self).__init__( super(ExpandLayer, self).__init__(
name, 'expand', 0, inputs=inputs, device=device) name, 'expand', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs') len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs')
self.config.trans_type = trans_type self.config.trans_type = trans_type
...@@ -2339,11 +2334,10 @@ class MaxLayer(LayerBase): ...@@ -2339,11 +2334,10 @@ class MaxLayer(LayerBase):
inputs, inputs,
trans_type='non-seq', trans_type='non-seq',
active_type='linear', active_type='linear',
device=None,
bias=False, bias=False,
output_max_index=None): output_max_index=None,
super(MaxLayer, self).__init__( **xargs):
name, 'max', 0, inputs=inputs, device=device) super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.active_type = active_type self.config.active_type = active_type
...@@ -2390,15 +2384,15 @@ class SequenceLastInstanceLayer(LayerBase): ...@@ -2390,15 +2384,15 @@ class SequenceLastInstanceLayer(LayerBase):
inputs, inputs,
active_type='linear', active_type='linear',
trans_type='non-seq', trans_type='non-seq',
device=None, bias=False,
bias=False): **xargs):
super(SequenceLastInstanceLayer, self).__init__( super(SequenceLastInstanceLayer, self).__init__(
name, name,
'seqlastins', 'seqlastins',
0, 0,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
...@@ -2410,39 +2404,29 @@ class SequenceLastInstanceLayer(LayerBase): ...@@ -2410,39 +2404,29 @@ class SequenceLastInstanceLayer(LayerBase):
@config_layer('seqfirstins') @config_layer('seqfirstins')
class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__( def __init__(self,
self,
name, name,
inputs, inputs,
active_type='linear', active_type='linear',
trans_type='non-seq', trans_type='non-seq',
device=None, bias=False,
bias=False, ): **xargs):
super(SequenceFirstInstanceLayer, self).__init__( super(SequenceFirstInstanceLayer, self).__init__(
name, name, inputs=inputs, active_type=active_type, bias=bias, **xargs)
inputs=inputs,
active_type=active_type,
device=device,
bias=bias)
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.select_first = True self.config.select_first = True
@config_layer('seqconcat') @config_layer('seqconcat')
class SequenceConcatLayer(LayerBase): class SequenceConcatLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
name,
inputs,
active_type='linear',
device=None,
bias=False):
super(SequenceConcatLayer, self).__init__( super(SequenceConcatLayer, self).__init__(
name, name,
'seqconcat', 'seqconcat',
0, 0,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2458,15 +2442,15 @@ class SequenceReshapeLayer(LayerBase): ...@@ -2458,15 +2442,15 @@ class SequenceReshapeLayer(LayerBase):
size, size,
inputs, inputs,
active_type='linear', active_type='linear',
device=None, bias=False,
bias=False): **xargs):
super(SequenceReshapeLayer, self).__init__( super(SequenceReshapeLayer, self).__init__(
name, name,
'seqreshape', 'seqreshape',
size, size,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size) self.set_layer_size(size)
...@@ -2475,19 +2459,9 @@ class SequenceReshapeLayer(LayerBase): ...@@ -2475,19 +2459,9 @@ class SequenceReshapeLayer(LayerBase):
@config_layer('subseq') @config_layer('subseq')
class SubSequenceLayer(LayerBase): class SubSequenceLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
name,
inputs,
active_type='linear',
device=None,
bias=False):
super(SubSequenceLayer, self).__init__( super(SubSequenceLayer, self).__init__(
name, name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs)
'subseq',
0,
inputs=inputs,
device=device,
active_type=active_type)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
size = input_layer0.size size = input_layer0.size
...@@ -2644,15 +2618,10 @@ class AverageLayer(LayerBase): ...@@ -2644,15 +2618,10 @@ class AverageLayer(LayerBase):
average_strategy='average', average_strategy='average',
trans_type='non-seq', trans_type='non-seq',
active_type='linear', active_type='linear',
device=None, bias=False,
bias=False): **xargs):
super(AverageLayer, self).__init__( super(AverageLayer, self).__init__(
name, name, 'average', 0, inputs=inputs, active_type=active_type, **xargs)
'average',
0,
inputs=inputs,
device=device,
active_type=active_type)
self.config.average_strategy = average_strategy self.config.average_strategy = average_strategy
self.config.trans_type = trans_type self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
...@@ -2676,9 +2645,9 @@ class CosSimLayer(LayerBase): ...@@ -2676,9 +2645,9 @@ class CosSimLayer(LayerBase):
@config_layer('tensor') @config_layer('tensor')
class TensorLayer(LayerBase): class TensorLayer(LayerBase):
def __init__(self, name, size, inputs, device=None, bias=True, **xargs): def __init__(self, name, size, inputs, bias=True, **xargs):
super(TensorLayer, self).__init__( super(TensorLayer, self).__init__(
name, 'tensor', size, inputs=inputs, device=device, **xargs) name, 'tensor', size, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs')
config_assert(size > 0, 'size must be positive') config_assert(size > 0, 'size must be positive')
config_assert(inputs[1].parameter_name == None, config_assert(inputs[1].parameter_name == None,
...@@ -3029,7 +2998,7 @@ class CRFLayer(LayerBase): ...@@ -3029,7 +2998,7 @@ class CRFLayer(LayerBase):
super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device) super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device)
config_assert(2 <= len(self.inputs) <= 3, config_assert(2 <= len(self.inputs) <= 3,
'CRFLayer must have 2 or 3 inputs') 'CRFLayer must have 2 or 3 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size + 2, size])
self.config.coeff = coeff self.config.coeff = coeff
...@@ -3051,7 +3020,7 @@ class CRFDecodingLayer(LayerBase): ...@@ -3051,7 +3020,7 @@ class CRFDecodingLayer(LayerBase):
config_assert( config_assert(
len(self.inputs) <= 2, len(self.inputs) <= 2,
'CRFDecodingLayer cannot have more than 2 inputs') 'CRFDecodingLayer cannot have more than 2 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size + 2, size])
@config_layer('ctc') @config_layer('ctc')
......
...@@ -239,9 +239,9 @@ parameters { ...@@ -239,9 +239,9 @@ parameters {
name: "___crf_layer_0__.w0" name: "___crf_layer_0__.w0"
size: 24 size: 24
initial_mean: 0.0 initial_mean: 0.0
initial_std: 0.5 initial_std: 0.408248290464
dims: 4
dims: 6 dims: 6
dims: 4
initial_strategy: 0 initial_strategy: 0
initial_smart: true initial_smart: true
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册