diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 968d41801d73c4082d2673efe415c1cdd0305b5e..900f59d4cb83bc9ce1893b2d3bd95f5a08b164bb 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,9 +1,9 @@ # Use ccache if found ccache program -find_program(CCACHE_FOUND ccache) +find_program(CCACHE_PATH ccache) -if(CCACHE_FOUND) +if(CCACHE_PATH) message(STATUS "Ccache is founded, use ccache to speed up compile.") - set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) - set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) -endif(CCACHE_FOUND) \ No newline at end of file + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH}) + set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH}) +endif(CCACHE_PATH) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 1575d8e9f5613e972df672b1daae145595676e8b..446a7532c55bd3ca66662efe70db93551580b8cc 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -16,6 +16,14 @@ INCLUDE(ExternalProject) FIND_PACKAGE(Protobuf 3.1) +IF(PROTOBUF_FOUND) + EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION) + STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}") + IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0") + SET(PROTOBUF_FOUND OFF) + ENDIF() +ENDIF(PROTOBUF_FOUND) + IF(NOT PROTOBUF_FOUND) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) diff --git a/cmake/util.cmake b/cmake/util.cmake index 24ad5c815ca20d9b6b317b1be4d2dc93a9e06fba..3640e4651fdd8b491f63875a7ea886afcadf978a 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -71,21 +71,10 @@ function(link_paddle_exe TARGET_NAME) generate_rdma_links() endif() - if(WITH_METRIC) - if(WITH_GPU) - set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu) - else() - set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric_cpu) - endif() - else() - set(METRIC_LIBS "") - endif() - target_circle_link_libraries(${TARGET_NAME} ARCHIVE_START paddle_gserver paddle_function - ${METRIC_LIBS} ARCHIVE_END paddle_pserver paddle_trainer_lib @@ -95,7 +84,6 @@ function(link_paddle_exe TARGET_NAME) paddle_parameter paddle_proto paddle_cuda - ${METRIC_LIBS} ${EXTERNAL_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS} diff --git a/doc/faq/index_cn.rst b/doc/faq/index_cn.rst index 6d5367177da2af6276698f94f86664a5b506dca2..df5e172252277a881480cd2816eb901b711abe6b 100644 --- a/doc/faq/index_cn.rst +++ b/doc/faq/index_cn.rst @@ -286,3 +286,16 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID,相同名字 .. code-block:: bash paddle train --use_gpu=true --trainer_count=2 --gpu_id=2 + + +12. 训练过程中出现 :code:`Floating point exception`, 训练因此退出怎么办? +------------------------------------------------------------------------ + +Paddle二进制在运行时捕获了浮点数异常,只要出现浮点数异常(即训练过程中出现NaN或者Inf),立刻退出。浮点异常通常的原因是浮点数溢出、除零等问题。 +主要原因包括两个方面: + +* 训练过程中参数或者训练过程中的梯度尺度过大,导致参数累加,乘除等时候,导致了浮点数溢出。 +* 模型一直不收敛,发散到了一个数值特别大的地方。 +* 训练数据有问题,导致参数收敛到了一些奇异的情况。或者输入数据尺度过大,有些特征的取值达到数百万,这时进行矩阵乘法运算就可能导致浮点数溢出。 + +主要的解决办法是减小学习律或者对数据进行归一化处理。 diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index cf7dddd073ceb465781fcb235595788a294f5d96..af889ec9d1b4f43f8e4a266b21822f773ab62ec2 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -4,138 +4,137 @@ PaddlePaddle的Docker容器使用方式 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 -通过Docker容器开发PaddlePaddle +纯CPU和GPU的docker镜像使用说明 ------------------------------ -开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 +对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。 +我们通过设置 `dockerhub.com `_ 自动生成最新的docker镜像: +`paddledev/paddle:0.10.0rc1-cpu` 和 `paddledev/paddle:0.10.0rc1-gpu`。 -1. 将开发环境构建为Docker镜像 - - .. code-block:: bash +以交互容器方式运行纯CPU的镜像: - git clone --recursive https://github.com/PaddlePaddle/Paddle - cd Paddle - docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile . +.. code-block:: bash + docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash - 请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数: +或者,可以以后台进程方式运行容器: - .. code-block:: bash +.. code-block:: bash - docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . + docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu +然后用密码 :code:`root` SSH进入容器: -2. 运行开发环境 +.. code-block:: bash - 当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面: - - .. code-block:: bash + ssh -p 2202 root@localhost - docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev +SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。 - 以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。 - 请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了: - - .. code-block:: bash +以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker: - ssh root@localhost -p 2202 +.. code-block:: bash -3. 在Docker开发环境中编译与安装PaddlPaddle代码 + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu - 当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle: - - .. code-block:: bash - - /paddle/paddle/scripts/docker/build.sh - 以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试: - - .. code-block:: bash +运行PaddlePaddle书籍 +--------------------- - cd /paddle/build - ctest +Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 -4. 在Docker容器中运行PaddlePaddle书籍 +PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。 +如果您想要更深入了解deep learning,PaddlePaddle书籍一定是您最好的选择。 - Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 +当您进入容器内之后,只用运行以下命令: - PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。 - 如果您想要更深入了解deep learning,PaddlePaddle书籍一定是您最好的选择。 - - 当您进入容器内之后,只用运行以下命令: +.. code-block:: bash + + jupyter notebook - .. code-block:: bash - - jupyter notebook +然后在浏览器中输入以下网址: + +.. code-block:: text - 然后在浏览器中输入以下网址: - - .. code-block:: text + http://localhost:8888/ - http://localhost:8888/ +就这么简单,享受您的旅程! - 就这么简单,享受您的旅程! -纯CPU和GPU的docker镜像 ----------------------- +非AVX镜像 +--------- -对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。我们通过设置 `dockerhub.com `_ 自动运行以下两个命令: +纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: .. code-block:: bash - docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . - docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu --build-arg BUILD_AND_INSTALL=ON . + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi -以交互容器方式运行纯CPU的镜像: +如果输出是No,我们就需要手动编译一个非AVX版本的镜像: .. code-block:: bash - docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash + cd ~ + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . + docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . -或者,可以以后台进程方式运行容器: -.. code-block:: bash +通过Docker容器开发PaddlePaddle +------------------------------ - docker run -d -p 2202:22 paddledev/paddle:0.10.0rc1-cpu +开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 -然后用密码 :code:`root` SSH进入容器: +1. 将开发环境构建为Docker镜像 + + .. code-block:: bash -.. code-block:: bash + git clone --recursive https://github.com/PaddlePaddle/Paddle + cd Paddle + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile . - ssh -p 2202 root@localhost -SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。 + 请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数: + .. code-block:: bash -以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker: + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . -.. code-block:: bash - export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu +2. 运行开发环境 + 当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面: + + .. code-block:: bash -非AVX镜像 ---------- + docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev -纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: + 以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。 + 请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了: + + .. code-block:: bash -.. code-block:: bash + ssh root@localhost -p 2202 - if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi +3. 在Docker开发环境中编译与安装PaddlPaddle代码 -如果输出是No,我们就需要手动编译一个非AVX版本的镜像: + 当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle: + + .. code-block:: bash + + /paddle/paddle/scripts/docker/build.sh -.. code-block:: bash + 以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试: + + .. code-block:: bash - cd ~ - git clone https://github.com/PaddlePaddle/Paddle.git - cd Paddle - docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . - docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . + cd /paddle/build + ctest 文档 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index a4f62b283563adffc725f20efae306240f30bf9d..606746597acc0da00588b7eb05935f6c05c169f2 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -9,6 +9,100 @@ Please be aware that you will need to change `Dockers settings of your hardware resource on Mac OS X and Windows. +Usage of CPU-only and GPU Images +---------------------------------- + +For each version of PaddlePaddle, we release 2 Docker images, a +CPU-only one and a CUDA GPU one. We do so by configuring +`dockerhub.com `_ +automatically generate the latest docker images `paddledev/paddle:0.10.0rc1-cpu` +and `paddledev/paddle:0.10.0rc1-gpu`. + +To run the CPU-only image as an interactive container: + +.. code-block:: bash + + docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash + +or, we can run it as a daemon container + +.. code-block:: bash + + docker run -d -p 2202:22 -p 8888:8888 paddledev/paddle:0.10.0rc1-cpu + +and SSH to this container using password :code:`root`: + +.. code-block:: bash + + ssh -p 2202 root@localhost + +An advantage of using SSH is that we can connect to PaddlePaddle from +more than one terminals. For example, one terminal running vi and +another one running Python interpreter. Another advantage is that we +can run the PaddlePaddle container on a remote server and SSH to it +from a laptop. + +Above methods work with the GPU image too -- just please don't forget +to install CUDA driver and let Docker knows about it: + +.. code-block:: bash + + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu + + +PaddlePaddle Book +------------------ + +The Jupyter Notebook is an open-source web application that allows +you to create and share documents that contain live code, equations, +visualizations and explanatory text in a single browser. + +PaddlePaddle Book is an interactive Jupyter Notebook for users and developers. +We already exposed port 8888 for this book. If you want to +dig deeper into deep learning, PaddlePaddle Book definitely is your best choice. + +Once you are inside the container, simply issue the command: + +.. code-block:: bash + + jupyter notebook + +Then, you would back and paste the address into the local browser: + +.. code-block:: text + + http://localhost:8888/ + +That's all. Enjoy your journey! + + +Non-AVX Images +-------------- + +Please be aware that the CPU-only and the GPU images both use the AVX +instruction set, but old computers produced before 2008 do not support +AVX. The following command checks if your Linux computer supports +AVX: + +.. code-block:: bash + + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi + + +If it doesn't, we will need to build non-AVX images manually from +source code: + +.. code-block:: bash + + cd ~ + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . + docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . + + Development Using Docker ------------------------ @@ -82,103 +176,6 @@ Windows -- in a consistent way. cd /paddle/build ctest -4. Run PaddlePaddle Book under Docker Container - - The Jupyter Notebook is an open-source web application that allows - you to create and share documents that contain live code, equations, - visualizations and explanatory text in a single browser. - - PaddlePaddle Book is an interactive Jupyter Notebook for users and developers. - We already exposed port 8888 for this book. If you want to - dig deeper into deep learning, PaddlePaddle Book definitely is your best choice. - - Once you are inside the container, simply issue the command: - - .. code-block:: bash - - jupyter notebook - - Then, you would back and paste the address into the local browser: - - .. code-block:: text - - http://localhost:8888/ - - That's all. Enjoy your journey! - -CPU-only and GPU Images ------------------------ - -For each version of PaddlePaddle, we release 2 Docker images, a -CPU-only one and a CUDA GPU one. We do so by configuring -`dockerhub.com `_ -automatically runs the following commands: - -.. code-block:: bash - - docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . - docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu --build-arg BUILD_AND_INSTALL=ON . - - -To run the CPU-only image as an interactive container: - -.. code-block:: bash - - docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash - -or, we can run it as a daemon container - -.. code-block:: bash - - docker run -d -p 2202:22 paddledev/paddle:0.10.0rc1-cpu - -and SSH to this container using password :code:`root`: - -.. code-block:: bash - - ssh -p 2202 root@localhost - -An advantage of using SSH is that we can connect to PaddlePaddle from -more than one terminals. For example, one terminal running vi and -another one running Python interpreter. Another advantage is that we -can run the PaddlePaddle container on a remote server and SSH to it -from a laptop. - - -Above methods work with the GPU image too -- just please don't forget -to install CUDA driver and let Docker knows about it: - -.. code-block:: bash - - export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu - - -Non-AVX Images --------------- - -Please be aware that the CPU-only and the GPU images both use the AVX -instruction set, but old computers produced before 2008 do not support -AVX. The following command checks if your Linux computer supports -AVX: - -.. code-block:: bash - - if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - - -If it doesn't, we will need to build non-AVX images manually from -source code: - -.. code-block:: bash - - cd ~ - git clone https://github.com/PaddlePaddle/Paddle.git - cd Paddle - docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile . - docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu . - Documentation ------------- diff --git a/doc/howto/usage/cmd_parameter/arguments_cn.md b/doc/howto/usage/cmd_parameter/arguments_cn.md index 2e2a2fcc54a09f4f41e4ebbc317e1409591ddd9c..f7aa525054468670f59309ddf9206af55bb77869 100644 --- a/doc/howto/usage/cmd_parameter/arguments_cn.md +++ b/doc/howto/usage/cmd_parameter/arguments_cn.md @@ -228,16 +228,6 @@ √√ - -度量学习(metric learning)external -√√√√ - - - -data_server_port -√√ - - 参数服务器(PServer)start_pserver √√ diff --git a/doc/howto/usage/cmd_parameter/arguments_en.md b/doc/howto/usage/cmd_parameter/arguments_en.md index e5546f0ddc78a9f8bdc306a19c2fe9a415463e5a..d1963067bda949b11ececefed3db7db1432c6223 100644 --- a/doc/howto/usage/cmd_parameter/arguments_en.md +++ b/doc/howto/usage/cmd_parameter/arguments_en.md @@ -228,16 +228,6 @@ It looks like there are a lot of arguments. However, most of them are for develo √√ - -metric learningexternal -√√√√ - - - -data_server_port -√√ - - PServerstart_pserver √√ diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_cn.md b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md index 3b573a324d541b024600a254d5266e517db229c5..b4625ba68cf23e5697554ba94efaf0b873f2c1de 100644 --- a/doc/howto/usage/cmd_parameter/detail_introduction_cn.md +++ b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md @@ -180,15 +180,6 @@  - 用户可以自定义beam search的方法,编译成动态库,供PaddlePaddle加载。 该参数用于指定动态库路径. - 类型: string (默认: "", null). -## 度量学习(Metric Learning) -* `--external` - - 指示是否使用外部机器进行度量学习. - - 类型: bool (默认: 0). - -* `--data_server_port` - - 数据服务器(data server)的监听端口,主要用在度量学习中. - - 类型: int32 (默认: 21134). - ## 数据支持(DataProvider) * `--memory_threshold_on_load_data` diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_en.md b/doc/howto/usage/cmd_parameter/detail_introduction_en.md index 33b7ec0d51a96ee126197e7aa819fdae0d3dc353..b681ebc81a355dfc1a7638a4463dff6979929a45 100644 --- a/doc/howto/usage/cmd_parameter/detail_introduction_en.md +++ b/doc/howto/usage/cmd_parameter/detail_introduction_en.md @@ -184,15 +184,6 @@ - Specify shared dynamic library. It can be defined out of paddle by user. - type: string (default: "", null). -## Metric Learning -* `--external` - - Whether to use external machine for metric learning. - - type: bool (default: 0). - -* `--data_server_port` - - Listening port for dserver (data server), dserver is mainly used in metric learning. - - type: int32 (default: 21134). - ## DataProvider * `--memory_threshold_on_load_data` diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 4654d0206413ec198da62af12e294cd5b442e735..6ae60102b3e431727c0954e8b8073bfe0534f8ee 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -24,9 +24,6 @@ limitations under the License. */ DEFINE_bool(allow_only_one_model_on_one_gpu, true, "If true, do not allow multiple models on one GPU device"); -#ifdef PADDLE_METRIC_LEARNING -DECLARE_bool(external); -#endif namespace paddle { @@ -45,11 +42,7 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, trainerBarrier_(FLAGS_trainer_count), allBarrier_(FLAGS_trainer_count + 1), inArgsCopied_(false) { -#ifdef PADDLE_METRIC_LEARNING - isPassGrad_ = FLAGS_external; -#else isPassGrad_ = false; -#endif numThreads_ = FLAGS_trainer_count; if (useGpu) { //! TODO(yuyang18): When useGpu=false && paddle is not compiled with gpu, diff --git a/paddle/gserver/layers/CRFDecodingLayer.cpp b/paddle/gserver/layers/CRFDecodingLayer.cpp index fdb46aba68e924480a6595b02c04ff4d1edd914d..191176ce985a8e12e33562f0cab73da6bbe667e6 100644 --- a/paddle/gserver/layers/CRFDecodingLayer.cpp +++ b/paddle/gserver/layers/CRFDecodingLayer.cpp @@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap, return false; } crf_.reset(new LinearChainCRF( - numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData(), nullptr)); + numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData())); return true; } diff --git a/paddle/gserver/layers/CRFLayer.cpp b/paddle/gserver/layers/CRFLayer.cpp index 02b7aaf17e89d889ca0030f9de2b5d7431a28fd3..0b544420097e9150f8489731b6379dea633e992c 100644 --- a/paddle/gserver/layers/CRFLayer.cpp +++ b/paddle/gserver/layers/CRFLayer.cpp @@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap, CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2)); parameter_ = parameters_[0]; + weight_.reset(new Weight(numClasses_ + 2, numClasses_, parameter_)); // We don't need sequenceStartPositions because each sample of output_ is // for the cost of one sequence. @@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) { for (size_t i = 0; i < numSequences; ++i) { if (i >= crfs_.size()) { - crfs_.emplace_back(numClasses_, - parameter_->getBuf(PARAMETER_VALUE)->getData(), - parameter_->getBuf(PARAMETER_GRADIENT) - ? parameter_->getBuf(PARAMETER_GRADIENT)->getData() - : nullptr); + crfs_.emplace_back(numClasses_, weight_->getW()->getData()); } output_.value->getData()[i] = crfs_[i].forward(output.value->getData() + numClasses_ * starts[i], @@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) { const int* starts = label.sequenceStartPositions->getData(false); int numSequences = label.sequenceStartPositions->getSize() - 1; + bool needWGrad = weight_->getWGrad() ? true : false; for (int i = 0; i < numSequences; ++i) { crfs_[i].backward(output.value->getData() + numClasses_ * starts[i], - output.grad->getData() + numClasses_ * starts[i], label.ids->getData() + starts[i], - starts[i + 1] - starts[i]); - if (weightLayer_) { - real weight = getInputValue(*weightLayer_)->getElement(i, 0); - MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]); - grad->mulScalar(weight); + starts[i + 1] - starts[i], + needWGrad); + real instanceWeight = weightLayer_ + ? getInputValue(*weightLayer_)->getElement(i, 0) + : real(1.0f); + instanceWeight *= coeff_; + + MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]); + grad->add(*crfs_[i].getXGrad(), real(1.0f), instanceWeight); + if (needWGrad) { + weight_->getWGrad()->add( + *crfs_[i].getWGrad(), real(1.0f), instanceWeight); } } - if (coeff_ != real(1.0f)) { - output.grad->mulScalar(coeff_); - } - parameter_->incUpdate(callback); } diff --git a/paddle/gserver/layers/CRFLayer.h b/paddle/gserver/layers/CRFLayer.h index de36a85083b6b293fd2d8522ec279a38cc4f8be3..00ec13cede97401b4c8a308df6fac27e47692146 100644 --- a/paddle/gserver/layers/CRFLayer.h +++ b/paddle/gserver/layers/CRFLayer.h @@ -38,8 +38,9 @@ protected: size_t numClasses_; ParameterPtr parameter_; std::vector crfs_; - LayerPtr weightLayer_; // weight for each sequence - real coeff_; // weight for the layer + LayerPtr weightLayer_; // weight for each sequence + std::unique_ptr weight_; // parameters + real coeff_; // weight for the layer }; } // namespace paddle diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index f76d41ad3e8a3b1730f9d50c0773ee4f61ddb541..125aaf947f3c9d976b117667d1d1b7700a029cc6 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -381,8 +381,7 @@ void Layer::backwardActivation() { void Layer::forwardDropOut() { auto& outV = getOutputValue(); - if (passType_ == PASS_TRAIN || passType_ == PASS_METRIC_TRAIN || - passType_ == PASS_METRIC_TRAIN_WITH_NOERROR) { + if (passType_ == PASS_TRAIN) { // new dropOutMask_ if dropOutMask_ is null ptr Matrix::resizeOrCreate(dropOutMask_, outV->getHeight(), diff --git a/paddle/gserver/layers/LinearChainCRF.cpp b/paddle/gserver/layers/LinearChainCRF.cpp index b7f748f3bb8a419429956724131e81dfdbd274c6..dc3dc156792bdf32c3b948a292597d0e9eca5d8b 100644 --- a/paddle/gserver/layers/LinearChainCRF.cpp +++ b/paddle/gserver/layers/LinearChainCRF.cpp @@ -17,18 +17,12 @@ limitations under the License. */ namespace paddle { -LinearChainCRF::LinearChainCRF(int numClasses, real* para, real* grad) +LinearChainCRF::LinearChainCRF(int numClasses, real* para) : numClasses_(numClasses) { a_ = Matrix::create(para, 1, numClasses_); b_ = Matrix::create(para + numClasses_, 1, numClasses_); w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_); - if (grad) { - da_ = Matrix::create(grad, 1, numClasses_); - db_ = Matrix::create(grad + numClasses_, 1, numClasses_); - dw_ = Matrix::create(grad + 2 * numClasses_, numClasses_, numClasses_); - } - ones_ = Matrix::create(1, numClasses_); ones_->one(); @@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) { return -ll; } -void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { +void LinearChainCRF::backward(real* x, int* s, int length, bool needWGrad) { MatrixPtr matX = Matrix::create(x, length, numClasses_); - MatrixPtr matDX = Matrix::create(dx, length, numClasses_); - MatrixPtr matGrad = Matrix::create(length, numClasses_); + Matrix::resizeOrCreate(matGrad_, length, numClasses_); Matrix::resizeOrCreate(beta_, length, numClasses_); real* b = b_->getData(); - real* dw = dw_ ? dw_->getData() : nullptr; + if (needWGrad) { + Matrix::resizeOrCreate(matWGrad_, numClasses_ + 2, numClasses_); + matWGrad_->zeroMem(); + da_ = matWGrad_->subRowMatrix(0, 1); + db_ = matWGrad_->subRowMatrix(1, 2); + dw_ = matWGrad_->subRowMatrix(2, numClasses_ + 2); + } real* alpha = alpha_->getData(); real* beta = beta_->getData(); real* expW = expW_->getData(); real* expX = expX_->getData(); - real* grad = matGrad->getData(); + real* grad = matGrad_->getData(); for (int i = 0; i < numClasses_; ++i) { beta[(length - 1) * numClasses_ + i] = exp(b[i]); @@ -140,39 +139,38 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { normalizeL1(beta + k * numClasses_, numClasses_); } - matGrad->dotMul(*alpha_, *beta_); - matGrad->rowNormalizeL1(*matGrad); + matGrad_->dotMul(*alpha_, *beta_); + matGrad_->rowNormalizeL1(*matGrad_); for (int k = 0; k < length; ++k) { grad[k * numClasses_ + s[k]] -= (real)1; } - matDX->add(*matGrad); - if (da_) { - da_->add(*matGrad->subMatrix(/* startRow= */ 0, /* numRows= */ 1)); - } - if (db_) { - db_->add(*matGrad->subMatrix(/* startRow= */ length - 1, 1)); - } - beta_->dotMul(*beta_, *expX_); - beta_->rowNormalizeL1(*beta_); + if (needWGrad) { + da_->add(*matGrad_->subMatrix(/* startRow= */ 0, /* numRows= */ 1)); + db_->add(*matGrad_->subMatrix(/* startRow= */ length - 1, 1)); - for (int k = 1; dw && k < length; ++k) { - real sum = 0; - for (int i = 0; i < numClasses_; ++i) { - for (int j = 0; j < numClasses_; ++j) { - sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] * - beta[k * numClasses_ + j]; + beta_->dotMul(*beta_, *expX_); + beta_->rowNormalizeL1(*beta_); + + real* dw = dw_->getData(); + for (int k = 1; k < length; ++k) { + real sum = 0; + for (int i = 0; i < numClasses_; ++i) { + for (int j = 0; j < numClasses_; ++j) { + sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] * + beta[k * numClasses_ + j]; + } } - } - sum = 1 / sum; - for (int i = 0; i < numClasses_; ++i) { - for (int j = 0; j < numClasses_; ++j) { - dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] * - alpha[(k - 1) * numClasses_ + i] * - beta[k * numClasses_ + j]; + sum = 1 / sum; + for (int i = 0; i < numClasses_; ++i) { + for (int j = 0; j < numClasses_; ++j) { + dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] * + alpha[(k - 1) * numClasses_ + i] * + beta[k * numClasses_ + j]; + } } + dw[s[k - 1] * numClasses_ + s[k]] -= (real)1; } - dw[s[k - 1] * numClasses_ + s[k]] -= (real)1; } } diff --git a/paddle/gserver/layers/LinearChainCRF.h b/paddle/gserver/layers/LinearChainCRF.h index a905bf803dd5443ef8d4ad7702720a50a5220a9a..8daf1e14a6fa98bef41f4f32bff439df8302adfd 100644 --- a/paddle/gserver/layers/LinearChainCRF.h +++ b/paddle/gserver/layers/LinearChainCRF.h @@ -21,7 +21,7 @@ namespace paddle { class LinearChainCRF { public: /** - * The size of para and grad must be \f$(numClasses + 2) * numClasses\f$. + * The size of para must be \f$(numClasses + 2) * numClasses\f$. * The first numClasses values of para are for starting weights (\f$a\f$). * The next numClasses values of para are for ending weights (\f$b\f$), * The remaning values are for transition weights (\f$w\f$). @@ -34,7 +34,7 @@ public: * all possible * sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF. */ - LinearChainCRF(int numClasses, real* para, real* grad); + LinearChainCRF(int numClasses, real* para); /** * Calculate the negative log likelihood of s given x. @@ -45,29 +45,45 @@ public: /** * Calculate the gradient with respect to x, a, b, and w. - * The gradient of x will be stored in dx. * backward() can only be called after a corresponding call to forward() with * the same x, s and length. - * @note The gradient is added to dx and grad (provided at constructor). + * The gradient with respect to a, b, and w will not be calculated if + * needWGrad is false. + * @note Please call getWGrad() and getXGrad() to get the gradient with + * respect to (a, b, w) and x respectively. */ - void backward(real* x, real* dx, int* s, int length); + void backward(real* x, int* s, int length, bool needWGrad); /** * Find the most probable sequence given x. The result will be stored in s. */ void decode(real* x, int* s, int length); + /* + * Return the gradient with respect to (a, b, w). It can only be called after + * a corresponding call to backward(). + */ + MatrixPtr getWGrad() { return matWGrad_; } + + /* + * Return the gradient with respect to x. It can only be called after a + * corresponding call to backward(). + */ + MatrixPtr getXGrad() { return matGrad_; } + protected: int numClasses_; MatrixPtr a_; MatrixPtr b_; MatrixPtr w_; + MatrixPtr matWGrad_; MatrixPtr da_; MatrixPtr db_; MatrixPtr dw_; MatrixPtr ones_; MatrixPtr expX_; + MatrixPtr matGrad_; MatrixPtr alpha_; MatrixPtr beta_; MatrixPtr maxX_; diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 0caa5e1e11e6d42fadfa87149814c4b77b3b6271..3c4128b5b8a0ea420bd3027b9a36e5f75087c3cb 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +################ test_CRFLayerGrad #################### +add_unittest_without_exec(test_CRFLayerGrad + test_CRFLayerGrad.cpp + LayerGradUtil.cpp) +add_test(NAME test_CRFLayerGrad + COMMAND test_CRFLayerGrad) + + add_unittest_without_exec(test_ActivationGrad test_ActivationGrad.cpp LayerGradUtil.cpp) diff --git a/paddle/gserver/tests/test_CRFLayerGrad.cpp b/paddle/gserver/tests/test_CRFLayerGrad.cpp new file mode 100644 index 0000000000000000000000000000000000000000..df14449291e9ec08f45718de07bbb101f6dbea58 --- /dev/null +++ b/paddle/gserver/tests/test_CRFLayerGrad.cpp @@ -0,0 +1,174 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "ModelConfig.pb.h" +#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/gserver/layers/LinearChainCRF.h" +#include "paddle/trainer/Trainer.h" + +#include "LayerGradUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT + +DECLARE_int32(gpu_id); +DECLARE_bool(thread_local_rand_use_global_seed); + +static inline bool getNextSequence(std::vector& seq, int numClasses) { + for (auto& v : seq) { + if (++v < numClasses) { + return true; + } + v = 0; + } + return false; +} + +// log(exp(x) + exp(y)) +static inline real logSum(real x, real y) { + real maxValue = std::max(x, y); + if (std::isinf(maxValue)) { + return -std::numeric_limits::infinity(); + } else { + return maxValue + log(exp(x - maxValue) + exp(y - maxValue)); + } +} + +static inline std::vector genRandLabels(int numClasses, int length) { + std::vector labels(length); + for (int i = 0; i < length; ++i) { + labels[i] = rand() % numClasses; // NOLINT + } + return labels; +} + +TEST(CRFLayer, cost) { + const int numClasses = 4; + CpuVector para(numClasses * (numClasses + 2)); + real* a = para.getData(); + real* b = para.getData() + numClasses; + real* w = para.getData() + 2 * numClasses; + LinearChainCRF crf(4, para.getData()); + for (int length : {1, 2, 3, 10}) { + for (int tries = 0; tries < 10; ++tries) { + CpuMatrix x(length, numClasses); + x.randomizeUniform(); + para.randnorm(0, 2); + + std::vector goldenLabels = genRandLabels(numClasses, length); + + real cost = crf.forward(x.getData(), goldenLabels.data(), length); + + real logZ = -std::numeric_limits::infinity(); + real logNominator = -std::numeric_limits::infinity(); + std::vector testResult(length, 0); + do { + real score = a[testResult.front()]; + score += x.getElement(0, testResult.front()); + for (int k = 1; k < length; ++k) { + score += x.getElement(k, testResult[k]) + + w[numClasses * testResult[k - 1] + testResult[k]]; + } + score += b[testResult.back()]; + logZ = logSum(logZ, score); + + if (goldenLabels == testResult) { + logNominator = score; + } + } while (getNextSequence(testResult, numClasses)); + + real trueCost = -logNominator + logZ; + + real diff = fabs(trueCost - cost); + diff /= fabs(cost) < fabs(trueCost) ? fabs(cost) : fabs(trueCost); + VLOG(1) << "cost=" << cost << " trueCost=" << trueCost << " diff=" << diff + << std::endl; + if (typeid(real) == typeid(double)) { // NOLINT + EXPECT_LE(diff, 1e-10); + } else { + EXPECT_LE(diff, 5e-3); + } + } + } +} + +inline real epsilon() { return typeid(real) == typeid(double) ? 1e-10 : 0.06; } + +TestConfig initTestConfig(size_t numClasses, bool withWeight) { + TestConfig config; + config.layerConfig.set_type("crf"); + config.layerConfig.set_size(numClasses); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, + "layer_0", + numClasses, + numClasses * (numClasses + 2)}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back( + {INPUT_SEQUENCE_LABEL, "layer_label", numClasses, 0}); + config.layerConfig.add_inputs(); + + if (withWeight) { + config.inputDefs.push_back({INPUT_DENSE_DIM_DATA, "layer_weight", 1, 0}); + config.layerConfig.add_inputs(); + } + + return config; +} + +TEST(Layer, CRFLayer) { + size_t numClasses = 10; + for (int tries = 0; tries < 5; ++tries) { + TestConfig config = initTestConfig(numClasses, /* withWeight= */ false); + for (int length : {1, 3, 100}) { + // Not support GPU now + testLayerGrad(config, + "crf", + length, + /* trans= */ false, + /* useGpu= */ false, + /* useWeight= */ false, + epsilon()); + } + } +} + +TEST(Layer, CRFLayerUseWeight) { + size_t numClasses = 10; + for (int tries = 0; tries < 5; ++tries) { + TestConfig config = initTestConfig(numClasses, /* withWeight= */ true); + for (int length : {1, 3, 100}) { + // Not support GPU now + testLayerGrad(config, + "crf", + length, + /* trans= */ false, + /* useGpu= */ false, + /* useWeight= */ false, + epsilon()); + } + } +} + +int main(int argc, char** argv) { + initMain(argc, argv); + hl_start(); + hl_init(FLAGS_gpu_id); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 14d9db52470b2828186eca04d303135910489266..ceb69359c992128635c199e56805d3f603ca4271 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) { } } -TEST(Layer, CRFLayer) { - TestConfig config; - config.layerConfig.set_type("crf"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 120}); - config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, - "crf", - 100, - /* trans */ false, - /* useGpu */ false, - false /*useWeight*/, - 0.03 /*epsilon*/); -} - TEST(Layer, CTCLayer) { TestConfig config; config.layerConfig.set_type("ctc"); diff --git a/paddle/gserver/tests/test_LinearChainCRF.cpp b/paddle/gserver/tests/test_LinearChainCRF.cpp index f046cb0b289c9ce22b98f3200bf0a3f7d48d77f5..b37277054c58a5f71cc4649fc6c062ca8dc1d4c9 100644 --- a/paddle/gserver/tests/test_LinearChainCRF.cpp +++ b/paddle/gserver/tests/test_LinearChainCRF.cpp @@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) { real* a = para.getData(); real* b = para.getData() + numClasses; real* w = para.getData() + 2 * numClasses; - LinearChainCRF crf(4, para.getData(), nullptr); + LinearChainCRF crf(4, para.getData()); for (int length : {1, 2, 3, 10}) { for (int tries = 0; tries < 10; ++tries) { CpuMatrix x(length, numClasses); diff --git a/paddle/pserver/BaseClient.h b/paddle/pserver/BaseClient.h index 11d7a147bf749ba2de0772b5efd5f73ab0ccdb1a..667bc451d16aa1436ac5d74dd96edbd70556edd0 100644 --- a/paddle/pserver/BaseClient.h +++ b/paddle/pserver/BaseClient.h @@ -30,9 +30,6 @@ namespace paddle { * the first solution arms with sendThreads_/recvThreads_ and sendJobQueue_/ * recvJobQueue_. the second solution use some shared thread pool to manage * connections. - * In addition to pserver, metric learning also uses network to exchange - * features within multi-machines, so this class just abstracts some basic - * threads and queue buffer creation for them */ class BaseClient { protected: diff --git a/paddle/pserver/ParameterServer2.cpp b/paddle/pserver/ParameterServer2.cpp index 856fa0ad1ab30e3fc554ac96dd3bed71b1548579..877cbb86ec112739a5c7eeee969ca48ef491ee87 100644 --- a/paddle/pserver/ParameterServer2.cpp +++ b/paddle/pserver/ParameterServer2.cpp @@ -367,11 +367,8 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, std::vector* outputBuffers) { VLOG(1) << "pserver: addGradient"; -/// forwardbackward delta from all trainers -/// indicate the fluctuation caused by forwardbackward. -#ifndef PADDLE_METRIC_LEARNING - // @TODO(yanfei): - // add support tuning forwardbackward balance for metric learning + // forwardbackward delta from all trainers + // indicate the fluctuation caused by forwardbackward. if (!numPassFinishClients_) { REGISTER_BARRIER_DELTA_SERVER_SET( *statSet_, @@ -381,7 +378,6 @@ void ParameterServer2::addGradient(const SendParameterRequest& request, request.forwardbackward_time(), isSparseServer_ ? "_sparseUpdater" : "_denseUpdater"); } -#endif { /// approximately pure network overhead diff --git a/paddle/trainer/Trainer.h b/paddle/trainer/Trainer.h index c8ee4726c24c335ceda22ea3a20049b01d11c149..fac589d1d711affcd008f90edf87d865c8362f69 100644 --- a/paddle/trainer/Trainer.h +++ b/paddle/trainer/Trainer.h @@ -30,10 +30,6 @@ limitations under the License. */ #include "TrainerConfigHelper.h" #include "TrainerInternal.h" -#ifdef PADDLE_METRIC_LEARNING -#include "paddle/internals/metric_learning/MetricTrainer.h" -#endif - DECLARE_int32(num_passes); namespace paddle { @@ -201,12 +197,8 @@ protected: // parameter util std::unique_ptr paramUtil_; -#ifdef PADDLE_METRIC_LEARNING - MetricTrainer trainerInternal_; -#else // trainer Internal TrainerInternal trainerInternal_; -#endif }; } // namespace paddle diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index e8f31bc811ac30d83e8203b784ee1f93a8d35d90..320f671ed97dbadc4fa1b4b52d5611cf9239e7dd 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -30,7 +30,6 @@ DEFINE_bool(parallel_nn, DEFINE_int32(trainer_count, 1, "Defined how many trainers to train"); DEFINE_int32(gpu_id, 0, "Which gpu core to use"); DEFINE_int32(port, 20134, "Listening port for pserver"); -DEFINE_int32(data_server_port, 21134, "Listening port for dserver"); DEFINE_int32(ports_num, 1, "Number of ports for sending dense parameter," diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 3e72f8356d883b353127ccae80f2881320d20b2b..dc4faef8331ed47b9ce3e952389b6469cd9fda2e 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -19,7 +19,6 @@ limitations under the License. */ DECLARE_bool(parallel_nn); DECLARE_int32(async_count); DECLARE_int32(port); -DECLARE_int32(data_server_port); DECLARE_bool(use_gpu); DECLARE_int32(gpu_id); DECLARE_int32(trainer_count); diff --git a/paddle/utils/GlobalConstants.h b/paddle/utils/GlobalConstants.h index 707346f2c76e59b50722f4f8805ebe56c3cf861b..0ec1c28dfbb2a7db9fa84c9eb2bc4dad806b78e9 100644 --- a/paddle/utils/GlobalConstants.h +++ b/paddle/utils/GlobalConstants.h @@ -23,11 +23,6 @@ enum PassType { PASS_TEST, // Test pass PASS_GC, // Gradient Check pass PASS_METRIC, // pass for generate template output with no drop rate. - // pass for metric learning training with metric learning error, only used - // when we are doing KNN evaluation. - PASS_METRIC_TRAIN, - PASS_METRIC_TRAIN_WITH_NOERROR, // Pass for metric learning training - // with no evaluation. }; enum ParameterType { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da937152ee0ce788309690c7b718943bb21b5a76..e257aa568facb1555944dba7e76c5d8bce7f1c7d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2301,14 +2301,9 @@ def Generator( @config_layer('expand') class ExpandLayer(LayerBase): - def __init__(self, - name, - inputs, - trans_type='non-seq', - device=None, - bias=False): + def __init__(self, name, inputs, trans_type='non-seq', bias=False, **xargs): super(ExpandLayer, self).__init__( - name, 'expand', 0, inputs=inputs, device=device) + name, 'expand', 0, inputs=inputs, **xargs) config_assert( len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs') self.config.trans_type = trans_type @@ -2339,11 +2334,10 @@ class MaxLayer(LayerBase): inputs, trans_type='non-seq', active_type='linear', - device=None, bias=False, - output_max_index=None): - super(MaxLayer, self).__init__( - name, 'max', 0, inputs=inputs, device=device) + output_max_index=None, + **xargs): + super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') self.config.trans_type = trans_type self.config.active_type = active_type @@ -2390,15 +2384,15 @@ class SequenceLastInstanceLayer(LayerBase): inputs, active_type='linear', trans_type='non-seq', - device=None, - bias=False): + bias=False, + **xargs): super(SequenceLastInstanceLayer, self).__init__( name, 'seqlastins', 0, inputs=inputs, - device=device, - active_type=active_type) + active_type=active_type, + **xargs) config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') self.config.trans_type = trans_type @@ -2410,39 +2404,29 @@ class SequenceLastInstanceLayer(LayerBase): @config_layer('seqfirstins') class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): - def __init__( - self, - name, - inputs, - active_type='linear', - trans_type='non-seq', - device=None, - bias=False, ): + def __init__(self, + name, + inputs, + active_type='linear', + trans_type='non-seq', + bias=False, + **xargs): super(SequenceFirstInstanceLayer, self).__init__( - name, - inputs=inputs, - active_type=active_type, - device=device, - bias=bias) + name, inputs=inputs, active_type=active_type, bias=bias, **xargs) self.config.trans_type = trans_type self.config.select_first = True @config_layer('seqconcat') class SequenceConcatLayer(LayerBase): - def __init__(self, - name, - inputs, - active_type='linear', - device=None, - bias=False): + def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): super(SequenceConcatLayer, self).__init__( name, 'seqconcat', 0, inputs=inputs, - device=device, - active_type=active_type) + active_type=active_type, + **xargs) config_assert( len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') for input_index in xrange(len(self.inputs)): @@ -2458,15 +2442,15 @@ class SequenceReshapeLayer(LayerBase): size, inputs, active_type='linear', - device=None, - bias=False): + bias=False, + **xargs): super(SequenceReshapeLayer, self).__init__( name, 'seqreshape', size, inputs=inputs, - device=device, - active_type=active_type) + active_type=active_type, + **xargs) config_assert( len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') self.set_layer_size(size) @@ -2475,19 +2459,9 @@ class SequenceReshapeLayer(LayerBase): @config_layer('subseq') class SubSequenceLayer(LayerBase): - def __init__(self, - name, - inputs, - active_type='linear', - device=None, - bias=False): + def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): super(SubSequenceLayer, self).__init__( - name, - 'subseq', - 0, - inputs=inputs, - device=device, - active_type=active_type) + name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs) config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') input_layer0 = self.get_input_layer(0) size = input_layer0.size @@ -2644,15 +2618,10 @@ class AverageLayer(LayerBase): average_strategy='average', trans_type='non-seq', active_type='linear', - device=None, - bias=False): + bias=False, + **xargs): super(AverageLayer, self).__init__( - name, - 'average', - 0, - inputs=inputs, - device=device, - active_type=active_type) + name, 'average', 0, inputs=inputs, active_type=active_type, **xargs) self.config.average_strategy = average_strategy self.config.trans_type = trans_type config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') @@ -2676,9 +2645,9 @@ class CosSimLayer(LayerBase): @config_layer('tensor') class TensorLayer(LayerBase): - def __init__(self, name, size, inputs, device=None, bias=True, **xargs): + def __init__(self, name, size, inputs, bias=True, **xargs): super(TensorLayer, self).__init__( - name, 'tensor', size, inputs=inputs, device=device, **xargs) + name, 'tensor', size, inputs=inputs, **xargs) config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs') config_assert(size > 0, 'size must be positive') config_assert(inputs[1].parameter_name == None, @@ -3029,7 +2998,7 @@ class CRFLayer(LayerBase): super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device) config_assert(2 <= len(self.inputs) <= 3, 'CRFLayer must have 2 or 3 inputs') - self.create_input_parameter(0, size * (size + 2), [size, size + 2]) + self.create_input_parameter(0, size * (size + 2), [size + 2, size]) self.config.coeff = coeff @@ -3051,7 +3020,7 @@ class CRFDecodingLayer(LayerBase): config_assert( len(self.inputs) <= 2, 'CRFDecodingLayer cannot have more than 2 inputs') - self.create_input_parameter(0, size * (size + 2), [size, size + 2]) + self.create_input_parameter(0, size * (size + 2), [size + 2, size]) @config_layer('ctc') diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr index 10e59e21bc7a48bc53fb535f86f053c91f57c1df..05fd1c99d2db6e9faa3b3884ec9baf051791f9fe 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_cost_layers.protostr @@ -239,9 +239,9 @@ parameters { name: "___crf_layer_0__.w0" size: 24 initial_mean: 0.0 - initial_std: 0.5 - dims: 4 + initial_std: 0.408248290464 dims: 6 + dims: 4 initial_strategy: 0 initial_smart: true }