提交 34ff7522 编写于 作者: C caoying03

Merge branch 'develop' into add_sequence_slice_layer

......@@ -38,7 +38,7 @@ before_install:
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install rarfile
- pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow
- curl https://glide.sh/get | bash
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- go get -u github.com/alecthomas/gometalinter
......
......@@ -38,17 +38,16 @@ RUN apt-get update && \
RUN pip --no-cache-dir install 'numpy>=1.12.0'
# Install Go and glide
RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go.tgz && \
RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src && \
rm go.tgz
mkdir /root/gopath/src
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# install glide
RUN curl -q https://glide.sh/get | sh
RUN curl -s -q https://glide.sh/get | sh
# git credential to skip password typing
RUN git config --global credential.helper store
......
......@@ -42,29 +42,21 @@ macro(add_style_check_target TARGET_NAME)
if(WITH_STYLE_CHECK)
set(SOURCES_LIST ${ARGN})
list(REMOVE_DUPLICATES SOURCES_LIST)
list(SORT SOURCES_LIST)
foreach(filename ${SOURCES_LIST})
set(LINT ON)
foreach(pattern ${IGNORE_PATTERN})
if(filename MATCHES ${pattern})
message(STATUS "DROP LINT ${filename}")
set(LINT OFF)
list(REMOVE_ITEM SOURCES_LIST ${filename})
endif()
endforeach()
if(LINT MATCHES ON)
# cpplint code style
get_filename_component(base_filename ${filename} NAME)
set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint)
add_custom_command(OUTPUT ${CUR_GEN} PRE_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}"
"--write-success=${CUR_GEN}" ${filename}
DEPENDS ${filename} ${PROJ_ROOT}/paddle/scripts/cpplint.py
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${base_filename}.cpplint DEPENDS ${CUR_GEN})
add_dependencies(${TARGET_NAME} ${base_filename}.cpplint)
endif()
endforeach()
if(SOURCES_LIST)
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}"
${SOURCES_LIST}
COMMENT "cpplint: Checking source code style"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endif()
endmacro()
......@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_lib_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
......
......@@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML})
ENDIF(NOT ${WITH_MKLML})
IF(WIN32 OR APPLE)
MESSAGE(WARNING
MESSAGE(WARNING
"Windows or Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
......@@ -43,22 +43,21 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt)
FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}\n"
" DESTINATION ${MKLML_DST_DIR})\n")
FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt
"PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}\n"
" DESTINATION ${MKLML_DST_DIR})\n")
ExternalProject_Add(
${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL}
&& tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
)
......
......@@ -115,7 +115,7 @@ set(COMMON_FLAGS
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs
-Wno-error=parentheses-equality # Warnings in Pybind11
-Wno-error=parentheses-equality # Warnings in pybind11
)
set(GPU_COMMON_FLAGS
......@@ -195,6 +195,7 @@ endif()
# Modern gpu architectures: Pascal
if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0")
list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60")
list(APPEND CUDA_NVCC_FLAGS --expt-relaxed-constexpr)
endif()
# Custom gpu architecture
......
......@@ -262,6 +262,16 @@ seq_slice
.. autoclass:: paddle.v2.layer.seq_slice
:noindex:
kmax_sequence_score
-------------------
.. autoclass:: paddle.v2.layer.kmax_sequence_score
:noindex:
sub_nested_seq
--------------
.. autoclass:: paddle.v2.layer.sub_nested_seq
:noindex:
Reshaping Layers
================
......
# Intel® MKL-DNN on PaddlePaddle: Design Doc
我们计划将Intel深度神经网络数学库(**MKL-DNN**\[[1](#references)\])集成到PaddlePaddle,充分展现英特尔平台的优势,有效提升PaddlePaddle在英特尔架构上的性能。
我们短期内的基本目标是:
- 完成常用layer的MKL-DNN实现。
- 完成常见深度神经网络VGG,GoogLeNet 和 ResNet的MKL-DNN实现。
## Contents
- [Overview](#overview)
- [Actions](#actions)
- [CMake](#cmake)
- [Layers](#layers)
- [Activations](#activations)
- [Unit Tests](#unit-tests)
- [Protobuf Messages](#protobuf-messages)
- [Python API](#python-api)
- [Demos](#demos)
- [Benchmarking](#benchmarking)
- [Others](#others)
- [Design Concerns](#design-concerns)
## Overview
我们会把MKL-DNN作为第三方库集成进PaddlePaddle,整体框架图
<div align="center">
<img src="image/overview.png" width=350><br/>
Figure 1. PaddlePaddle on IA.
</div>
## Actions
我们把集成方案大致分为了如下几个方面。
### CMake
我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。
同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。
所以,我们会在`cmake/external`目录新建`mkldnn.cmake``mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。
**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。
### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
`paddle/gserver/layers`中,并且文件名都会一以*Mkldnn*开头。
所有MKL-DNN的layers都会继承于一个叫做`MkldnnLayer`的父类,该父类继承于PaddlePaddle的基类`Layer`
### Activations
由于在PaddlePaddle中,激活函数是独立于layer概念的,所以会在`paddle/gserver/activations`目录下添加一个`MkldnnActivation.h`文件定义一些用于MKL-DNN的接口,实现方法还是会在`ActivationFunction.cpp`文件。
### Unit Tests
会在`paddle/gserver/test`目录下添加`test_Mkldnn.cpp``MkldnnTester.*`用于MKL-DNN的测试。
Activation的测试,计划在PaddlePaddle原有的测试文件上直接添加新的测试type。
### Protobuf Messages
根据具体layer的需求可能会在`proto/ModelConfig.proto`里面添加必要的选项。
### Python API
目前只考虑**v1 API**
计划在`python/paddle/trainer/config_parser.py`里面添加`use_mkldnn`这个选择,方便用户选择使用MKL-DNN的layers。
具体实现方式比如:
```python
use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0)))
if use_mkldnn
self.layer_type = mkldnn_*
```
所有MKL-DNN的layer type会以*mkldnn_*开头,以示区分。
并且可能在`python/paddle/trainer_config_helper`目录下的`activations.py ``layers.py`里面添加必要的MKL-DNN的接口。
### Demos
会在`v1_api_demo`目录下添加一个`mkldnn`的文件夹,里面放入一些用于MKL-DNN测试的demo脚本。
### Benchmarking
会考虑添加部分逻辑在`benchmark/paddle/image/run.sh`,添加使用MKL-DNN的测试。
### Others
1. 如果在使用MKL-DNN的情况下,会把CPU的Buffer对齐为64。
2. 深入PaddlePaddle,寻找有没有其他可以优化的可能,进一步优化。比如可能会用OpenMP改进SGD的更新性能。
## Design Concerns
为了更好的符合PaddlePaddle的代码风格\[[2](#references)\],同时又尽可能少的牺牲MKL-DNN的性能\[[3](#references)\]
我们总结出一些特别需要注意的点:
1. 使用**deviceId_**。为了尽可能少的在父类Layer中添加变量或者函数,我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2``MkldnnLayer`特有的设备ID。
2. 重写父类Layer的**init**函数,修改`deviceId_``-2`,代表这个layer是用于跑在MKL-DNN的环境下。
3. 创建`MkldnnMatrix`,用于管理MKL-DNN会用到的相关memory函数、接口以及会用的到格式信息。
4. 创建`MkldnnBase`,定义一些除了layer和memory相关的类和函数。包括MKL-DNN会用到`MkldnnStream``CpuEngine`,和未来可能还会用到`FPGAEngine`等。
5.**Argument**里添加两个`MkldnnMatrixPtr`,取名为`mkldnnValue``mkldnnGrad`,用于存放`MkldnnLayer`会用到的memory buffer。 并且添加函数cvt(会修改为一个更加合适的函数名),用于处理"CPU device"和"MKL-DNN device"之间memory的相互转化。
6. 在父类`Layer`中的`getOutput`函数中添加一段逻辑,用于判断`deviceId`,并针对device在MKL-DNN和CPU之间不统一的情况,做一个前期转换。 也就是调用`Argument`的cvt函数把output统一到需要的device上。
7. 在原来的`FLAGS`中添加一个`use_mkldnn`的flag,用于选择是否使用MKL-DNN的相关功能。
## References
1. [Intel Math Kernel Library for Deep Neural Networks (Intel MKL-DNN)](https://github.com/01org/mkl-dnn "Intel MKL-DNN")
2. [原来的方案](https://github.com/PaddlePaddle/Paddle/pull/3096)会引入**nextLayer**的信息。但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。
3. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的CUDNN部分使用的也是`NCHW`,所以不存在这个问题),所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。
......@@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程:
* 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。
* 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步
* 编译这个版本的python wheel包,并发布到pypi。
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`
* 上传方法:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面
6. 协同完成Release Note的书写
......
......@@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。
Docker使用入门
------------------------------
几个基础的概念帮助理解和使用Docker:
- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行:
.. code-block:: bash
docker images
来列出当前系统中的所有镜像,同样可以执行:
.. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0
来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用ocker.paddlepaddle.org/paddle下载。
- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。
实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。
可以执行:
.. code-block:: bash
docker run paddlepaddle/paddle:0.10.0
来使用一个镜像启动一个容器。
- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中
访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到
Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用
debian镜像,并且启动后执行 :code:`ls /data`。
.. code-block:: bash
docker run --rm -v $(pwd):/data debian ls /data
PaddlePaddle发布的Docker镜像使用说明
------------------------------
......@@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次
PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国
内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您
在国内,请把文档里命令中的paddlepaddle/paddle替换成
docker.paddlepaddle.org/paddle。
`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_
和国内镜像`docker.paddlepaddle.org` 提供最新
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。
**注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。**
1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev`
......@@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。
如果输出是No,就需要选择使用no-AVX的镜像
**注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用**
以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。
为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。
......
......@@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions.
We put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
latest versions under "tags" tab at dockerhub.com. If you are in
China, you can use our Docker image registry mirror to speed up the
download process. To use it, please replace all paddlepaddle/paddle in
the commands to docker.paddlepaddle.org/paddle.
latest versions under "tags" tab at dockerhub.com.
1. Production images, this image might have multiple variants:
** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.**
1. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
2. Production images, this image might have multiple variants:
- GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu`
- GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx`
......@@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle.
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
**NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.**
To run the CPU-only image as an interactive container:
.. code-block:: bash
......@@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle.
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
2. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
Train Model Using Python API
----------------------------
......
......@@ -13,15 +13,11 @@
# serve to show the default.
import sys
import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
import shlex
from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
import paddle
import paddle.v2
MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify
......
......@@ -13,15 +13,11 @@
# serve to show the default.
import sys
import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
import shlex
from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
import paddle
import paddle.v2
MarkdownParser = parser.CommonMarkParser
......
......@@ -32,7 +32,7 @@ import (
func main() {
port := flag.Int("port", 0, "port of the pserver")
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0")
index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd")
dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout")
......@@ -60,12 +60,12 @@ func main() {
idx, err = e.Register(*port)
candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e)
cp, err = pserver.LoadCheckpoint(e, idx)
if err != nil {
if err == pserver.ErrCheckpointNotFound {
log.Infof("Could not find the pserver checkpoint.")
} else {
log.Errorf("Fetch checkpoint failed, %s", err)
panic(err)
}
}
}
......
hash: 2a1c0eca5c07a130e3d224f9821f96cfa37a39bf6bce141c855bbc57ef569f1c
updated: 2017-07-29T07:34:48.722757905+08:00
hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582
updated: 2017-08-03T21:46:51.744995189Z
imports:
- name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
......@@ -145,6 +145,8 @@ imports:
version: a1dba9ce8baed984a2495b658c82687f8157b98f
subpackages:
- xfs
- name: github.com/satori/go.uuid
version: 879c5887cd475cd7864858769793b2ceb0d44feb
- name: github.com/sirupsen/logrus
version: a3f95b5c423586578a4e099b11a46c2479628cac
- name: github.com/topicai/candy
......
......@@ -14,11 +14,13 @@ import:
version: ^1.0.0
- package: github.com/topicai/candy
- package: golang.org/x/crypto
vcs: git
repo: https://github.com/golang/crypto.git
- package: golang.org/x/sys
vcs: git
- package: golang.org/x/sys
repo: https://github.com/golang/sys.git
- package: golang.org/x/text
vcs: git
- package: golang.org/x/text
repo: https://github.com/golang/text.git
vcs: git
- package: github.com/satori/go.uuid
version: v1.1.0
......@@ -77,11 +77,12 @@ type taskEntry struct {
NumFailure int
}
type taskQueues struct {
type masterState struct {
Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry
Failed []taskEntry
CurPass int
}
// Service is the master server service.
......@@ -94,11 +95,11 @@ type Service struct {
ready chan struct{}
initDone bool
mu sync.Mutex
taskQueues taskQueues
currPass int
jobTasks []taskEntry
mu sync.Mutex
// State to be persisted to snapshot.
state masterState
// The trainer that is currently saving model. This state is
// transient, does not need to be persisted to snapshot.
savingTrainer string
}
......@@ -141,8 +142,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur
s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur
s.failureMax = failureMax
s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry)
s.state = masterState{}
s.state.Pending = make(map[int]taskEntry)
s.ready = make(chan struct{})
s.store = store
recovered, err := s.recover()
......@@ -180,7 +181,7 @@ func (s *Service) recover() (bool, error) {
}
dec := gob.NewDecoder(gr)
var tqs taskQueues
var tqs masterState
err = dec.Decode(&tqs)
if err != nil {
return false, err
......@@ -193,7 +194,12 @@ func (s *Service) recover() (bool, error) {
log.Errorln(err)
}
s.taskQueues = tqs
s.state = tqs
log.WithFields(s.logFields()).Infof("Master recovered from snapshot, scheduling pending task timeout check.")
for _, t := range s.state.Pending {
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
}
return true, nil
}
......@@ -208,7 +214,7 @@ func (s *Service) snapshot() error {
var buf bytes.Buffer
gw := gzip.NewWriter(&buf)
enc := gob.NewEncoder(gw)
err := enc.Encode(s.taskQueues)
err := enc.Encode(s.state)
if err != nil {
return err
}
......@@ -290,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error {
return err
}
s.jobTasks = partition(chunks, s.chunksPerTask)
s.taskQueues.Todo = s.jobTasks
s.state.Todo = partition(chunks, s.chunksPerTask)
err = s.snapshot()
if err != nil {
......@@ -319,17 +324,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
}
}()
delete(s.taskQueues.Pending, t.Task.Meta.ID)
delete(s.state.Pending, t.Task.Meta.ID)
t.NumFailure++
if t.NumFailure > s.failureMax {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Failed = append(s.taskQueues.Failed, t)
s.state.Failed = append(s.state.Failed, t)
return
}
log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t)
s.state.Todo = append(s.state.Todo, t)
return
}
......@@ -338,7 +343,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
t, ok := s.state.Pending[taskID]
if !ok {
return
}
......@@ -350,10 +355,11 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
// must be called with lock held.
func (s *Service) logFields() log.Fields {
return log.Fields{
"todoLen": len(s.taskQueues.Todo),
"pendingLen": len(s.taskQueues.Pending),
"doneLen": len(s.taskQueues.Done),
"failedLen": len(s.taskQueues.Failed),
"todoLen": len(s.state.Todo),
"pendingLen": len(s.state.Pending),
"doneLen": len(s.state.Done),
"failedLen": len(s.state.Failed),
"curPass": s.state.CurPass,
}
}
......@@ -366,17 +372,17 @@ func (s *Service) GetTask(passID int, task *Task) error {
s.mu.Lock()
defer s.mu.Unlock()
if passID < s.currPass {
if passID < s.state.CurPass {
return ErrPassBefore
}
if passID > s.currPass {
if passID > s.state.CurPass {
// Client may get run to pass after master when one client faster than the
// other
return ErrPassAfter
}
if len(s.taskQueues.Todo) == 0 {
if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 {
if len(s.state.Todo) == 0 {
if len(s.state.Done) == 0 && len(s.state.Pending) == 0 {
log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass")
return ErrAllTaskFailed
}
......@@ -384,10 +390,10 @@ func (s *Service) GetTask(passID int, task *Task) error {
return ErrNoMoreAvailable
}
t := s.taskQueues.Todo[0]
t := s.state.Todo[0]
t.Task.Meta.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.Meta.ID] = t
s.state.Todo = s.state.Todo[1:]
s.state.Pending[t.Task.Meta.ID] = t
err := s.snapshot()
if err != nil {
return err
......@@ -409,7 +415,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
t, ok := s.state.Pending[taskID]
if !ok {
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
return nil
......@@ -417,18 +423,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
// task finished, reset timeout
t.NumFailure = 0
s.taskQueues.Done = append(s.taskQueues.Done, t)
delete(s.taskQueues.Pending, taskID)
s.state.Done = append(s.state.Done, t)
delete(s.state.Pending, taskID)
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 {
if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 {
// increase master side pass count if all tasks finished
s.currPass++
s.taskQueues.Todo = s.jobTasks
s.taskQueues.Done = []taskEntry{}
s.state.CurPass++
s.state.Todo = append(s.state.Done, s.state.Failed...)
s.state.Done = []taskEntry{}
// TODO(typhoonzero): deal with failed tasks
s.taskQueues.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass)
s.state.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.state.CurPass)
}
err := s.snapshot()
......@@ -447,7 +453,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[meta.ID]
t, ok := s.state.Pending[meta.ID]
if !ok {
log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta)
return nil
......
......@@ -59,7 +59,7 @@ func initClient() [numPserver]int {
go func(l net.Listener) {
var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil {
panic(err)
}
......
......@@ -103,7 +103,7 @@ func (p *EtcdClient) List() []Server {
time.Sleep(p.timeout)
continue
}
log.Infof("got value (%s) for key: %s", psAddr, psKey)
log.Debugf("got value (%s) for key: %s", psAddr, psKey)
servers[i].Index = i
servers[i].Addr = psAddr
}
......
......@@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
if err != nil {
return []byte{}, err
}
kvs := resp.Kvs
if len(kvs) == 0 {
return []byte{}, nil
......@@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
}
// PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error {
func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration, withLease bool) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease()))
var err error
if withLease {
_, err = e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease()))
} else {
_, err = e.client.Put(ctx, key, string(value))
}
cancel()
return err
}
......
......@@ -32,6 +32,7 @@ type optimizer struct {
opt *C.struct_paddle_optimizer
elementType ElementType
contentLen int
config []byte
}
func cArrayToSlice(p unsafe.Pointer, len int) []byte {
......@@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer
cstate = unsafe.Pointer(&s[0])
}
o.config = c
o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)),
C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s)))
return o
......
......@@ -25,11 +25,13 @@ import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"path"
"strconv"
"sync"
"time"
uuid "github.com/satori/go.uuid"
log "github.com/sirupsen/logrus"
)
......@@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found")
// RPC error message.
const (
AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized"
CheckpointMD5Failed = "checkpoint file MD5 validation failed"
AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized"
WrongChecksum = "checkpoint file checksum validation failed"
)
// Supported element types.
......@@ -73,11 +75,12 @@ type ParameterWithConfig struct {
// checkpointMeta saves checkpoint metadata
type checkpointMeta struct {
UUID string `json:"uuid"`
Path string `json:"path"`
MD5 string `json:"md5"`
Timestamp int64 `json:"timestamp"`
}
// Checkpoint is the pserver shard persist in file
// Checkpoint is the pserver shard persist in file.
type Checkpoint []parameterCheckpoint
// Gradient is the gradient of the parameter.
......@@ -90,50 +93,58 @@ type Service struct {
checkpointInterval time.Duration
checkpointPath string
client *EtcdClient
mu sync.Mutex
optMap map[string]*optimizer
mu sync.Mutex
optMap map[string]*optimizer
}
// parameterCheckpoint saves parameter checkpoint
// parameterCheckpoint saves parameter checkpoint.
type parameterCheckpoint struct {
ParameterWithConfig
State []byte
}
// NewCheckpointFromFile loads parameters and state from checkpoint file
func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) {
v, err := e.GetKey(PsPath+string(idx), 3*time.Second)
func loadMeta(e *EtcdClient, idx int) (meta checkpointMeta, err error) {
v, err := e.GetKey(PsCheckpoint+strconv.Itoa(idx), 3*time.Second)
if err != nil {
return nil, err
return
}
if len(v) == 0 {
return nil, ErrCheckpointNotFound
err = ErrCheckpointNotFound
return
}
var cpMeta checkpointMeta
if err = json.Unmarshal(v, &cpMeta); err != nil {
return nil, err
if err = json.Unmarshal(v, &meta); err != nil {
return
}
fn := filepath.Join(cpPath, cpMeta.UUID)
if _, err = os.Stat(fn); os.IsNotExist(err) {
return
}
// LoadCheckpoint loads checkpoint from file.
func LoadCheckpoint(e *EtcdClient, idx int) (Checkpoint, error) {
cpMeta, err := loadMeta(e, idx)
if err != nil {
return nil, err
}
content, err := ioutil.ReadFile(fn)
content, err := ioutil.ReadFile(cpMeta.Path)
if err != nil {
return nil, err
}
// TODO(helin): change MD5 to CRC since CRC is better for file
// checksum in our use case (emphasize speed over security).
h := md5.New()
md5 := hex.EncodeToString(h.Sum(content))
if md5 != cpMeta.MD5 {
return nil, errors.New(CheckpointMD5Failed)
return nil, errors.New(WrongChecksum)
}
dec := gob.NewDecoder(bytes.NewReader(content))
cp := Checkpoint{}
if err = dec.Decode(cp); err != nil {
var cp Checkpoint
if err = dec.Decode(&cp); err != nil {
return nil, err
}
return cp, nil
......@@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error {
}
close(s.initialized)
go func() {
t := time.Tick(s.checkpointInterval)
for range t {
err := s.checkpoint()
if err != nil {
log.Errorln(err)
}
}
}()
return nil
}
......@@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
return nil
}
// pserver save checkpoint
func (s *Service) doCheckpoint() (err error) {
<-s.initialized
s.mu.Lock()
defer s.mu.Unlock()
func traceTime(start time.Time, name string) {
elapsed := time.Since(start)
log.Infof("%s took %v", name, elapsed)
}
// checkpoint saves checkpoint to disk.
//
// checkpoint should be only called after the parameters are
// initialized.
func (s *Service) checkpoint() (err error) {
log.Infoln("Begin save checkpoint.")
defer traceTime(time.Now(), "save checkpoint")
s.mu.Lock()
cp := make([]parameterCheckpoint, len(s.optMap))
index := 0
// TODO(helin): write checkpoint incrementally to reduce memory
// footprint during checkpoint.
for name, opt := range s.optMap {
var pc parameterCheckpoint
pc.Param.Name = name
pc.Param.ElementType = opt.elementType
pc.Param.Content = opt.GetWeights()
pc.Config = opt.config
pc.State = opt.GetStates()
cp[index] = pc
index++
}
s.mu.Unlock()
var buf bytes.Buffer
encoder := gob.NewEncoder(&buf)
err = encoder.Encode(cp)
......@@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) {
return
}
cpMeta := checkpointMeta{}
cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx)
cpMeta.Timestamp = time.Now().UnixNano()
h := md5.New()
cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil {
return
}
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.")
} else {
err = os.Remove(cpMeta.UUID)
if err != nil {
log.Infof("Removing checkpoint %s failed", cpMeta.UUID)
} else {
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID)
}
}
f, err := os.Create(cpMeta.UUID)
id := uuid.NewV4().String()
p := path.Join(s.checkpointPath, id)
f, err := os.Create(p)
if err != nil {
return
}
......@@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) {
return
}
oldMeta, err := loadMeta(s.client, s.idx)
if err == ErrCheckpointNotFound {
log.Infoln("Do not have existing checkpoint.")
err = nil
}
if err != nil {
return
}
h := md5.New()
md5 := hex.EncodeToString(h.Sum(buf.Bytes()))
cpMeta := checkpointMeta{
UUID: id,
Timestamp: time.Now().UnixNano(),
MD5: md5,
Path: p,
}
json, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(PsCheckpoint+strconv.Itoa(s.idx), json, 3*time.Second, false)
if err != nil {
return
}
if oldMeta.Path != "" {
rmErr := os.Remove(oldMeta.Path)
if rmErr != nil {
// log error, but still treat checkpoint as
// successful.
log.Errorln(rmErr)
}
}
return
}
......@@ -30,7 +30,7 @@ const (
func TestServiceFull(t *testing.T) {
var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil {
t.Error(err)
}
......@@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) {
func TestMultipleInit(t *testing.T) {
var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil {
t.Fatal(err)
}
......@@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) {
func TestUninitialized(t *testing.T) {
var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
s, err := pserver.NewService(0, time.Hour, "", nil, cp)
err = s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized {
t.Fatal(err)
......@@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) {
func TestBlockUntilInitialized(t *testing.T) {
var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil {
t.Error(err)
}
......
......@@ -22,7 +22,5 @@ if(WITH_C_API)
endif()
if(WITH_SWIG_PY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_SOURCE_DIR}/setup.py)
add_subdirectory(api)
endif()
......@@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp
COMMAND rm -rf py_paddle.egg-info build
COMMAND ${CMAKE_COMMAND} -E touch .timestamp
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS _swig_paddle
)
......@@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
# TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
install(DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/dist/
DESTINATION opt/paddle/share/wheels
)
if(WITH_TESTING)
IF(NOT PY_PIP_FOUND)
SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip)
......@@ -108,7 +102,7 @@ if(WITH_TESTING)
BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1
DEPENDS python setuptools python_api_wheel
#DEPENDS python setuptools python_api_wheel
)
ENDIF()
add_subdirectory(test)
......
......@@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES
src/hl_cuda_lstm.cu
src/hl_top_k.cu
src/hl_batch_transpose.cu
src/hl_batch_norm.cu
src/hl_cuda_sequence.cu
src/hl_table_apply.cu)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_BATCH_NORM_H_
#define HL_BATCH_NORM_H_
#include "hl_base.h"
/**
* @brief batch norm inferece.
*
* @param[in] input input data.
* @param[out] output output data.
* @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma).
* @param[in] bias batch normalization bias parameter (in original
* paper scale is referred to as beta).
* @param[in] estimatedMean
* @param[in] estimatedVar The moving mean and variance
* accumulated during the training phase are passed
* as inputs here.
* @param[in] epsilon Epsilon value used in the batch
* normalization formula.
*/
extern void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width);
#endif // HL_BATCH_NORM_H_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_batch_norm.h"
__global__ void batchNormInference(real* output,
const real* input,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
const int tid = threadIdx.x;
const int num = channel * height * width;
const int batch = blockIdx.x;
for (int i = tid; i < num; i += blockDim.x) {
const int c = i / (height * width);
const int id = batch * num + i;
real val = input[id] - estimatedMean[c];
val /= sqrt(estimatedVar[c] + epsilon);
val *= scale[c];
val += bias[c];
output[id] = val;
}
}
void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
batchNormInference<<<batchSize, 256, 0, STREAM_DEFAULT>>>(output,
input,
scale,
bias,
estimatedMean,
estimatedVar,
epsilon,
batchSize,
channel,
height,
width);
CHECK_SYNC("hl_batch_norm_cuda_inference failed!");
}
......@@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
int batch_size = ((cudnn_tensor_descriptor)inputDesc)->batch_size;
if (batch_size > 1024 && g_cudnn_lib_version < 6000) {
LOG(INFO) << " To process current batch data with size " << batch_size
<< " (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<< " If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<< " just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<< " current version " << g_cudnn_lib_version;
}
CHECK_CUDNN(
dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle,
mode,
......
......@@ -35,6 +35,8 @@ add_dependencies(framework_py_proto framework_py_proto_init)
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward)
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
......@@ -43,4 +45,6 @@ cc_library(paddle_pybind SHARED
add_op
mean_op
cross_entropy_op
fill_zeros_like_op
recurrent_op)
endif(WITH_PYTHON)
......@@ -260,6 +260,12 @@ class OpRegistry {
return CreateOp(op_desc.type(), inputs, outputs, attrs);
}
static bool SupportGPU(const std::string& op_type) {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0;
}
static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(),
"Use framework::Backward to get backward ops");
......
......@@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
#endif
const std::string& OperatorBase::Input(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr,
"Input Output Indices could not be nullptr");
PADDLE_ENFORCE_NOT_NULL(in_out_idxs_,
"Input Output Indices could not be nullptr");
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
......@@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const {
}
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr");
PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr");
auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <=
......@@ -62,7 +62,7 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
}
const std::string& OperatorBase::Output(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr");
PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
......@@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const {
}
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr");
PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <=
......
......@@ -167,28 +167,32 @@ class OperatorContext {
template <typename T>
const T* Input(const size_t index) const {
auto var = InputVar(index);
PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index);
PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index);
return &var->Get<T>();
}
template <typename T>
T* Output(const size_t index) const {
auto var = OutputVar(index);
PADDLE_ENFORCE(var != nullptr, "Output(%d) should not be nullptr", index);
PADDLE_ENFORCE_NOT_NULL(
var,
"Output(%d) not be nullptr, which means variable [%s] does not "
"exist in scope",
index, op_.outputs_[index]);
return var->GetMutable<T>();
}
template <typename T>
const T* Input(const std::string& name) const {
auto var = InputVar(name);
PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name);
PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name);
return &var->Get<T>();
}
template <typename T>
T* Output(const std::string& name) const {
auto var = OutputVar(name);
PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name);
PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>();
}
......@@ -200,9 +204,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiInput(%s:%s) should not be nullptr",
name, sub_name);
PADDLE_ENFORCE_NOT_NULL(
var, "MultiInput(%s:%s) should not be nullptr", name,
sub_name);
return &var->Get<T>();
});
return res;
......@@ -216,9 +220,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr,
"MultiOutput(%s:%s) should not be nullptr",
name, sub_name);
PADDLE_ENFORCE_NOT_NULL(
var, "MultiOutput(%s:%s) should not be nullptr", name,
sub_name);
return var->GetMutable<T>();
});
return res;
......
......@@ -32,7 +32,7 @@ limitations under the License. */
namespace py = pybind11;
USE_OP(add_two);
USE_OP(onehot_cross_entropy);
USE_OP_CPU(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd);
USE_OP(mul);
......@@ -40,6 +40,7 @@ USE_OP(mean);
USE_OP(sigmoid);
USE_OP(softmax);
USE_OP(rowwise_add);
USE_OP(fill_zeros_like);
USE_OP_WITHOUT_KERNEL(recurrent_op);
namespace paddle {
namespace framework {
......@@ -200,6 +201,8 @@ All parameter, weight, gradient are variables in Paddle.
return OpRegistry::CreateOp(desc);
});
operator_base.def_static("support_gpu", &OpRegistry::SupportGPU);
operator_base.def("backward",
[](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) {
......
......@@ -127,8 +127,8 @@ class Tensor {
memory::PODDeleter<T, Place>(place)),
place_(place),
size_(size) {
PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.",
is_cpu_place(place_) ? "CPU" : "GPU");
PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
(is_cpu_place(place_) ? "CPU" : "GPU"));
}
virtual size_t size() const { return size_; }
......
......@@ -14,17 +14,18 @@ limitations under the License. */
#pragma once
#include "paddle/memory/memcpy.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
template <typename T>
inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
template <typename T>
......@@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template <typename T>
inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
PADDLE_ENFORCE_GT(product(dims_), 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */
size_t size = product(dims_) * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) ||
......@@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src,
template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>();
PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1.");
PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
int base = product(dims_) / dims_[0];
Tensor dst;
dst.holder_ = holder_;
......
......@@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) {
} catch (paddle::platform::EnforceNotMet err) {
caught = true;
std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first.";
"holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
......@@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) {
} catch (paddle::platform::EnforceNotMet err) {
caught = true;
std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first.";
"holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "CudnnBatchNormLayer.h"
#include "Layer.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/utils/Stat.h"
namespace paddle {
......@@ -79,16 +80,33 @@ void CudnnBatchNormLayer::forward(PassType passType) {
savedInvVar);
} else {
// used movingMean and movingVar in testing
hl_batch_norm_forward_inference(ioDesc_,
input,
ioDesc_,
output,
bnParamDesc_,
gamma,
beta,
movingMean,
movingVar,
EPS);
if (batchSize <= 1024) {
hl_batch_norm_forward_inference(ioDesc_,
input,
ioDesc_,
output,
bnParamDesc_,
gamma,
beta,
movingMean,
movingVar,
EPS);
} else {
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
// the cudnnBatchNormalizationForwardInference will fail.
hl_batch_norm_cuda_inference(input,
output,
gamma,
beta,
movingMean,
movingVar,
EPS,
batchSize,
channels_,
imageH_,
imageW_);
}
}
/* activation */ {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class KmaxSeqScoreLayer : public Layer {
private:
MatrixPtr scores_;
size_t beamSize_;
void kmaxScorePerSeq(const real* score,
real* sortedRes,
const ICpuGpuVectorPtr seqStartPos);
public:
explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};
REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer);
bool KmaxSeqScoreLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
CHECK_EQ(1U, inputLayers_.size());
beamSize_ = config_.beam_size();
CHECK_GE(beamSize_, 1U);
setNeedSequenceInfo(false);
setNeedGradient(false);
return ret;
}
void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores,
real* sortedIds,
const ICpuGpuVectorPtr seqStartPos) {
int* starts = seqStartPos->getMutableData(false);
std::vector<real> indices;
for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) {
int seqLen = starts[i + 1] - starts[i];
int k = std::min(static_cast<int>(beamSize_), seqLen);
indices.resize(seqLen, 0);
std::iota(begin(indices), end(indices), 0.);
std::vector<real> tmpScore(scores + starts[i], scores + starts[i + 1]);
std::partial_sort(
begin(indices),
begin(indices) + k,
end(indices),
[&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; });
memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real));
}
}
void KmaxSeqScoreLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
const MatrixPtr inputScore = getInputValue(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "input of " << getName()
<< " must be a sequence or a nested sequence.";
CHECK_EQ(input.value->getWidth(), 1UL)
<< "input of " << getName()
<< " is score over a sequence or a nested sequence, so its width "
<< " must be 1.";
if (useGpu_) {
// this Layer runs only in CPU, if the model is runing on GPU,
// then copy the input to this layer from GPU to CPU.
Matrix::resizeOrCreate(scores_,
inputScore->getHeight(),
1,
false /* trans */,
false /* useGpu */);
scores_->copyFrom(*inputScore);
} else {
scores_ = inputScore;
}
Matrix::resizeOrCreate(
output_.value,
input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
beamSize_,
false,
false);
output_.value->one();
output_.value->mulScalar(-1.);
kmaxScorePerSeq(scores_->getData(),
output_.value->getData(),
input.hasSubseq() ? input.subSequenceStartPositions
: input.sequenceStartPositions);
}
void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
class SubNestedSequenceLayer : public Layer {
public:
explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
private:
/*
* This functions generates the indices of rows in a batch according to the
* indices of selected sub-sequence in each sequence.
*
* Examples:
* selectedIndices:
* [
* [0, 1, -1],
* [0, 1, 2],
* [0, -1, -1],
* [0, 2, 3],
* ]
* inputSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*
* ths output is saved to private member rowIndice_;
* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
* 16,17,18,19,20,21,22,23,24,25,26,27]
*/
void calSelectedCols(const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo);
// if the second input of this layer is on GPU memory, copy it to CPU memory.
MatrixPtr selIdsCpu_;
// reorganized sequenceStartPositions and subSequenceStartPositions
// into a 2d vector to facilitate the sequence selection process.
std::vector<std::vector<int>> inputSeqInfoVec_;
// the final selected row indices in a batch,
// rowIdx_ and selectedRows_ actually share a same memory.
IVectorPtr rowIndice_;
std::vector<int> selectedRows_;
};
REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer);
bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_EQ(2U, inputLayers_.size());
setNeedSequenceInfo(false);
return true;
}
void SubNestedSequenceLayer::calSelectedCols(
const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo) {
selectedRows_.clear();
std::vector<int> outSeqStartInfo(1, 0);
std::vector<int> outSubSeqStartInfo(1, 0);
size_t seqNum = selectedIndices->getHeight();
size_t beamSize = selectedIndices->getWidth();
for (size_t i = 0; i < seqNum; ++i) {
for (size_t j = 0; j < beamSize; ++j) {
if (selectedIndices->getElement(i, j) == -1.) break;
int selSubSeqIdx = selectedIndices->getElement(i, j);
CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
inputSeqInfoVec_[i][selSubSeqIdx];
for (size_t k = 0; k < subSeqLen; ++k)
selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
}
outSeqStartInfo.push_back(outSubSeqStartInfo.back());
}
if (useGpu_) {
rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
} else {
rowIndice_ =
IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
}
// create the sequence information for the output.
ICpuGpuVector::resizeOrCreate(
output_.sequenceStartPositions, outSeqStartInfo.size(), false);
output_.sequenceStartPositions->copyFrom(
outSeqStartInfo.data(), outSeqStartInfo.size(), false);
ICpuGpuVector::resizeOrCreate(
output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
output_.subSequenceStartPositions->copyFrom(
outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
}
void SubNestedSequenceLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& inputSeq = getInput(0);
CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
<< "must be a nested sequence.";
const MatrixPtr selectedIndices = getInputValue(1);
CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight());
if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
/*
* Currently, the second input for this layer is generated by
* kmax_sequence_score_layer whose output is always stored on CPU,
* or a data_layer which canbe on GPU.
*
* If the second input is on GPU, copy it to CPU memory, because this
* input always uses very few memory, and operations related to it are
* all logic control, not computations.
*/
Matrix::resizeOrCreate(selIdsCpu_,
selectedIndices->getHeight(),
selectedIndices->getWidth(),
false /* trans */,
false /* useGpu */);
selIdsCpu_->copyFrom(*selectedIndices);
} else {
selIdsCpu_ = selectedIndices;
}
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
resetOutput(selectedRows_.size(), getSize());
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
}
void SubNestedSequenceLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputSeqGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
}
} // namespace paddle
# gserver pacakge unittests
file(GLOB_RECURSE GSERVER_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.h")
file(GLOB_RECURSE GSERVER_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cpp")
add_style_check_target(paddle_gserver ${GSERVER_SOURCES})
add_style_check_target(paddle_gserver ${GSERVER_HEADER})
################### test_ProtoDataProvider ############
add_unittest_without_exec(test_ProtoDataProvider
test_ProtoDataProvider.cpp)
......@@ -77,6 +72,16 @@ add_unittest_without_exec(test_BatchNorm
add_test(NAME test_BatchNorm
COMMAND test_BatchNorm)
################# test_KmaxSeqScore #######################
add_unittest_without_exec(test_KmaxSeqScore
test_KmaxSeqScore.cpp
LayerGradUtil.cpp)
add_test(NAME test_KmaxSeqScore
COMMAND test_KmaxSeqScore)
################## test_Evaluator #######################
add_unittest(test_Evaluator
test_Evaluator.cpp)
......
......@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
......@@ -117,6 +119,74 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
}
#ifndef PADDLE_ONLY_CPU
void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudaOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnCheck = std::make_shared<CpuMatrix>(n, c * h * w);
MatrixPtr cudaCheck = std::make_shared<CpuMatrix>(n, c * h * w);
input->randomizeUniform();
cudnnOut->zeroMem();
cudaOut->zeroMem();
MatrixPtr scale = std::make_shared<GpuMatrix>(1, c);
scale->randomizeUniform();
MatrixPtr bias = std::make_shared<GpuMatrix>(1, c);
bias->randomizeUniform();
MatrixPtr movingMean = std::make_shared<GpuMatrix>(1, c);
movingMean->randomizeUniform();
MatrixPtr movingVar = std::make_shared<GpuMatrix>(1, c);
movingVar->randomizeUniform();
movingVar->clip(0.01, 50);
hl_tensor_descriptor ioDesc;
hl_tensor_descriptor bnDesc;
hl_create_tensor_descriptor(&ioDesc);
hl_create_tensor_descriptor(&bnDesc);
hl_tensor_reshape(ioDesc, n, c, h, w);
hl_tensor_reshape(bnDesc, 1, c, 1, 1);
double EPS = 1E-5;
hl_batch_norm_forward_inference(ioDesc,
input->getData(),
ioDesc,
cudnnOut->getData(),
bnDesc,
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS);
hl_batch_norm_cuda_inference(input->getData(),
cudaOut->getData(),
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS,
n,
c,
h,
w);
cudnnCheck->copyFrom(*cudnnOut);
cudaCheck->copyFrom(*cudaOut);
autotest::TensorCheckErr(*cudnnCheck, *cudaCheck);
hl_destroy_tensor_descriptor(ioDesc);
hl_destroy_tensor_descriptor(bnDesc);
}
TEST(BatchNorm, Inference) {
batchNormInference(33, 267, 1, 1);
batchNormInference(19, 105, 4, 4);
}
#endif
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <algorithm>
#include <string>
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
DECLARE_bool(use_gpu);
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
vector<int> randSampling(int range, int n) {
CHECK_GE(range, n);
vector<int> num(range);
iota(begin(num), end(num), 0);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
return num;
}
void genRandomSeqInfo(vector<int>& seqStartPosition,
vector<int>& subSeqStartPosition) {
const int maxSeqNum = 100;
// generate random start position information
int seqNum = 1 + (rand() % maxSeqNum);
seqStartPosition.resize(seqNum + 1, 0);
subSeqStartPosition.resize(1, 0);
for (int i = 0; i < seqNum; ++i) {
int subSeqLen = 1 + (rand() % maxSeqNum);
for (int j = 0; j < subSeqLen; ++j)
subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen);
seqStartPosition[i + 1] = subSeqStartPosition.back();
}
}
void genRandomGroundTruth(real* values,
vector<vector<int>>& groundTruth,
vector<int>& startPos,
size_t beamSize) {
groundTruth.resize(startPos.size() - 1, vector<int>(beamSize, -1));
for (size_t i = 0; i < startPos.size() - 1; ++i) {
int seqLen = startPos[i + 1] - startPos[i];
vector<int> pos =
randSampling(seqLen, min(static_cast<int>(beamSize), seqLen));
for (size_t j = 0; j < pos.size(); ++j) {
groundTruth[i][j] = pos[j];
values[startPos[i] + pos[j]] = 1.;
}
}
}
void checkLayerOut(vector<vector<int>> groundTruth,
real* layerOut,
size_t beamSize) {
for (size_t i = 0; i < groundTruth.size(); ++i) {
int begPos = i * beamSize;
vector<real> tmp(layerOut + begPos, layerOut + begPos + beamSize);
sort(begin(tmp), end(tmp));
sort(begin(groundTruth[i]), end(groundTruth[i]));
for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]);
}
}
TEST(Layer, kmaxSeqScoreLayer) {
const size_t maxBeamSize = 100;
int beamSize = 1 + (rand() % maxBeamSize);
vector<int> seqStartPosition;
vector<int> subSeqStartPosition;
genRandomSeqInfo(seqStartPosition, subSeqStartPosition);
MatrixPtr inValue =
Matrix::create(subSeqStartPosition.back(), 1, false, false);
for (auto hasSubseq : {false, true}) {
vector<vector<int>> groundTruth;
inValue->randomizeUniform();
genRandomGroundTruth(inValue->getData(),
groundTruth,
hasSubseq ? subSeqStartPosition : seqStartPosition,
beamSize);
for (auto useGpu : {false, true}) {
TestConfig config;
config.layerConfig.set_type("kmax_seq_score");
config.layerConfig.set_beam_size(beamSize);
if (hasSubseq) {
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"scores",
inValue,
seqStartPosition,
subSeqStartPosition});
} else {
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition});
}
config.layerConfig.add_inputs();
// data layer initialize
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(
config,
&dataLayers,
&datas,
&layerMap,
"kmax_seq_score",
100 /* actually this parameter is unused in self-defined input*/,
false,
useGpu);
// test layer initialize
std::vector<ParameterPtr> parameters;
LayerPtr kmaxSeqScoreLayer;
FLAGS_use_gpu = useGpu;
initTestLayer(config, &layerMap, &parameters, &kmaxSeqScoreLayer);
kmaxSeqScoreLayer->forward(PASS_TRAIN);
const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue();
CHECK_EQ(outValue->getHeight(),
hasSubseq ? subSeqStartPosition.size() - 1
: seqStartPosition.size() - 1);
CHECK_EQ(outValue->getWidth(), beamSize);
checkLayerOut(groundTruth, outValue->getData(), beamSize);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
srand((size_t)(time(NULL)));
return RUN_ALL_TESTS();
}
......@@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) {
}
}
vector<real> randSampling(real range, int n) {
CHECK_GE(range, n);
vector<real> num(range);
iota(begin(num), end(num), 0.);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
sort(begin(num), end(num));
return num;
}
TEST(Layer, SubNestedSequenceLayer) {
// layer size is not crutial for this layer,
// so use a small layer size in unittest
const int layerSize = 4;
const int maxSeqNum = 50;
const int maxSeqLen = 50;
const int maxBeamSize = 32;
srand((size_t)(time(NULL)));
int beamSize = 1 + (rand() % maxBeamSize);
TestConfig config;
config.layerConfig.set_type("sub_nested_seq");
config.layerConfig.set_name("sub_nested_seq_layer");
config.layerConfig.set_size(layerSize);
int seqNum = 1 + (rand() % maxSeqNum);
// sequence information for the first input, it is a nested sequence
vector<int> seqStartPos(seqNum + 1, 0);
vector<int> subSeqStartPos(1, 0);
// selected indices
MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false);
selectedIndices->one();
selectedIndices->mulScalar(-1.);
real* indicesData = selectedIndices->getData();
for (int i = 0; i < seqNum; ++i) {
int subSeqNum = 1 + (rand() % maxSeqNum);
for (int j = 0; j < subSeqNum; ++j) {
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % maxSeqLen)));
}
vector<real> selSeqs =
randSampling(static_cast<real>(subSeqNum), min(beamSize, subSeqNum));
memcpy(indicesData + (i * beamSize),
selSeqs.data(),
selSeqs.size() * sizeof(real));
seqStartPos[i + 1] = subSeqStartPos.back();
}
MatrixPtr seqInputPtr =
Matrix::create(seqStartPos.back(), layerSize, false, false);
seqInputPtr->randomizeUniform();
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"nested_seq_input",
seqInputPtr,
seqStartPos,
subSeqStartPos});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config,
"sub_nested_seq",
/* batchSize */ seqNum,
/* trans */ false,
/* useGpu*/ useGpu,
/* useWeight */ false);
}
}
TEST(Layer, ClipLayer) {
const size_t batchSize = 128;
const size_t size = 512;
......
......@@ -20,10 +20,9 @@ namespace operators {
class AddOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr,
"Inputs of AddOp must all be set");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 2);
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1);
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
......
......@@ -20,18 +20,19 @@ namespace operators {
class OnehotCrossEntropyOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2,
"Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1,
"Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr,
"Inputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims().size() == 2,
"X's dimension must be 2.");
PADDLE_ENFORCE(ctx.Output<Tensor>(0)->dims().size() == 1,
"label's dimension must be 1.");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 2,
"Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1,
"Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"0-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1),
"1-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE_EQ(ctx.Input<Tensor>(0)->dims().size(), 2);
PADDLE_ENFORCE_EQ(ctx.Output<Tensor>(0)->dims().size(), 1,
"label's dimension must be 1.");
ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]});
}
};
......
......@@ -14,6 +14,3 @@
#define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
......@@ -18,7 +18,24 @@ limitations under the License. */
namespace paddle {
namespace operators {
static const float kCrossEntropyLogThreshold{1e-20};
template <typename T>
T tolerable_value(T x) {
static_assert(std::is_floating_point<T>::value,
"tolerable_value works only on float, "
"double and double double.");
const T kApproInf = 1e20;
if (x == INFINITY) {
return kApproInf;
}
if (x == -INFINITY) {
return -kApproInf;
}
return x;
}
template <typename Place, typename T>
class OnehotCrossEntropyOpKernel : public OpKernel {
......@@ -36,10 +53,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel {
int batch_size = X->dims()[0];
int class_num = X->dims()[1];
// Y[i] = -log(X[i][j])
for (int i = 0; i < batch_size; ++i) {
Ydata[i] = -std::log(std::max(Xdata[i * class_num + label_data[i]],
kCrossEntropyLogThreshold));
int index = i * class_num + label_data[i];
Ydata[i] = -tolerable_value(std::log(Xdata[index]));
}
}
};
......@@ -62,9 +78,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel {
const int class_num = X->dims()[1];
for (int i = 0; i < batch_size; ++i) {
dXdata[i * class_num + label_data[i]] =
-dYdata[i] / std::max(Xdata[i * class_num + label_data[i]],
kCrossEntropyLogThreshold);
int index = i * class_num + label_data[i];
dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]);
}
}
};
......
......@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/fill_zeros_like_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace operators {
......@@ -22,14 +20,14 @@ namespace operators {
class FillZerosLikeOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL,
"Input size of FillZerosLikeOp must be one.");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Output size of AddOp must be one.");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr,
"Input of FillZerosLikeOp must be set.");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Output of FillZerosLikeOp must be set.");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Input size of FillZerosLikeOp must be one.");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output size of AddOp must be one.");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"Input of FillZerosLikeOp must be set.");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"Output of FillZerosLikeOp must be set.");
ctx.Output<framework::Tensor>(0)->Resize(
ctx.Input<framework::Tensor>(0)->dims());
}
......
......@@ -12,6 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h"
......
......@@ -13,9 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
#include "paddle/operators/type_alias.h"
namespace paddle {
namespace operators {
......@@ -26,7 +24,8 @@ class FillZerosLikeKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto* output = context.Output<framework::Tensor>(0);
output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).setZero();
auto t = framework::EigenVector<T>::Flatten(*output);
t.device(context.GetEigenDevice<Place>()) = t.constant(T(0));
}
};
......
......@@ -20,10 +20,10 @@ namespace operators {
class MeanOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr,
"Input/Output of MeanOp must be initialized.");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set");
ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1}));
}
};
......
......@@ -23,12 +23,16 @@ class MulOp : public OperatorWithKernel {
PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs");
auto dim0 = ctx.Input<Tensor>(0)->dims();
auto dim1 = ctx.Input<Tensor>(1)->dims();
PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2,
"The input of mul op must be matrix");
PADDLE_ENFORCE(
dim0[1] == dim1[0],
PADDLE_ENFORCE_EQ(dim0.size(), 2,
"input X(%s) should be a tensor with 2 dims, a matrix",
ctx.op_.Input("X"));
PADDLE_ENFORCE_EQ(dim1.size(), 2,
"input Y(%s) should be a tensor with 2 dims, a matrix",
ctx.op_.Input("Y"));
PADDLE_ENFORCE_EQ(
dim0[1], dim1[0],
"First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "The mul op must take one output");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "The mul op takes only one output");
ctx.Output<Tensor>(0)->Resize({dim0[0], dim1[1]});
}
};
......
......@@ -70,15 +70,15 @@ class NetOp : public framework::OperatorBase {
*/
void AddOp(const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(op);
}
void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op");
PADDLE_ENFORCE(pos <= ops_.size(), "Out of range");
PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, op);
}
......
......@@ -36,6 +36,7 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const {
InitMemories(step_scopes[0], true /*infer_shape_mode*/);
Variable* net = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net != nullptr, "failed to get step net");
for (size_t i = 0; i < seq_len_; i++) {
if (i > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, i, -1,
......@@ -56,6 +57,7 @@ void RecurrentAlgorithm::Run(const Scope& scope,
Variable* net = scope.FindVar(arg_->step_net);
for (size_t step_id = 0; step_id < seq_len_; step_id++) {
// create output alias variables
if (step_id > 0) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1,
false /*infer_shape_mode*/);
......@@ -67,22 +69,31 @@ void RecurrentAlgorithm::Run(const Scope& scope,
}
void RecurrentAlgorithm::CreateScopes(const Scope& scope) const {
// TODO(xxx) Only two scopes are needed for inference, this case will be
// TODO(superjom) Only two scopes are needed for inference, this case will be
// supported later.
auto step_scopes =
scope.FindVar(arg_->step_scopes)->GetMutable<std::vector<Scope*>>();
auto step_scopes_var = scope.FindVar(arg_->step_scopes);
PADDLE_ENFORCE(step_scopes_var != nullptr, "");
auto step_scopes = step_scopes_var->GetMutable<std::vector<Scope*>>();
// Now all variables in scope must be created outside of op.
auto net_var = scope.FindVar(arg_->step_net);
PADDLE_ENFORCE(net_var != nullptr, "no stepnet called %s in scope",
arg_->step_net);
auto net_op = net_var->GetMutable<NetOp>();
PADDLE_ENFORCE(!net_op->outputs_.empty(), "net_op has no outputs");
if (seq_len_ > step_scopes->size()) {
for (size_t i = step_scopes->size(); i < seq_len_; ++i) {
auto& step_scope = scope.NewScope();
// Now all variables in scope must be created outside of op.
auto net_op = scope.FindVar(arg_->step_net)->GetMutable<NetOp>();
// create step net's temp inputs
for (auto& input : net_op->inputs_) {
// the weight are located in parent scope
if (!step_scope.FindVar(input)) step_scope.NewVar(input);
if (!step_scope.FindVar(input))
step_scope.NewVar(input)->GetMutable<Tensor>();
}
for (auto& output : net_op->outputs_) {
// create stepnet's outputs
for (const auto& output : net_op->outputs_) {
step_scope.NewVar(output);
}
step_scopes->emplace_back(&step_scope);
......@@ -100,6 +111,7 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope,
Tensor* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable<Tensor>();
if (infer_shape_mode) {
pre_mem->Resize(boot_mem->dims());
PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2);
} else {
pre_mem->ShareDataWith<float>(*boot_mem);
}
......
......@@ -53,11 +53,13 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
PADDLE_ENFORCE(output_var != nullptr, "output link [%s] is not in scope.",
outlinks[i].external);
Tensor* output = output_var->GetMutable<Tensor>();
if (infer_shape_mode) {
fmw::DDim step_dims = step_scopes[0]
->FindVar(outlinks[i].internal)
->GetMutable<Tensor>()
->dims();
auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal);
PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope",
outlinks[i].internal);
fmw::DDim step_dims =
step_scope_var->template GetMutable<Tensor>()->dims();
std::vector<int> dims_vec = vectorize(step_dims);
dims_vec.insert(dims_vec.begin(), seq_len);
output->Resize(fmw::make_ddim(dims_vec));
......@@ -79,14 +81,15 @@ void LinkMemories(const std::vector<Scope*>& scopes,
const std::vector<rnn::MemoryAttr>& memories,
const size_t step_id, const int offset,
bool infer_shape_mode) {
PADDLE_ENFORCE(step_id < scopes.size(),
"step [%d] is out of range of step scopes' size [%d]", step_id,
scopes.size());
PADDLE_ENFORCE(static_cast<int>(step_id) + offset >= 0,
"offset [%d] must be large than -[%d]", offset, step_id);
PADDLE_ENFORCE(step_id + offset < scopes.size(),
"offset [%d] is out of range, it must be less than (%d - %d)",
offset, scopes.size(), step_id);
PADDLE_ENFORCE_LT(step_id, scopes.size(),
"step [%d] is out of range of step scopes' size [%d]",
step_id, scopes.size());
PADDLE_ENFORCE_GE(static_cast<int>(step_id) + offset, 0,
"offset [%d] must be large than -[%d]", offset, step_id);
PADDLE_ENFORCE_LT(
step_id + offset, scopes.size(),
"offset [%d] is out of range, it must be less than (%d - %d)", offset,
scopes.size(), step_id);
auto scope = scopes[step_id];
auto linked_scope = scopes[step_id + offset];
for (auto& attr : memories) {
......
......@@ -20,11 +20,11 @@ namespace operators {
class SGDOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, "inputs[0] mast be set");
PADDLE_ENFORCE(ctx.InputVar(1) != nullptr, "inputs[1] mast be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, "outputs[0] mast be set");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set");
PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
"Two input of SGD Op's dimension must be same.");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
......
......@@ -37,10 +37,8 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker {
class SigmoidOpGrad : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {}
std::string DebugString() const override {
LOG(INFO) << "SigmoidGrad";
return "";
void InferShape(const InferShapeContext &ctx) const override {
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
}
};
......@@ -51,3 +49,5 @@ REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker);
REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad);
REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(sigmoid_grad,
ops::SigmoidGradKernel<ops::CPUPlace, float>);
......@@ -16,3 +16,5 @@
#include "paddle/operators/sigmoid_op.h"
REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(sigmoid_grad,
ops::SigmoidGradKernel<ops::GPUPlace, float>);
......@@ -27,6 +27,7 @@ class SigmoidKernel : public OpKernel {
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace());
// The clipping is used in Paddle's raw implenmention
auto X = EigenVector<T>::Flatten(*input);
auto Y = EigenVector<T>::Flatten(*output);
auto place = context.GetEigenDevice<Place>();
......@@ -34,5 +35,23 @@ class SigmoidKernel : public OpKernel {
Y.device(place) = 1.0 / (1.0 + (-1.0 * X).exp());
}
};
template <typename Place, typename T>
class SigmoidGradKernel : public OpKernel {
public:
void Compute(const ExecutionContext& context) const override {
auto Y_t = context.Input<Tensor>("Y");
auto dY_t = context.Input<Tensor>(framework::GradVarName("Y"));
auto dX_t = context.Output<Tensor>(framework::GradVarName("X"));
dX_t->mutable_data<T>(context.GetPlace());
auto dX = EigenVector<T>::Flatten(*dX_t);
auto Y = EigenVector<T>::Flatten(*Y_t);
auto dY = EigenVector<T>::Flatten(*dY_t);
dX.device(context.GetEigenDevice<Place>()) = dY * Y * (1. - Y);
}
};
} // namespace operators
} // namespace paddle
......@@ -20,12 +20,12 @@ namespace operators {
class SoftmaxOp : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL,
"Only one input is need for softmax");
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL,
"The input of softmax op must be matrix");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Only one output is need for softmax");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Only one input is need for softmax");
PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims().size(), 2UL,
"The input of softmax op must be matrix");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Only one output is need for softmax");
ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
}
};
......@@ -43,13 +43,13 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker {
class SoftmaxOpGrad : public OperatorWithKernel {
protected:
void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 3UL,
"Input of SoftmaxOpGrad should be 3, X, Y, YG");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL,
"Output of SoftmaxOpGrad should be 1");
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null");
PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr,
"Input(Y@GRAD) should not be null");
PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL,
"Input of SoftmaxOpGrad should be 3, X, Y, YG");
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output of SoftmaxOpGrad should be 1");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
"Input(Y@GRAD) should not be null");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same");
......
......@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input,
}
}
void Argument::reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo) {
int* seqStarts = seqStartPos->getMutableData(false);
int* subSeqStarts = subSeqStartPos->getMutableData(false);
int seqNum = seqStartPos->getSize() - 1;
reorganizedSeqInfo.resize(seqNum, std::vector<int>());
int seqIdx = 0;
for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
seqIdx++;
if (seqIdx == seqNum) return;
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
}
}
}
} // namespace paddle
......@@ -317,6 +317,30 @@ struct Argument {
*/
void printValueString(std::ostream& stream,
const std::string& prefix = "") const;
/**
* @brief reorganizeSeqInfo will reorganize sequenceStartPositions and
* subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo.
*
* @param seqStartPos: sequenceStartPositions of an Argument.
* @param subSeqStartPos: subSequenceStartPositions of an Argument.
* @param the reorganized sequence start position information.
*
* Examples:
* seqStartPos: [0, 4, 15, 20, 28]
* subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28]
* reorganizedSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*/
static void reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo);
};
} // namespace paddle
......@@ -187,25 +187,16 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
// if two values have different data types, choose a compatible type for them.
template <typename T1, typename T2>
struct CompatibleType {
static const bool t1_to_t2 = std::is_convertible<T1, T2>::value;
typedef typename std::conditional<t1_to_t2, T2, T1>::type type;
};
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
PADDLE_ENFORCE(nullptr != (__VAL), #__VAL " should not be null\n%s", \
paddle::string::Sprintf("" __VA_ARGS__));
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \
__CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \
PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \
"enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
#__VAL0, #__VAL1, std::to_string(__VAL0), \
std::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__));
#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \
typename paddle::platform::CompatibleType<decltype(__VAL0), \
decltype(__VAL1)>::type(__VAL)
} // namespace platform
} // namespace paddle
......@@ -9,8 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/enforce.h"
#include <memory>
#include "gtest/gtest.h"
#include "paddle/platform/enforce.h"
TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
......@@ -196,3 +198,27 @@ TEST(ENFORCE_LT, FAIL) {
ASSERT_TRUE(in_catch);
}
TEST(ENFORCE_NOT_NULL, OK) {
int* a = new int;
PADDLE_ENFORCE_NOT_NULL(a);
delete a;
}
TEST(ENFORCE_NOT_NULL, FAIL) {
bool in_catch = false;
int* a{nullptr};
try {
PADDLE_ENFORCE_NOT_NULL(a);
} catch (paddle::platform::EnforceNotMet error) {
in_catch = true;
const std::string msg = "a should not be null";
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
}
ASSERT_TRUE(in_catch);
}
......@@ -6,4 +6,5 @@ cc_library(paddle_pybind SHARED
add_op
mean_op
cross_entropy_op
recurrent_op)
recurrent_op
fill_zeros_like_op)
configure_file(submit_local.sh.in
submit_local.sh
paddle
@ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/submit_local.sh DESTINATION bin
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle DESTINATION bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
RENAME paddle)
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ)
configure_file(tools/usage_stat/usage.sh
usage.sh
paddle_usage
@ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/usage.sh DESTINATION opt/paddle/bin
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle_usage DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
RENAME paddle_usage)
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ)
......@@ -33,6 +33,9 @@ Configuring cmake in /paddle/build ...
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
-DCUDNN_ROOT=/usr/
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
-DWITH_TESTING=${WITH_TESTING:-OFF}
......@@ -49,7 +52,9 @@ cmake .. \
-DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DWITH_SWIG_PY=ON \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
-DWITH_TESTING=${WITH_TESTING:-OFF} \
......
......@@ -20,4 +20,4 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
-DWITH_SWIG_PY=OFF \
..
make -j `nproc`
make install
make install -j `nproc`
文件模式从 100644 更改为 100755
......@@ -5,15 +5,9 @@ set -e
mkdir -p $TRAVIS_BUILD_DIR/build
cd $TRAVIS_BUILD_DIR/build
# Compile paddle binaries first
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF
mkdir output
make -j `nproc`
find .. -name '*whl' | xargs pip install # install all wheels.
rm -rf *
# Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
make -j `nproc` gen_proto_py
make -j `nproc` paddle_docs paddle_docs_cn
# check websites for broken links
......@@ -35,6 +29,7 @@ TARGET_BRANCH="gh-pages"
SOURCE_BRANCH="master"
# Clone the repo to output directory
mkdir output
git clone $REPO output
cd output
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup, Extension
setup(name="py_paddle",
version="${PADDLE_VERSION}",
packages=['py_paddle'],
include_package_data=True,
package_data={'py_paddle':['*.py','_swig_paddle.so']},
install_requires = [
'nltk>=3.2.2',
# We use `numpy.flip` in `test_image.py`.
# `numpy.flip` is introduced in `1.12.0`
'numpy>=1.12.0', # The numpy is required.
'protobuf==${PROTOBUF_VERSION}' # The paddle protobuf version
],
url='http://www.paddlepaddle.org/',
license='Apache 2.0',
)
from paddle.trainer_config_helpers import *
settings(batch_size=128, learning_method=AdaGradOptimizer(), learning_rate=1e-4)
settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4)
file_list = 'trainer/tests/fake_file_list.list'
......@@ -12,7 +12,7 @@ define_py_data_sources2(
embedding = embedding_layer(
input=data_layer(
name="word_ids", size=65536),
name="word_ids", size=8191),
size=128,
param_attr=ParamAttr(sparse_update=True))
prediction = fc_layer(input=embedding, size=10, act=SoftmaxActivation())
......
......@@ -7,15 +7,15 @@ def init_hook(settings, is_train, **kwargs):
@provider(
input_types={'word_ids': integer_value(65536),
input_types={'word_ids': integer_value(8191),
'label': integer_value(10)},
min_pool_size=0,
init_hook=init_hook)
def process(settings, filename):
if settings.is_train:
data_size = 2**20
else:
data_size = 2**10
else:
data_size = 2**5
for _ in xrange(data_size):
yield random.randint(0, 65535), random.randint(0, 9)
yield random.randint(0, 8190), random.randint(0, 9)
......@@ -100,25 +100,25 @@ TEST(average_window, gpu) {
}
TEST(average_window, gpu2) {
FLAGS_num_passes = 100;
FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 2, 0.01);
FLAGS_num_passes = 1;
}
TEST(average_window, gpu4) {
FLAGS_num_passes = 100;
FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 4, 0.01);
FLAGS_num_passes = 1;
}
TEST(average_window_cpu, gpu2) {
FLAGS_num_passes = 100;
FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 2, 0.01, true);
FLAGS_num_passes = 1;
}
TEST(average_window_cpu, gpu4) {
FLAGS_num_passes = 100;
FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 4, 0.01, true);
FLAGS_num_passes = 1;
}
......
......@@ -17,7 +17,7 @@ foreach(filename ${proto_filenames})
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto"
"-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL}
DEPENDS ${ABS_FIL} ${external_project_dependencies})
DEPENDS ${ABS_FIL} protoc)
endforeach()
add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY})
......@@ -39,7 +39,7 @@ add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp)
${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel)
set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
......
......@@ -2686,6 +2686,7 @@ class SeqSliceLayer(LayerBase):
super(SeqSliceLayer, self).__init__(
name, 'seq_slice', 0, inputs=inputs, **xargs)
input_layer0 = self.get_input_layer(0)
size = input_layer0.size
self.set_layer_size(size)
......@@ -2702,6 +2703,31 @@ class SeqSliceLayer(LayerBase):
self.config.select_first = False
@config_layer('sub_nested_seq')
class SubNestedSequenceLayer(LayerBase):
def __init__(self, name, inputs, selected_indices, bias=False, **xargs):
if isinstance(inputs, list):
assert len(inputs) == 1, ('the first input of sub_nested_seq '
'layer is a single nested sequence.')
inputs = inputs[0]
if isinstance(selected_indices, list):
assert len(selected_indices) == 1, (
'the second input of '
'sub_nested_seq layer is a single layer which is a '
'set of selected indices.')
selected_indices = selected_indices[0]
super(SubNestedSequenceLayer, self).__init__(
name,
'sub_nested_seq',
0,
inputs=[inputs, selected_indices],
**xargs)
input_layer0 = self.get_input_layer(0)
size = input_layer0.size
self.set_layer_size(size)
@config_layer('out_prod')
class OuterProdLayer(LayerBase):
def __init__(self, name, inputs, device=None):
......@@ -3268,6 +3294,16 @@ class CTCLayer(LayerBase):
config_assert(len(self.inputs) == 2, 'CTCLayer must have 2 inputs')
@config_layer('kmax_seq_score')
class KmaxSeqScoreLayer(LayerBase):
def __init__(self, name, inputs, beam_size, **xargs):
super(KmaxSeqScoreLayer, self).__init__(
name, 'kmax_seq_score', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1, 'KmaxSeqScoreLayer has only one input.')
self.config.beam_size = beam_size
@config_layer('warp_ctc')
class WarpCTCLayer(LayerBase):
def __init__(self,
......
......@@ -129,9 +129,11 @@ __all__ = [
'prelu_layer',
'gated_unit_layer',
'crop_layer',
'sub_nested_seq_layer',
'clip_layer',
'slice_projection',
'seq_slice_layer',
'kmax_sequence_score_layer',
]
......@@ -225,9 +227,12 @@ class LayerType(object):
PRELU = 'prelu'
CROP_LAYER = 'crop'
SUB_NESTED_SEQ = 'sub_nested_seq'
CLIP_LAYER = 'clip'
SEQ_SLICE = 'seq_slice'
KMAX_SEQ_SCORE = 'kmax_seq_score'
@staticmethod
def is_layer_type(type_name):
"""
......@@ -6090,6 +6095,53 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
size=l.config.size)
@wrap_name_default()
@layer_support()
def sub_nested_seq_layer(input, selected_indices, name=None):
"""
The sub_nested_seq_layer accepts two inputs: the first one is a nested
sequence; the second one is a set of selceted indices in the nested sequence.
Then sub_nest_seq_layer trims the first nested sequence input according
to the selected indices to form a new output. This layer is useful in
beam training.
The example usage is:
.. code-block:: python
sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices])
:param input: A nested sequence.
:type input: LayerOutput
:param selected_indices: a set of sequence indices in the nested sequence.
:type input: LayerOutput
:param name: name of this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), (
'The first input of '
'sub_nested_seq_layer must be a Paddle layer.')
assert isinstance(selected_indices, LayerOutput), (
'The second input of '
'sub_nested_seq_layer must be a Paddle layer.')
l = Layer(
inputs=input.name,
selected_indices=selected_indices.name,
name=name,
type=LayerType.SUB_NESTED_SEQ)
return LayerOutput(
name=name,
layer_type=LayerType.SUB_NESTED_SEQ,
parents=input,
size=l.config.size)
@wrap_name_default("clip")
def clip_layer(input, min, max, name=None):
"""
......@@ -6111,7 +6163,8 @@ def clip_layer(input, min, max, name=None):
:type min: double
:param max: The upper threshold for clipping.
:type max: double
:return: LayerOutput
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(
name=name,
......@@ -6187,3 +6240,40 @@ def seq_slice_layer(input, starts, ends, name=None):
ends=ends.name if ends is not None else None)
return LayerOutput(
name, LayerType.SEQ_SLICE, parents=[input], size=input.size)
@layer_support()
def kmax_sequence_score_layer(input, name=None, beam_size=1):
"""
This layer accepts one input which are scores over a sequence or a nested
sequence, and returns indices of beam_size sequences with highest scores.
.. code-block:: python
kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size)
:param name: The Layer Name.
:type name: basestring
:param input: The input layer. It stores scores over a sequence or a nested
sequence and its size must be 1.
:type input: LayerOutput.
:param beam_size: squence indices with top beam_size scores are returned.
:type beam_size: double
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer "
"accepts only one input.")
assert input.size == 1, (
"input of kmax_sequence_score_layer is a score"
"over a sequence or a nested sequence, so its width must be 1.")
Layer(
name=name,
type=LayerType.KMAX_SEQ_SCORE,
inputs=[input.name],
beam_size=beam_size)
return LayerOutput(
name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
......@@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_seq_slice_layer)
test_seq_slice_layer test_kmax_seq_socre_layer test_seq_select_layers)
export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "data"
type: "data"
size: 128
active_type: ""
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 1
active_type: "exponential"
inputs {
input_layer_name: "data"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_0__"
type: "kmax_seq_score"
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
beam_size: 5
}
parameters {
name: "___fc_layer_0__.w0"
size: 128
initial_mean: 0.0
initial_std: 0.0883883476483
dims: 128
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "data"
output_layer_names: "__kmax_sequence_score_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "data"
layer_names: "__fc_layer_0__"
layer_names: "__kmax_sequence_score_layer_0__"
input_layer_names: "data"
output_layer_names: "__kmax_sequence_score_layer_0__"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "input_seq"
type: "data"
size: 300
active_type: ""
}
layers {
name: "input"
type: "data"
size: 5
active_type: ""
}
layers {
name: "__sub_nested_seq_layer_0__"
type: "sub_nested_seq"
size: 300
active_type: ""
inputs {
input_layer_name: "input_seq"
}
inputs {
input_layer_name: "input"
}
}
input_layer_names: "input_seq"
output_layer_names: "__sub_nested_seq_layer_0__"
sub_models {
name: "root"
layer_names: "input_seq"
layer_names: "input"
layer_names: "__sub_nested_seq_layer_0__"
input_layer_names: "input_seq"
output_layer_names: "__sub_nested_seq_layer_0__"
is_recurrent_layer_group: false
}
#!/usr/bin/env python
#coding=utf-8
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
data = data_layer(name="data", size=128)
scores = fc_layer(input=data, size=1, act=ExpActivation())
kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5)
outputs(kmax_seq_id)
#!/usr/bin/env python
#coding=utf-8
from paddle.trainer_config_helpers import *
beam_size = 5
data = data_layer(name='input_seq', size=300)
selected_ids = data_layer(name='input', size=beam_size)
sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids)
outputs(sub_nest_seq)
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
from default_scope_funcs import new_var, find_var, get_cur_scope
__all__ = ['Network'] # Only expose Network
class NetworkFunctor(object):
"""
Network Op Creation Function. Used internally in this module.
It convert string input to Variable. If it is not created before, just
create in scope.
It is a functor object. means the instances are callable.
:param func: The op creation function which generated in Python.
:param net: The Network instance.
"""
def __init__(self, func, net):
self.func = func
self.net = net
def __call__(self, *args, **kwargs):
if len(args) != 0:
raise ValueError("Paddle must use keyword argument")
inputs = self.func.all_input_args
for ipt in inputs:
if ipt in kwargs:
var = kwargs[ipt]
if isinstance(var, basestring):
tmp = new_var(var)
self.net.var_names[tmp] = var
var = tmp
if not isinstance(var, core.Variable):
raise TypeError(
"Input of op creation must be string or variable")
kwargs[ipt] = self.net.var_names[var]
notemp_outputs = self.func.all_not_temp_output_args
for name in notemp_outputs:
if name not in kwargs:
kwargs[
name] = self.func.__name__ + "@OUT@%d" % core.unique_integer(
)
outputs = self.func.all_output_args
for opt in outputs:
if opt in kwargs:
var = kwargs[opt]
if isinstance(var, basestring):
tmp = new_var(var)
self.net.var_names[tmp] = var
var = tmp
if not isinstance(var, core.Variable):
raise TypeError(
"Output of op creation must be string or variable")
kwargs[opt] = self.net.var_names[var]
op = self.func(**kwargs)
self.net.net.add_op(op)
lst = [find_var(kwargs[opt]) for opt in notemp_outputs]
if len(lst) == 1:
return lst[0]
elif len(lst) == 0:
return None
else:
return lst
class Network(object):
"""
The network concept. It avoid user to manually create operator, create
variable, and combine them into a Net. Just use Network.xxx can create the
operator, create variables in default scope, and add them into `self.net`.
For example:
.. code-block: python
net = Network()
out = net.add_two(X="a", Y="b")
fc_out = net.fc(X="out", W="fc.w")
net.run(...)
"""
def __init__(self):
self.net = core.Net.create()
funcs = (func_name for func_name in dir(op_creations)
if not func_name.startswith("__"))
self.var_names = dict()
# TODO(yuyang18): This code can work, but do not generate a good
# docstring, try to give a better way generate function in runtime
# later.
for func_name in funcs:
func = getattr(op_creations, func_name)
impl = NetworkFunctor(func, self)
setattr(self, func_name, impl.__call__)
self.__complete_add_op__ = False
def infer_shape(self):
self.complete_add_op()
self.net.infer_shape(get_cur_scope())
def run(self, device_context):
self.complete_add_op()
self.net.run(get_cur_scope(), device_context)
def __str__(self):
return str(self.net)
def complete_add_op(self):
if not self.__complete_add_op__:
self.net.complete_add_op()
self.__complete_add_op__ = True
if __name__ == '__main__':
net = Network()
out = net.add_two(X="a", Y="b")
fc_out = net.fc(X=out, W="fc.w", b="fc.b", activation="softmax")
net.complete_add_op()
print net
......@@ -2,7 +2,6 @@ import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2
import cStringIO
def get_all_op_protos():
......@@ -146,64 +145,14 @@ class OpDescCreationMethod(object):
return False
def get_docstring_from_op_proto(op_proto):
"""
Generate docstring from a OpProto
:param op_proto: a OpProto instance.
:type op_proto: op_proto_pb2.OpProto
:return: docstring
"""
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Input must be OpProto")
f = cStringIO.StringIO()
f.write(op_proto.comment)
f.write("\n")
def __append_param__(name, comment, type):
# Maybe replace the following line with template engine is better.
f.write(":param ")
f.write(name)
f.write(": ")
f.write(comment)
f.write("\n")
f.write(":type ")
f.write(name)
f.write(": ")
f.write(type)
f.write("\n")
for ipt in op_proto.inputs:
__append_param__(ipt.name, ipt.comment, "list | basestr"
if ipt.multiple else "basestr")
temp_var_prefix = \
"This is a temporary variable. It does not have to set by user. "
for opt in op_proto.outputs:
__append_param__(opt.name, opt.comment if not opt.temporary else
temp_var_prefix + opt.comment, "list | basestr"
if opt.multiple else "basestr")
for attr in op_proto.attrs:
attr_type = None
if attr.type == attribute_pb2.INT:
attr_type = "int"
elif attr.type == attribute_pb2.FLOAT:
attr_type = "float"
elif attr.type == attribute_pb2.STRING:
attr_type = "basestr"
elif attr.type == attribute_pb2.INTS:
attr_type = "list of int"
elif attr.type == attribute_pb2.FLOATS:
attr_type = "list of float"
elif attr.type == attribute_pb2.STRINGS:
attr_type = "list of basestr"
if attr_type is None:
raise RuntimeError("Not supported attribute type " + attr.type)
__append_param__(attr.name, attr.comment, attr_type)
return f.getvalue()
class OpInfo(object):
def __init__(self, name, method, inputs, outputs, attrs, no_temp_outputs):
self.name = name
self.method = method
self.inputs = inputs
self.outputs = outputs
self.attrs = attrs
self.no_temp_outputs = no_temp_outputs
def create_op_creation_method(op_proto):
......@@ -216,38 +165,57 @@ def create_op_creation_method(op_proto):
opdesc = method(*args, **kwargs)
return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto)
__impl__.all_input_args = [var.name for var in op_proto.inputs]
__impl__.all_output_args = [var.name for var in op_proto.outputs]
__impl__.all_attr_args = [attr.name for attr in op_proto.attrs]
__impl__.all_not_temp_output_args = [
var.name for var in op_proto.outputs if not var.temporary
]
return OpInfo(
method=__impl__,
name=op_proto.type,
inputs=[var.name for var in op_proto.inputs],
outputs=[var.name for var in op_proto.outputs],
attrs=[attr.name for attr in op_proto.attrs],
no_temp_outputs=[
var.name for var in op_proto.outputs if not var.temporary
])
return __impl__
class OperatorFactory(object):
def __init__(self):
self.op_methods = dict()
for op_proto in get_all_op_protos():
method = create_op_creation_method(op_proto)
self.op_methods[method.name] = method
class OpCreationsHolder(object):
"""
A object will holds all op creation methods.
Use `op_creations.xxx_op` to access them.
"""
pass
def __call__(self, *args, **kwargs):
if 'type' in kwargs:
if len(args) != 0:
raise ValueError("All Paddle argument should be key-word "
"argument except type")
t = kwargs.pop('type')
else:
if len(args) != 1:
raise ValueError("All Paddle argument should be key-word "
"argument except type")
t = args[0]
return self.get_op_info(t).method(**kwargs)
op_creations = OpCreationsHolder()
def types(self):
return self.op_methods.keys()
def get_op_info(self, t):
if t not in self.op_methods:
raise ValueError("operator %s is not registered", t)
return self.op_methods.get(t)
def __bootstrap__():
"""
Bootstrap function for this module. It will dynamic create all op creation
methods in runtime.
"""
for op_proto in get_all_op_protos():
func = create_op_creation_method(op_proto)
func.__name__ = str(op_proto.type)
setattr(op_creations, func.__name__, func)
def get_op_input_names(self, type):
return self.get_op_info(type).inputs
def get_op_output_names(self, type):
return self.get_op_info(type).outputs
def get_op_attr_names(self, type):
return self.get_op_info(type).attrs
def get_op_no_temp_output_names(self, type):
return self.get_op_info(type).no_temp_outputs
__bootstrap__()
Operator = OperatorFactory() # Default global factory
......@@ -6,7 +6,6 @@ py_test(test_scope SRCS test_scope.py)
py_test(test_tensor SRCS test_tensor.py)
py_test(test_mul_op SRCS test_mul_op.py)
py_test(test_network SRCS test_network.py)
py_test(test_mean_op SRCS test_mean_op.py)
py_test(test_protobuf SRCS test_protobuf.py)
......@@ -14,10 +13,11 @@ py_test(test_protobuf SRCS test_protobuf.py)
py_test(test_add_two_op SRCS test_add_two_op.py)
py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
py_test(test_softmax_op SRCS test_softmax_op.py)
py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
py_test(gradient_checker SRCS gradient_checker.py)
py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
py_test(test_op_creation_methods SRCS test_op_creation_methods.py)
py_test(test_operator SRCS test_operator.py)
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
from paddle.v2.framework.op import Operator
import numpy
import unittest
......@@ -80,7 +80,7 @@ if __name__ == '__main__':
class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self):
add_op = op_creations.add_two(X="X", Y="Y", Out="Z")
add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32")
......
import paddle.v2.framework.core as core
import unittest
import numpy
import paddle.v2.framework.create_op_creation_methods as creation
from paddle.v2.framework.op import Operator
class OpTestMeta(type):
......@@ -21,18 +21,14 @@ class OpTestMeta(type):
obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
def test_all(self):
func = getattr(creation.op_creations, self.type, None)
self.assertIsNotNone(func)
scope = core.Scope()
kwargs = dict()
places = []
places.append(core.CPUPlace())
if core.is_compile_gpu():
places = [core.CPUPlace()]
if core.is_compile_gpu() and core.Operator.support_gpu(self.type):
places.append(core.GPUPlace(0))
for place in places:
for in_name in func.all_input_args:
for in_name in Operator.get_op_input_names(self.type):
if hasattr(self, "inputs") and in_name in self.inputs:
kwargs[in_name] = in_name
var = scope.new_var(in_name).get_tensor()
......@@ -42,7 +38,7 @@ class OpTestMeta(type):
else:
kwargs[in_name] = "@EMPTY@"
for out_name in func.all_output_args:
for out_name in Operator.get_op_output_names(self.type):
if not hasattr(self, "outputs"):
raise ValueError(
"The test op must set self.outputs dict.")
......@@ -52,21 +48,23 @@ class OpTestMeta(type):
kwargs[out_name] = out_name
scope.new_var(out_name).get_tensor()
for attr_name in func.all_attr_args:
for attr_name in Operator.get_op_attr_names(self.type):
if hasattr(self, "attrs") and attr_name in self.attrs:
kwargs[attr_name] = self.attrs[attr_name]
op = func(**kwargs)
op = Operator(self.type, **kwargs)
op.infer_shape(scope)
ctx = core.DeviceContext.create(place)
op.run(scope, ctx)
for out_name in func.all_output_args:
for out_name in Operator.get_op_output_names(self.type):
actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name]
numpy.isclose(actual, expect)
self.assertTrue(
numpy.allclose(actual, expect),
"output name: " + out_name + "has diff")
obj.test_all = test_all
return obj
......@@ -2,7 +2,7 @@ import unittest
import numpy
import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation
from paddle.v2.framework.op import Operator
from op_test_util import OpTestMeta
......@@ -21,7 +21,7 @@ class TestAddOp(unittest.TestCase):
class TestAddGradOp(unittest.TestCase):
def test_add_grad(self):
op = creation.op_creations.add_two(X="X", Y="Y", Out="Out")
op = Operator('add_two', X="X", Y="Y", Out="Out")
backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "add_two_grad")
expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).'''
......
import paddle.v2.framework.core as core
import unittest
import numpy
import paddle.v2.framework.create_op_creation_methods as creation
from paddle.v2.framework.op import Operator
class TestFc(unittest.TestCase):
......@@ -24,7 +24,7 @@ class TestFc(unittest.TestCase):
# Set a real numpy array here.
# x_tensor.set(numpy.array([]))
op = creation.op_creations.fc(X="X", Y="Y", W="W")
op = Operator("fc", X="X", Y="Y", W="W")
for out in op.outputs():
if scope.find_var(out) is None:
......
import unittest
from op_test_util import OpTestMeta
import numpy
class TestFillZerosLikeOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "fill_zeros_like"
self.inputs = {'Src': numpy.random.random((219, 232)).astype("float32")}
self.outputs = {'Dst': numpy.zeros_like(self.inputs['Src'])}
if __name__ == '__main__':
unittest.main()
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
from paddle.v2.framework.op import Operator
import unittest
class TestNet(unittest.TestCase):
def test_net_all(self):
net = core.Net.create()
op1 = op_creations.add_two(X="X", Y="Y", Out="Out")
op1 = Operator("add_two", X="X", Y="Y", Out="Out")
net.add_op(op1)
net2 = core.Net.create()
net2.add_op(op_creations.fc(X="X", W="w", Y="fc.out"))
net2.add_op(Operator("fc", X="X", W="w", Y="fc.out"))
net2.complete_add_op(True)
net.add_op(net2)
net.complete_add_op(True)
......
from paddle.v2.framework.network import Network
import paddle.v2.framework.core as core
import unittest
class TestNet(unittest.TestCase):
def test_net_all(self):
net = Network()
out = net.add_two(X="X", Y="Y")
fc_out = net.fc(X=out, W="w")
net.complete_add_op()
self.assertTrue(isinstance(fc_out, core.Variable))
self.assertEqual(
'''Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, add_two@OUT@0, fc@OUT@1).
Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@0).
Op(fc), inputs:(add_two@OUT@0, w, @EMPTY@), outputs:(fc@OUT@1, @TEMP@fc@0).
Op(mul), inputs:(add_two@OUT@0, w), outputs:(@TEMP@fc@0).
Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc@OUT@1).
''', str(net))
net2 = Network()
tmp = net2.add_two(X="X", Y="Y")
self.assertTrue(isinstance(tmp, core.Variable))
net2.complete_add_op()
self.assertEqual(
'''Op(plain_net), inputs:(X, Y), outputs:(add_two@OUT@2).
Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@2).
''', str(net2))
if __name__ == '__main__':
unittest.main()
import unittest
import paddle.v2.framework.create_op_creation_methods as creation
import paddle.v2.framework.op as op
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
......@@ -8,7 +8,7 @@ import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2
class TestGetAllProtos(unittest.TestCase):
def test_all(self):
all_protos = creation.get_all_op_protos()
all_protos = op.get_all_op_protos()
self.assertNotEqual(0, len(all_protos))
for each in all_protos:
......@@ -17,25 +17,25 @@ class TestGetAllProtos(unittest.TestCase):
class TestOpDescCreationMethod(unittest.TestCase):
def test_plain_input_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
op_proto = op_proto_pb2.OpProto()
op_proto.type = "test"
ipt = op_proto.inputs.add()
ipt.name = "X"
ipt.comment = "not matter"
ipt = op.inputs.add()
ipt = op_proto.inputs.add()
ipt.name = "Y"
ipt.comment = "not matter"
opt = op.outputs.add()
opt = op_proto.outputs.add()
opt.name = "Z"
opt.comment = "not matter"
op.comment = "not matter"
op_proto.comment = "not matter"
self.assertTrue(op.IsInitialized())
self.assertTrue(op_proto.IsInitialized())
method = creation.OpDescCreationMethod(op)
method = op.OpDescCreationMethod(op_proto)
output = method(X="a", Y="b", Z="c")
expected = op_desc_pb2.OpDesc()
......@@ -45,29 +45,29 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected, output)
def test_multiple_input_plain_output(self):
op = op_proto_pb2.OpProto()
op.type = "fc"
ipt = op.inputs.add()
op_proto = op_proto_pb2.OpProto()
op_proto.type = "fc"
ipt = op_proto.inputs.add()
ipt.name = "X"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt = op_proto.inputs.add()
ipt.name = "W"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt = op_proto.inputs.add()
ipt.name = "b"
ipt.comment = ""
out = op.outputs.add()
out = op_proto.outputs.add()
out.name = "Y"
out.comment = ""
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
op_proto.comment = ""
self.assertTrue(op_proto.IsInitialized())
method = op.OpDescCreationMethod(op_proto)
generated1 = method(X="x", W="w", b="b", Y="y")
expected1 = op_desc_pb2.OpDesc()
......@@ -93,14 +93,14 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected2, generated2)
def test_attrs(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
op_proto = op_proto_pb2.OpProto()
op_proto.type = "test"
ipt = op_proto.inputs.add()
ipt.name = 'X'
ipt.comment = ""
def __add_attr__(name, type):
attr = op.attrs.add()
attr = op_proto.attrs.add()
attr.name = name
attr.comment = ""
attr.type = type
......@@ -112,10 +112,10 @@ class TestOpDescCreationMethod(unittest.TestCase):
__add_attr__("floats_attr", attribute_pb2.FLOATS)
__add_attr__("strings_attr", attribute_pb2.STRINGS)
op.comment = ""
self.assertTrue(op.IsInitialized())
op_proto.comment = ""
self.assertTrue(op_proto.IsInitialized())
method = creation.OpDescCreationMethod(op)
method = op.OpDescCreationMethod(op_proto)
generated = method(
X="a",
......@@ -162,23 +162,23 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected, generated)
def test_input_temporary_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
out = op.outputs.add()
op_proto = op_proto_pb2.OpProto()
op_proto.type = "test"
out = op_proto.outputs.add()
out.name = "OUT"
out.comment = ""
out = op.outputs.add()
out = op_proto.outputs.add()
out.name = "TMP"
out.comment = ""
out.temporary = True
out = op.outputs.add()
out = op_proto.outputs.add()
out.name = "OUT2"
out.comment = ""
op.comment = ""
op_proto.comment = ""
method = creation.OpDescCreationMethod(op)
method = op.OpDescCreationMethod(op_proto)
generated = method(OUT="a", OUT2="b")
desc = op_desc_pb2.OpDesc()
desc.outputs.extend(["a", core.var_names.temp(), "b"])
......@@ -190,60 +190,9 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(generated, desc)
class TestOpCreationDocStr(unittest.TestCase):
def test_all(self):
op = op_proto_pb2.OpProto()
op.type = "test"
op.comment = """Test Op.
This op is used for unit test, not a real op.
"""
a = op.inputs.add()
a.name = "a"
a.comment = "Input a for test op"
a.multiple = True
b = op.inputs.add()
b.name = "b"
b.comment = "Input b for test op"
self.assertTrue(op.IsInitialized())
o1 = op.outputs.add()
o1.name = "output"
o1.comment = "The output of test op"
o2 = op.outputs.add()
o2.name = "temp output"
o2.comment = "The temporary output of test op"
o2.temporary = True
test_str = op.attrs.add()
test_str.name = "str_attr"
test_str.type = attribute_pb2.STRING
test_str.comment = "A string attribute for test op"
actual = creation.get_docstring_from_op_proto(op)
expected_docstring = '''Test Op.
This op is used for unit test, not a real op.
:param a: Input a for test op
:type a: list | basestr
:param b: Input b for test op
:type b: basestr
:param output: The output of test op
:type output: basestr
:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op
:type temp output: basestr
:param str_attr: A string attribute for test op
:type str_attr: basestr
'''
self.assertEqual(expected_docstring, actual)
class TestOpCreations(unittest.TestCase):
def test_all(self):
add_op = creation.op_creations.add_two(X="a", Y="b", Out="z")
add_op = op.Operator("add_two", X="a", Y="b", Out="z")
self.assertIsNotNone(add_op)
# Invoke C++ DebugString()
self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).',
......
import logging
import paddle.v2.framework.core as core
import unittest
import numpy as np
......@@ -7,10 +8,9 @@ ops = creation.op_creations
def create_tensor(scope, name, shape):
tensor = scope.create_var(name).get_tensor()
tensor = scope.new_var(name).get_tensor()
tensor.set_dims(shape)
tensor.alloc_float()
tensor.set(np.random.random(shape))
tensor.set(np.random.random(shape), core.CPUPlace())
return tensor
......@@ -31,40 +31,36 @@ class TestRNN(unittest.TestCase):
- h
'''
input_dim = 30
batch_size = 50
weight_dim = 15
sent_len = 11
def init(self):
input_dim = 30
batch_size = 50
weight_dim = 15
self.scope = core.Scope(None)
# create vars
create_tensor(self.scope, "x", [batch_size, input_dim])
create_tensor(self.scope, "W", [input_dim, weight_dim])
create_tensor(self.scope, "U", [weight_dim, weight_dim])
create_tensor(self.scope, "h_boot", [batch_size, weight_dim])
x_alias = "x@alias"
y_alias = "y@alias"
memory = "h@alias"
prememory = "h@pre"
output = "rnn_out"
output_alias = "rnn_out@alias"
# create step net
stepnet_var = self.scope.create_var("stepnet")
stepnet = stepnet_var.get_net()
# stepnet = core.Net.create()
x_fc_op = ops.fc(X=x_alias, W="W", Y="Wx")
h_fc_op = ops.fc(X=prememory, W="U", Y="Uh")
sum_op = ops.add_two(X="Wx", Y="Uh", Out="sum")
sig_op = ops.sigmoid(X="sum", Y=memory)
stepnet.add_op(x_fc_op)
stepnet.add_op(h_fc_op)
stepnet.add_op(sum_op)
stepnet.add_op(sig_op)
stepnet.complete_add_op(True)
self.scope = core.Scope()
self.create_global_variables()
self.create_step_net()
rnn_op = self.create_rnn_op()
ctx = core.DeviceContext.create(core.CPUPlace())
print 'infer_shape'
rnn_op.infer_shape(self.scope)
rnn_op.run(self.scope, ctx)
def create_global_variables(self):
# create inlink
create_tensor(self.scope, "x",
[self.sent_len, self.batch_size, self.input_dim])
create_tensor(self.scope, "W", [self.input_dim, self.input_dim])
create_tensor(self.scope, "U", [self.input_dim, self.input_dim])
create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim])
self.scope.new_var("step_scopes")
self.scope.new_var("h@alias")
self.scope.new_var("h")
def create_rnn_op(self):
# create RNNOp
rnnop = ops.recurrent_op(
# inputs
......@@ -72,17 +68,27 @@ class TestRNN(unittest.TestCase):
boot_memories=["h_boot"],
step_net="stepnet",
# outputs
outlinks=[output],
outlinks=["h"],
step_scopes="step_scopes",
# attributes
inlink_alias=["x@alias"],
outlink_alias=[output_alias],
pre_memories=[prememory],
memories=[memory])
outlink_alias=["h@alias"],
pre_memories=["h@pre"],
memories=["h@alias"])
return rnnop
def create_step_net(self):
var = self.scope.new_var("stepnet")
stepnet = var.get_net()
ctx = core.DeviceContext.cpu_context()
rnnop.infer_shape(self.scope)
rnnop.run(self.scope, ctx)
x_fc_op = ops.fc(X="x@alias", W="W", Y="Wx")
h_fc_op = ops.fc(X="h@pre", W="U", Y="Uh")
sum_op = ops.add_two(X="Wx", Y="Uh", Out="sum")
sig_op = ops.sigmoid(X="sum", Y="h@alias")
for op in [x_fc_op, h_fc_op, sum_op, sig_op]:
stepnet.add_op(op)
stepnet.complete_add_op(True)
def test_recurrent(self):
self.init()
......
......@@ -12,5 +12,8 @@ class TestSigmoidOp(unittest.TestCase):
self.outputs = {'Y': 1 / (1 + np.exp(-self.inputs['X']))}
#class TestSigmoidGradOp(unittest.TestCase):
#TODO(qingqing) add unit test
if __name__ == '__main__':
unittest.main()
......@@ -2,7 +2,7 @@ import unittest
import numpy as np
import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation
from paddle.v2.framework.op import Operator
from op_test_util import OpTestMeta
......@@ -27,7 +27,7 @@ class TestSoftmaxOp(unittest.TestCase):
class TestSoftmaxGradOp(unittest.TestCase):
def test_softmax_grad(self):
op = creation.op_creations.softmax(X="X", Y="Y")
op = Operator('softmax', X="X", Y="Y")
backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "softmax_grad")
expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).'''
......
from setuptools import setup
from setuptools import setup, Distribution
class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True
packages=['paddle',
'paddle.proto',
......@@ -11,7 +15,8 @@ packages=['paddle',
'paddle.v2.master',
'paddle.v2.plot',
'paddle.v2.framework',
'paddle.v2.framework.proto']
'paddle.v2.framework.proto',
'py_paddle']
setup_requires=["requests",
"numpy>=1.12",
......@@ -21,23 +26,33 @@ setup_requires=["requests",
"rarfile",
"scipy>=0.19.0",
"Pillow",
"nltk"]
"nltk>=3.2.2"]
if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=["opencv-python"]
setup(name='paddle',
setup(name='paddlepaddle',
version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning',
install_requires=setup_requires,
packages=packages,
package_data={'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so']
package_data={
'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so']
},
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling.
# So that package points to other directory.
'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework'
'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework',
'py_paddle': '${PROJ_ROOT}/paddle/py_paddle'
},
scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'],
distclass=BinaryDistribution,
data_files=[('/usr/local/opt/paddle/bin',
['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model',
'${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])]
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册