提交 a821fec1 编写于 作者: Q qijun

merge baidu/develop

...@@ -24,4 +24,5 @@ cmake-build-* ...@@ -24,4 +24,5 @@ cmake-build-*
python/paddle/v2/framework/core.so python/paddle/v2/framework/core.so
CMakeFiles CMakeFiles
cmake_install.cmake cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
...@@ -38,7 +38,7 @@ before_install: ...@@ -38,7 +38,7 @@ before_install:
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version. # protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
- pip install rarfile - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow
- curl https://glide.sh/get | bash - curl https://glide.sh/get | bash
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- go get -u github.com/alecthomas/gometalinter - go get -u github.com/alecthomas/gometalinter
......
...@@ -28,7 +28,7 @@ RUN apt-get update && \ ...@@ -28,7 +28,7 @@ RUN apt-get update && \
wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \ python-matplotlib gcc-4.8 g++-4.8 \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \ liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools && \ net-tools && \
...@@ -38,17 +38,16 @@ RUN apt-get update && \ ...@@ -38,17 +38,16 @@ RUN apt-get update && \
RUN pip --no-cache-dir install 'numpy>=1.12.0' RUN pip --no-cache-dir install 'numpy>=1.12.0'
# Install Go and glide # Install Go and glide
RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -C /usr/local -xzf go.tgz && \ tar -xz -C /usr/local && \
mkdir /root/gopath && \ mkdir /root/gopath && \
mkdir /root/gopath/bin && \ mkdir /root/gopath/bin && \
mkdir /root/gopath/src && \ mkdir /root/gopath/src
rm go.tgz
ENV GOROOT=/usr/local/go GOPATH=/root/gopath ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# install glide # install glide
RUN curl -q https://glide.sh/get | sh RUN curl -s -q https://glide.sh/get | sh
# git credential to skip password typing # git credential to skip password typing
RUN git config --global credential.helper store RUN git config --global credential.helper store
......
...@@ -8,7 +8,7 @@ ExternalProject_Add( ...@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_lib_any extern_lib_any
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d"
PREFIX ${ANY_SOURCE_DIR} PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML}) ...@@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML})
ENDIF(NOT ${WITH_MKLML}) ENDIF(NOT ${WITH_MKLML})
IF(WIN32 OR APPLE) IF(WIN32 OR APPLE)
MESSAGE(WARNING MESSAGE(WARNING
"Windows or Mac is not supported with MKLML in Paddle yet." "Windows or Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF") "Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
...@@ -43,22 +43,21 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") ...@@ -43,22 +43,21 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt) FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt
FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n" "PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n" "cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}\n" "install(DIRECTORY ${MKLML_VER}\n"
" DESTINATION ${MKLML_DST_DIR})\n") " DESTINATION ${MKLML_DST_DIR})\n")
ExternalProject_Add( ExternalProject_Add(
${MKLML_PROJECT} ${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR} PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR}
&& tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
) )
......
...@@ -257,6 +257,16 @@ seq_concat ...@@ -257,6 +257,16 @@ seq_concat
.. autoclass:: paddle.v2.layer.seq_concat .. autoclass:: paddle.v2.layer.seq_concat
:noindex: :noindex:
kmax_sequence_score
-------------------
.. autoclass:: paddle.v2.layer.kmax_sequence_score
:noindex:
sub_nested_seq
--------------
.. autoclass:: paddle.v2.layer.sub_nested_seq
:noindex:
Reshaping Layers Reshaping Layers
================ ================
......
...@@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程: ...@@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程:
* 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。
* 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步
* 编译这个版本的python wheel包,并发布到pypi。
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`
* 上传方法:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面
6. 协同完成Release Note的书写 6. 协同完成Release Note的书写
......
...@@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式 ...@@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。
Docker使用入门
------------------------------
几个基础的概念帮助理解和使用Docker:
- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行:
.. code-block:: bash
docker images
来列出当前系统中的所有镜像,同样可以执行:
.. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0
来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用ocker.paddlepaddle.org/paddle下载。
- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。
实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。
可以执行:
.. code-block:: bash
docker run paddlepaddle/paddle:0.10.0
来使用一个镜像启动一个容器。
- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中
访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到
Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用
debian镜像,并且启动后执行 :code:`ls /data`。
.. code-block:: bash
docker run --rm -v $(pwd):/data debian ls /data
PaddlePaddle发布的Docker镜像使用说明 PaddlePaddle发布的Docker镜像使用说明
------------------------------ ------------------------------
...@@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打 ...@@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次
PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新 `dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国 和国内镜像`docker.paddlepaddle.org` 提供最新
内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您 的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。
在国内,请把文档里命令中的paddlepaddle/paddle替换成
docker.paddlepaddle.org/paddle。 **注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。**
1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` 1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev`
...@@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。 ...@@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。
如果输出是No,就需要选择使用no-AVX的镜像 如果输出是No,就需要选择使用no-AVX的镜像
**注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用**
以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。
为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。
......
...@@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions. ...@@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions.
We put the docker images on `dockerhub.com We put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
latest versions under "tags" tab at dockerhub.com. If you are in latest versions under "tags" tab at dockerhub.com.
China, you can use our Docker image registry mirror to speed up the
download process. To use it, please replace all paddlepaddle/paddle in
the commands to docker.paddlepaddle.org/paddle.
1. Production images, this image might have multiple variants: ** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.**
1. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
2. Production images, this image might have multiple variants:
- GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu` - GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu`
- GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx` - GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx`
...@@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle.
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
**NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.**
To run the CPU-only image as an interactive container: To run the CPU-only image as an interactive container:
.. code-block:: bash .. code-block:: bash
...@@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle.
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
2. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
Train Model Using Python API Train Model Using Python API
---------------------------- ----------------------------
......
...@@ -13,15 +13,11 @@ ...@@ -13,15 +13,11 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try: import paddle
import py_paddle import paddle.v2
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
......
...@@ -13,15 +13,11 @@ ...@@ -13,15 +13,11 @@
# serve to show the default. # serve to show the default.
import sys import sys
import os, subprocess import os, subprocess
sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python'))
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try: import paddle
import py_paddle import paddle.v2
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
......
...@@ -32,7 +32,7 @@ import ( ...@@ -32,7 +32,7 @@ import (
func main() { func main() {
port := flag.Int("port", 0, "port of the pserver") port := flag.Int("port", 0, "port of the pserver")
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd") "comma separated endpoint string for pserver to connect to etcd")
dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout") dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout")
...@@ -60,12 +60,12 @@ func main() { ...@@ -60,12 +60,12 @@ func main() {
idx, err = e.Register(*port) idx, err = e.Register(*port)
candy.Must(err) candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e) cp, err = pserver.LoadCheckpoint(e, idx)
if err != nil { if err != nil {
if err == pserver.ErrCheckpointNotFound { if err == pserver.ErrCheckpointNotFound {
log.Infof("Could not find the pserver checkpoint.") log.Infof("Could not find the pserver checkpoint.")
} else { } else {
log.Errorf("Fetch checkpoint failed, %s", err) panic(err)
} }
} }
} }
......
hash: 2a1c0eca5c07a130e3d224f9821f96cfa37a39bf6bce141c855bbc57ef569f1c hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582
updated: 2017-07-29T07:34:48.722757905+08:00 updated: 2017-08-07T23:37:48.867469328Z
imports: imports:
- name: github.com/beorn7/perks - name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
...@@ -10,7 +10,7 @@ imports: ...@@ -10,7 +10,7 @@ imports:
- name: github.com/cockroachdb/cmux - name: github.com/cockroachdb/cmux
version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92
- name: github.com/coreos/etcd - name: github.com/coreos/etcd
version: c31bec0f29facff13f7c3e3d948e55dd6689ed42 version: d0d1a87aa96ae14914751d42264262cb69eda170
subpackages: subpackages:
- alarm - alarm
- auth - auth
...@@ -24,6 +24,7 @@ imports: ...@@ -24,6 +24,7 @@ imports:
- error - error
- etcdserver - etcdserver
- etcdserver/api - etcdserver/api
- etcdserver/api/etcdhttp
- etcdserver/api/v2http - etcdserver/api/v2http
- etcdserver/api/v2http/httptypes - etcdserver/api/v2http/httptypes
- etcdserver/api/v3client - etcdserver/api/v3client
...@@ -145,6 +146,8 @@ imports: ...@@ -145,6 +146,8 @@ imports:
version: a1dba9ce8baed984a2495b658c82687f8157b98f version: a1dba9ce8baed984a2495b658c82687f8157b98f
subpackages: subpackages:
- xfs - xfs
- name: github.com/satori/go.uuid
version: 879c5887cd475cd7864858769793b2ceb0d44feb
- name: github.com/sirupsen/logrus - name: github.com/sirupsen/logrus
version: a3f95b5c423586578a4e099b11a46c2479628cac version: a3f95b5c423586578a4e099b11a46c2479628cac
- name: github.com/topicai/candy - name: github.com/topicai/candy
...@@ -208,11 +211,6 @@ testImports: ...@@ -208,11 +211,6 @@ testImports:
version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9 version: 04cdfd42973bb9c8589fd6a731800cf222fde1a9
subpackages: subpackages:
- spew - spew
- name: github.com/docker/docker
version: b6d164e6c46d8115b146e4c3ac93784e9ef8b49e
subpackages:
- pkg/ioutils
- pkg/longpath
- name: github.com/pmezard/go-difflib - name: github.com/pmezard/go-difflib
version: d8ed2627bdf02c080bf22230dbb337003b7aba2d version: d8ed2627bdf02c080bf22230dbb337003b7aba2d
subpackages: subpackages:
......
...@@ -14,11 +14,13 @@ import: ...@@ -14,11 +14,13 @@ import:
version: ^1.0.0 version: ^1.0.0
- package: github.com/topicai/candy - package: github.com/topicai/candy
- package: golang.org/x/crypto - package: golang.org/x/crypto
vcs: git
repo: https://github.com/golang/crypto.git repo: https://github.com/golang/crypto.git
- package: golang.org/x/sys
vcs: git vcs: git
- package: golang.org/x/sys
repo: https://github.com/golang/sys.git repo: https://github.com/golang/sys.git
- package: golang.org/x/text
vcs: git vcs: git
- package: golang.org/x/text
repo: https://github.com/golang/text.git repo: https://github.com/golang/text.git
vcs: git
- package: github.com/satori/go.uuid
version: v1.1.0
...@@ -77,11 +77,12 @@ type taskEntry struct { ...@@ -77,11 +77,12 @@ type taskEntry struct {
NumFailure int NumFailure int
} }
type taskQueues struct { type masterState struct {
Todo []taskEntry Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry Done []taskEntry
Failed []taskEntry Failed []taskEntry
CurPass int
} }
// Service is the master server service. // Service is the master server service.
...@@ -94,11 +95,11 @@ type Service struct { ...@@ -94,11 +95,11 @@ type Service struct {
ready chan struct{} ready chan struct{}
initDone bool initDone bool
mu sync.Mutex mu sync.Mutex
taskQueues taskQueues // State to be persisted to snapshot.
currPass int state masterState
jobTasks []taskEntry // The trainer that is currently saving model. This state is
// transient, does not need to be persisted to snapshot.
savingTrainer string savingTrainer string
} }
...@@ -141,8 +142,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur ...@@ -141,8 +142,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur
s.chunksPerTask = chunksPerTask s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur s.timeoutDur = timeoutDur
s.failureMax = failureMax s.failureMax = failureMax
s.taskQueues = taskQueues{} s.state = masterState{}
s.taskQueues.Pending = make(map[int]taskEntry) s.state.Pending = make(map[int]taskEntry)
s.ready = make(chan struct{}) s.ready = make(chan struct{})
s.store = store s.store = store
recovered, err := s.recover() recovered, err := s.recover()
...@@ -180,7 +181,7 @@ func (s *Service) recover() (bool, error) { ...@@ -180,7 +181,7 @@ func (s *Service) recover() (bool, error) {
} }
dec := gob.NewDecoder(gr) dec := gob.NewDecoder(gr)
var tqs taskQueues var tqs masterState
err = dec.Decode(&tqs) err = dec.Decode(&tqs)
if err != nil { if err != nil {
return false, err return false, err
...@@ -193,7 +194,12 @@ func (s *Service) recover() (bool, error) { ...@@ -193,7 +194,12 @@ func (s *Service) recover() (bool, error) {
log.Errorln(err) log.Errorln(err)
} }
s.taskQueues = tqs s.state = tqs
log.WithFields(s.logFields()).Infof("Master recovered from snapshot, scheduling pending task timeout check.")
for _, t := range s.state.Pending {
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
}
return true, nil return true, nil
} }
...@@ -208,7 +214,7 @@ func (s *Service) snapshot() error { ...@@ -208,7 +214,7 @@ func (s *Service) snapshot() error {
var buf bytes.Buffer var buf bytes.Buffer
gw := gzip.NewWriter(&buf) gw := gzip.NewWriter(&buf)
enc := gob.NewEncoder(gw) enc := gob.NewEncoder(gw)
err := enc.Encode(s.taskQueues) err := enc.Encode(s.state)
if err != nil { if err != nil {
return err return err
} }
...@@ -290,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { ...@@ -290,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error {
return err return err
} }
s.jobTasks = partition(chunks, s.chunksPerTask) s.state.Todo = partition(chunks, s.chunksPerTask)
s.taskQueues.Todo = s.jobTasks
err = s.snapshot() err = s.snapshot()
if err != nil { if err != nil {
...@@ -319,17 +324,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { ...@@ -319,17 +324,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
} }
}() }()
delete(s.taskQueues.Pending, t.Task.Meta.ID) delete(s.state.Pending, t.Task.Meta.ID)
t.NumFailure++ t.NumFailure++
if t.NumFailure > s.failureMax { if t.NumFailure > s.failureMax {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Failed = append(s.taskQueues.Failed, t) s.state.Failed = append(s.state.Failed, t)
return return
} }
log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure) log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.state.Todo = append(s.state.Todo, t)
return return
} }
...@@ -338,7 +343,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -338,7 +343,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID] t, ok := s.state.Pending[taskID]
if !ok { if !ok {
return return
} }
...@@ -350,10 +355,11 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -350,10 +355,11 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
// must be called with lock held. // must be called with lock held.
func (s *Service) logFields() log.Fields { func (s *Service) logFields() log.Fields {
return log.Fields{ return log.Fields{
"todoLen": len(s.taskQueues.Todo), "todoLen": len(s.state.Todo),
"pendingLen": len(s.taskQueues.Pending), "pendingLen": len(s.state.Pending),
"doneLen": len(s.taskQueues.Done), "doneLen": len(s.state.Done),
"failedLen": len(s.taskQueues.Failed), "failedLen": len(s.state.Failed),
"curPass": s.state.CurPass,
} }
} }
...@@ -366,17 +372,17 @@ func (s *Service) GetTask(passID int, task *Task) error { ...@@ -366,17 +372,17 @@ func (s *Service) GetTask(passID int, task *Task) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if passID < s.currPass { if passID < s.state.CurPass {
return ErrPassBefore return ErrPassBefore
} }
if passID > s.currPass { if passID > s.state.CurPass {
// Client may get run to pass after master when one client faster than the // Client may get run to pass after master when one client faster than the
// other // other
return ErrPassAfter return ErrPassAfter
} }
if len(s.taskQueues.Todo) == 0 { if len(s.state.Todo) == 0 {
if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 { if len(s.state.Done) == 0 && len(s.state.Pending) == 0 {
log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass") log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass")
return ErrAllTaskFailed return ErrAllTaskFailed
} }
...@@ -384,10 +390,10 @@ func (s *Service) GetTask(passID int, task *Task) error { ...@@ -384,10 +390,10 @@ func (s *Service) GetTask(passID int, task *Task) error {
return ErrNoMoreAvailable return ErrNoMoreAvailable
} }
t := s.taskQueues.Todo[0] t := s.state.Todo[0]
t.Task.Meta.Epoch++ t.Task.Meta.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:] s.state.Todo = s.state.Todo[1:]
s.taskQueues.Pending[t.Task.Meta.ID] = t s.state.Pending[t.Task.Meta.ID] = t
err := s.snapshot() err := s.snapshot()
if err != nil { if err != nil {
return err return err
...@@ -409,7 +415,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -409,7 +415,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID] t, ok := s.state.Pending[taskID]
if !ok { if !ok {
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID) log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
return nil return nil
...@@ -417,18 +423,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -417,18 +423,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
// task finished, reset timeout // task finished, reset timeout
t.NumFailure = 0 t.NumFailure = 0
s.taskQueues.Done = append(s.taskQueues.Done, t) s.state.Done = append(s.state.Done, t)
delete(s.taskQueues.Pending, taskID) delete(s.state.Pending, taskID)
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID) log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 { if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 {
// increase master side pass count if all tasks finished // increase master side pass count if all tasks finished
s.currPass++ s.state.CurPass++
s.taskQueues.Todo = s.jobTasks s.state.Todo = append(s.state.Done, s.state.Failed...)
s.taskQueues.Done = []taskEntry{} s.state.Done = []taskEntry{}
// TODO(typhoonzero): deal with failed tasks // TODO(typhoonzero): deal with failed tasks
s.taskQueues.Failed = []taskEntry{} s.state.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass) log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.state.CurPass)
} }
err := s.snapshot() err := s.snapshot()
...@@ -447,7 +453,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error { ...@@ -447,7 +453,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[meta.ID] t, ok := s.state.Pending[meta.ID]
if !ok { if !ok {
log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta) log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta)
return nil return nil
......
package master_test package master_test
import ( import (
"io/ioutil"
"net/url"
"os" "os"
"strings"
"testing" "testing"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/embed" "github.com/coreos/etcd/embed"
"github.com/docker/docker/pkg/ioutils"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestNewServiceWithEtcd(t *testing.T) { func TestNewServiceWithEtcd(t *testing.T) {
// setup an embed etcd server // setup an embed etcd server
etcdDir, err := ioutils.TempDir("", "") etcdDir, err := ioutil.TempDir("", "")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
cfg := embed.NewConfig() cfg := embed.NewConfig()
lpurl, _ := url.Parse("http://localhost:0")
lcurl, _ := url.Parse("http://localhost:0")
cfg.LPUrls = []url.URL{*lpurl}
cfg.LCUrls = []url.URL{*lcurl}
cfg.Dir = etcdDir cfg.Dir = etcdDir
e, err := embed.StartEtcd(cfg) e, err := embed.StartEtcd(cfg)
if err != nil { if err != nil {
...@@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) { ...@@ -30,15 +36,13 @@ func TestNewServiceWithEtcd(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
}() }()
select {
case <-e.Server.ReadyNotify():
t.Log("Server is ready!")
case <-time.After(60 * time.Second):
e.Server.Stop() // trigger a shutdown
t.Fatal("Server took too long to start!")
}
ep := []string{"127.0.0.1:2379"} <-e.Server.ReadyNotify()
port := strings.Split(e.Clients[0].Addr().String(), ":")[1]
endpoint := "127.0.0.1:" + port
ep := []string{endpoint}
masterAddr := "127.0.0.1:3306" masterAddr := "127.0.0.1:3306"
store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30) store, err := master.NewEtcdClient(ep, masterAddr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, 30)
if err != nil { if err != nil {
......
...@@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { ...@@ -90,8 +90,12 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte {
type selector bool type selector bool
func (s selector) Select() bool { func (s selector) Select() (bool, error) {
return bool(s) return bool(s), nil
}
func (s selector) Done() error {
return nil
} }
type lister []client.Server type lister []client.Server
...@@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli ...@@ -114,11 +118,10 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli
} }
//export paddle_new_etcd_pserver_client //export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client { func paddle_new_etcd_pserver_client(etcdEndpoints *C.char) C.paddle_pserver_client {
// TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters)
addr := C.GoString(etcdEndpoints) addr := C.GoString(etcdEndpoints)
etcdClient := client.NewEtcd(addr) etcdClient := client.NewEtcd(addr)
c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0)) c := client.NewClient(etcdClient, etcdClient.Desired(), etcdClient)
return add(c) return add(c)
} }
...@@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) { ...@@ -136,7 +139,12 @@ func paddle_pserver_client_release(client C.paddle_pserver_client) {
//export paddle_begin_init_params //export paddle_begin_init_params
func paddle_begin_init_params(client C.paddle_pserver_client) C.int { func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
c := get(client) c := get(client)
if selected := c.BeginInitParams(); selected { selected, err := c.BeginInitParams()
if err != nil {
panic(err)
}
if selected {
return 1 return 1
} }
return 0 return 0
......
...@@ -27,9 +27,13 @@ import ( ...@@ -27,9 +27,13 @@ import (
// TODO(helin): add RPC call retry logic // TODO(helin): add RPC call retry logic
// Selector selects if the client should initialize parameter servers. // Selector selects if the client should initialize parameters and
// reports the initialization process done.
type Selector interface { type Selector interface {
Select() bool // Select selects if the client should initialize parameter servers.
Select() (bool, error)
// Done indicates the initialization process is done.
Done() error
} }
// Server is the identification of a parameter Server. // Server is the identification of a parameter Server.
...@@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) { ...@@ -115,7 +119,7 @@ func (c *Client) monitorPservers(l Lister, pserverNum int) {
// servers. Other trainers will be blocked until the initialization is // servers. Other trainers will be blocked until the initialization is
// done, and they need to get the initialized parameters from // done, and they need to get the initialized parameters from
// parameter servers using GetParams. // parameter servers using GetParams.
func (c *Client) BeginInitParams() bool { func (c *Client) BeginInitParams() (bool, error) {
return c.sel.Select() return c.sel.Select()
} }
......
...@@ -59,7 +59,7 @@ func initClient() [numPserver]int { ...@@ -59,7 +59,7 @@ func initClient() [numPserver]int {
go func(l net.Listener) { go func(l net.Listener) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
panic(err) panic(err)
} }
...@@ -124,8 +124,12 @@ func initEtcdClient() { ...@@ -124,8 +124,12 @@ func initEtcdClient() {
type selector bool type selector bool
func (s selector) Select() bool { func (s selector) Select() (bool, error) {
return bool(s) return bool(s), nil
}
func (s selector) Done() error {
return nil
} }
type lister []client.Server type lister []client.Server
...@@ -135,7 +139,11 @@ func (l lister) List() []client.Server { ...@@ -135,7 +139,11 @@ func (l lister) List() []client.Server {
} }
func testClient(t *testing.T, c *client.Client) { func testClient(t *testing.T, c *client.Client) {
selected := c.BeginInitParams() selected, err := c.BeginInitParams()
if err != nil {
t.Fatal(err)
}
if !selected { if !selected {
t.Fatal("should be selected.") t.Fatal("should be selected.")
} }
......
...@@ -16,53 +16,60 @@ package client ...@@ -16,53 +16,60 @@ package client
import ( import (
"context" "context"
"errors"
"fmt"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
"github.com/coreos/etcd/clientv3" "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/clientv3/concurrency"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
const ( const (
defaultEtcdTimeout time.Duration = 5 * time.Second defaultEtcdTimeout time.Duration = 5 * time.Second
initLockPath = "/init_ps/lock"
initDonePath = "/init_ps/done"
initDoneVal = "1"
) )
// EtcdClient is used by pserver client that is a part of trainer process. // Etcd is used by pserver client that is a part of trainer process.
// TODO: // TODO:
// 1. add watcher to watch the change state of pservers) // 1. add watcher to watch the change state of pservers.
// 1. add etcd lock) type Etcd struct {
type EtcdClient struct {
client *clientv3.Client client *clientv3.Client
timeout time.Duration timeout time.Duration
endpoints []string endpoints []string
lock *concurrency.Mutex
} }
// Desired read ps desired number from etcd. // Desired read ps desired number from etcd.
func (p *EtcdClient) Desired() int { func (e *Etcd) Desired() int {
var psDesired int var psDesired int
for { for {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout) ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
resp, err := p.client.Get(ctx, pserver.PsDesired) resp, err := e.client.Get(ctx, pserver.PsDesired)
cancel() cancel()
if err != nil { if err != nil {
log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
log.Infoln("Waiting for ps desired registered ...") log.Infoln("Waiting for ps desired registered ...")
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil { if err != nil {
log.Errorf("psDesired %d invalid %v", psDesired, err) log.Errorf("psDesired %d invalid %v", psDesired, err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
...@@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int { ...@@ -73,26 +80,26 @@ func (p *EtcdClient) Desired() int {
} }
// List return the pserver list read from etcd. // List return the pserver list read from etcd.
func (p *EtcdClient) List() []Server { func (e *Etcd) List() []Server {
psDesired := p.Desired() psDesired := e.Desired()
servers := make([]Server, psDesired) servers := make([]Server, psDesired)
for { for {
for i := 0; i < psDesired; i++ { for i := 0; i < psDesired; i++ {
ctx, cancel := context.WithTimeout(context.Background(), p.timeout) ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
psKey := pserver.PsPath + strconv.Itoa(i) psKey := pserver.PsPath + strconv.Itoa(i)
log.Debugf("checking %s", psKey) log.Debugf("checking %s", psKey)
resp, err := p.client.Get(ctx, psKey) resp, err := e.client.Get(ctx, psKey)
cancel() cancel()
if err != nil { if err != nil {
log.Infof("Get psKey= %s error, %v", psKey, err) log.Infof("Get psKey= %s error, %v", psKey, err)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
log.Infof("Waiting for ps addr registered ...") log.Infof("Waiting for ps addr registered ...")
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
...@@ -100,10 +107,10 @@ func (p *EtcdClient) List() []Server { ...@@ -100,10 +107,10 @@ func (p *EtcdClient) List() []Server {
// TODO(Longfei) check the ps address // TODO(Longfei) check the ps address
if psAddr == "" { if psAddr == "" {
log.Infof("Get psKey = %s, psAddr is empty", psKey) log.Infof("Get psKey = %s, psAddr is empty", psKey)
time.Sleep(p.timeout) time.Sleep(e.timeout)
continue continue
} }
log.Infof("got value (%s) for key: %s", psAddr, psKey) log.Debugf("got value (%s) for key: %s", psAddr, psKey)
servers[i].Index = i servers[i].Index = i
servers[i].Addr = psAddr servers[i].Addr = psAddr
} }
...@@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server { ...@@ -113,7 +120,7 @@ func (p *EtcdClient) List() []Server {
} }
// NewEtcd create a etcd client to return the state of pserver on etcd. // NewEtcd create a etcd client to return the state of pserver on etcd.
func NewEtcd(endpoints string) *EtcdClient { func NewEtcd(endpoints string) *Etcd {
ep := strings.Split(endpoints, ",") ep := strings.Split(endpoints, ",")
var cli *clientv3.Client var cli *clientv3.Client
var err error var err error
...@@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient { ...@@ -130,10 +137,118 @@ func NewEtcd(endpoints string) *EtcdClient {
break break
} }
log.Infof("Connected to etcd: %s\n", endpoints) log.Infof("Connected to etcd: %s\n", endpoints)
client := &EtcdClient{ client := &Etcd{
client: cli, client: cli,
timeout: defaultEtcdTimeout, timeout: defaultEtcdTimeout,
endpoints: ep, endpoints: ep,
} }
return client return client
} }
// Select indicates if the current trainer is selected to initialize
// the pserver parameters.
func (e *Etcd) Select() (bool, error) {
sess, err := concurrency.NewSession(e.client, concurrency.WithTTL(5))
if err != nil {
return false, err
}
lock := concurrency.NewMutex(sess, initLockPath)
log.Infof("Trying to acquire lock at %s.", initLockPath)
// Do not use timeout context here, since we don't know how
// long does it take for other trainers to initialize the
// parameters.
err = lock.Lock(context.Background())
if err != nil {
return false, err
}
log.Infof("Successfully acquired lock at %s.", initLockPath)
get := clientv3.OpGet(initDonePath)
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
tresp, err := e.client.Txn(ctx).If(lock.IsOwner()).Then(get).Commit()
cancel()
if err != nil {
return false, err
}
if !tresp.Succeeded {
return false, errors.New("no longer the owner of the lock")
}
resp := tresp.Responses[0].GetResponseRange()
if len(resp.Kvs) == 0 {
// Key value not set, select current trainer.
e.lock = lock
log.Infoln("Trainer selected.")
return true, nil
}
if string(resp.Kvs[0].Value) == initDoneVal {
log.Infoln("Initialization is already done.")
ctx, cancel = context.WithTimeout(context.Background(), e.timeout)
err = lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
}
return false, nil
}
return false, fmt.Errorf("key %s have unexpected value: %v", initDonePath, resp.Kvs[0].Value)
}
// Done indicates the parameter initialization process is done.
func (e *Etcd) Done() error {
if e.lock == nil {
return errors.New("lock is nil, Done called unexpectedly")
}
put := clientv3.OpPut(initDonePath, initDoneVal)
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
tresp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit()
cancel()
if err != nil {
return err
}
if !tresp.Succeeded {
return errors.New("no longer the owner of the lock")
}
ctx, cancel = context.WithTimeout(context.Background(), e.timeout)
err = e.lock.Unlock(ctx)
cancel()
if err != nil {
log.Errorln(err)
} else {
e.lock = nil
}
return nil
}
// Close closes the etcd client.
func (e *Etcd) Close() error {
var err error
if e.lock != nil {
ctx, cancel := context.WithTimeout(context.Background(), e.timeout)
err = e.lock.Unlock(ctx)
cancel()
if err == nil {
e.lock = nil
}
}
cErr := e.client.Close()
if cErr != nil {
if err != nil {
log.Errorln(cErr)
return err
}
return cErr
}
return err
}
package client_test
import (
"io/ioutil"
"net/url"
"os"
"strings"
"sync"
"testing"
"github.com/PaddlePaddle/Paddle/go/pserver/client"
"github.com/coreos/etcd/embed"
)
func TestSelector(t *testing.T) {
etcdDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatal(err)
}
cfg := embed.NewConfig()
lpurl, _ := url.Parse("http://localhost:0")
lcurl, _ := url.Parse("http://localhost:0")
cfg.LPUrls = []url.URL{*lpurl}
cfg.LCUrls = []url.URL{*lcurl}
cfg.Dir = etcdDir
e, err := embed.StartEtcd(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
e.Close()
if err := os.RemoveAll(etcdDir); err != nil {
t.Fatal(err)
}
}()
<-e.Server.ReadyNotify()
port := strings.Split(e.Clients[0].Addr().String(), ":")[1]
endpoint := "127.0.0.1:" + port
var mu sync.Mutex
selectedCount := 0
var wg sync.WaitGroup
selectAndDone := func(c *client.Etcd) {
defer wg.Done()
selected, err := c.Select()
if err != nil {
panic(err)
}
if selected {
mu.Lock()
selectedCount++
mu.Unlock()
err = c.Done()
if err != nil {
t.Fatal(err)
}
}
}
c0 := client.NewEtcd(endpoint)
c1 := client.NewEtcd(endpoint)
c2 := client.NewEtcd(endpoint)
c3 := client.NewEtcd(endpoint)
wg.Add(3)
go selectAndDone(c0)
go selectAndDone(c1)
go selectAndDone(c2)
wg.Wait()
// simulate trainer crashed and restarted after the
// initialization process.
wg.Add(1)
go selectAndDone(c3)
wg.Wait()
mu.Lock()
if selectedCount != 1 {
t.Fatal("selected count wrong:", selectedCount)
}
mu.Unlock()
err = c0.Close()
if err != nil {
t.Fatal(err)
}
err = c1.Close()
if err != nil {
t.Fatal(err)
}
err = c2.Close()
if err != nil {
t.Fatal(err)
}
err = c3.Close()
if err != nil {
t.Fatal(err)
}
}
...@@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { ...@@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
if err != nil { if err != nil {
return []byte{}, err return []byte{}, err
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
return []byte{}, nil return []byte{}, nil
...@@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { ...@@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
} }
// PutKey put into etcd with value by key specified // PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error { func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration, withLease bool) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout) ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease())) var err error
if withLease {
_, err = e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease()))
} else {
_, err = e.client.Put(ctx, key, string(value))
}
cancel() cancel()
return err return err
} }
......
...@@ -32,6 +32,7 @@ type optimizer struct { ...@@ -32,6 +32,7 @@ type optimizer struct {
opt *C.struct_paddle_optimizer opt *C.struct_paddle_optimizer
elementType ElementType elementType ElementType
contentLen int contentLen int
config []byte
} }
func cArrayToSlice(p unsafe.Pointer, len int) []byte { func cArrayToSlice(p unsafe.Pointer, len int) []byte {
...@@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer ...@@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer
cstate = unsafe.Pointer(&s[0]) cstate = unsafe.Pointer(&s[0])
} }
o.config = c
o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)),
C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s))) C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s)))
return o return o
......
...@@ -25,11 +25,13 @@ import ( ...@@ -25,11 +25,13 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path"
"strconv" "strconv"
"sync" "sync"
"time" "time"
uuid "github.com/satori/go.uuid"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
...@@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found") ...@@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found")
// RPC error message. // RPC error message.
const ( const (
AlreadyInitialized = "pserver already initialized" AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized" Uninitialized = "pserver not fully initialized"
CheckpointMD5Failed = "checkpoint file MD5 validation failed" WrongChecksum = "checkpoint file checksum validation failed"
) )
// Supported element types. // Supported element types.
...@@ -73,11 +75,12 @@ type ParameterWithConfig struct { ...@@ -73,11 +75,12 @@ type ParameterWithConfig struct {
// checkpointMeta saves checkpoint metadata // checkpointMeta saves checkpoint metadata
type checkpointMeta struct { type checkpointMeta struct {
UUID string `json:"uuid"` UUID string `json:"uuid"`
Path string `json:"path"`
MD5 string `json:"md5"` MD5 string `json:"md5"`
Timestamp int64 `json:"timestamp"` Timestamp int64 `json:"timestamp"`
} }
// Checkpoint is the pserver shard persist in file // Checkpoint is the pserver shard persist in file.
type Checkpoint []parameterCheckpoint type Checkpoint []parameterCheckpoint
// Gradient is the gradient of the parameter. // Gradient is the gradient of the parameter.
...@@ -90,50 +93,58 @@ type Service struct { ...@@ -90,50 +93,58 @@ type Service struct {
checkpointInterval time.Duration checkpointInterval time.Duration
checkpointPath string checkpointPath string
client *EtcdClient client *EtcdClient
mu sync.Mutex
optMap map[string]*optimizer mu sync.Mutex
optMap map[string]*optimizer
} }
// parameterCheckpoint saves parameter checkpoint // parameterCheckpoint saves parameter checkpoint.
type parameterCheckpoint struct { type parameterCheckpoint struct {
ParameterWithConfig ParameterWithConfig
State []byte State []byte
} }
// NewCheckpointFromFile loads parameters and state from checkpoint file func loadMeta(e *EtcdClient, idx int) (meta checkpointMeta, err error) {
func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) { v, err := e.GetKey(PsCheckpoint+strconv.Itoa(idx), 3*time.Second)
v, err := e.GetKey(PsPath+string(idx), 3*time.Second)
if err != nil { if err != nil {
return nil, err return
} }
if len(v) == 0 { if len(v) == 0 {
return nil, ErrCheckpointNotFound err = ErrCheckpointNotFound
return
} }
var cpMeta checkpointMeta if err = json.Unmarshal(v, &meta); err != nil {
if err = json.Unmarshal(v, &cpMeta); err != nil { return
return nil, err
} }
fn := filepath.Join(cpPath, cpMeta.UUID) return
if _, err = os.Stat(fn); os.IsNotExist(err) { }
// LoadCheckpoint loads checkpoint from file.
func LoadCheckpoint(e *EtcdClient, idx int) (Checkpoint, error) {
cpMeta, err := loadMeta(e, idx)
if err != nil {
return nil, err return nil, err
} }
content, err := ioutil.ReadFile(fn)
content, err := ioutil.ReadFile(cpMeta.Path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// TODO(helin): change MD5 to CRC since CRC is better for file
// checksum in our use case (emphasize speed over security).
h := md5.New() h := md5.New()
md5 := hex.EncodeToString(h.Sum(content)) md5 := hex.EncodeToString(h.Sum(content))
if md5 != cpMeta.MD5 { if md5 != cpMeta.MD5 {
return nil, errors.New(CheckpointMD5Failed) return nil, errors.New(WrongChecksum)
} }
dec := gob.NewDecoder(bytes.NewReader(content)) dec := gob.NewDecoder(bytes.NewReader(content))
cp := Checkpoint{} var cp Checkpoint
if err = dec.Decode(cp); err != nil { if err = dec.Decode(&cp); err != nil {
return nil, err return nil, err
} }
return cp, nil return cp, nil
...@@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error { ...@@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error {
} }
close(s.initialized) close(s.initialized)
go func() {
t := time.Tick(s.checkpointInterval)
for range t {
err := s.checkpoint()
if err != nil {
log.Errorln(err)
}
}
}()
return nil return nil
} }
...@@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { ...@@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
return nil return nil
} }
// pserver save checkpoint func traceTime(start time.Time, name string) {
func (s *Service) doCheckpoint() (err error) { elapsed := time.Since(start)
<-s.initialized log.Infof("%s took %v", name, elapsed)
s.mu.Lock() }
defer s.mu.Unlock()
// checkpoint saves checkpoint to disk.
//
// checkpoint should be only called after the parameters are
// initialized.
func (s *Service) checkpoint() (err error) {
log.Infoln("Begin save checkpoint.")
defer traceTime(time.Now(), "save checkpoint")
s.mu.Lock()
cp := make([]parameterCheckpoint, len(s.optMap)) cp := make([]parameterCheckpoint, len(s.optMap))
index := 0 index := 0
// TODO(helin): write checkpoint incrementally to reduce memory
// footprint during checkpoint.
for name, opt := range s.optMap { for name, opt := range s.optMap {
var pc parameterCheckpoint var pc parameterCheckpoint
pc.Param.Name = name pc.Param.Name = name
pc.Param.ElementType = opt.elementType pc.Param.ElementType = opt.elementType
pc.Param.Content = opt.GetWeights() pc.Param.Content = opt.GetWeights()
pc.Config = opt.config
pc.State = opt.GetStates() pc.State = opt.GetStates()
cp[index] = pc cp[index] = pc
index++ index++
} }
s.mu.Unlock()
var buf bytes.Buffer var buf bytes.Buffer
encoder := gob.NewEncoder(&buf) encoder := gob.NewEncoder(&buf)
err = encoder.Encode(cp) err = encoder.Encode(cp)
...@@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) { ...@@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) {
return return
} }
cpMeta := checkpointMeta{} id := uuid.NewV4().String()
cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx) p := path.Join(s.checkpointPath, id)
cpMeta.Timestamp = time.Now().UnixNano() f, err := os.Create(p)
h := md5.New()
cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil {
return
}
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.")
} else {
err = os.Remove(cpMeta.UUID)
if err != nil {
log.Infof("Removing checkpoint %s failed", cpMeta.UUID)
} else {
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID)
}
}
f, err := os.Create(cpMeta.UUID)
if err != nil { if err != nil {
return return
} }
...@@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) { ...@@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) {
return return
} }
oldMeta, err := loadMeta(s.client, s.idx)
if err == ErrCheckpointNotFound {
log.Infoln("Do not have existing checkpoint.")
err = nil
}
if err != nil {
return
}
h := md5.New()
md5 := hex.EncodeToString(h.Sum(buf.Bytes()))
cpMeta := checkpointMeta{
UUID: id,
Timestamp: time.Now().UnixNano(),
MD5: md5,
Path: p,
}
json, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(PsCheckpoint+strconv.Itoa(s.idx), json, 3*time.Second, false)
if err != nil {
return
}
if oldMeta.Path != "" {
rmErr := os.Remove(oldMeta.Path)
if rmErr != nil {
// log error, but still treat checkpoint as
// successful.
log.Errorln(rmErr)
}
}
return return
} }
...@@ -30,7 +30,7 @@ const ( ...@@ -30,7 +30,7 @@ const (
func TestServiceFull(t *testing.T) { func TestServiceFull(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
...@@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) { ...@@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) {
func TestMultipleInit(t *testing.T) { func TestMultipleInit(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) { ...@@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) {
func TestUninitialized(t *testing.T) { func TestUninitialized(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
err = s.SendGrad(pserver.Gradient{}, nil) err = s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized { if err.Error() != pserver.Uninitialized {
t.Fatal(err) t.Fatal(err)
...@@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) { ...@@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) {
func TestBlockUntilInitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
......
...@@ -22,7 +22,5 @@ if(WITH_C_API) ...@@ -22,7 +22,5 @@ if(WITH_C_API)
endif() endif()
if(WITH_SWIG_PY) if(WITH_SWIG_PY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_SOURCE_DIR}/setup.py)
add_subdirectory(api) add_subdirectory(api)
endif() endif()
...@@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle ...@@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch .timestamp
COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp
COMMAND rm -rf py_paddle.egg-info build
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS _swig_paddle DEPENDS _swig_paddle
) )
...@@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so ...@@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
# TODO(yuyang18) : make wheel name calculated by cmake # TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
install(DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/dist/
DESTINATION opt/paddle/share/wheels
)
if(WITH_TESTING) if(WITH_TESTING)
IF(NOT PY_PIP_FOUND) IF(NOT PY_PIP_FOUND)
SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip) SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip)
...@@ -108,7 +102,7 @@ if(WITH_TESTING) ...@@ -108,7 +102,7 @@ if(WITH_TESTING)
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
DEPENDS python setuptools python_api_wheel #DEPENDS python setuptools python_api_wheel
) )
ENDIF() ENDIF()
add_subdirectory(test) add_subdirectory(test)
......
...@@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES ...@@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES
src/hl_cuda_lstm.cu src/hl_cuda_lstm.cu
src/hl_top_k.cu src/hl_top_k.cu
src/hl_batch_transpose.cu src/hl_batch_transpose.cu
src/hl_batch_norm.cu
src/hl_cuda_sequence.cu src/hl_cuda_sequence.cu
src/hl_table_apply.cu) src/hl_table_apply.cu)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_BATCH_NORM_H_
#define HL_BATCH_NORM_H_
#include "hl_base.h"
/**
* @brief batch norm inferece.
*
* @param[in] input input data.
* @param[out] output output data.
* @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma).
* @param[in] bias batch normalization bias parameter (in original
* paper scale is referred to as beta).
* @param[in] estimatedMean
* @param[in] estimatedVar The moving mean and variance
* accumulated during the training phase are passed
* as inputs here.
* @param[in] epsilon Epsilon value used in the batch
* normalization formula.
*/
extern void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width);
#endif // HL_BATCH_NORM_H_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_batch_norm.h"
__global__ void batchNormInference(real* output,
const real* input,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
const int tid = threadIdx.x;
const int num = channel * height * width;
const int batch = blockIdx.x;
for (int i = tid; i < num; i += blockDim.x) {
const int c = i / (height * width);
const int id = batch * num + i;
real val = input[id] - estimatedMean[c];
val /= sqrt(estimatedVar[c] + epsilon);
val *= scale[c];
val += bias[c];
output[id] = val;
}
}
void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
batchNormInference<<<batchSize, 256, 0, STREAM_DEFAULT>>>(output,
input,
scale,
bias,
estimatedMean,
estimatedVar,
epsilon,
batchSize,
channel,
height,
width);
CHECK_SYNC("hl_batch_norm_cuda_inference failed!");
}
...@@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real beta = 1.0f; real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
int batch_size = ((cudnn_tensor_descriptor)inputDesc)->batch_size;
if (batch_size > 1024 && g_cudnn_lib_version < 6000) {
LOG(INFO) << " To process current batch data with size " << batch_size
<< " (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<< " If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<< " just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<< " current version " << g_cudnn_lib_version;
}
CHECK_CUDNN( CHECK_CUDNN(
dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle, dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle,
mode, mode,
......
...@@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) ...@@ -7,6 +7,9 @@ cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_test(eigen_test SRCS eigen_test.cc DEPS tensor)
cc_library(lod_tensor SRCS lod_tensor.cc details/lod_tensor.cc DEPS ddim place tensor)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc) cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc) cc_library(scope SRCS scope.cc)
...@@ -35,12 +38,17 @@ add_dependencies(framework_py_proto framework_py_proto_init) ...@@ -35,12 +38,17 @@ add_dependencies(framework_py_proto framework_py_proto_init)
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_test(backward_test SRCS backward_test.cc DEPS backward)
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc SRCS pybind.cc
DEPS pybind python backward DEPS pybind python backward
fc_op fc_op
sgd_op sgd_op
add_op add_op
mean_op mean_op
cross_entropy_op cross_entropy_op
recurrent_op) recurrent_op
uniform_random_op
fill_zeros_like_op)
endif(WITH_PYTHON)
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include <list> #include <list>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
......
...@@ -17,16 +17,21 @@ ...@@ -17,16 +17,21 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using OperatorBase = framework::OperatorBase;
using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker;
using OpProto = framework::OpProto;
using OpAttrChecker = framework::OpAttrChecker;
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext;
class EmptyOp : public OperatorBase { class EmptyOp : public OperatorBase {
public: public:
void InferShape(const Scope &scope) const override {} void InferShape(const Scope &scope) const override {}
void Run(const Scope &scope, void Run(const Scope &scope, const DeviceContext &dev_ctx) const override {}
const platform::DeviceContext &dev_ctx) const override {}
}; };
class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
...@@ -71,7 +76,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { ...@@ -71,7 +76,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class FcOp : public ops::NetOp { class FcOp : public operators::NetOp {
public: public:
void Init() override { void Init() override {
AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")},
...@@ -143,6 +148,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker { ...@@ -143,6 +148,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker {
} // namespace paddle } // namespace paddle
namespace f = paddle::framework; namespace f = paddle::framework;
namespace ops = paddle::operators;
using EnforceNotMet = paddle::platform::EnforceNotMet; using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker); REGISTER_OP(rowwise_add, f::EmptyOp, f::RowWiseAddOpMaker);
REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp); REGISTER_GRADIENT_OP(rowwise_add, rowwise_add_grad, f::EmptyOp);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include <memory>
namespace paddle {
namespace framework {
namespace details {
using LOD = LODTensor::LOD;
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level_begin,
size_t level_end) {
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(level_end - level_begin);
for (size_t i = level_begin; i < level_end; i++) {
new_lod->emplace_back(lod[i]);
}
return new_lod;
}
std::shared_ptr<LOD> SliceLOD(const LOD &lod, size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared) {
// slice the lod.
auto new_lod = std::make_shared<LOD>();
new_lod->reserve(lod.size() - level);
auto start = lod.at(level)[elem_begin];
auto end = lod.at(level)[elem_end];
for (auto it = lod.begin() + level; it != lod.end(); it++) {
auto it_begin = std::find(it->begin(), it->end(), start);
auto it_end = std::find(it_begin, it->end(), end);
PADDLE_ENFORCE(it_begin != it->end(), "error in parsing lod info");
PADDLE_ENFORCE(it_end != it->end(), "error in parsing lod info");
new_lod->emplace_back(it_begin, it_end + 1);
if (!tensor_shared) {
// reset offset if tensor is copyed and sliced.
std::transform(new_lod->back().begin(), new_lod->back().end(),
new_lod->back().begin(),
[start](int v) { return v - start; });
PADDLE_ENFORCE(new_lod->back().front() == 0, "error in slice LOD");
}
}
return new_lod;
}
} // namespace details
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
namespace paddle {
namespace framework {
namespace details {
/*
* Slice levels from LOD.
*
* @lod: LOD to slice.
* @level_begin: level to begin slice.
* @level_end: level to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level_begin, size_t level_end);
/*
* Slice elements from a level of LOD.
*
* @lod: LOD to slice.
* @level: which level to slice.
* @elem_begin: element's index to begin slice.
* @elem_end: element's index to end slice.
*/
std::shared_ptr<LODTensor::LOD> SliceLOD(const LODTensor::LOD &lod,
size_t level, size_t elem_begin,
size_t elem_end, bool tensor_shared);
} // namespace details
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_tensor.h"
#include <glog/logging.h>
namespace paddle {
namespace framework {
LODTensor LODTensor::SliceShared(size_t level_begin, size_t level_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
// slice levels just need to update LOD info, each level will contains the
// whole tensor_, so no need to modify tensor_.
return LODTensor(tensor_, new_lod);
}
LODTensor LODTensor::SliceShared(size_t level, size_t elem_begin,
size_t elem_end) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
true /*tensor_shared*/);
// slice elements just need to update LOD info, because offsets are not
// changed, so the original tensor_ can be reused.
return LODTensor(tensor_, new_lod);
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#if (!PADDLE_ONLY_CPU)
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#endif
#include "paddle/framework/ddim.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
/*
* LODTensor (Level of details Tensor)
* see https://en.wikipedia.org/wiki/Level_of_details for reference.
*/
class LODTensor {
public:
// Level save offsets of each unit.
#ifdef PADDLE_ONLY_CPU
using Level = std::vector<size_t>;
#else
using Level = thrust::device_vector<size_t>;
#endif
// LOD stores offsets of each level of units, the largest units level first,
// then the smaller units level. Each Level stores the offsets of units in
// Tesor.
typedef std::vector<Level> LOD;
LODTensor() {}
LODTensor(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
Reset(tensor, lod);
}
void Reset(const std::shared_ptr<Tensor> &tensor,
const std::shared_ptr<LOD> &lod) {
tensor_ = tensor;
lod_start_pos_ = lod;
}
/*
* Get a element from LOD.
*/
size_t lod_element(size_t level, size_t elem) const {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem < NumElements(level),
"element begin [%d] out of range [%d]", elem,
NumElements(level));
return (*lod_start_pos_)[level][elem];
}
/*
* Number of LODTensor's levels, each level has units of data, for example,
* in the sentence's view, article, paragraph, sentence are 3 levels.
*/
size_t NumLevels() const {
return lod_start_pos_ ? lod_start_pos_->size() : 0UL;
}
/*
* Number of elements in a level.
*/
size_t NumElements(size_t level = 0) const {
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
// the last offset is the end of last element
return lod_start_pos_->at(level).size() - 1;
}
/*
* Slice of levels[level_begin:level_end], with tensor copied.
*/
template <typename T>
LODTensor SliceCopied(size_t level_begin, size_t level_end,
const platform::Place &dst_place) const;
/*
* Slice of levels[level_begin:level_end], with tensor shared.
*/
LODTensor SliceShared(size_t level_begin, size_t level_end) const;
/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor copied.
* @note: low performance in slice lod_start_pos_.
*/
template <typename T>
LODTensor SliceCopied(size_t level, size_t elem_begin, size_t elem_end,
const platform::Place &dst_place) const;
/*
* Slice of elements of a level, [elem_begin: elem_end], with tensor shared.
* @note: low performance in slice lod_start_pos_.
*/
LODTensor SliceShared(size_t level, size_t elem_begin, size_t elem_end) const;
/*
* Copy other's lod_start_pos_, to share LOD info.
* @note: the LOD info should not be changed.
*/
void ShareLOD(const LODTensor &other) {
lod_start_pos_ = other.lod_start_pos_;
}
/*
* Copy other's lod_start_pos_'s content, free to mutate.
*/
void CopyLOD(const LODTensor &other) {
lod_start_pos_ = std::make_shared<LOD>(*other.lod_start_pos_);
}
/*
* Determine whether LODTensor has a valid LOD info.
*/
bool HasLOD() const { return bool(lod_start_pos_); }
LOD *lod() const { return lod_start_pos_.get(); }
std::shared_ptr<Tensor> &tensor() { return tensor_; }
Tensor *raw_tensor() { return tensor_.get(); }
private:
std::shared_ptr<LOD> lod_start_pos_;
std::shared_ptr<Tensor> tensor_;
};
} // namespace framework
} // namespace paddle
#include "paddle/framework/lod_tensor_impl.h"
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/details/lod_tensor.h"
namespace paddle {
namespace framework {
template <typename T>
LODTensor LODTensor::SliceCopied(size_t level_begin, size_t level_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
auto new_lod = details::SliceLOD(*lod_start_pos_, level_begin, level_end);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(*tensor_, dst_place);
return LODTensor(new_tensor, new_lod);
}
template <typename T>
LODTensor LODTensor::SliceCopied(size_t level, size_t elem_begin,
size_t elem_end,
const platform::Place &dst_place) const {
PADDLE_ENFORCE(HasLOD(), "has no LOD info, can't be sliced.");
PADDLE_ENFORCE(level < NumLevels(), "level [%d] out of range [%d]", level,
NumLevels());
PADDLE_ENFORCE(elem_begin < NumElements(level),
"element begin [%d] out of range [%d]", elem_begin,
NumElements(level));
PADDLE_ENFORCE(elem_end < NumElements(level) + 1,
"element end [%d] out of range [%d]", elem_end,
NumElements(level));
auto new_lod = details::SliceLOD(*lod_start_pos_, level, elem_begin, elem_end,
false /*tensor_shared*/);
auto start_idx = new_lod->front().front();
auto end_idx = new_lod->front().back() - 1 /*the next element's start*/;
auto sliced_tensor = tensor_->Slice<T>(start_idx, end_idx);
auto new_tensor = std::make_shared<Tensor>();
new_tensor->CopyFrom<T>(sliced_tensor, dst_place);
return LODTensor(new_tensor, new_lod);
}
} // namespace framework
} // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/framework/lod_tensor.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
namespace paddle {
namespace framework {
class LODTensorTester : public ::testing::Test {
public:
virtual void SetUp() override {
lod_tensor.reset(new LODTensor);
// tensor's batch_size: 30
// 3 levels
// 0 10 20
// 0 5 10 15 20
// 0 2 5 7 10 12 15 20
auto lod = std::make_shared<LODTensor::LOD>();
lod->push_back(std::vector<size_t>{0, 10, 20});
lod->push_back(std::vector<size_t>{0, 5, 10, 15, 20});
lod->push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
auto tensor = std::make_shared<Tensor>();
tensor->Resize({20 /*batch size*/, 128 /*dim*/});
// malloc memory
tensor->mutable_data<float>(place);
lod_tensor->Reset(tensor, lod);
}
protected:
std::unique_ptr<LODTensor> lod_tensor;
platform::CPUPlace place;
};
TEST_F(LODTensorTester, NumLevels) { ASSERT_EQ(lod_tensor->NumLevels(), 3UL); }
TEST_F(LODTensorTester, NumElements) {
ASSERT_EQ(lod_tensor->NumElements(0), 2UL);
ASSERT_EQ(lod_tensor->NumElements(1), 4UL);
ASSERT_EQ(lod_tensor->NumElements(2), 8UL);
}
TEST_F(LODTensorTester, SliceShared_Level) {
// slice 1 level
for (size_t level = 0; level < 3UL; ++level) {
auto new_lod_tensor = lod_tensor->SliceShared(level, level + 1);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
}
// slice 2 level
for (size_t level = 0; level < 2UL; ++level) {
auto new_lod_tensor = lod_tensor->SliceShared(level, level + 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor->NumElements(level + 1));
ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
}
}
TEST_F(LODTensorTester, SliceCopied_Level) {
// slice 1 level
for (size_t level = 0; level < 3UL; ++level) {
auto new_lod_tensor =
lod_tensor->SliceCopied<float>(level, level + 1, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 1UL);
ASSERT_EQ(new_lod_tensor.NumElements(0UL), lod_tensor->NumElements(level));
// ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
// TODO(superjom) add tensor comparation here.
}
// slice 2 level
for (size_t level = 0; level < 2UL; ++level) {
auto new_lod_tensor =
lod_tensor->SliceCopied<float>(level, level + 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), lod_tensor->NumElements(level));
ASSERT_EQ(new_lod_tensor.NumElements(1),
lod_tensor->NumElements(level + 1));
// ASSERT_EQ(new_lod_tensor.tensor(), lod_tensor->tensor());
// TODO(superjom) add tensor comparation here.
}
}
TEST_F(LODTensorTester, SliceShared_Element) {
size_t level = 0;
auto new_lod_tensor = lod_tensor->SliceShared(level, 0, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL);
ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
new_lod_tensor = lod_tensor->SliceShared(level, 0, 2);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
}
TEST_F(LODTensorTester, SliceCopied_Element) {
size_t level = 0;
auto new_lod_tensor = lod_tensor->SliceCopied<float>(level, 0, 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 3UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.NumElements(2), 8UL);
ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
new_lod_tensor = lod_tensor->SliceCopied<float>(level, 0, 2, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_NE(new_lod_tensor.raw_tensor(), lod_tensor->raw_tensor());
level = 1;
// LOD is
// 0 5 10
// 0 2 5 7 10
new_lod_tensor = lod_tensor->SliceCopied<float>(level, 1, 3, place);
ASSERT_EQ(new_lod_tensor.NumLevels(), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(0), 2UL);
ASSERT_EQ(new_lod_tensor.NumElements(1), 4UL);
ASSERT_EQ(new_lod_tensor.lod_element(0, 0), 0UL);
ASSERT_EQ(new_lod_tensor.lod_element(0, 1), 5UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 0), 0UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 1), 2UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 2), 5UL);
ASSERT_EQ(new_lod_tensor.lod_element(1, 3), 7UL);
// TODO(superjom) compare the content of these tensors
}
TEST_F(LODTensorTester, ShareLOD) {
LODTensor new_lod_tensor;
new_lod_tensor.ShareLOD(*lod_tensor);
ASSERT_EQ(new_lod_tensor.lod(), lod_tensor->lod());
}
TEST_F(LODTensorTester, CopyLOD) {
LODTensor new_lod_tensor;
new_lod_tensor.CopyLOD(*lod_tensor);
ASSERT_NE(new_lod_tensor.lod(), lod_tensor->lod());
}
} // namespace framework
} // namespace paddle
...@@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ...@@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
#endif #endif
const std::string& OperatorBase::Input(const std::string& name) const { const std::string& OperatorBase::Input(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, PADDLE_ENFORCE_NOT_NULL(in_out_idxs_,
"Input Output Indices could not be nullptr"); "Input Output Indices could not be nullptr");
auto it = in_out_idxs_->find(name); auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name); name);
...@@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const { ...@@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const {
} }
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const { std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr");
auto input_format = GetAttr<std::vector<int>>("input_format"); auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <= PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <=
...@@ -62,7 +62,7 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const { ...@@ -62,7 +62,7 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
} }
const std::string& OperatorBase::Output(const std::string& name) const { const std::string& OperatorBase::Output(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto it = in_out_idxs_->find(name); auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name); name);
...@@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const { ...@@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const {
} }
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const { std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto output_format = GetAttr<std::vector<int>>("output_format"); auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <= PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <=
......
...@@ -88,6 +88,8 @@ class OperatorBase { ...@@ -88,6 +88,8 @@ class OperatorBase {
virtual bool IsNetOp() const { return false; } virtual bool IsNetOp() const { return false; }
virtual bool SupportGPU() const { return false; }
/// rename inputs outputs name /// rename inputs outputs name
void Rename(const std::string& old_name, const std::string& new_name); void Rename(const std::string& old_name, const std::string& new_name);
...@@ -118,10 +120,10 @@ class OperatorBase { ...@@ -118,10 +120,10 @@ class OperatorBase {
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_; std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
}; };
class OperatorContext { class InferShapeContext {
public: public:
OperatorContext(const OperatorBase* op, const Scope& scope) InferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(*op), scope_(scope) {} : op_(op), scope_(scope) {}
size_t InputSize() const { return op_.inputs_.size(); } size_t InputSize() const { return op_.inputs_.size(); }
...@@ -167,15 +169,15 @@ class OperatorContext { ...@@ -167,15 +169,15 @@ class OperatorContext {
template <typename T> template <typename T>
const T* Input(const size_t index) const { const T* Input(const size_t index) const {
auto var = InputVar(index); auto var = InputVar(index);
PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index); PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index);
return &var->Get<T>(); return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const size_t index) const { T* Output(const size_t index) const {
auto var = OutputVar(index); auto var = OutputVar(index);
PADDLE_ENFORCE( PADDLE_ENFORCE_NOT_NULL(
var != nullptr, var,
"Output(%d) not be nullptr, which means variable [%s] does not " "Output(%d) not be nullptr, which means variable [%s] does not "
"exist in scope", "exist in scope",
index, op_.outputs_[index]); index, op_.outputs_[index]);
...@@ -185,14 +187,14 @@ class OperatorContext { ...@@ -185,14 +187,14 @@ class OperatorContext {
template <typename T> template <typename T>
const T* Input(const std::string& name) const { const T* Input(const std::string& name) const {
auto var = InputVar(name); auto var = InputVar(name);
PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name); PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name);
return &var->Get<T>(); return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const std::string& name) const { T* Output(const std::string& name) const {
auto var = OutputVar(name); auto var = OutputVar(name);
PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name); PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>(); return var->GetMutable<T>();
} }
...@@ -204,9 +206,9 @@ class OperatorContext { ...@@ -204,9 +206,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) { [&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr, PADDLE_ENFORCE_NOT_NULL(
"MultiInput(%s:%s) should not be nullptr", var, "MultiInput(%s:%s) should not be nullptr", name,
name, sub_name); sub_name);
return &var->Get<T>(); return &var->Get<T>();
}); });
return res; return res;
...@@ -220,9 +222,9 @@ class OperatorContext { ...@@ -220,9 +222,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) { [&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr, PADDLE_ENFORCE_NOT_NULL(
"MultiOutput(%s:%s) should not be nullptr", var, "MultiOutput(%s:%s) should not be nullptr", name,
name, sub_name); sub_name);
return var->GetMutable<T>(); return var->GetMutable<T>();
}); });
return res; return res;
...@@ -232,12 +234,6 @@ class OperatorContext { ...@@ -232,12 +234,6 @@ class OperatorContext {
const Scope& scope_; const Scope& scope_;
}; };
class InferShapeContext : public OperatorContext {
public:
InferShapeContext(const OperatorBase* op, const Scope& scope)
: OperatorContext(op, scope) {}
};
template <typename T> template <typename T>
struct EigenDeviceConverter; struct EigenDeviceConverter;
...@@ -253,11 +249,11 @@ struct EigenDeviceConverter<platform::GPUPlace> { ...@@ -253,11 +249,11 @@ struct EigenDeviceConverter<platform::GPUPlace> {
}; };
#endif #endif
class ExecutionContext : public OperatorContext { class ExecutionContext : public InferShapeContext {
public: public:
ExecutionContext(const OperatorBase* op, const Scope& scope, ExecutionContext(const OperatorBase& op, const Scope& scope,
const platform::DeviceContext* device_context) const platform::DeviceContext* device_context)
: OperatorContext(op, scope), device_context_(device_context) {} : InferShapeContext(op, scope), device_context_(device_context) {}
template <typename PlaceType, template <typename PlaceType,
typename DeviceType = typename DeviceType =
...@@ -312,14 +308,14 @@ class OperatorWithKernel : public OperatorBase { ...@@ -312,14 +308,14 @@ class OperatorWithKernel : public OperatorBase {
using OpKernelMap = using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>; std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
void InferShape(const Scope& scope) const { void InferShape(const Scope& scope) const override {
InferShape(InferShapeContext(this, scope)); InferShape(InferShapeContext(*this, scope));
} }
void Run(const Scope& scope, void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final { const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx)); auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(ExecutionContext(this, scope, &dev_ctx)); opKernel->Compute(ExecutionContext(*this, scope, &dev_ctx));
} }
static std::unordered_map<std::string /* op_type */, OpKernelMap>& static std::unordered_map<std::string /* op_type */, OpKernelMap>&
...@@ -328,6 +324,12 @@ class OperatorWithKernel : public OperatorBase { ...@@ -328,6 +324,12 @@ class OperatorWithKernel : public OperatorBase {
return g_all_op_kernels; return g_all_op_kernels;
} }
bool SupportGPU() const override {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(type_).count(key) != 0;
}
protected: protected:
virtual void InferShape(const InferShapeContext& ctx) const = 0; virtual void InferShape(const InferShapeContext& ctx) const = 0;
}; };
......
...@@ -18,11 +18,8 @@ limitations under the License. */ ...@@ -18,11 +18,8 @@ limitations under the License. */
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor_py.h" #include "paddle/framework/tensor_py.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
...@@ -32,7 +29,7 @@ limitations under the License. */ ...@@ -32,7 +29,7 @@ limitations under the License. */
namespace py = pybind11; namespace py = pybind11;
USE_OP(add_two); USE_OP(add_two);
USE_OP(onehot_cross_entropy); USE_OP_CPU(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc); USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd); USE_OP(sgd);
USE_OP(mul); USE_OP(mul);
...@@ -40,9 +37,14 @@ USE_OP(mean); ...@@ -40,9 +37,14 @@ USE_OP(mean);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
USE_OP(rowwise_add); USE_OP(rowwise_add);
USE_OP(fill_zeros_like);
USE_OP_WITHOUT_KERNEL(recurrent_op); USE_OP_WITHOUT_KERNEL(recurrent_op);
USE_OP(uniform_random);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
using Tensor = framework::Tensor;
template <typename ClassType> template <typename ClassType>
void ExposeOperator(ClassType &m) { void ExposeOperator(ClassType &m) {
m.def("infer_shape", &ClassType::type::InferShape) m.def("infer_shape", &ClassType::type::InferShape)
...@@ -55,6 +57,26 @@ void ExposeOperator(ClassType &m) { ...@@ -55,6 +57,26 @@ void ExposeOperator(ClassType &m) {
[](const typename ClassType::type &op) -> std::vector<std::string> { [](const typename ClassType::type &op) -> std::vector<std::string> {
return op.outputs_; return op.outputs_;
}) })
.def("inputs",
[](const typename ClassType::type &op) -> std::vector<std::string> {
return op.inputs_;
})
.def("support_gpu", &ClassType::type::SupportGPU)
.def("temp_outputs",
[](const typename ClassType::type &op) -> std::vector<std::string> {
auto iter = op.attrs_.find("temporary_index");
std::vector<std::string> ret;
if (iter == op.attrs_.end()) {
return ret;
} else {
auto tmp_idx = boost::get<std::vector<int>>(iter->second);
ret.reserve(tmp_idx.size());
for (auto &index : tmp_idx) {
ret.push_back(op.outputs_.at(index));
}
return ret;
}
})
.def("__str__", &ClassType::type::DebugString); .def("__str__", &ClassType::type::DebugString);
} }
...@@ -128,8 +150,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -128,8 +150,8 @@ All parameter, weight, gradient are variables in Paddle.
[](Variable &self) -> Tensor * { return self.GetMutable<Tensor>(); }, [](Variable &self) -> Tensor * { return self.GetMutable<Tensor>(); },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("get_net", .def("get_net",
[](Variable &self) -> ops::NetOp * { [](Variable &self) -> operators::NetOp * {
return self.GetMutable<ops::NetOp>(); return self.GetMutable<operators::NetOp>();
}, },
py::return_value_policy::reference); py::return_value_policy::reference);
...@@ -208,23 +230,24 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -208,23 +230,24 @@ All parameter, weight, gradient are variables in Paddle.
ExposeOperator(operator_base); ExposeOperator(operator_base);
py::class_<ops::NetOp, std::shared_ptr<ops::NetOp>> net(m, "Net"); py::class_<operators::NetOp, std::shared_ptr<operators::NetOp>> net(m, "Net");
net.def_static("create", net.def_static("create",
[]() -> std::shared_ptr<ops::NetOp> { []() -> std::shared_ptr<operators::NetOp> {
auto retv = std::make_shared<ops::NetOp>(); auto retv = std::make_shared<operators::NetOp>();
retv->type_ = "plain_net"; retv->type_ = "plain_net";
return retv; return retv;
}) })
.def("add_op", &ops::NetOp::AddOp) .def("add_op", &operators::NetOp::AddOp)
.def( .def("add_op",
"add_op", [](operators::NetOp &self,
[](ops::NetOp &self, const std::shared_ptr<ops::NetOp> &net) -> void { const std::shared_ptr<operators::NetOp> &net) -> void {
self.AddOp(std::static_pointer_cast<OperatorBase>(net)); self.AddOp(std::static_pointer_cast<OperatorBase>(net));
}) })
.def("complete_add_op", &ops::NetOp::CompleteAddOp) .def("complete_add_op", &operators::NetOp::CompleteAddOp)
.def("complete_add_op", .def("complete_add_op", [](std::shared_ptr<operators::NetOp> &self) {
[](std::shared_ptr<ops::NetOp> &self) { self->CompleteAddOp(); }); self->CompleteAddOp();
});
ExposeOperator(net); ExposeOperator(net);
......
...@@ -18,6 +18,8 @@ limitations under the License. */ ...@@ -18,6 +18,8 @@ limitations under the License. */
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <typeindex> #include <typeindex>
#include <vector>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
...@@ -129,8 +131,8 @@ class Tensor { ...@@ -129,8 +131,8 @@ class Tensor {
memory::PODDeleter<T, Place>(place)), memory::PODDeleter<T, Place>(place)),
place_(place), place_(place),
size_(size) { size_(size) {
PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.", PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
is_cpu_place(place_) ? "CPU" : "GPU"); (is_cpu_place(place_) ? "CPU" : "GPU"));
} }
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
......
...@@ -14,17 +14,18 @@ limitations under the License. */ ...@@ -14,17 +14,18 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/memory/memcpy.h" #include "paddle/memory/memcpy.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
template <typename T> template <typename T>
inline void Tensor::check_memory_size() const { inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE(holder_ != nullptr, PADDLE_ENFORCE_NOT_NULL(
"Tenosr holds no memory. Call Tensor::mutable_data first."); holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."); "first to re-allocate memory.");
} }
template <typename T> template <typename T>
...@@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { ...@@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template <typename T> template <typename T>
inline T* Tensor::mutable_data(platform::Place place) { inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE(product(dims_) > 0, PADDLE_ENFORCE_GT(product(dims_), 0,
"Tensor's numel must be larger than zero to call " "Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."); "Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
size_t size = product(dims_) * sizeof(T); size_t size = product(dims_) * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) || if (holder_ == nullptr || !(holder_->place() == place) ||
...@@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src,
template <typename T> template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>(); check_memory_size<T>();
PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx, PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index."); "Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
int base = product(dims_) / dims_[0]; int base = product(dims_) / dims_[0];
Tensor dst; Tensor dst;
dst.holder_ = holder_; dst.holder_ = holder_;
......
...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) { ...@@ -19,7 +19,7 @@ TEST(Tensor, Dims) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
Tensor tt; Tensor tt;
tt.Resize(make_ddim({2, 3, 4})); tt.Resize({2, 3, 4});
DDim dims = tt.dims(); DDim dims = tt.dims();
ASSERT_EQ(arity(dims), 3); ASSERT_EQ(arity(dims), 3);
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
...@@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) { ...@@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) {
} catch (paddle::platform::EnforceNotMet err) { } catch (paddle::platform::EnforceNotMet err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what(); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
...@@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) { ...@@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) {
} catch (paddle::platform::EnforceNotMet err) { } catch (paddle::platform::EnforceNotMet err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what(); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
......
...@@ -93,8 +93,8 @@ TEST(Arguments, Matrix) { ...@@ -93,8 +93,8 @@ TEST(Arguments, Matrix) {
MatrixPtr matrix = Matrix::create(100, 200); MatrixPtr matrix = Matrix::create(100, 200);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2U); EXPECT_EQ(arg.shape().ndims(), 2U);
EXPECT_EQ(arg.shape()[0], 100); EXPECT_EQ(arg.shape()[0], 100U);
EXPECT_EQ(arg.shape()[1], 200); EXPECT_EQ(arg.shape()[1], 200U);
EXPECT_EQ(arg.data(), matrix->getData()); EXPECT_EQ(arg.data(), matrix->getData());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight()); EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
...@@ -112,8 +112,8 @@ TEST(Arguments, Matrix) { ...@@ -112,8 +112,8 @@ TEST(Arguments, Matrix) {
TEST(Arguments, Vector) { TEST(Arguments, Vector) {
VectorPtr vector = Vector::create(100, false); VectorPtr vector = Vector::create(100, false);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 1); EXPECT_EQ(arg.shape().ndims(), 1U);
EXPECT_EQ(arg.shape()[0], 100); EXPECT_EQ(arg.shape()[0], 100U);
EXPECT_EQ(arg.data(), vector->getData()); EXPECT_EQ(arg.data(), vector->getData());
CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>(); CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
...@@ -131,9 +131,9 @@ TEST(Arguments, Vector) { ...@@ -131,9 +131,9 @@ TEST(Arguments, Vector) {
TEST(Arguments, CpuSparseMatrix) { TEST(Arguments, CpuSparseMatrix) {
CpuSparseMatrix sparse(200, 300, 50); CpuSparseMatrix sparse(200, 300, 50);
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2); EXPECT_EQ(arg.shape().ndims(), 2U);
EXPECT_EQ(arg.shape()[0], 200); EXPECT_EQ(arg.shape()[0], 200U);
EXPECT_EQ(arg.shape()[1], 300); EXPECT_EQ(arg.shape()[1], 300U);
EXPECT_EQ(arg.data(), sparse.getData()); EXPECT_EQ(arg.data(), sparse.getData());
// CHECK_EQ(arg.sparse().nnz(), 50); // CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT); // CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
...@@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) { ...@@ -152,10 +152,10 @@ TEST(Arguments, CpuSparseMatrix) {
TEST(Arguments, BufferArg) { TEST(Arguments, BufferArg) {
BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3}); BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
CheckBufferArg check = [=](const BufferArg& arg) { CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 3); EXPECT_EQ(arg.shape().ndims(), 3U);
EXPECT_EQ(arg.shape()[0], 1); EXPECT_EQ(arg.shape()[0], 1U);
EXPECT_EQ(arg.shape()[1], 2); EXPECT_EQ(arg.shape()[1], 2U);
EXPECT_EQ(arg.shape()[2], 3); EXPECT_EQ(arg.shape()[2], 3U);
}; };
BufferArgs argments; BufferArgs argments;
......
...@@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) { ...@@ -44,7 +44,7 @@ TEST(TensorShape, GetAndSet) {
EXPECT_EQ(t.ndims(), 3U); EXPECT_EQ(t.ndims(), 3U);
EXPECT_EQ(t.getElements(), 6U); EXPECT_EQ(t.getElements(), 6U);
EXPECT_EQ(t[1], 2); EXPECT_EQ(t[1], 2U);
t.setDim(1, 100); t.setDim(1, 100);
EXPECT_EQ(t.getElements(), 300U); EXPECT_EQ(t.getElements(), 300U);
EXPECT_EQ(t[1], 100U); EXPECT_EQ(t[1], 100U);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "CudnnBatchNormLayer.h" #include "CudnnBatchNormLayer.h"
#include "Layer.h" #include "Layer.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
namespace paddle { namespace paddle {
...@@ -79,16 +80,33 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -79,16 +80,33 @@ void CudnnBatchNormLayer::forward(PassType passType) {
savedInvVar); savedInvVar);
} else { } else {
// used movingMean and movingVar in testing // used movingMean and movingVar in testing
hl_batch_norm_forward_inference(ioDesc_, if (batchSize <= 1024) {
input, hl_batch_norm_forward_inference(ioDesc_,
ioDesc_, input,
output, ioDesc_,
bnParamDesc_, output,
gamma, bnParamDesc_,
beta, gamma,
movingMean, beta,
movingVar, movingMean,
EPS); movingVar,
EPS);
} else {
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
// the cudnnBatchNormalizationForwardInference will fail.
hl_batch_norm_cuda_inference(input,
output,
gamma,
beta,
movingMean,
movingVar,
EPS,
batchSize,
channels_,
imageH_,
imageW_);
}
} }
/* activation */ { /* activation */ {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class KmaxSeqScoreLayer : public Layer {
private:
MatrixPtr scores_;
size_t beamSize_;
void kmaxScorePerSeq(const real* score,
real* sortedRes,
const ICpuGpuVectorPtr seqStartPos);
public:
explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};
REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer);
bool KmaxSeqScoreLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
bool ret = Layer::init(layerMap, parameterMap);
CHECK_EQ(1U, inputLayers_.size());
beamSize_ = config_.beam_size();
CHECK_GE(beamSize_, 1U);
setNeedSequenceInfo(false);
setNeedGradient(false);
return ret;
}
void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores,
real* sortedIds,
const ICpuGpuVectorPtr seqStartPos) {
int* starts = seqStartPos->getMutableData(false);
std::vector<real> indices;
for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) {
int seqLen = starts[i + 1] - starts[i];
int k = std::min(static_cast<int>(beamSize_), seqLen);
indices.resize(seqLen, 0);
std::iota(begin(indices), end(indices), 0.);
std::vector<real> tmpScore(scores + starts[i], scores + starts[i + 1]);
std::partial_sort(
begin(indices),
begin(indices) + k,
end(indices),
[&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; });
memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real));
}
}
void KmaxSeqScoreLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
const MatrixPtr inputScore = getInputValue(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "input of " << getName()
<< " must be a sequence or a nested sequence.";
CHECK_EQ(input.value->getWidth(), 1UL)
<< "input of " << getName()
<< " is score over a sequence or a nested sequence, so its width "
<< " must be 1.";
if (useGpu_) {
// this Layer runs only in CPU, if the model is runing on GPU,
// then copy the input to this layer from GPU to CPU.
Matrix::resizeOrCreate(scores_,
inputScore->getHeight(),
1,
false /* trans */,
false /* useGpu */);
scores_->copyFrom(*inputScore);
} else {
scores_ = inputScore;
}
Matrix::resizeOrCreate(
output_.value,
input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
beamSize_,
false,
false);
output_.value->one();
output_.value->mulScalar(-1.);
kmaxScorePerSeq(scores_->getData(),
output_.value->getData(),
input.hasSubseq() ? input.subSequenceStartPositions
: input.sequenceStartPositions);
}
void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
class SubNestedSequenceLayer : public Layer {
public:
explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
private:
/*
* This functions generates the indices of rows in a batch according to the
* indices of selected sub-sequence in each sequence.
*
* Examples:
* selectedIndices:
* [
* [0, 1, -1],
* [0, 1, 2],
* [0, -1, -1],
* [0, 2, 3],
* ]
* inputSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*
* ths output is saved to private member rowIndice_;
* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
* 16,17,18,19,20,21,22,23,24,25,26,27]
*/
void calSelectedCols(const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo);
// if the second input of this layer is on GPU memory, copy it to CPU memory.
MatrixPtr selIdsCpu_;
// reorganized sequenceStartPositions and subSequenceStartPositions
// into a 2d vector to facilitate the sequence selection process.
std::vector<std::vector<int>> inputSeqInfoVec_;
// the final selected row indices in a batch,
// rowIdx_ and selectedRows_ actually share a same memory.
IVectorPtr rowIndice_;
std::vector<int> selectedRows_;
};
REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer);
bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_EQ(2U, inputLayers_.size());
setNeedSequenceInfo(false);
return true;
}
void SubNestedSequenceLayer::calSelectedCols(
const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo) {
selectedRows_.clear();
std::vector<int> outSeqStartInfo(1, 0);
std::vector<int> outSubSeqStartInfo(1, 0);
size_t seqNum = selectedIndices->getHeight();
size_t beamSize = selectedIndices->getWidth();
for (size_t i = 0; i < seqNum; ++i) {
for (size_t j = 0; j < beamSize; ++j) {
if (selectedIndices->getElement(i, j) == -1.) break;
size_t selSubSeqIdx = selectedIndices->getElement(i, j);
CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
inputSeqInfoVec_[i][selSubSeqIdx];
for (size_t k = 0; k < subSeqLen; ++k)
selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
}
outSeqStartInfo.push_back(outSubSeqStartInfo.back());
}
if (useGpu_) {
rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
} else {
rowIndice_ =
IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
}
// create the sequence information for the output.
ICpuGpuVector::resizeOrCreate(
output_.sequenceStartPositions, outSeqStartInfo.size(), false);
output_.sequenceStartPositions->copyFrom(
outSeqStartInfo.data(), outSeqStartInfo.size(), false);
ICpuGpuVector::resizeOrCreate(
output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
output_.subSequenceStartPositions->copyFrom(
outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
}
void SubNestedSequenceLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& inputSeq = getInput(0);
CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
<< "must be a nested sequence.";
const MatrixPtr selectedIndices = getInputValue(1);
CHECK_EQ(size_t(inputSeq.getNumSequences()), selectedIndices->getHeight());
if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
/*
* Currently, the second input for this layer is generated by
* kmax_sequence_score_layer whose output is always stored on CPU,
* or a data_layer which canbe on GPU.
*
* If the second input is on GPU, copy it to CPU memory, because this
* input always uses very few memory, and operations related to it are
* all logic control, not computations.
*/
Matrix::resizeOrCreate(selIdsCpu_,
selectedIndices->getHeight(),
selectedIndices->getWidth(),
false /* trans */,
false /* useGpu */);
selIdsCpu_->copyFrom(*selectedIndices);
} else {
selIdsCpu_ = selectedIndices;
}
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
resetOutput(selectedRows_.size(), getSize());
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
}
void SubNestedSequenceLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputSeqGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
}
} // namespace paddle
...@@ -66,6 +66,16 @@ add_unittest_without_exec(test_BatchNorm ...@@ -66,6 +66,16 @@ add_unittest_without_exec(test_BatchNorm
add_test(NAME test_BatchNorm add_test(NAME test_BatchNorm
COMMAND test_BatchNorm) COMMAND test_BatchNorm)
################# test_KmaxSeqScore #######################
add_unittest_without_exec(test_KmaxSeqScore
test_KmaxSeqScore.cpp
LayerGradUtil.cpp)
add_test(NAME test_KmaxSeqScore
COMMAND test_KmaxSeqScore)
################## test_Evaluator ####################### ################## test_Evaluator #######################
add_unittest(test_Evaluator add_unittest(test_Evaluator
test_Evaluator.cpp) test_Evaluator.cpp)
......
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
...@@ -117,6 +119,74 @@ TEST(Layer, batchNorm) { ...@@ -117,6 +119,74 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576); CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
} }
#ifndef PADDLE_ONLY_CPU
void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudaOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnCheck = std::make_shared<CpuMatrix>(n, c * h * w);
MatrixPtr cudaCheck = std::make_shared<CpuMatrix>(n, c * h * w);
input->randomizeUniform();
cudnnOut->zeroMem();
cudaOut->zeroMem();
MatrixPtr scale = std::make_shared<GpuMatrix>(1, c);
scale->randomizeUniform();
MatrixPtr bias = std::make_shared<GpuMatrix>(1, c);
bias->randomizeUniform();
MatrixPtr movingMean = std::make_shared<GpuMatrix>(1, c);
movingMean->randomizeUniform();
MatrixPtr movingVar = std::make_shared<GpuMatrix>(1, c);
movingVar->randomizeUniform();
movingVar->clip(0.01, 50);
hl_tensor_descriptor ioDesc;
hl_tensor_descriptor bnDesc;
hl_create_tensor_descriptor(&ioDesc);
hl_create_tensor_descriptor(&bnDesc);
hl_tensor_reshape(ioDesc, n, c, h, w);
hl_tensor_reshape(bnDesc, 1, c, 1, 1);
double EPS = 1E-5;
hl_batch_norm_forward_inference(ioDesc,
input->getData(),
ioDesc,
cudnnOut->getData(),
bnDesc,
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS);
hl_batch_norm_cuda_inference(input->getData(),
cudaOut->getData(),
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS,
n,
c,
h,
w);
cudnnCheck->copyFrom(*cudnnOut);
cudaCheck->copyFrom(*cudaOut);
autotest::TensorCheckErr(*cudnnCheck, *cudaCheck);
hl_destroy_tensor_descriptor(ioDesc);
hl_destroy_tensor_descriptor(bnDesc);
}
TEST(BatchNorm, Inference) {
batchNormInference(33, 267, 1, 1);
batchNormInference(19, 105, 4, 4);
}
#endif
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <algorithm>
#include <string>
#include <vector>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/trainer/Trainer.h"
#include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
using namespace std; // NOLINT
DECLARE_bool(use_gpu);
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
vector<int> randSampling(int range, int n) {
CHECK_GE(range, n);
vector<int> num(range);
iota(begin(num), end(num), 0);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
return num;
}
void genRandomSeqInfo(vector<int>& seqStartPosition,
vector<int>& subSeqStartPosition) {
const int maxSeqNum = 100;
// generate random start position information
int seqNum = 1 + (rand() % maxSeqNum);
seqStartPosition.resize(seqNum + 1, 0);
subSeqStartPosition.resize(1, 0);
for (int i = 0; i < seqNum; ++i) {
int subSeqLen = 1 + (rand() % maxSeqNum);
for (int j = 0; j < subSeqLen; ++j)
subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen);
seqStartPosition[i + 1] = subSeqStartPosition.back();
}
}
void genRandomGroundTruth(real* values,
vector<vector<int>>& groundTruth,
vector<int>& startPos,
size_t beamSize) {
groundTruth.resize(startPos.size() - 1, vector<int>(beamSize, -1));
for (size_t i = 0; i < startPos.size() - 1; ++i) {
int seqLen = startPos[i + 1] - startPos[i];
vector<int> pos =
randSampling(seqLen, min(static_cast<int>(beamSize), seqLen));
for (size_t j = 0; j < pos.size(); ++j) {
groundTruth[i][j] = pos[j];
values[startPos[i] + pos[j]] = 1.;
}
}
}
void checkLayerOut(vector<vector<int>> groundTruth,
real* layerOut,
size_t beamSize) {
for (size_t i = 0; i < groundTruth.size(); ++i) {
int begPos = i * beamSize;
vector<real> tmp(layerOut + begPos, layerOut + begPos + beamSize);
sort(begin(tmp), end(tmp));
sort(begin(groundTruth[i]), end(groundTruth[i]));
for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]);
}
}
TEST(Layer, kmaxSeqScoreLayer) {
const size_t maxBeamSize = 100;
size_t beamSize = 1 + (rand() % maxBeamSize);
vector<int> seqStartPosition;
vector<int> subSeqStartPosition;
genRandomSeqInfo(seqStartPosition, subSeqStartPosition);
MatrixPtr inValue =
Matrix::create(subSeqStartPosition.back(), 1, false, false);
for (auto hasSubseq : {false, true}) {
vector<vector<int>> groundTruth;
inValue->randomizeUniform();
genRandomGroundTruth(inValue->getData(),
groundTruth,
hasSubseq ? subSeqStartPosition : seqStartPosition,
beamSize);
for (auto useGpu : {false, true}) {
TestConfig config;
config.layerConfig.set_type("kmax_seq_score");
config.layerConfig.set_beam_size(beamSize);
if (hasSubseq) {
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"scores",
inValue,
seqStartPosition,
subSeqStartPosition});
} else {
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition});
}
config.layerConfig.add_inputs();
// data layer initialize
std::vector<DataLayerPtr> dataLayers;
LayerMap layerMap;
vector<Argument> datas;
initDataLayer(
config,
&dataLayers,
&datas,
&layerMap,
"kmax_seq_score",
100 /* actually this parameter is unused in self-defined input*/,
false,
useGpu);
// test layer initialize
std::vector<ParameterPtr> parameters;
LayerPtr kmaxSeqScoreLayer;
FLAGS_use_gpu = useGpu;
initTestLayer(config, &layerMap, &parameters, &kmaxSeqScoreLayer);
kmaxSeqScoreLayer->forward(PASS_TRAIN);
const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue();
CHECK_EQ(outValue->getHeight(),
hasSubseq ? subSeqStartPosition.size() - 1
: seqStartPosition.size() - 1);
CHECK_EQ(outValue->getWidth(), beamSize);
checkLayerOut(groundTruth, outValue->getData(), beamSize);
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
FLAGS_thread_local_rand_use_global_seed = true;
srand((size_t)(time(NULL)));
return RUN_ALL_TESTS();
}
...@@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) { ...@@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) {
} }
} }
vector<real> randSampling(real range, int n) {
CHECK_GE(range, n);
vector<real> num(range);
iota(begin(num), end(num), 0.);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
sort(begin(num), end(num));
return num;
}
TEST(Layer, SubNestedSequenceLayer) {
// layer size is not crutial for this layer,
// so use a small layer size in unittest
const int layerSize = 4;
const int maxSeqNum = 50;
const int maxSeqLen = 50;
const int maxBeamSize = 32;
srand((size_t)(time(NULL)));
int beamSize = 1 + (rand() % maxBeamSize);
TestConfig config;
config.layerConfig.set_type("sub_nested_seq");
config.layerConfig.set_name("sub_nested_seq_layer");
config.layerConfig.set_size(layerSize);
int seqNum = 1 + (rand() % maxSeqNum);
// sequence information for the first input, it is a nested sequence
vector<int> seqStartPos(seqNum + 1, 0);
vector<int> subSeqStartPos(1, 0);
// selected indices
MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false);
selectedIndices->one();
selectedIndices->mulScalar(-1.);
real* indicesData = selectedIndices->getData();
for (int i = 0; i < seqNum; ++i) {
int subSeqNum = 1 + (rand() % maxSeqNum);
for (int j = 0; j < subSeqNum; ++j) {
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % maxSeqLen)));
}
vector<real> selSeqs =
randSampling(static_cast<real>(subSeqNum), min(beamSize, subSeqNum));
memcpy(indicesData + (i * beamSize),
selSeqs.data(),
selSeqs.size() * sizeof(real));
seqStartPos[i + 1] = subSeqStartPos.back();
}
MatrixPtr seqInputPtr =
Matrix::create(seqStartPos.back(), layerSize, false, false);
seqInputPtr->randomizeUniform();
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"nested_seq_input",
seqInputPtr,
seqStartPos,
subSeqStartPos});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config,
"sub_nested_seq",
/* batchSize */ seqNum,
/* trans */ false,
/* useGpu*/ useGpu,
/* useWeight */ false);
}
}
TEST(Layer, ClipLayer) { TEST(Layer, ClipLayer) {
const size_t batchSize = 128; const size_t batchSize = 128;
const size_t size = 512; const size_t size = 512;
......
...@@ -61,6 +61,7 @@ op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu) ...@@ -61,6 +61,7 @@ op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu) op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
cc_test(sgd_op_test SRCS sgd_op_test.cc DEPS sgd_op)
op_library(fc_op op_library(fc_op
SRCS fc_op.cc SRCS fc_op.cc
...@@ -68,3 +69,5 @@ op_library(fc_op ...@@ -68,3 +69,5 @@ op_library(fc_op
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS op_desc tensor op_registry operator net_op) DEPS op_desc tensor op_registry operator net_op)
cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op)
op_library(uniform_random_op
SRCS uniform_random_op.cc uniform_random_op.cu)
...@@ -17,13 +17,12 @@ limitations under the License. */ ...@@ -17,13 +17,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class AddOp : public OperatorWithKernel { class AddOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); PADDLE_ENFORCE_EQ(ctx.InputSize(), 2);
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1);
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set");
"Inputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of AddOp must all be set"); "Outputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(), PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
...@@ -32,9 +31,9 @@ class AddOp : public OperatorWithKernel { ...@@ -32,9 +31,9 @@ class AddOp : public OperatorWithKernel {
} }
}; };
class AddOpMaker : public OpProtoAndCheckerMaker { class AddOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
AddOpMaker(OpProto *proto, OpAttrChecker *op_checker) AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of add op"); AddInput("X", "The first input of add op");
AddInput("Y", "The second input of add op"); AddInput("Y", "The second input of add op");
...@@ -47,14 +46,17 @@ The equation is: Out = X + Y ...@@ -47,14 +46,17 @@ The equation is: Out = X + Y
} }
}; };
class AddOpGrad : public OperatorWithKernel { class AddOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override {} void InferShape(const framework::InferShapeContext &ctx) const override {}
}; };
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker); REGISTER_OP(add_two, ops::AddOp, ops::AddOpMaker);
REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad); REGISTER_GRADIENT_OP(add_two, add_two_grad, ops::AddOpGrad);
REGISTER_OP_CPU_KERNEL(add_two, ops::AddKernel<ops::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(add_two,
ops::AddKernel<paddle::platform::CPUPlace, float>);
...@@ -16,4 +16,6 @@ ...@@ -16,4 +16,6 @@
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/add_op.h" #include "paddle/operators/add_op.h"
REGISTER_OP_GPU_KERNEL(add_two, ops::AddKernel<ops::GPUPlace, float>); namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(add_two,
ops::AddKernel<paddle::platform::GPUPlace, float>);
...@@ -13,15 +13,21 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/type_alias.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class AddKernel : public OpKernel { class AddKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input0 = context.Input<Tensor>(0); auto input0 = context.Input<Tensor>(0);
auto input1 = context.Input<Tensor>(1); auto input1 = context.Input<Tensor>(1);
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>(0);
......
...@@ -14,9 +14,9 @@ limitations under the License. */ ...@@ -14,9 +14,9 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#define private public #define private public
#include <paddle/framework/op_registry.h> #include "paddle/framework/op_registry.h"
USE_OP(add_two); USE_OP(add_two);
// USE_OP(add_two_grad);
TEST(AddOp, GetOpProto) { TEST(AddOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos(); auto& protos = paddle::framework::OpRegistry::protos();
......
...@@ -17,28 +17,29 @@ limitations under the License. */ ...@@ -17,28 +17,29 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class OnehotCrossEntropyOp : public OperatorWithKernel { class OnehotCrossEntropyOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, PADDLE_ENFORCE_EQ(ctx.InputSize(), 2,
"Input size of OnehotCrossEntropyOp must be two"); "Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1,
"Output size of OnehotCrossEntropyOp must be one"); "Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"Inputs of OnehotCrossEntropyOp must all be set"); "0-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1),
"Outputs of OnehotCrossEntropyOp must all be set"); "1-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims().size() == 2, PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"X's dimension must be 2."); "Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(ctx.Output<Tensor>(0)->dims().size() == 1, PADDLE_ENFORCE_EQ(ctx.Input<Tensor>(0)->dims().size(), 2);
"label's dimension must be 1."); PADDLE_ENFORCE_EQ(ctx.Output<Tensor>(0)->dims().size(), 1,
"label's dimension must be 1.");
ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]}); ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]});
} }
}; };
class OnehotCrossEntropyGradientOp : public OperatorWithKernel { class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto X = ctx.Input<Tensor>("X"); auto X = ctx.Input<Tensor>("X");
...@@ -47,9 +48,10 @@ class OnehotCrossEntropyGradientOp : public OperatorWithKernel { ...@@ -47,9 +48,10 @@ class OnehotCrossEntropyGradientOp : public OperatorWithKernel {
} }
}; };
class OnehotCrossEntropyOpMaker : public OpProtoAndCheckerMaker { class OnehotCrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
OnehotCrossEntropyOpMaker(OpProto *proto, OpAttrChecker *op_checker) OnehotCrossEntropyOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of OnehotCrossEntropyOp"); AddInput("X", "The first input of OnehotCrossEntropyOp");
AddInput("label", "The second input of OnehotCrossEntropyOp"); AddInput("label", "The second input of OnehotCrossEntropyOp");
...@@ -65,11 +67,14 @@ OnehotCrossEntropy Operator. ...@@ -65,11 +67,14 @@ OnehotCrossEntropy Operator.
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp, REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp,
ops::OnehotCrossEntropyOpMaker); ops::OnehotCrossEntropyOpMaker);
REGISTER_OP_CPU_KERNEL(onehot_cross_entropy, REGISTER_OP_CPU_KERNEL(
ops::OnehotCrossEntropyOpKernel<ops::CPUPlace, float>); onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_GRADIENT_OP(onehot_cross_entropy, onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
onehot_cross_entropy_grad, onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOpKernel<ops::CPUPlace, float>); ops::OnehotCrossEntropyGradientOpKernel<paddle::platform::CPUPlace, float>);
...@@ -15,5 +15,7 @@ ...@@ -15,5 +15,7 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, namespace ops = paddle::operators;
ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(
onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<paddle::platform::GPUPlace, float>);
...@@ -13,17 +13,36 @@ See the License for the specific language governing permissions and ...@@ -13,17 +13,36 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/type_alias.h" #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
static const float kCrossEntropyLogThreshold{1e-20}; using Tensor = framework::Tensor;
template <typename T>
T tolerable_value(T x) {
static_assert(std::is_floating_point<T>::value,
"tolerable_value works only on float, "
"double and double double.");
const T kApproInf = 1e20;
if (x == INFINITY) {
return kApproInf;
}
if (x == -INFINITY) {
return -kApproInf;
}
return x;
}
template <typename Place, typename T> template <typename Place, typename T>
class OnehotCrossEntropyOpKernel : public OpKernel { class OnehotCrossEntropyOpKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto X = ctx.Input<Tensor>("X"); auto X = ctx.Input<Tensor>("X");
const T* Xdata = X->data<T>(); const T* Xdata = X->data<T>();
const int* label_data = ctx.Input<Tensor>(1)->data<int>(); const int* label_data = ctx.Input<Tensor>(1)->data<int>();
...@@ -36,18 +55,17 @@ class OnehotCrossEntropyOpKernel : public OpKernel { ...@@ -36,18 +55,17 @@ class OnehotCrossEntropyOpKernel : public OpKernel {
int batch_size = X->dims()[0]; int batch_size = X->dims()[0];
int class_num = X->dims()[1]; int class_num = X->dims()[1];
// Y[i] = -log(X[i][j])
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
Ydata[i] = -std::log(std::max(Xdata[i * class_num + label_data[i]], int index = i * class_num + label_data[i];
kCrossEntropyLogThreshold)); Ydata[i] = -tolerable_value(std::log(Xdata[index]));
} }
} }
}; };
template <typename Place, typename T> template <typename Place, typename T>
class OnehotCrossEntropyGradientOpKernel : public OpKernel { class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto X = ctx.Input<Tensor>("X"); auto X = ctx.Input<Tensor>("X");
auto dX = ctx.Output<Tensor>(framework::GradVarName("X")); auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dY = ctx.Input<Tensor>(framework::GradVarName("Y")); auto dY = ctx.Input<Tensor>(framework::GradVarName("Y"));
...@@ -62,9 +80,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel { ...@@ -62,9 +80,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel {
const int class_num = X->dims()[1]; const int class_num = X->dims()[1];
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
dXdata[i * class_num + label_data[i]] = int index = i * class_num + label_data[i];
-dYdata[i] / std::max(Xdata[i * class_num + label_data[i]], dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]);
kCrossEntropyLogThreshold);
} }
} }
}; };
......
...@@ -12,11 +12,16 @@ ...@@ -12,11 +12,16 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "type_alias.h" #include "paddle/operators/net_op.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using OpRegistry = framework::OpRegistry;
class FullyConnectedOp : public NetOp { class FullyConnectedOp : public NetOp {
public: public:
void Init() override { void Init() override {
...@@ -39,9 +44,10 @@ class FullyConnectedOp : public NetOp { ...@@ -39,9 +44,10 @@ class FullyConnectedOp : public NetOp {
} }
}; };
class FullyConnectedOpMaker : public OpProtoAndCheckerMaker { class FullyConnectedOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
FullyConnectedOpMaker(OpProto *proto, OpAttrChecker *op_checker) FullyConnectedOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input of fc operator"); AddInput("X", "the input of fc operator");
AddInput("W", "the weight of fc operator"); AddInput("W", "the weight of fc operator");
...@@ -66,4 +72,5 @@ USE_OP(rowwise_add); ...@@ -66,4 +72,5 @@ USE_OP(rowwise_add);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
namespace ops = paddle::operators;
REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker); REGISTER_OP(fc, ops::FullyConnectedOp, ops::FullyConnectedOpMaker);
...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/fill_zeros_like_op.h" #include "paddle/operators/fill_zeros_like_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -22,14 +20,14 @@ namespace operators { ...@@ -22,14 +20,14 @@ namespace operators {
class FillZerosLikeOp : public framework::OperatorWithKernel { class FillZerosLikeOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Input size of FillZerosLikeOp must be one."); "Input size of FillZerosLikeOp must be one.");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output size of AddOp must be one."); "Output size of AddOp must be one.");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"Input of FillZerosLikeOp must be set."); "Input of FillZerosLikeOp must be set.");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"Output of FillZerosLikeOp must be set."); "Output of FillZerosLikeOp must be set.");
ctx.Output<framework::Tensor>(0)->Resize( ctx.Output<framework::Tensor>(0)->Resize(
ctx.Input<framework::Tensor>(0)->dims()); ctx.Input<framework::Tensor>(0)->dims());
} }
...@@ -52,8 +50,8 @@ The output will have the same size with input. ...@@ -52,8 +50,8 @@ The output will have the same size with input.
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP(fill_zeros_like, paddle::operators::FillZerosLikeOp, namespace ops = paddle::operators;
paddle::operators::FillZerosLikeOpMaker); REGISTER_OP(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
fill_zeros_like, fill_zeros_like,
paddle::operators::FillZerosLikeKernel<paddle::platform::CPUPlace, float>); ops::FillZerosLikeKernel<paddle::platform::CPUPlace, float>);
...@@ -12,9 +12,11 @@ ...@@ -12,9 +12,11 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h" #include "paddle/operators/fill_zeros_like_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
fill_zeros_like, fill_zeros_like,
paddle::operators::FillZerosLikeKernel<paddle::platform::GPUPlace, float>); ops::FillZerosLikeKernel<paddle::platform::GPUPlace, float>);
...@@ -13,9 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h"
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h" #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -26,7 +25,8 @@ class FillZerosLikeKernel : public framework::OpKernel { ...@@ -26,7 +25,8 @@ class FillZerosLikeKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* output = context.Output<framework::Tensor>(0); auto* output = context.Output<framework::Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).setZero(); auto t = framework::EigenVector<T>::Flatten(*output);
t.device(context.GetEigenDevice<Place>()) = t.constant(T(0));
} }
}; };
......
...@@ -17,20 +17,20 @@ limitations under the License. */ ...@@ -17,20 +17,20 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class MeanOp : public OperatorWithKernel { class MeanOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set");
"Input/Output of MeanOp must be initialized."); PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set");
ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1})); ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1}));
} }
}; };
class MeanOpMaker : public OpProtoAndCheckerMaker { class MeanOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker) MeanOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of mean op"); AddInput("X", "The input of mean op");
AddOutput("Out", "The output of mean op").IgnoreGradient(); AddOutput("Out", "The output of mean op").IgnoreGradient();
...@@ -38,9 +38,9 @@ class MeanOpMaker : public OpProtoAndCheckerMaker { ...@@ -38,9 +38,9 @@ class MeanOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class MeanGradOp : public OperatorWithKernel { class MeanGradOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<Tensor>("X" + framework::kGradVarSuffix) ctx.Output<Tensor>("X" + framework::kGradVarSuffix)
->Resize(ctx.Input<Tensor>("X")->dims()); ->Resize(ctx.Input<Tensor>("X")->dims());
} }
...@@ -49,7 +49,10 @@ class MeanGradOp : public OperatorWithKernel { ...@@ -49,7 +49,10 @@ class MeanGradOp : public OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker); REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker);
REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(mean,
ops::MeanKernel<paddle::platform::CPUPlace, float>);
REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp); REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp);
REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(mean_grad,
ops::MeanGradKernel<paddle::platform::CPUPlace, float>);
...@@ -16,5 +16,8 @@ ...@@ -16,5 +16,8 @@
#include "paddle/operators/mean_op.h" #include "paddle/operators/mean_op.h"
REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel<ops::GPUPlace, float>); namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(mean,
ops::MeanKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(mean_grad,
ops::MeanGradKernel<paddle::platform::GPUPlace, float>);
...@@ -13,15 +13,24 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,24 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/type_alias.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class MeanKernel : public OpKernel { class MeanKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>(0); auto input = context.Input<Tensor>(0);
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>(0);
...@@ -36,9 +45,9 @@ class MeanKernel : public OpKernel { ...@@ -36,9 +45,9 @@ class MeanKernel : public OpKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class MeanGradKernel : public OpKernel { class MeanGradKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto OG = context.Input<Tensor>("Out" + framework::kGradVarSuffix); auto OG = context.Input<Tensor>("Out" + framework::kGradVarSuffix);
PADDLE_ENFORCE(framework::product(OG->dims()) == 1, PADDLE_ENFORCE(framework::product(OG->dims()) == 1,
"Mean Gradient should be scalar"); "Mean Gradient should be scalar");
......
...@@ -18,9 +18,9 @@ ...@@ -18,9 +18,9 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class MulOp : public OperatorWithKernel { class MulOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs"); PADDLE_ENFORCE(ctx.InputSize() == 2, "The mul op must take two inputs");
auto dim0 = ctx.Input<Tensor>(0)->dims(); auto dim0 = ctx.Input<Tensor>(0)->dims();
auto dim1 = ctx.Input<Tensor>(1)->dims(); auto dim1 = ctx.Input<Tensor>(1)->dims();
...@@ -38,9 +38,9 @@ class MulOp : public OperatorWithKernel { ...@@ -38,9 +38,9 @@ class MulOp : public OperatorWithKernel {
} }
}; };
class MulOpMaker : public OpProtoAndCheckerMaker { class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op"); AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op"); AddInput("Y", "The second input of mul op");
...@@ -53,9 +53,9 @@ The equation is: Out = X * Y ...@@ -53,9 +53,9 @@ The equation is: Out = X * Y
} }
}; };
class MulOpGrad : public OperatorWithKernel { class MulOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override {} void InferShape(const framework::InferShapeContext &ctx) const override {}
std::string DebugString() const override { std::string DebugString() const override {
LOG(INFO) << "MulGrad"; LOG(INFO) << "MulGrad";
return ""; return "";
...@@ -65,7 +65,8 @@ class MulOpGrad : public OperatorWithKernel { ...@@ -65,7 +65,8 @@ class MulOpGrad : public OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker); REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker);
REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad); REGISTER_GRADIENT_OP(mul, mul_grad, ops::MulOpGrad);
REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUPlace, float>);
...@@ -15,5 +15,4 @@ ...@@ -15,5 +15,4 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/mul_op.h" #include "paddle/operators/mul_op.h"
REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(mul, ops::MulKernel<ops::GPUPlace, float>);
...@@ -15,18 +15,28 @@ ...@@ -15,18 +15,28 @@
#pragma once #pragma once
#include "paddle/operators/math/math_function.h" #include "paddle/operators/math/math_function.h"
#include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class MulKernel : public OpKernel { class MulKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* input0 = context.Input<Tensor>("X"); Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair = {
auto* input1 = context.Input<Tensor>("Y"); {Eigen::IndexPair<Eigen::DenseIndex>(1, 0)}};
auto* output = context.Output<Tensor>(0);
auto input0 = context.Input<Tensor>("X");
auto input1 = context.Input<Tensor>("Y");
auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
*/ */
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -14,13 +14,7 @@ limitations under the License. */ ...@@ -14,13 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/scope.h"
#include "paddle/operators/type_alias.h"
#include "paddle/platform/device_context.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -65,20 +59,29 @@ class NetOp : public framework::OperatorBase { ...@@ -65,20 +59,29 @@ class NetOp : public framework::OperatorBase {
} }
} }
bool SupportGPU() const override {
for (auto& op : ops_) {
if (!op->SupportGPU()) {
return false;
}
}
return true;
}
/** /**
* @brief Add an operator by ptr * @brief Add an operator by ptr
*/ */
void AddOp(const std::shared_ptr<OperatorBase>& op) { void AddOp(const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(op); ops_.push_back(op);
} }
void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) { void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed"); "Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE(pos <= ops_.size(), "Out of range"); PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, op); ops_.insert(ops_.begin() + pos, op);
} }
......
...@@ -2,31 +2,27 @@ ...@@ -2,31 +2,27 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Scope = framework::Scope;
using DeviceContext = platform::DeviceContext;
static int infer_shape_cnt = 0; static int infer_shape_cnt = 0;
static int run_cnt = 0; static int run_cnt = 0;
class TestOp : public OperatorBase { class TestOp : public framework::OperatorBase {
public: public:
void InferShape(const framework::Scope& scope) const override { void InferShape(const Scope& scope) const override { ++infer_shape_cnt; }
++infer_shape_cnt; void Run(const Scope& scope,
} const platform::DeviceContext& dev_ctx) const override {
void Run(const framework::Scope& scope,
const paddle::platform::DeviceContext& dev_ctx) const override {
++run_cnt; ++run_cnt;
} }
}; };
class EmptyOp : public OperatorBase { class EmptyOp : public framework::OperatorBase {
public: public:
void InferShape(const Scope& scope) const override {} void InferShape(const Scope& scope) const override {}
void Run(const Scope& scope, void Run(const Scope& scope, const DeviceContext& dev_ctx) const override {}
const platform::DeviceContext& dev_ctx) const override {}
}; };
template <typename T> template <typename T>
...@@ -72,7 +68,7 @@ TEST(OpKernel, all) { ...@@ -72,7 +68,7 @@ TEST(OpKernel, all) {
net->Run(scope, dev_ctx); net->Run(scope, dev_ctx);
ASSERT_EQ(2, infer_shape_cnt); ASSERT_EQ(2, infer_shape_cnt);
ASSERT_EQ(2, run_cnt); ASSERT_EQ(2, run_cnt);
ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); ASSERT_THROW(net->AddOp(op2), platform::EnforceNotMet);
} }
TEST(NetOp, insert_op) { TEST(NetOp, insert_op) {
......
...@@ -14,17 +14,19 @@ ...@@ -14,17 +14,19 @@
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
#include <glog/logging.h>
#include <cstring> #include <cstring>
#include <sstream> #include <sstream>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Scope = framework::Scope;
using Variable = framework::Variable;
using Tensor = framework::Tensor;
void RecurrentAlgorithm::InferShape(const Scope& scope) const { void RecurrentAlgorithm::InferShape(const Scope& scope) const {
seq_len_ = scope.FindVar((arg_->inlinks[0]).external) seq_len_ = scope.FindVar((arg_->inlinks[0]).external)
->GetMutable<Tensor>() ->GetMutable<Tensor>()
...@@ -135,10 +137,11 @@ void RecurrentOp::Init() { ...@@ -135,10 +137,11 @@ void RecurrentOp::Init() {
alg_.Init(std::move(arg)); alg_.Init(std::move(arg));
} }
class RecurrentAlgorithmProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class RecurrentAlgorithmProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker {
public: public:
RecurrentAlgorithmProtoAndCheckerMaker(OpProto* proto, RecurrentAlgorithmProtoAndCheckerMaker(framework::OpProto* proto,
OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
const auto& name = RecurrentOp::kArgName; const auto& name = RecurrentOp::kArgName;
// inputs and outputs stored in proto // inputs and outputs stored in proto
......
...@@ -27,6 +27,10 @@ namespace operators { ...@@ -27,6 +27,10 @@ namespace operators {
using framework::make_ddim; using framework::make_ddim;
using framework::DDim; using framework::DDim;
using framework::Tensor;
using framework::Variable;
using framework::Scope;
using framework::OpRegistry;
class RecurrentOpTest : public ::testing::Test { class RecurrentOpTest : public ::testing::Test {
protected: protected:
...@@ -164,7 +168,7 @@ class RecurrentOpTest : public ::testing::Test { ...@@ -164,7 +168,7 @@ class RecurrentOpTest : public ::testing::Test {
// father scope // father scope
Scope scope_; Scope scope_;
std::shared_ptr<OperatorBase> rnn_op_; std::shared_ptr<framework::OperatorBase> rnn_op_;
}; };
TEST_F(RecurrentOpTest, Run) { TEST_F(RecurrentOpTest, Run) {
......
...@@ -18,7 +18,9 @@ namespace paddle { ...@@ -18,7 +18,9 @@ namespace paddle {
namespace operators { namespace operators {
namespace rnn { namespace rnn {
namespace fmw = paddle::framework; namespace f = paddle::framework;
using Tensor = framework::Tensor;
void SegmentInputs(const std::vector<Scope*>& step_scopes, void SegmentInputs(const std::vector<Scope*>& step_scopes,
const std::vector<Link>& inlinks, const size_t seq_len, const std::vector<Link>& inlinks, const size_t seq_len,
...@@ -30,10 +32,10 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -30,10 +32,10 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
inlinks[i].external); inlinks[i].external);
Tensor* input = input_var->GetMutable<Tensor>(); Tensor* input = input_var->GetMutable<Tensor>();
fmw::DDim dims = input->dims(); f::DDim dims = input->dims();
PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len, PADDLE_ENFORCE(static_cast<size_t>(dims[0]) == seq_len,
"all the inlinks must have same length"); "all the inlinks must have same length");
fmw::DDim step_dims = slice_ddim(dims, 1, dims.size()); f::DDim step_dims = slice_ddim(dims, 1, dims.size());
for (size_t j = 0; j < seq_len; j++) { for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input = Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>(); step_scopes[j]->NewVar(inlinks[i].internal)->GetMutable<Tensor>();
...@@ -58,11 +60,10 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes, ...@@ -58,11 +60,10 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal); auto step_scope_var = step_scopes[0]->FindVar(outlinks[i].internal);
PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope", PADDLE_ENFORCE(step_scope_var != nullptr, "%s not in scope",
outlinks[i].internal); outlinks[i].internal);
fmw::DDim step_dims = f::DDim step_dims = step_scope_var->template GetMutable<Tensor>()->dims();
step_scope_var->template GetMutable<Tensor>()->dims();
std::vector<int> dims_vec = vectorize(step_dims); std::vector<int> dims_vec = vectorize(step_dims);
dims_vec.insert(dims_vec.begin(), seq_len); dims_vec.insert(dims_vec.begin(), seq_len);
output->Resize(fmw::make_ddim(dims_vec)); output->Resize(f::make_ddim(dims_vec));
} else { } else {
output->mutable_data<float>(platform::CPUPlace()); output->mutable_data<float>(platform::CPUPlace());
for (size_t j = 0; j < seq_len; j++) { for (size_t j = 0; j < seq_len; j++) {
...@@ -104,7 +105,7 @@ void LinkMemories(const std::vector<Scope*>& scopes, ...@@ -104,7 +105,7 @@ void LinkMemories(const std::vector<Scope*>& scopes,
} }
void InitArgument(const ArgumentName& name, Argument* arg, void InitArgument(const ArgumentName& name, Argument* arg,
const OperatorBase& op) { const framework::OperatorBase& op) {
arg->step_net = op.Input(name.step_net); arg->step_net = op.Input(name.step_net);
arg->step_scopes = op.Output(name.step_scopes); arg->step_scopes = op.Output(name.step_scopes);
......
...@@ -17,12 +17,13 @@ ...@@ -17,12 +17,13 @@
#include <string> #include <string>
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/operators/type_alias.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace rnn { namespace rnn {
using Scope = framework::Scope;
/** /**
* Memory of a RNN (same as the role of `Momory` in PaddlePaddle). * Memory of a RNN (same as the role of `Momory` in PaddlePaddle).
* *
...@@ -86,7 +87,7 @@ void LinkMemories(const std::vector<Scope*>& step_scopes, ...@@ -86,7 +87,7 @@ void LinkMemories(const std::vector<Scope*>& step_scopes,
const int offset, bool infer_shape_mode); const int offset, bool infer_shape_mode);
void InitArgument(const ArgumentName& name, Argument* arg, void InitArgument(const ArgumentName& name, Argument* arg,
const OperatorBase& op); const framework::OperatorBase& op);
} // namespace rnn } // namespace rnn
} // namespace operators } // namespace operators
......
...@@ -13,12 +13,13 @@ ...@@ -13,12 +13,13 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class RowWiseAddOp : public OperatorWithKernel { class RowWiseAddOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2UL, PADDLE_ENFORCE(ctx.InputSize() == 2UL,
"Two inputs is needed by rowwise add"); "Two inputs is needed by rowwise add");
auto dim0 = ctx.Input<Tensor>(0)->dims(); auto dim0 = ctx.Input<Tensor>(0)->dims();
...@@ -32,9 +33,10 @@ class RowWiseAddOp : public OperatorWithKernel { ...@@ -32,9 +33,10 @@ class RowWiseAddOp : public OperatorWithKernel {
} }
}; };
class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) RowWiseAddOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left input of row-wise add op, must be matrix"); AddInput("X", "The left input of row-wise add op, must be matrix");
AddInput("b", "The right input of row-wise add op, must be vector"); AddInput("b", "The right input of row-wise add op, must be vector");
...@@ -50,6 +52,7 @@ for i in xrange(X.shape[0]): ...@@ -50,6 +52,7 @@ for i in xrange(X.shape[0]):
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker); REGISTER_OP(rowwise_add, ops::RowWiseAddOp, ops::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL(rowwise_add, REGISTER_OP_CPU_KERNEL(
ops::RowWiseAddKernel<ops::CPUPlace, float>); rowwise_add, ops::RowWiseAddKernel<paddle::platform::CPUPlace, float>);
...@@ -15,5 +15,6 @@ ...@@ -15,5 +15,6 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/rowwise_add_op.h" #include "paddle/operators/rowwise_add_op.h"
REGISTER_OP_GPU_KERNEL(rowwise_add, namespace ops = paddle::operators;
ops::RowWiseAddKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(
rowwise_add, ops::RowWiseAddKernel<paddle::platform::GPUPlace, float>);
...@@ -13,15 +13,24 @@ ...@@ -13,15 +13,24 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/type_alias.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class RowWiseAddKernel : public OpKernel { class RowWiseAddKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto out = context.Output<Tensor>(0); auto out = context.Output<Tensor>(0);
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
......
...@@ -17,23 +17,23 @@ limitations under the License. */ ...@@ -17,23 +17,23 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SGDOp : public OperatorWithKernel { class SGDOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, "inputs[0] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set");
PADDLE_ENFORCE(ctx.InputVar(1) != nullptr, "inputs[1] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, "outputs[0] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(), PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
"Two input of SGD Op's dimension must be same."); "Two input of SGD Op's dimension must be same.");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
} }
}; };
class SGDOpMaker : public OpProtoAndCheckerMaker { class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SGDOpMaker(OpProto *proto, OpAttrChecker *op_checker) SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "input parameter"); AddInput("param", "input parameter");
AddInput("grad", "input gradient"); AddInput("grad", "input gradient");
...@@ -51,5 +51,7 @@ param_out = param - learning_rate * grad; ...@@ -51,5 +51,7 @@ param_out = param - learning_rate * grad;
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker); REGISTER_OP(sgd, ops::SGDOp, ops::SGDOpMaker);
REGISTER_OP_CPU_KERNEL(sgd, ops::SGDOpKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(sgd,
ops::SGDOpKernel<paddle::platform::CPUPlace, float>);
...@@ -15,4 +15,6 @@ ...@@ -15,4 +15,6 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/sgd_op.h" #include "paddle/operators/sgd_op.h"
REGISTER_OP_GPU_KERNEL(sgd, ops::SGDOpKernel<ops::GPUPlace, float>); namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(sgd,
ops::SGDOpKernel<paddle::platform::GPUPlace, float>);
...@@ -13,15 +13,21 @@ See the License for the specific language governing permissions and ...@@ -13,15 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/operators/type_alias.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class SGDOpKernel : public OpKernel { class SGDOpKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto param = ctx.Input<Tensor>("param"); auto param = ctx.Input<Tensor>("param");
auto grad = ctx.Input<Tensor>("grad"); auto grad = ctx.Input<Tensor>("grad");
auto param_out = ctx.Output<Tensor>(0); auto param_out = ctx.Output<Tensor>(0);
......
...@@ -13,21 +13,23 @@ ...@@ -13,21 +13,23 @@
limitations under the License. */ limitations under the License. */
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SigmoidOp : public OperatorWithKernel { class SigmoidOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input"); PADDLE_ENFORCE(ctx.InputSize() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output"); PADDLE_ENFORCE(ctx.OutputSize() == 1, "Sigmoid Op only have one output");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
} }
}; };
class SigmoidOpMaker : public OpProtoAndCheckerMaker { class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input"); AddInput("X", "sigmoid input");
AddOutput("Y", "sigmoid output"); AddOutput("Y", "sigmoid output");
...@@ -35,9 +37,9 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker { ...@@ -35,9 +37,9 @@ class SigmoidOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class SigmoidOpGrad : public OperatorWithKernel { class SigmoidOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
} }
}; };
...@@ -45,9 +47,11 @@ class SigmoidOpGrad : public OperatorWithKernel { ...@@ -45,9 +47,11 @@ class SigmoidOpGrad : public OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker); REGISTER_OP(sigmoid, ops::SigmoidOp, ops::SigmoidOpMaker);
REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad); REGISTER_GRADIENT_OP(sigmoid, sigmoid_grad, ops::SigmoidOpGrad);
REGISTER_OP_CPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(sigmoid,
REGISTER_OP_CPU_KERNEL(sigmoid_grad, ops::SigmoidKernel<paddle::platform::CPUPlace, float>);
ops::SigmoidGradKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(
sigmoid_grad, ops::SigmoidGradKernel<paddle::platform::CPUPlace, float>);
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/sigmoid_op.h" #include "paddle/operators/sigmoid_op.h"
REGISTER_OP_GPU_KERNEL(sigmoid, ops::SigmoidKernel<ops::GPUPlace, float>); namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(sigmoid_grad,
ops::SigmoidGradKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(sigmoid,
ops::SigmoidKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sigmoid_grad, ops::SigmoidGradKernel<paddle::platform::GPUPlace, float>);
...@@ -13,16 +13,21 @@ ...@@ -13,16 +13,21 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h"
#include "paddle/operators/type_alias.h" #include "paddle/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class SigmoidKernel : public OpKernel { class SigmoidKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>(0); auto input = context.Input<Tensor>(0);
auto output = context.Output<Tensor>(0); auto output = context.Output<Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
...@@ -37,9 +42,9 @@ class SigmoidKernel : public OpKernel { ...@@ -37,9 +42,9 @@ class SigmoidKernel : public OpKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class SigmoidGradKernel : public OpKernel { class SigmoidGradKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto Y_t = context.Input<Tensor>("Y"); auto Y_t = context.Input<Tensor>("Y");
auto dY_t = context.Input<Tensor>(framework::GradVarName("Y")); auto dY_t = context.Input<Tensor>(framework::GradVarName("Y"));
auto dX_t = context.Output<Tensor>(framework::GradVarName("X")); auto dX_t = context.Output<Tensor>(framework::GradVarName("X"));
......
...@@ -17,22 +17,23 @@ limitations under the License. */ ...@@ -17,22 +17,23 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SoftmaxOp : public OperatorWithKernel { class SoftmaxOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Only one input is need for softmax"); "Only one input is need for softmax");
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL, PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims().size(), 2UL,
"The input of softmax op must be matrix"); "The input of softmax op must be matrix");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Only one output is need for softmax"); "Only one output is need for softmax");
ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims()); ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
} }
}; };
class SoftmaxOpMaker : public OpProtoAndCheckerMaker { class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftmaxOpMaker(OpProto *proto, OpAttrChecker *op_checker) SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input of softmax"); AddInput("X", "input of softmax");
AddOutput("Y", "output of softmax"); AddOutput("Y", "output of softmax");
...@@ -40,16 +41,16 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { ...@@ -40,16 +41,16 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class SoftmaxOpGrad : public OperatorWithKernel { class SoftmaxOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 3UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL,
"Input of SoftmaxOpGrad should be 3, X, Y, YG"); "Input of SoftmaxOpGrad should be 3, X, Y, YG");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output of SoftmaxOpGrad should be 1"); "Output of SoftmaxOpGrad should be 1");
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
"Input(Y@GRAD) should not be null"); "Input(Y@GRAD) should not be null");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() == PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(), ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same"); "the shape of Input(0) and Input(1) should be the same");
...@@ -61,8 +62,11 @@ class SoftmaxOpGrad : public OperatorWithKernel { ...@@ -61,8 +62,11 @@ class SoftmaxOpGrad : public OperatorWithKernel {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<ops::CPUPlace, float>); REGISTER_OP_CPU_KERNEL(softmax,
ops::SoftmaxKernel<paddle::platform::CPUPlace, float>);
REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad);
REGISTER_OP_CPU_KERNEL(softmax_grad, REGISTER_OP_CPU_KERNEL(
ops::SoftmaxGradKernel<ops::CPUPlace, float>); softmax_grad, ops::SoftmaxGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h"
#include "paddle/operators/softmax_op.h" #include "paddle/operators/softmax_op.h"
REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel<ops::GPUPlace, float>); namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(softmax_grad,
ops::SoftmaxGradKernel<ops::GPUPlace, float>); REGISTER_OP_GPU_KERNEL(softmax,
ops::SoftmaxKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
softmax_grad, ops::SoftmaxGradKernel<paddle::platform::GPUPlace, float>);
...@@ -13,19 +13,21 @@ See the License for the specific language governing permissions and ...@@ -13,19 +13,21 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/ddim.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor.h"
#include "paddle/operators/type_alias.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class SoftmaxKernel : public OpKernel { class SoftmaxKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto input = context.Input<Tensor>("X"); auto input = context.Input<Tensor>("X");
auto output = context.Output<Tensor>("Y"); auto output = context.Output<Tensor>("Y");
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
...@@ -62,9 +64,9 @@ class SoftmaxKernel : public OpKernel { ...@@ -62,9 +64,9 @@ class SoftmaxKernel : public OpKernel {
}; };
template <typename Place, typename T> template <typename Place, typename T>
class SoftmaxGradKernel : public OpKernel { class SoftmaxGradKernel : public framework::OpKernel {
public: public:
void Compute(const ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>(); std::shared_ptr<Tensor> scale_ = std::make_shared<Tensor>();
auto Y = context.Input<Tensor>("Y"); auto Y = context.Input<Tensor>("Y");
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <random>
#include <type_traits>
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
// It seems that Eigen::Tensor::random in GPU will SEGFAULT.
// Use std::random and thrust::random(thrust is a std library in CUDA) to
// implement uniform random.
template <typename T>
class CPUUniformRandomKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>(0);
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed =
static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::uniform_real_distribution<T> dist(
static_cast<T>(context.op_.GetAttr<float>("min")),
static_cast<T>(context.op_.GetAttr<float>("max")));
for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) {
data[i] = dist(engine);
}
}
};
class UniformRandomOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext& ctx) const override {
PADDLE_ENFORCE(GetAttr<float>("min") < GetAttr<float>("max"),
"uniform_random's min must less then max");
auto* tensor = ctx.Output<framework::Tensor>(0);
auto dims = GetAttr<std::vector<int>>("dims");
tensor->Resize(framework::make_ddim(dims));
}
};
class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
public:
UniformRandomOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddOutput("Out", "The output tensor of uniform random op");
AddComment(R"DOC(Uniform random operator.
Used to initialize tensor with uniform random generator.
)DOC");
AddAttr<std::vector<int>>("dims", "the dimension of random tensor");
AddAttr<float>("min", "Minimum value of uniform random").SetDefault(-1.0f);
AddAttr<float>("max", "Maximun value of uniform random").SetDefault(1.0f);
AddAttr<int>("seed",
"Random seed of uniform random. "
"0 means generate a seed by system")
.SetDefault(0);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp,
paddle::operators::UniformRandomOpMaker);
REGISTER_OP_CPU_KERNEL(uniform_random,
paddle::operators::CPUUniformRandomKernel<float>);
...@@ -12,44 +12,59 @@ ...@@ -12,44 +12,59 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #include <thrust/device_ptr.h>
#include <thrust/iterator/counting_iterator.h>
#include "paddle/framework/eigen.h" #include <thrust/random.h>
#include <thrust/transform.h>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h" #include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using OpKernel = framework::OpKernel; template <typename T>
using OperatorBase = framework::OperatorBase; struct UniformGenerator {
using InferShapeContext = framework::InferShapeContext; T min_, max_;
using ExecutionContext = framework::ExecutionContext; unsigned int seed_;
using Variable = framework::Variable;
template <typename T, int MajorType = Eigen::RowMajor, __host__ __device__ UniformGenerator(T min, T max, int seed)
typename IndexType = Eigen::DenseIndex> : min_(min), max_(max), seed_(seed) {}
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor, __host__ __device__ T operator()(const unsigned int n) const {
typename IndexType = Eigen::DenseIndex> thrust::minstd_rand rng;
using EigenVector = framework::EigenVector<T, MajorType, IndexType>; rng.seed(seed_);
template <typename T, int MajorType = Eigen::RowMajor, thrust::uniform_real_distribution<T> dist(min_, max_);
typename IndexType = Eigen::DenseIndex> rng.discard(n);
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; return dist(rng);
template <typename T, size_t D, int MajorType = Eigen::RowMajor, }
typename IndexType = Eigen::DenseIndex> };
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
using Tensor = framework::Tensor; // It seems that Eigen::Tensor::random in GPU will SEGFAULT.
using Scope = framework::Scope; // Use std::random and thrust::random(thrust is a std library in CUDA) to
using OperatorWithKernel = framework::OperatorWithKernel; // implement uniform random.
using OperatorBase = framework::OperatorBase; template <typename T>
using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; class GPUUniformRandomKernel : public framework::OpKernel {
using OpProto = framework::OpProto; public:
using OpAttrChecker = framework::OpAttrChecker; void Compute(const framework::ExecutionContext& context) const override {
using CPUPlace = platform::CPUPlace; auto* tensor = context.Output<framework::Tensor>(0);
using GPUPlace = platform::GPUPlace; T* data = tensor->mutable_data<T>(context.GetPlace());
using OpRegistry = framework::OpRegistry; unsigned int seed =
static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
if (seed == 0) {
seed = std::random_device()();
}
T min = static_cast<T>(context.op_.GetAttr<float>("min"));
T max = static_cast<T>(context.op_.GetAttr<float>("max"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
ssize_t N = framework::product(tensor->dims());
thrust::transform(index_sequence_begin, index_sequence_begin + N,
thrust::device_ptr<T>(data),
UniformGenerator<T>(min, max, seed));
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(uniform_random,
paddle::operators::GPUUniformRandomKernel<float>);
...@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input, ...@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input,
} }
} }
void Argument::reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo) {
int* seqStarts = seqStartPos->getMutableData(false);
int* subSeqStarts = subSeqStartPos->getMutableData(false);
int seqNum = seqStartPos->getSize() - 1;
reorganizedSeqInfo.resize(seqNum, std::vector<int>());
int seqIdx = 0;
for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
seqIdx++;
if (seqIdx == seqNum) return;
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
}
}
}
} // namespace paddle } // namespace paddle
...@@ -317,6 +317,30 @@ struct Argument { ...@@ -317,6 +317,30 @@ struct Argument {
*/ */
void printValueString(std::ostream& stream, void printValueString(std::ostream& stream,
const std::string& prefix = "") const; const std::string& prefix = "") const;
/**
* @brief reorganizeSeqInfo will reorganize sequenceStartPositions and
* subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo.
*
* @param seqStartPos: sequenceStartPositions of an Argument.
* @param subSeqStartPos: subSequenceStartPositions of an Argument.
* @param the reorganized sequence start position information.
*
* Examples:
* seqStartPos: [0, 4, 15, 20, 28]
* subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28]
* reorganizedSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*/
static void reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo);
}; };
} // namespace paddle } // namespace paddle
...@@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) ...@@ -8,7 +8,7 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
add_subdirectory(dynload) add_subdirectory(dynload)
cc_test(enforce_test SRCS enforce_test.cc) cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece)
IF(WITH_GPU) IF(WITH_GPU)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader) set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
......
...@@ -187,25 +187,16 @@ inline void throw_on_error(T e) { ...@@ -187,25 +187,16 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
// if two values have different data types, choose a compatible type for them. PADDLE_ENFORCE(nullptr != (__VAL), #__VAL " should not be null\n%s", \
template <typename T1, typename T2> paddle::string::Sprintf("" __VA_ARGS__));
struct CompatibleType {
static const bool t1_to_t2 = std::is_convertible<T1, T2>::value;
typedef typename std::conditional<t1_to_t2, T2, T1>::type type;
};
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \
__CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \
"enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
#__VAL0, #__VAL1, std::to_string(__VAL0), \ #__VAL0, #__VAL1, std::to_string(__VAL0), \
std::to_string(__VAL1), \ std::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__)); paddle::string::Sprintf("" __VA_ARGS__));
#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \
typename paddle::platform::CompatibleType<decltype(__VAL0), \
decltype(__VAL1)>::type(__VAL)
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -9,8 +9,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/platform/enforce.h" #include <memory>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/platform/enforce.h"
#include "paddle/string/piece.h"
using StringPiece = paddle::string::Piece;
using paddle::string::HasPrefix;
TEST(ENFORCE, OK) { TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
...@@ -20,19 +26,15 @@ TEST(ENFORCE, OK) { ...@@ -20,19 +26,15 @@ TEST(ENFORCE, OK) {
} }
TEST(ENFORCE, FAILED) { TEST(ENFORCE, FAILED) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
// your error handling code here caught_exception = true;
in_catch = true; EXPECT_TRUE(
std::string msg = "Enforce is not ok 123 at all"; HasPrefix(StringPiece(error.what()), "Enforce is not ok 123 at all"));
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
ASSERT_TRUE(in_catch); EXPECT_TRUE(caught_exception);
} }
TEST(ENFORCE, NO_ARG_OK) { TEST(ENFORCE, NO_ARG_OK) {
...@@ -45,41 +47,27 @@ TEST(ENFORCE, NO_ARG_OK) { ...@@ -45,41 +47,27 @@ TEST(ENFORCE, NO_ARG_OK) {
TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) {
int a = 2; int a = 2;
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_EQ(a, 1 + 3); PADDLE_ENFORCE_EQ(a, 1 + 3);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce a == 1 + 3 failed, 2 != 4"; HasPrefix(StringPiece(error.what()), "enforce a == 1 + 3 failed, 2 != 4");
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) {
int a = 2; int a = 2;
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their"); PADDLE_ENFORCE_EQ(a, 1 + 3, "%s size not match", "their");
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = HasPrefix(StringPiece(error.what()),
"enforce a == 1 + 3 failed, 2 != 4\ntheir size not match"; "enforce a == 1 + 3 failed, 2 != 4\ntheir size not match");
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_NE, OK) { TEST(ENFORCE_NE, OK) {
...@@ -87,42 +75,32 @@ TEST(ENFORCE_NE, OK) { ...@@ -87,42 +75,32 @@ TEST(ENFORCE_NE, OK) {
PADDLE_ENFORCE_NE(1.0, 2UL); PADDLE_ENFORCE_NE(1.0, 2UL);
} }
TEST(ENFORCE_NE, FAIL) { TEST(ENFORCE_NE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
// 2UL here to check data type compatible // 2UL here to check data type compatible
PADDLE_ENFORCE_NE(1.0, 1UL); PADDLE_ENFORCE_NE(1.0, 1UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1.0 != 1UL failed, 1.000000 == 1"; EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
const char* what = error.what(); "enforce 1.0 != 1UL failed, 1.000000 == 1"))
for (size_t i = 0; i < msg.length(); ++i) { << error.what() << " does not have expected prefix";
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); } TEST(ENFORCE_GT, OK) { PADDLE_ENFORCE_GT(2, 1); }
TEST(ENFORCE_GT, FAIL) { TEST(ENFORCE_GT, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
// 2UL here to check data type compatible
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_GE, OK) { TEST(ENFORCE_GE, OK) {
...@@ -132,21 +110,16 @@ TEST(ENFORCE_GE, OK) { ...@@ -132,21 +110,16 @@ TEST(ENFORCE_GE, OK) {
PADDLE_ENFORCE_GE(3.21, 2UL); PADDLE_ENFORCE_GE(3.21, 2UL);
} }
TEST(ENFORCE_GE, FAIL) { TEST(ENFORCE_GE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_GE(1, 2UL); PADDLE_ENFORCE_GE(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 >= 2UL failed, 1 < 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 >= 2UL failed, 1 < 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_LE, OK) { TEST(ENFORCE_LE, OK) {
...@@ -157,21 +130,16 @@ TEST(ENFORCE_LE, OK) { ...@@ -157,21 +130,16 @@ TEST(ENFORCE_LE, OK) {
PADDLE_ENFORCE_LE(2UL, 3.2); PADDLE_ENFORCE_LE(2UL, 3.2);
} }
TEST(ENFORCE_LE, FAIL) { TEST(ENFORCE_LE, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_GT(1, 2UL); PADDLE_ENFORCE_GT(1, 2UL);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1 > 2UL failed, 1 <= 2"; EXPECT_TRUE(
const char* what = error.what(); HasPrefix(StringPiece(error.what()), "enforce 1 > 2UL failed, 1 <= 2"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_LT, OK) { TEST(ENFORCE_LT, OK) {
...@@ -180,19 +148,31 @@ TEST(ENFORCE_LT, OK) { ...@@ -180,19 +148,31 @@ TEST(ENFORCE_LT, OK) {
PADDLE_ENFORCE_LT(2UL, 3); PADDLE_ENFORCE_LT(2UL, 3);
} }
TEST(ENFORCE_LT, FAIL) { TEST(ENFORCE_LT, FAIL) {
bool in_catch = false; bool caught_exception = false;
try { try {
PADDLE_ENFORCE_LT(1UL, 0.12); PADDLE_ENFORCE_LT(1UL, 0.12);
} catch (paddle::platform::EnforceNotMet error) { } catch (paddle::platform::EnforceNotMet error) {
in_catch = true; caught_exception = true;
const std::string msg = "enforce 1UL < 0.12 failed, 1 >= 0.12"; EXPECT_TRUE(HasPrefix(StringPiece(error.what()),
const char* what = error.what(); "enforce 1UL < 0.12 failed, 1 >= 0.12"));
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
} }
EXPECT_TRUE(caught_exception);
}
TEST(ENFORCE_NOT_NULL, OK) {
int* a = new int;
PADDLE_ENFORCE_NOT_NULL(a);
delete a;
}
TEST(ENFORCE_NOT_NULL, FAIL) {
bool caught_exception = false;
try {
int* a = nullptr;
PADDLE_ENFORCE_NOT_NULL(a);
ASSERT_TRUE(in_catch); } catch (paddle::platform::EnforceNotMet error) {
caught_exception = true;
EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "a should not be null"));
}
EXPECT_TRUE(caught_exception);
} }
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
fc_op
sgd_op
add_op
mean_op
cross_entropy_op
recurrent_op)
此差异已折叠。
此差异已折叠。
文件模式从 100644 更改为 100755
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册