提交 8804b242 编写于 作者: D dongzhihong

Merge remote-tracking branch 'origin/develop' into random_op

...@@ -38,17 +38,16 @@ RUN apt-get update && \ ...@@ -38,17 +38,16 @@ RUN apt-get update && \
RUN pip --no-cache-dir install 'numpy>=1.12.0' RUN pip --no-cache-dir install 'numpy>=1.12.0'
# Install Go and glide # Install Go and glide
RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -C /usr/local -xzf go.tgz && \ tar -xz -C /usr/local && \
mkdir /root/gopath && \ mkdir /root/gopath && \
mkdir /root/gopath/bin && \ mkdir /root/gopath/bin && \
mkdir /root/gopath/src && \ mkdir /root/gopath/src
rm go.tgz
ENV GOROOT=/usr/local/go GOPATH=/root/gopath ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# install glide # install glide
RUN curl -q https://glide.sh/get | sh RUN curl -s -q https://glide.sh/get | sh
# git credential to skip password typing # git credential to skip password typing
RUN git config --global credential.helper store RUN git config --global credential.helper store
......
...@@ -8,7 +8,7 @@ ExternalProject_Add( ...@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_lib_any extern_lib_any
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d"
PREFIX ${ANY_SOURCE_DIR} PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML}) ...@@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML})
ENDIF(NOT ${WITH_MKLML}) ENDIF(NOT ${WITH_MKLML})
IF(WIN32 OR APPLE) IF(WIN32 OR APPLE)
MESSAGE(WARNING MESSAGE(WARNING
"Windows or Mac is not supported with MKLML in Paddle yet." "Windows or Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF") "Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
...@@ -43,22 +43,21 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") ...@@ -43,22 +43,21 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) INCLUDE_DIRECTORIES(${MKLML_INC_DIR})
SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt) FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt
FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n" "PROJECT(MKLML)\n"
"cmake_minimum_required(VERSION 3.0)\n" "cmake_minimum_required(VERSION 3.0)\n"
"install(DIRECTORY ${MKLML_VER}\n" "install(DIRECTORY ${MKLML_VER}\n"
" DESTINATION ${MKLML_DST_DIR})\n") " DESTINATION ${MKLML_DST_DIR})\n")
ExternalProject_Add( ExternalProject_Add(
${MKLML_PROJECT} ${MKLML_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${MKLML_SOURCE_DIR} PREFIX ${MKLML_SOURCE_DIR}
DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR}
&& tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT}
) )
......
...@@ -257,6 +257,11 @@ seq_concat ...@@ -257,6 +257,11 @@ seq_concat
.. autoclass:: paddle.v2.layer.seq_concat .. autoclass:: paddle.v2.layer.seq_concat
:noindex: :noindex:
sub_nested_seq
--------------
.. autoclass:: paddle.v2.layer.sub_nested_seq
:noindex:
Reshaping Layers Reshaping Layers
================ ================
......
...@@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程: ...@@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程:
* 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。
* 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性
* 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步
* 编译这个版本的python wheel包,并发布到pypi。
* 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`
* pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`
* 上传方法:
```
cd build/python
pip install twine
twine upload dist/[package to upload]
```
4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。
5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面
6. 协同完成Release Note的书写 6. 协同完成Release Note的书写
......
...@@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式 ...@@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式
PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源。
Docker使用入门
------------------------------
几个基础的概念帮助理解和使用Docker:
- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行:
.. code-block:: bash
docker images
来列出当前系统中的所有镜像,同样可以执行:
.. code-block:: bash
docker pull paddlepaddle/paddle:0.10.0
来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用ocker.paddlepaddle.org/paddle下载。
- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。
实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。
可以执行:
.. code-block:: bash
docker run paddlepaddle/paddle:0.10.0
来使用一个镜像启动一个容器。
- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中
访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到
Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用
debian镜像,并且启动后执行 :code:`ls /data`。
.. code-block:: bash
docker run --rm -v $(pwd):/data debian ls /data
PaddlePaddle发布的Docker镜像使用说明 PaddlePaddle发布的Docker镜像使用说明
------------------------------ ------------------------------
...@@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打 ...@@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打
像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次
PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运
行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在
`dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_ 提供最新 `dockerhub.com <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_
的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国 和国内镜像`docker.paddlepaddle.org` 提供最新
内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您 的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。
在国内,请把文档里命令中的paddlepaddle/paddle替换成
docker.paddlepaddle.org/paddle。 **注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。**
1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` 1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev`
...@@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。 ...@@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。
如果输出是No,就需要选择使用no-AVX的镜像 如果输出是No,就需要选择使用no-AVX的镜像
**注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用**
以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。
为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。
......
...@@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions. ...@@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions.
We put the docker images on `dockerhub.com We put the docker images on `dockerhub.com
<https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the <https://hub.docker.com/r/paddlepaddle/paddle/tags/>`_. You can find the
latest versions under "tags" tab at dockerhub.com. If you are in latest versions under "tags" tab at dockerhub.com.
China, you can use our Docker image registry mirror to speed up the
download process. To use it, please replace all paddlepaddle/paddle in
the commands to docker.paddlepaddle.org/paddle.
1. Production images, this image might have multiple variants: ** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.**
1. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
2. Production images, this image might have multiple variants:
- GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu` - GPU/AVX::code:`paddlepaddle/paddle:<version>-gpu`
- GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx` - GPU/no-AVX::code:`paddlepaddle/paddle:<version>-gpu-noavx`
...@@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle.
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
**NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.**
To run the CPU-only image as an interactive container: To run the CPU-only image as an interactive container:
.. code-block:: bash .. code-block:: bash
...@@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle. ...@@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle.
nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash
2. development image :code:`paddlepaddle/paddle:<version>-dev`
This image has packed related develop tools and runtime
environment. Users and developers can use this image instead of
their own local computer to accomplish development, build,
releasing, document writing etc. While different version of paddle
may depends on different version of libraries and tools, if you
want to setup a local environment, you must pay attention to the
versions. The development image contains:
- gcc/clang
- nvcc
- Python
- sphinx
- woboq
- sshd
Many developers use servers with GPUs, they can use ssh to login to
the server and run :code:`docker exec` to enter the docker
container and start their work. Also they can start a development
docker image with SSHD service, so they can login to the container
and start work.
Train Model Using Python API Train Model Using Python API
---------------------------- ----------------------------
......
...@@ -32,7 +32,7 @@ import ( ...@@ -32,7 +32,7 @@ import (
func main() { func main() {
port := flag.Int("port", 0, "port of the pserver") port := flag.Int("port", 0, "port of the pserver")
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd") "comma separated endpoint string for pserver to connect to etcd")
dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout") dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout")
...@@ -60,12 +60,12 @@ func main() { ...@@ -60,12 +60,12 @@ func main() {
idx, err = e.Register(*port) idx, err = e.Register(*port)
candy.Must(err) candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e) cp, err = pserver.LoadCheckpoint(e, idx)
if err != nil { if err != nil {
if err == pserver.ErrCheckpointNotFound { if err == pserver.ErrCheckpointNotFound {
log.Infof("Could not find the pserver checkpoint.") log.Infof("Could not find the pserver checkpoint.")
} else { } else {
log.Errorf("Fetch checkpoint failed, %s", err) panic(err)
} }
} }
} }
......
hash: 2a1c0eca5c07a130e3d224f9821f96cfa37a39bf6bce141c855bbc57ef569f1c hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582
updated: 2017-07-29T07:34:48.722757905+08:00 updated: 2017-08-03T21:46:51.744995189Z
imports: imports:
- name: github.com/beorn7/perks - name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
...@@ -145,6 +145,8 @@ imports: ...@@ -145,6 +145,8 @@ imports:
version: a1dba9ce8baed984a2495b658c82687f8157b98f version: a1dba9ce8baed984a2495b658c82687f8157b98f
subpackages: subpackages:
- xfs - xfs
- name: github.com/satori/go.uuid
version: 879c5887cd475cd7864858769793b2ceb0d44feb
- name: github.com/sirupsen/logrus - name: github.com/sirupsen/logrus
version: a3f95b5c423586578a4e099b11a46c2479628cac version: a3f95b5c423586578a4e099b11a46c2479628cac
- name: github.com/topicai/candy - name: github.com/topicai/candy
......
...@@ -14,11 +14,13 @@ import: ...@@ -14,11 +14,13 @@ import:
version: ^1.0.0 version: ^1.0.0
- package: github.com/topicai/candy - package: github.com/topicai/candy
- package: golang.org/x/crypto - package: golang.org/x/crypto
vcs: git
repo: https://github.com/golang/crypto.git repo: https://github.com/golang/crypto.git
- package: golang.org/x/sys
vcs: git vcs: git
- package: golang.org/x/sys
repo: https://github.com/golang/sys.git repo: https://github.com/golang/sys.git
- package: golang.org/x/text
vcs: git vcs: git
- package: golang.org/x/text
repo: https://github.com/golang/text.git repo: https://github.com/golang/text.git
vcs: git
- package: github.com/satori/go.uuid
version: v1.1.0
...@@ -77,11 +77,12 @@ type taskEntry struct { ...@@ -77,11 +77,12 @@ type taskEntry struct {
NumFailure int NumFailure int
} }
type taskQueues struct { type masterState struct {
Todo []taskEntry Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry Done []taskEntry
Failed []taskEntry Failed []taskEntry
CurPass int
} }
// Service is the master server service. // Service is the master server service.
...@@ -94,11 +95,11 @@ type Service struct { ...@@ -94,11 +95,11 @@ type Service struct {
ready chan struct{} ready chan struct{}
initDone bool initDone bool
mu sync.Mutex mu sync.Mutex
taskQueues taskQueues // State to be persisted to snapshot.
currPass int state masterState
jobTasks []taskEntry // The trainer that is currently saving model. This state is
// transient, does not need to be persisted to snapshot.
savingTrainer string savingTrainer string
} }
...@@ -141,8 +142,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur ...@@ -141,8 +142,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur
s.chunksPerTask = chunksPerTask s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur s.timeoutDur = timeoutDur
s.failureMax = failureMax s.failureMax = failureMax
s.taskQueues = taskQueues{} s.state = masterState{}
s.taskQueues.Pending = make(map[int]taskEntry) s.state.Pending = make(map[int]taskEntry)
s.ready = make(chan struct{}) s.ready = make(chan struct{})
s.store = store s.store = store
recovered, err := s.recover() recovered, err := s.recover()
...@@ -180,7 +181,7 @@ func (s *Service) recover() (bool, error) { ...@@ -180,7 +181,7 @@ func (s *Service) recover() (bool, error) {
} }
dec := gob.NewDecoder(gr) dec := gob.NewDecoder(gr)
var tqs taskQueues var tqs masterState
err = dec.Decode(&tqs) err = dec.Decode(&tqs)
if err != nil { if err != nil {
return false, err return false, err
...@@ -193,7 +194,12 @@ func (s *Service) recover() (bool, error) { ...@@ -193,7 +194,12 @@ func (s *Service) recover() (bool, error) {
log.Errorln(err) log.Errorln(err)
} }
s.taskQueues = tqs s.state = tqs
log.WithFields(s.logFields()).Infof("Master recovered from snapshot, scheduling pending task timeout check.")
for _, t := range s.state.Pending {
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
}
return true, nil return true, nil
} }
...@@ -208,7 +214,7 @@ func (s *Service) snapshot() error { ...@@ -208,7 +214,7 @@ func (s *Service) snapshot() error {
var buf bytes.Buffer var buf bytes.Buffer
gw := gzip.NewWriter(&buf) gw := gzip.NewWriter(&buf)
enc := gob.NewEncoder(gw) enc := gob.NewEncoder(gw)
err := enc.Encode(s.taskQueues) err := enc.Encode(s.state)
if err != nil { if err != nil {
return err return err
} }
...@@ -290,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { ...@@ -290,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error {
return err return err
} }
s.jobTasks = partition(chunks, s.chunksPerTask) s.state.Todo = partition(chunks, s.chunksPerTask)
s.taskQueues.Todo = s.jobTasks
err = s.snapshot() err = s.snapshot()
if err != nil { if err != nil {
...@@ -319,17 +324,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { ...@@ -319,17 +324,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) {
} }
}() }()
delete(s.taskQueues.Pending, t.Task.Meta.ID) delete(s.state.Pending, t.Task.Meta.ID)
t.NumFailure++ t.NumFailure++
if t.NumFailure > s.failureMax { if t.NumFailure > s.failureMax {
log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Failed = append(s.taskQueues.Failed, t) s.state.Failed = append(s.state.Failed, t)
return return
} }
log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure) log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.state.Todo = append(s.state.Todo, t)
return return
} }
...@@ -338,7 +343,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -338,7 +343,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID] t, ok := s.state.Pending[taskID]
if !ok { if !ok {
return return
} }
...@@ -350,10 +355,11 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -350,10 +355,11 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
// must be called with lock held. // must be called with lock held.
func (s *Service) logFields() log.Fields { func (s *Service) logFields() log.Fields {
return log.Fields{ return log.Fields{
"todoLen": len(s.taskQueues.Todo), "todoLen": len(s.state.Todo),
"pendingLen": len(s.taskQueues.Pending), "pendingLen": len(s.state.Pending),
"doneLen": len(s.taskQueues.Done), "doneLen": len(s.state.Done),
"failedLen": len(s.taskQueues.Failed), "failedLen": len(s.state.Failed),
"curPass": s.state.CurPass,
} }
} }
...@@ -366,17 +372,17 @@ func (s *Service) GetTask(passID int, task *Task) error { ...@@ -366,17 +372,17 @@ func (s *Service) GetTask(passID int, task *Task) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
if passID < s.currPass { if passID < s.state.CurPass {
return ErrPassBefore return ErrPassBefore
} }
if passID > s.currPass { if passID > s.state.CurPass {
// Client may get run to pass after master when one client faster than the // Client may get run to pass after master when one client faster than the
// other // other
return ErrPassAfter return ErrPassAfter
} }
if len(s.taskQueues.Todo) == 0 { if len(s.state.Todo) == 0 {
if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 { if len(s.state.Done) == 0 && len(s.state.Pending) == 0 {
log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass") log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass")
return ErrAllTaskFailed return ErrAllTaskFailed
} }
...@@ -384,10 +390,10 @@ func (s *Service) GetTask(passID int, task *Task) error { ...@@ -384,10 +390,10 @@ func (s *Service) GetTask(passID int, task *Task) error {
return ErrNoMoreAvailable return ErrNoMoreAvailable
} }
t := s.taskQueues.Todo[0] t := s.state.Todo[0]
t.Task.Meta.Epoch++ t.Task.Meta.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:] s.state.Todo = s.state.Todo[1:]
s.taskQueues.Pending[t.Task.Meta.ID] = t s.state.Pending[t.Task.Meta.ID] = t
err := s.snapshot() err := s.snapshot()
if err != nil { if err != nil {
return err return err
...@@ -409,7 +415,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -409,7 +415,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID] t, ok := s.state.Pending[taskID]
if !ok { if !ok {
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID) log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
return nil return nil
...@@ -417,18 +423,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -417,18 +423,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
// task finished, reset timeout // task finished, reset timeout
t.NumFailure = 0 t.NumFailure = 0
s.taskQueues.Done = append(s.taskQueues.Done, t) s.state.Done = append(s.state.Done, t)
delete(s.taskQueues.Pending, taskID) delete(s.state.Pending, taskID)
log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID) log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID)
if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 { if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 {
// increase master side pass count if all tasks finished // increase master side pass count if all tasks finished
s.currPass++ s.state.CurPass++
s.taskQueues.Todo = s.jobTasks s.state.Todo = append(s.state.Done, s.state.Failed...)
s.taskQueues.Done = []taskEntry{} s.state.Done = []taskEntry{}
// TODO(typhoonzero): deal with failed tasks // TODO(typhoonzero): deal with failed tasks
s.taskQueues.Failed = []taskEntry{} s.state.Failed = []taskEntry{}
log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass) log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.state.CurPass)
} }
err := s.snapshot() err := s.snapshot()
...@@ -447,7 +453,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error { ...@@ -447,7 +453,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[meta.ID] t, ok := s.state.Pending[meta.ID]
if !ok { if !ok {
log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta) log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta)
return nil return nil
......
...@@ -59,7 +59,7 @@ func initClient() [numPserver]int { ...@@ -59,7 +59,7 @@ func initClient() [numPserver]int {
go func(l net.Listener) { go func(l net.Listener) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
panic(err) panic(err)
} }
......
...@@ -103,7 +103,7 @@ func (p *EtcdClient) List() []Server { ...@@ -103,7 +103,7 @@ func (p *EtcdClient) List() []Server {
time.Sleep(p.timeout) time.Sleep(p.timeout)
continue continue
} }
log.Infof("got value (%s) for key: %s", psAddr, psKey) log.Debugf("got value (%s) for key: %s", psAddr, psKey)
servers[i].Index = i servers[i].Index = i
servers[i].Addr = psAddr servers[i].Addr = psAddr
} }
......
...@@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { ...@@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
if err != nil { if err != nil {
return []byte{}, err return []byte{}, err
} }
kvs := resp.Kvs kvs := resp.Kvs
if len(kvs) == 0 { if len(kvs) == 0 {
return []byte{}, nil return []byte{}, nil
...@@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { ...@@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
} }
// PutKey put into etcd with value by key specified // PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error { func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration, withLease bool) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout) ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease())) var err error
if withLease {
_, err = e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease()))
} else {
_, err = e.client.Put(ctx, key, string(value))
}
cancel() cancel()
return err return err
} }
......
...@@ -32,6 +32,7 @@ type optimizer struct { ...@@ -32,6 +32,7 @@ type optimizer struct {
opt *C.struct_paddle_optimizer opt *C.struct_paddle_optimizer
elementType ElementType elementType ElementType
contentLen int contentLen int
config []byte
} }
func cArrayToSlice(p unsafe.Pointer, len int) []byte { func cArrayToSlice(p unsafe.Pointer, len int) []byte {
...@@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer ...@@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer
cstate = unsafe.Pointer(&s[0]) cstate = unsafe.Pointer(&s[0])
} }
o.config = c
o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)),
C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s))) C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s)))
return o return o
......
...@@ -25,11 +25,13 @@ import ( ...@@ -25,11 +25,13 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path"
"strconv" "strconv"
"sync" "sync"
"time" "time"
uuid "github.com/satori/go.uuid"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
...@@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found") ...@@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found")
// RPC error message. // RPC error message.
const ( const (
AlreadyInitialized = "pserver already initialized" AlreadyInitialized = "pserver already initialized"
Uninitialized = "pserver not fully initialized" Uninitialized = "pserver not fully initialized"
CheckpointMD5Failed = "checkpoint file MD5 validation failed" WrongChecksum = "checkpoint file checksum validation failed"
) )
// Supported element types. // Supported element types.
...@@ -73,11 +75,12 @@ type ParameterWithConfig struct { ...@@ -73,11 +75,12 @@ type ParameterWithConfig struct {
// checkpointMeta saves checkpoint metadata // checkpointMeta saves checkpoint metadata
type checkpointMeta struct { type checkpointMeta struct {
UUID string `json:"uuid"` UUID string `json:"uuid"`
Path string `json:"path"`
MD5 string `json:"md5"` MD5 string `json:"md5"`
Timestamp int64 `json:"timestamp"` Timestamp int64 `json:"timestamp"`
} }
// Checkpoint is the pserver shard persist in file // Checkpoint is the pserver shard persist in file.
type Checkpoint []parameterCheckpoint type Checkpoint []parameterCheckpoint
// Gradient is the gradient of the parameter. // Gradient is the gradient of the parameter.
...@@ -90,50 +93,58 @@ type Service struct { ...@@ -90,50 +93,58 @@ type Service struct {
checkpointInterval time.Duration checkpointInterval time.Duration
checkpointPath string checkpointPath string
client *EtcdClient client *EtcdClient
mu sync.Mutex
optMap map[string]*optimizer mu sync.Mutex
optMap map[string]*optimizer
} }
// parameterCheckpoint saves parameter checkpoint // parameterCheckpoint saves parameter checkpoint.
type parameterCheckpoint struct { type parameterCheckpoint struct {
ParameterWithConfig ParameterWithConfig
State []byte State []byte
} }
// NewCheckpointFromFile loads parameters and state from checkpoint file func loadMeta(e *EtcdClient, idx int) (meta checkpointMeta, err error) {
func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) { v, err := e.GetKey(PsCheckpoint+strconv.Itoa(idx), 3*time.Second)
v, err := e.GetKey(PsPath+string(idx), 3*time.Second)
if err != nil { if err != nil {
return nil, err return
} }
if len(v) == 0 { if len(v) == 0 {
return nil, ErrCheckpointNotFound err = ErrCheckpointNotFound
return
} }
var cpMeta checkpointMeta if err = json.Unmarshal(v, &meta); err != nil {
if err = json.Unmarshal(v, &cpMeta); err != nil { return
return nil, err
} }
fn := filepath.Join(cpPath, cpMeta.UUID) return
if _, err = os.Stat(fn); os.IsNotExist(err) { }
// LoadCheckpoint loads checkpoint from file.
func LoadCheckpoint(e *EtcdClient, idx int) (Checkpoint, error) {
cpMeta, err := loadMeta(e, idx)
if err != nil {
return nil, err return nil, err
} }
content, err := ioutil.ReadFile(fn)
content, err := ioutil.ReadFile(cpMeta.Path)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// TODO(helin): change MD5 to CRC since CRC is better for file
// checksum in our use case (emphasize speed over security).
h := md5.New() h := md5.New()
md5 := hex.EncodeToString(h.Sum(content)) md5 := hex.EncodeToString(h.Sum(content))
if md5 != cpMeta.MD5 { if md5 != cpMeta.MD5 {
return nil, errors.New(CheckpointMD5Failed) return nil, errors.New(WrongChecksum)
} }
dec := gob.NewDecoder(bytes.NewReader(content)) dec := gob.NewDecoder(bytes.NewReader(content))
cp := Checkpoint{} var cp Checkpoint
if err = dec.Decode(cp); err != nil { if err = dec.Decode(&cp); err != nil {
return nil, err return nil, err
} }
return cp, nil return cp, nil
...@@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error { ...@@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error {
} }
close(s.initialized) close(s.initialized)
go func() {
t := time.Tick(s.checkpointInterval)
for range t {
err := s.checkpoint()
if err != nil {
log.Errorln(err)
}
}
}()
return nil return nil
} }
...@@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { ...@@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
return nil return nil
} }
// pserver save checkpoint func traceTime(start time.Time, name string) {
func (s *Service) doCheckpoint() (err error) { elapsed := time.Since(start)
<-s.initialized log.Infof("%s took %v", name, elapsed)
s.mu.Lock() }
defer s.mu.Unlock()
// checkpoint saves checkpoint to disk.
//
// checkpoint should be only called after the parameters are
// initialized.
func (s *Service) checkpoint() (err error) {
log.Infoln("Begin save checkpoint.")
defer traceTime(time.Now(), "save checkpoint")
s.mu.Lock()
cp := make([]parameterCheckpoint, len(s.optMap)) cp := make([]parameterCheckpoint, len(s.optMap))
index := 0 index := 0
// TODO(helin): write checkpoint incrementally to reduce memory
// footprint during checkpoint.
for name, opt := range s.optMap { for name, opt := range s.optMap {
var pc parameterCheckpoint var pc parameterCheckpoint
pc.Param.Name = name pc.Param.Name = name
pc.Param.ElementType = opt.elementType pc.Param.ElementType = opt.elementType
pc.Param.Content = opt.GetWeights() pc.Param.Content = opt.GetWeights()
pc.Config = opt.config
pc.State = opt.GetStates() pc.State = opt.GetStates()
cp[index] = pc cp[index] = pc
index++ index++
} }
s.mu.Unlock()
var buf bytes.Buffer var buf bytes.Buffer
encoder := gob.NewEncoder(&buf) encoder := gob.NewEncoder(&buf)
err = encoder.Encode(cp) err = encoder.Encode(cp)
...@@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) { ...@@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) {
return return
} }
cpMeta := checkpointMeta{} id := uuid.NewV4().String()
cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx) p := path.Join(s.checkpointPath, id)
cpMeta.Timestamp = time.Now().UnixNano() f, err := os.Create(p)
h := md5.New()
cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
if err != nil {
return
}
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.")
} else {
err = os.Remove(cpMeta.UUID)
if err != nil {
log.Infof("Removing checkpoint %s failed", cpMeta.UUID)
} else {
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID)
}
}
f, err := os.Create(cpMeta.UUID)
if err != nil { if err != nil {
return return
} }
...@@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) { ...@@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) {
return return
} }
oldMeta, err := loadMeta(s.client, s.idx)
if err == ErrCheckpointNotFound {
log.Infoln("Do not have existing checkpoint.")
err = nil
}
if err != nil {
return
}
h := md5.New()
md5 := hex.EncodeToString(h.Sum(buf.Bytes()))
cpMeta := checkpointMeta{
UUID: id,
Timestamp: time.Now().UnixNano(),
MD5: md5,
Path: p,
}
json, err := json.Marshal(cpMeta)
if err != nil {
return
}
err = s.client.PutKey(PsCheckpoint+strconv.Itoa(s.idx), json, 3*time.Second, false)
if err != nil {
return
}
if oldMeta.Path != "" {
rmErr := os.Remove(oldMeta.Path)
if rmErr != nil {
// log error, but still treat checkpoint as
// successful.
log.Errorln(rmErr)
}
}
return return
} }
...@@ -30,7 +30,7 @@ const ( ...@@ -30,7 +30,7 @@ const (
func TestServiceFull(t *testing.T) { func TestServiceFull(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
...@@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) { ...@@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) {
func TestMultipleInit(t *testing.T) { func TestMultipleInit(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
...@@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) { ...@@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) {
func TestUninitialized(t *testing.T) { func TestUninitialized(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
err = s.SendGrad(pserver.Gradient{}, nil) err = s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized { if err.Error() != pserver.Uninitialized {
t.Fatal(err) t.Fatal(err)
...@@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) { ...@@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) {
func TestBlockUntilInitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, time.Hour, "", nil, cp)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
......
...@@ -22,7 +22,5 @@ if(WITH_C_API) ...@@ -22,7 +22,5 @@ if(WITH_C_API)
endif() endif()
if(WITH_SWIG_PY) if(WITH_SWIG_PY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_SOURCE_DIR}/setup.py)
add_subdirectory(api) add_subdirectory(api)
endif() endif()
...@@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle ...@@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch .timestamp
COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp
COMMAND rm -rf py_paddle.egg-info build
WORKING_DIRECTORY ${PROJ_ROOT}/paddle WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS _swig_paddle DEPENDS _swig_paddle
) )
...@@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so ...@@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so
# TODO(yuyang18) : make wheel name calculated by cmake # TODO(yuyang18) : make wheel name calculated by cmake
add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so)
install(DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/dist/
DESTINATION opt/paddle/share/wheels
)
if(WITH_TESTING) if(WITH_TESTING)
IF(NOT PY_PIP_FOUND) IF(NOT PY_PIP_FOUND)
SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip) SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip)
...@@ -108,7 +102,7 @@ if(WITH_TESTING) ...@@ -108,7 +102,7 @@ if(WITH_TESTING)
BUILD_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
DEPENDS python setuptools python_api_wheel #DEPENDS python setuptools python_api_wheel
) )
ENDIF() ENDIF()
add_subdirectory(test) add_subdirectory(test)
......
...@@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES ...@@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES
src/hl_cuda_lstm.cu src/hl_cuda_lstm.cu
src/hl_top_k.cu src/hl_top_k.cu
src/hl_batch_transpose.cu src/hl_batch_transpose.cu
src/hl_batch_norm.cu
src/hl_cuda_sequence.cu src/hl_cuda_sequence.cu
src/hl_table_apply.cu) src/hl_table_apply.cu)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef HL_BATCH_NORM_H_
#define HL_BATCH_NORM_H_
#include "hl_base.h"
/**
* @brief batch norm inferece.
*
* @param[in] input input data.
* @param[out] output output data.
* @param[in] scale batch normalization scale parameter (in original
* paper scale is referred to as gamma).
* @param[in] bias batch normalization bias parameter (in original
* paper scale is referred to as beta).
* @param[in] estimatedMean
* @param[in] estimatedVar The moving mean and variance
* accumulated during the training phase are passed
* as inputs here.
* @param[in] epsilon Epsilon value used in the batch
* normalization formula.
*/
extern void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width);
#endif // HL_BATCH_NORM_H_
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_batch_norm.h"
__global__ void batchNormInference(real* output,
const real* input,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
const int tid = threadIdx.x;
const int num = channel * height * width;
const int batch = blockIdx.x;
for (int i = tid; i < num; i += blockDim.x) {
const int c = i / (height * width);
const int id = batch * num + i;
real val = input[id] - estimatedMean[c];
val /= sqrt(estimatedVar[c] + epsilon);
val *= scale[c];
val += bias[c];
output[id] = val;
}
}
void hl_batch_norm_cuda_inference(const real* input,
real* output,
const real* scale,
const real* bias,
const real* estimatedMean,
const real* estimatedVar,
const double epsilon,
size_t batchSize,
size_t channel,
size_t height,
size_t width) {
batchNormInference<<<batchSize, 256, 0, STREAM_DEFAULT>>>(output,
input,
scale,
bias,
estimatedMean,
estimatedVar,
epsilon,
batchSize,
channel,
height,
width);
CHECK_SYNC("hl_batch_norm_cuda_inference failed!");
}
...@@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, ...@@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc,
real beta = 1.0f; real beta = 1.0f;
cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL;
int batch_size = ((cudnn_tensor_descriptor)inputDesc)->batch_size;
if (batch_size > 1024 && g_cudnn_lib_version < 6000) {
LOG(INFO) << " To process current batch data with size " << batch_size
<< " (>1024), cudnnBatchNorm requires cuDNN version >= 6000."
<< " If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED,"
<< " just recompile PaddlePaddle with cuDNN >= 6000, replacing"
<< " current version " << g_cudnn_lib_version;
}
CHECK_CUDNN( CHECK_CUDNN(
dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle, dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle,
mode, mode,
......
...@@ -35,6 +35,8 @@ add_dependencies(framework_py_proto framework_py_proto_init) ...@@ -35,6 +35,8 @@ add_dependencies(framework_py_proto framework_py_proto_init)
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_test(backward_test SRCS backward_test.cc DEPS backward)
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc SRCS pybind.cc
DEPS pybind python backward DEPS pybind python backward
...@@ -44,4 +46,6 @@ cc_library(paddle_pybind SHARED ...@@ -44,4 +46,6 @@ cc_library(paddle_pybind SHARED
mean_op mean_op
cross_entropy_op cross_entropy_op
gaussian_random_op gaussian_random_op
fill_zeros_like_op
recurrent_op) recurrent_op)
endif(WITH_PYTHON)
...@@ -260,6 +260,12 @@ class OpRegistry { ...@@ -260,6 +260,12 @@ class OpRegistry {
return CreateOp(op_desc.type(), inputs, outputs, attrs); return CreateOp(op_desc.type(), inputs, outputs, attrs);
} }
static bool SupportGPU(const std::string& op_type) {
OperatorWithKernel::OpKernelKey key;
key.place_ = platform::GPUPlace();
return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0;
}
static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) { static std::shared_ptr<OperatorBase> CreateGradOp(const OperatorBase& op) {
PADDLE_ENFORCE(!op.IsNetOp(), PADDLE_ENFORCE(!op.IsNetOp(),
"Use framework::Backward to get backward ops"); "Use framework::Backward to get backward ops");
......
...@@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const { ...@@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
#endif #endif
const std::string& OperatorBase::Input(const std::string& name) const { const std::string& OperatorBase::Input(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, PADDLE_ENFORCE_NOT_NULL(in_out_idxs_,
"Input Output Indices could not be nullptr"); "Input Output Indices could not be nullptr");
auto it = in_out_idxs_->find(name); auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name); name);
...@@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const { ...@@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const {
} }
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const { std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr");
auto input_format = GetAttr<std::vector<int>>("input_format"); auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <= PADDLE_ENFORCE(input_format.at(static_cast<size_t>(offset) + 1) <=
...@@ -62,7 +62,7 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const { ...@@ -62,7 +62,7 @@ std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
} }
const std::string& OperatorBase::Output(const std::string& name) const { const std::string& OperatorBase::Output(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto it = in_out_idxs_->find(name); auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name); name);
...@@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const { ...@@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const {
} }
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const { std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr");
auto output_format = GetAttr<std::vector<int>>("output_format"); auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name); auto offset = in_out_idxs_->at(name);
PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <= PADDLE_ENFORCE(output_format.at(static_cast<size_t>(offset) + 1) <=
......
...@@ -167,15 +167,15 @@ class OperatorContext { ...@@ -167,15 +167,15 @@ class OperatorContext {
template <typename T> template <typename T>
const T* Input(const size_t index) const { const T* Input(const size_t index) const {
auto var = InputVar(index); auto var = InputVar(index);
PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index); PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index);
return &var->Get<T>(); return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const size_t index) const { T* Output(const size_t index) const {
auto var = OutputVar(index); auto var = OutputVar(index);
PADDLE_ENFORCE( PADDLE_ENFORCE_NOT_NULL(
var != nullptr, var,
"Output(%d) not be nullptr, which means variable [%s] does not " "Output(%d) not be nullptr, which means variable [%s] does not "
"exist in scope", "exist in scope",
index, op_.outputs_[index]); index, op_.outputs_[index]);
...@@ -185,14 +185,14 @@ class OperatorContext { ...@@ -185,14 +185,14 @@ class OperatorContext {
template <typename T> template <typename T>
const T* Input(const std::string& name) const { const T* Input(const std::string& name) const {
auto var = InputVar(name); auto var = InputVar(name);
PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name); PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name);
return &var->Get<T>(); return &var->Get<T>();
} }
template <typename T> template <typename T>
T* Output(const std::string& name) const { T* Output(const std::string& name) const {
auto var = OutputVar(name); auto var = OutputVar(name);
PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name); PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>(); return var->GetMutable<T>();
} }
...@@ -204,9 +204,9 @@ class OperatorContext { ...@@ -204,9 +204,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) { [&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr, PADDLE_ENFORCE_NOT_NULL(
"MultiInput(%s:%s) should not be nullptr", var, "MultiInput(%s:%s) should not be nullptr", name,
name, sub_name); sub_name);
return &var->Get<T>(); return &var->Get<T>();
}); });
return res; return res;
...@@ -220,9 +220,9 @@ class OperatorContext { ...@@ -220,9 +220,9 @@ class OperatorContext {
std::transform(names.begin(), names.end(), std::back_inserter(res), std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) { [&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name); auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE(var != nullptr, PADDLE_ENFORCE_NOT_NULL(
"MultiOutput(%s:%s) should not be nullptr", var, "MultiOutput(%s:%s) should not be nullptr", name,
name, sub_name); sub_name);
return var->GetMutable<T>(); return var->GetMutable<T>();
}); });
return res; return res;
......
...@@ -32,7 +32,7 @@ limitations under the License. */ ...@@ -32,7 +32,7 @@ limitations under the License. */
namespace py = pybind11; namespace py = pybind11;
USE_OP(add_two); USE_OP(add_two);
USE_OP(onehot_cross_entropy); USE_OP_CPU(onehot_cross_entropy);
USE_OP_WITHOUT_KERNEL(fc); USE_OP_WITHOUT_KERNEL(fc);
USE_OP(sgd); USE_OP(sgd);
USE_OP(mul); USE_OP(mul);
...@@ -40,6 +40,7 @@ USE_OP(mean); ...@@ -40,6 +40,7 @@ USE_OP(mean);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(softmax); USE_OP(softmax);
USE_OP(rowwise_add); USE_OP(rowwise_add);
USE_OP(fill_zeros_like);
USE_OP_WITHOUT_KERNEL(recurrent_op); USE_OP_WITHOUT_KERNEL(recurrent_op);
USE_OP(gaussian_random); USE_OP(gaussian_random);
namespace paddle { namespace paddle {
...@@ -201,6 +202,8 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -201,6 +202,8 @@ All parameter, weight, gradient are variables in Paddle.
return OpRegistry::CreateOp(desc); return OpRegistry::CreateOp(desc);
}); });
operator_base.def_static("support_gpu", &OpRegistry::SupportGPU);
operator_base.def("backward", operator_base.def("backward",
[](const OperatorBase &forwardOp, [](const OperatorBase &forwardOp,
const std::unordered_set<std::string> &no_grad_vars) { const std::unordered_set<std::string> &no_grad_vars) {
......
...@@ -127,8 +127,8 @@ class Tensor { ...@@ -127,8 +127,8 @@ class Tensor {
memory::PODDeleter<T, Place>(place)), memory::PODDeleter<T, Place>(place)),
place_(place), place_(place),
size_(size) { size_(size) {
PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.", PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.",
is_cpu_place(place_) ? "CPU" : "GPU"); (is_cpu_place(place_) ? "CPU" : "GPU"));
} }
virtual size_t size() const { return size_; } virtual size_t size() const { return size_; }
......
...@@ -14,17 +14,18 @@ limitations under the License. */ ...@@ -14,17 +14,18 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/memory/memcpy.h" #include "paddle/memory/memcpy.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
template <typename T> template <typename T>
inline void Tensor::check_memory_size() const { inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE(holder_ != nullptr, PADDLE_ENFORCE_NOT_NULL(
"Tenosr holds no memory. Call Tensor::mutable_data first."); holder_, "Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data " "Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."); "first to re-allocate memory.");
} }
template <typename T> template <typename T>
...@@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { ...@@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) {
template <typename T> template <typename T>
inline T* Tensor::mutable_data(platform::Place place) { inline T* Tensor::mutable_data(platform::Place place) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
PADDLE_ENFORCE(product(dims_) > 0, PADDLE_ENFORCE_GT(product(dims_), 0,
"Tensor's numel must be larger than zero to call " "Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."); "Tensor::mutable_data. Call Tensor::set_dim first.");
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
size_t size = product(dims_) * sizeof(T); size_t size = product(dims_) * sizeof(T);
if (holder_ == nullptr || !(holder_->place() == place) || if (holder_ == nullptr || !(holder_->place() == place) ||
...@@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src,
template <typename T> template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>(); check_memory_size<T>();
PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero.");
PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx, PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index."); "Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1.");
int base = product(dims_) / dims_[0]; int base = product(dims_) / dims_[0];
Tensor dst; Tensor dst;
dst.holder_ = holder_; dst.holder_ = holder_;
......
...@@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) { ...@@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) {
} catch (paddle::platform::EnforceNotMet err) { } catch (paddle::platform::EnforceNotMet err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what(); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
...@@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) { ...@@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) {
} catch (paddle::platform::EnforceNotMet err) { } catch (paddle::platform::EnforceNotMet err) {
caught = true; caught = true;
std::string msg = std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first."; "holder_ should not be null\nTenosr holds no memory. Call "
"Tensor::mutable_data first.";
const char* what = err.what(); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "CudnnBatchNormLayer.h" #include "CudnnBatchNormLayer.h"
#include "Layer.h" #include "Layer.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
namespace paddle { namespace paddle {
...@@ -79,16 +80,33 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -79,16 +80,33 @@ void CudnnBatchNormLayer::forward(PassType passType) {
savedInvVar); savedInvVar);
} else { } else {
// used movingMean and movingVar in testing // used movingMean and movingVar in testing
hl_batch_norm_forward_inference(ioDesc_, if (batchSize <= 1024) {
input, hl_batch_norm_forward_inference(ioDesc_,
ioDesc_, input,
output, ioDesc_,
bnParamDesc_, output,
gamma, bnParamDesc_,
beta, gamma,
movingMean, beta,
movingVar, movingMean,
EPS); movingVar,
EPS);
} else {
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
// the cudnnBatchNormalizationForwardInference will fail.
hl_batch_norm_cuda_inference(input,
output,
gamma,
beta,
movingMean,
movingVar,
EPS,
batchSize,
channels_,
imageH_,
imageW_);
}
} }
/* activation */ { /* activation */ {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/math/Vector.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
namespace paddle {
class SubNestedSequenceLayer : public Layer {
public:
explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {}
bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
private:
/*
* This functions generates the indices of rows in a batch according to the
* indices of selected sub-sequence in each sequence.
*
* Examples:
* selectedIndices:
* [
* [0, 1, -1],
* [0, 1, 2],
* [0, -1, -1],
* [0, 2, 3],
* ]
* inputSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*
* ths output is saved to private member rowIndice_;
* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
* 16,17,18,19,20,21,22,23,24,25,26,27]
*/
void calSelectedCols(const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo);
// if the second input of this layer is on GPU memory, copy it to CPU memory.
MatrixPtr selIdsCpu_;
// reorganized sequenceStartPositions and subSequenceStartPositions
// into a 2d vector to facilitate the sequence selection process.
std::vector<std::vector<int>> inputSeqInfoVec_;
// the final selected row indices in a batch,
// rowIdx_ and selectedRows_ actually share a same memory.
IVectorPtr rowIndice_;
std::vector<int> selectedRows_;
};
REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer);
bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
CHECK_EQ(2U, inputLayers_.size());
setNeedSequenceInfo(false);
return true;
}
void SubNestedSequenceLayer::calSelectedCols(
const MatrixPtr selectedIndices,
const std::vector<std::vector<int>>& inputSeqInfo) {
selectedRows_.clear();
std::vector<int> outSeqStartInfo(1, 0);
std::vector<int> outSubSeqStartInfo(1, 0);
size_t seqNum = selectedIndices->getHeight();
size_t beamSize = selectedIndices->getWidth();
for (size_t i = 0; i < seqNum; ++i) {
for (size_t j = 0; j < beamSize; ++j) {
if (selectedIndices->getElement(i, j) == -1.) break;
int selSubSeqIdx = selectedIndices->getElement(i, j);
CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
inputSeqInfoVec_[i][selSubSeqIdx];
for (size_t k = 0; k < subSeqLen; ++k)
selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
}
outSeqStartInfo.push_back(outSubSeqStartInfo.back());
}
if (useGpu_) {
rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
} else {
rowIndice_ =
IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
}
// create the sequence information for the output.
ICpuGpuVector::resizeOrCreate(
output_.sequenceStartPositions, outSeqStartInfo.size(), false);
output_.sequenceStartPositions->copyFrom(
outSeqStartInfo.data(), outSeqStartInfo.size(), false);
ICpuGpuVector::resizeOrCreate(
output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
output_.subSequenceStartPositions->copyFrom(
outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
}
void SubNestedSequenceLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& inputSeq = getInput(0);
CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
<< "must be a nested sequence.";
const MatrixPtr selectedIndices = getInputValue(1);
CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight());
if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
/*
* Currently, the second input for this layer is generated by
* kmax_sequence_score_layer whose output is always stored on CPU,
* or a data_layer which canbe on GPU.
*
* If the second input is on GPU, copy it to CPU memory, because this
* input always uses very few memory, and operations related to it are
* all logic control, not computations.
*/
Matrix::resizeOrCreate(selIdsCpu_,
selectedIndices->getHeight(),
selectedIndices->getWidth(),
false /* trans */,
false /* useGpu */);
selIdsCpu_->copyFrom(*selectedIndices);
} else {
selIdsCpu_ = selectedIndices;
}
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
inputSeq.subSequenceStartPositions,
inputSeqInfoVec_);
calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
resetOutput(selectedRows_.size(), getSize());
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
}
void SubNestedSequenceLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputSeqGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
}
} // namespace paddle
...@@ -21,6 +21,8 @@ limitations under the License. */ ...@@ -21,6 +21,8 @@ limitations under the License. */
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "LayerGradUtil.h" #include "LayerGradUtil.h"
#include "paddle/cuda/include/hl_batch_norm.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h" #include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT using namespace paddle; // NOLINT
...@@ -117,6 +119,74 @@ TEST(Layer, batchNorm) { ...@@ -117,6 +119,74 @@ TEST(Layer, batchNorm) {
CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576); CHECK_EQ(static_cast<int>(convLayer->getOutputValue()->getWidth()), 576);
} }
#ifndef PADDLE_ONLY_CPU
void batchNormInference(int n, int c, int h, int w) {
MatrixPtr input = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudaOut = std::make_shared<GpuMatrix>(n, c * h * w);
MatrixPtr cudnnCheck = std::make_shared<CpuMatrix>(n, c * h * w);
MatrixPtr cudaCheck = std::make_shared<CpuMatrix>(n, c * h * w);
input->randomizeUniform();
cudnnOut->zeroMem();
cudaOut->zeroMem();
MatrixPtr scale = std::make_shared<GpuMatrix>(1, c);
scale->randomizeUniform();
MatrixPtr bias = std::make_shared<GpuMatrix>(1, c);
bias->randomizeUniform();
MatrixPtr movingMean = std::make_shared<GpuMatrix>(1, c);
movingMean->randomizeUniform();
MatrixPtr movingVar = std::make_shared<GpuMatrix>(1, c);
movingVar->randomizeUniform();
movingVar->clip(0.01, 50);
hl_tensor_descriptor ioDesc;
hl_tensor_descriptor bnDesc;
hl_create_tensor_descriptor(&ioDesc);
hl_create_tensor_descriptor(&bnDesc);
hl_tensor_reshape(ioDesc, n, c, h, w);
hl_tensor_reshape(bnDesc, 1, c, 1, 1);
double EPS = 1E-5;
hl_batch_norm_forward_inference(ioDesc,
input->getData(),
ioDesc,
cudnnOut->getData(),
bnDesc,
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS);
hl_batch_norm_cuda_inference(input->getData(),
cudaOut->getData(),
scale->getData(),
bias->getData(),
movingMean->getData(),
movingVar->getData(),
EPS,
n,
c,
h,
w);
cudnnCheck->copyFrom(*cudnnOut);
cudaCheck->copyFrom(*cudaOut);
autotest::TensorCheckErr(*cudnnCheck, *cudaCheck);
hl_destroy_tensor_descriptor(ioDesc);
hl_destroy_tensor_descriptor(bnDesc);
}
TEST(BatchNorm, Inference) {
batchNormInference(33, 267, 1, 1);
batchNormInference(19, 105, 4, 4);
}
#endif
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);
......
...@@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) { ...@@ -1899,6 +1899,84 @@ TEST(Layer, CropLayer) {
} }
} }
vector<real> randSampling(real range, int n) {
CHECK_GE(range, n);
vector<real> num(range);
iota(begin(num), end(num), 0.);
if (range == n) return num;
random_shuffle(begin(num), end(num));
num.resize(n);
sort(begin(num), end(num));
return num;
}
TEST(Layer, SubNestedSequenceLayer) {
// layer size is not crutial for this layer,
// so use a small layer size in unittest
const int layerSize = 4;
const int maxSeqNum = 50;
const int maxSeqLen = 50;
const int maxBeamSize = 32;
srand((size_t)(time(NULL)));
int beamSize = 1 + (rand() % maxBeamSize);
TestConfig config;
config.layerConfig.set_type("sub_nested_seq");
config.layerConfig.set_name("sub_nested_seq_layer");
config.layerConfig.set_size(layerSize);
int seqNum = 1 + (rand() % maxSeqNum);
// sequence information for the first input, it is a nested sequence
vector<int> seqStartPos(seqNum + 1, 0);
vector<int> subSeqStartPos(1, 0);
// selected indices
MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false);
selectedIndices->one();
selectedIndices->mulScalar(-1.);
real* indicesData = selectedIndices->getData();
for (int i = 0; i < seqNum; ++i) {
int subSeqNum = 1 + (rand() % maxSeqNum);
for (int j = 0; j < subSeqNum; ++j) {
subSeqStartPos.push_back(subSeqStartPos.back() +
(1 + (rand() % maxSeqLen)));
}
vector<real> selSeqs =
randSampling(static_cast<real>(subSeqNum), min(beamSize, subSeqNum));
memcpy(indicesData + (i * beamSize),
selSeqs.data(),
selSeqs.size() * sizeof(real));
seqStartPos[i + 1] = subSeqStartPos.back();
}
MatrixPtr seqInputPtr =
Matrix::create(seqStartPos.back(), layerSize, false, false);
seqInputPtr->randomizeUniform();
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
"nested_seq_input",
seqInputPtr,
seqStartPos,
subSeqStartPos});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config,
"sub_nested_seq",
/* batchSize */ seqNum,
/* trans */ false,
/* useGpu*/ useGpu,
/* useWeight */ false);
}
}
TEST(Layer, ClipLayer) { TEST(Layer, ClipLayer) {
const size_t batchSize = 128; const size_t batchSize = 128;
const size_t size = 512; const size_t size = 512;
......
...@@ -22,8 +22,7 @@ class AddOp : public OperatorWithKernel { ...@@ -22,8 +22,7 @@ class AddOp : public OperatorWithKernel {
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); PADDLE_ENFORCE_EQ(ctx.InputSize(), 2);
PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1);
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set");
"Inputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr,
"Outputs of AddOp must all be set"); "Outputs of AddOp must all be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(), PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
......
...@@ -20,18 +20,19 @@ namespace operators { ...@@ -20,18 +20,19 @@ namespace operators {
class OnehotCrossEntropyOp : public OperatorWithKernel { class OnehotCrossEntropyOp : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, PADDLE_ENFORCE_EQ(ctx.InputSize(), 2,
"Input size of OnehotCrossEntropyOp must be two"); "Input size of OnehotCrossEntropyOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1,
"Output size of OnehotCrossEntropyOp must be one"); "Output size of OnehotCrossEntropyOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"Inputs of OnehotCrossEntropyOp must all be set"); "0-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1),
"Outputs of OnehotCrossEntropyOp must all be set"); "1-th input of OnehotCrossEntropyOp should be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims().size() == 2, PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"X's dimension must be 2."); "Outputs of OnehotCrossEntropyOp must all be set");
PADDLE_ENFORCE(ctx.Output<Tensor>(0)->dims().size() == 1, PADDLE_ENFORCE_EQ(ctx.Input<Tensor>(0)->dims().size(), 2);
"label's dimension must be 1."); PADDLE_ENFORCE_EQ(ctx.Output<Tensor>(0)->dims().size(), 1,
"label's dimension must be 1.");
ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]}); ctx.Output<Tensor>(0)->Resize({ctx.Input<Tensor>(0)->dims()[0]});
} }
}; };
......
...@@ -14,6 +14,3 @@ ...@@ -14,6 +14,3 @@
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h" #include "paddle/operators/cross_entropy_op.h"
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<ops::GPUPlace, float>);
...@@ -18,7 +18,24 @@ limitations under the License. */ ...@@ -18,7 +18,24 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
static const float kCrossEntropyLogThreshold{1e-20}; template <typename T>
T tolerable_value(T x) {
static_assert(std::is_floating_point<T>::value,
"tolerable_value works only on float, "
"double and double double.");
const T kApproInf = 1e20;
if (x == INFINITY) {
return kApproInf;
}
if (x == -INFINITY) {
return -kApproInf;
}
return x;
}
template <typename Place, typename T> template <typename Place, typename T>
class OnehotCrossEntropyOpKernel : public OpKernel { class OnehotCrossEntropyOpKernel : public OpKernel {
...@@ -36,10 +53,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel { ...@@ -36,10 +53,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel {
int batch_size = X->dims()[0]; int batch_size = X->dims()[0];
int class_num = X->dims()[1]; int class_num = X->dims()[1];
// Y[i] = -log(X[i][j])
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
Ydata[i] = -std::log(std::max(Xdata[i * class_num + label_data[i]], int index = i * class_num + label_data[i];
kCrossEntropyLogThreshold)); Ydata[i] = -tolerable_value(std::log(Xdata[index]));
} }
} }
}; };
...@@ -62,9 +78,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel { ...@@ -62,9 +78,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel {
const int class_num = X->dims()[1]; const int class_num = X->dims()[1];
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
dXdata[i * class_num + label_data[i]] = int index = i * class_num + label_data[i];
-dYdata[i] / std::max(Xdata[i * class_num + label_data[i]], dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]);
kCrossEntropyLogThreshold);
} }
} }
}; };
......
...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/fill_zeros_like_op.h" #include "paddle/operators/fill_zeros_like_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -22,14 +20,14 @@ namespace operators { ...@@ -22,14 +20,14 @@ namespace operators {
class FillZerosLikeOp : public framework::OperatorWithKernel { class FillZerosLikeOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Input size of FillZerosLikeOp must be one."); "Input size of FillZerosLikeOp must be one.");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output size of AddOp must be one."); "Output size of AddOp must be one.");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0),
"Input of FillZerosLikeOp must be set."); "Input of FillZerosLikeOp must be set.");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0),
"Output of FillZerosLikeOp must be set."); "Output of FillZerosLikeOp must be set.");
ctx.Output<framework::Tensor>(0)->Resize( ctx.Output<framework::Tensor>(0)->Resize(
ctx.Input<framework::Tensor>(0)->dims()); ctx.Input<framework::Tensor>(0)->dims());
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/fill_zeros_like_op.h" #include "paddle/operators/fill_zeros_like_op.h"
......
...@@ -13,9 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include "glog/logging.h" #include "paddle/operators/type_alias.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -26,7 +24,8 @@ class FillZerosLikeKernel : public framework::OpKernel { ...@@ -26,7 +24,8 @@ class FillZerosLikeKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* output = context.Output<framework::Tensor>(0); auto* output = context.Output<framework::Tensor>(0);
output->mutable_data<T>(context.GetPlace()); output->mutable_data<T>(context.GetPlace());
framework::EigenVector<T>::Flatten(*output).setZero(); auto t = framework::EigenVector<T>::Flatten(*output);
t.device(context.GetEigenDevice<Place>()) = t.constant(T(0));
} }
}; };
......
...@@ -20,10 +20,10 @@ namespace operators { ...@@ -20,10 +20,10 @@ namespace operators {
class MeanOp : public OperatorWithKernel { class MeanOp : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set");
"Input/Output of MeanOp must be initialized."); PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set");
ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1})); ctx.Output<Tensor>(0)->Resize(framework::make_ddim({1}));
} }
}; };
......
...@@ -70,15 +70,15 @@ class NetOp : public framework::OperatorBase { ...@@ -70,15 +70,15 @@ class NetOp : public framework::OperatorBase {
*/ */
void AddOp(const std::shared_ptr<OperatorBase>& op) { void AddOp(const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
ops_.push_back(op); ops_.push_back(op);
} }
void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) { void InsertOp(size_t pos, const std::shared_ptr<OperatorBase>& op) {
PADDLE_ENFORCE(!add_op_done_, PADDLE_ENFORCE(!add_op_done_,
"Cannot InsertOp when this network is sealed"); "Cannot InsertOp when this network is sealed");
PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op");
PADDLE_ENFORCE(pos <= ops_.size(), "Out of range"); PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range");
ops_.insert(ops_.begin() + pos, op); ops_.insert(ops_.begin() + pos, op);
} }
......
...@@ -20,11 +20,11 @@ namespace operators { ...@@ -20,11 +20,11 @@ namespace operators {
class SGDOp : public OperatorWithKernel { class SGDOp : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of SGDOp must be two"); PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two");
PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of SGDOp must be one"); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one");
PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, "inputs[0] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set");
PADDLE_ENFORCE(ctx.InputVar(1) != nullptr, "inputs[1] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set");
PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, "outputs[0] mast be set"); PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set");
PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(), PADDLE_ENFORCE(ctx.Input<Tensor>(0)->dims() == ctx.Input<Tensor>(1)->dims(),
"Two input of SGD Op's dimension must be same."); "Two input of SGD Op's dimension must be same.");
ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims()); ctx.Output<Tensor>(0)->Resize(ctx.Input<Tensor>(0)->dims());
......
...@@ -20,12 +20,12 @@ namespace operators { ...@@ -20,12 +20,12 @@ namespace operators {
class SoftmaxOp : public OperatorWithKernel { class SoftmaxOp : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL,
"Only one input is need for softmax"); "Only one input is need for softmax");
PADDLE_ENFORCE(ctx.Input<Tensor>("X")->dims().size() == 2UL, PADDLE_ENFORCE_EQ(ctx.Input<Tensor>("X")->dims().size(), 2UL,
"The input of softmax op must be matrix"); "The input of softmax op must be matrix");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Only one output is need for softmax"); "Only one output is need for softmax");
ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims()); ctx.Output<Tensor>("Y")->Resize(ctx.Input<Tensor>("X")->dims());
} }
}; };
...@@ -43,13 +43,13 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { ...@@ -43,13 +43,13 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker {
class SoftmaxOpGrad : public OperatorWithKernel { class SoftmaxOpGrad : public OperatorWithKernel {
protected: protected:
void InferShape(const InferShapeContext &ctx) const override { void InferShape(const InferShapeContext &ctx) const override {
PADDLE_ENFORCE(ctx.InputSize() == 3UL, PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL,
"Input of SoftmaxOpGrad should be 3, X, Y, YG"); "Input of SoftmaxOpGrad should be 3, X, Y, YG");
PADDLE_ENFORCE(ctx.OutputSize() == 1UL, PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL,
"Output of SoftmaxOpGrad should be 1"); "Output of SoftmaxOpGrad should be 1");
PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr, PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")),
"Input(Y@GRAD) should not be null"); "Input(Y@GRAD) should not be null");
PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() == PADDLE_ENFORCE(ctx.Input<Tensor>("Y")->dims() ==
ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(), ctx.Input<Tensor>(framework::GradVarName("Y"))->dims(),
"the shape of Input(0) and Input(1) should be the same"); "the shape of Input(0) and Input(1) should be the same");
......
...@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input, ...@@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input,
} }
} }
void Argument::reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo) {
int* seqStarts = seqStartPos->getMutableData(false);
int* subSeqStarts = subSeqStartPos->getMutableData(false);
int seqNum = seqStartPos->getSize() - 1;
reorganizedSeqInfo.resize(seqNum, std::vector<int>());
int seqIdx = 0;
for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) {
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
if (subSeqStarts[i] == seqStarts[seqIdx + 1]) {
seqIdx++;
if (seqIdx == seqNum) return;
reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]);
}
}
}
} // namespace paddle } // namespace paddle
...@@ -317,6 +317,30 @@ struct Argument { ...@@ -317,6 +317,30 @@ struct Argument {
*/ */
void printValueString(std::ostream& stream, void printValueString(std::ostream& stream,
const std::string& prefix = "") const; const std::string& prefix = "") const;
/**
* @brief reorganizeSeqInfo will reorganize sequenceStartPositions and
* subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo.
*
* @param seqStartPos: sequenceStartPositions of an Argument.
* @param subSeqStartPos: subSequenceStartPositions of an Argument.
* @param the reorganized sequence start position information.
*
* Examples:
* seqStartPos: [0, 4, 15, 20, 28]
* subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28]
* reorganizedSeqInfo:
* [
* [0,3,4],
* [4,5,7,10,15],
* [15,20],
* [20,22,23,25,28]
* ]
*/
static void reorganizeSeqInfo(
const ICpuGpuVectorPtr seqStartPos,
const ICpuGpuVectorPtr subSeqStartPos,
std::vector<std::vector<int>>& reorganizedSeqInfo);
}; };
} // namespace paddle } // namespace paddle
...@@ -187,25 +187,16 @@ inline void throw_on_error(T e) { ...@@ -187,25 +187,16 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
// if two values have different data types, choose a compatible type for them. PADDLE_ENFORCE(nullptr != (__VAL), #__VAL " should not be null\n%s", \
template <typename T1, typename T2> paddle::string::Sprintf("" __VA_ARGS__));
struct CompatibleType {
static const bool t1_to_t2 = std::is_convertible<T1, T2>::value;
typedef typename std::conditional<t1_to_t2, T2, T1>::type type;
};
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \ PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \
__CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \
"enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \
#__VAL0, #__VAL1, std::to_string(__VAL0), \ #__VAL0, #__VAL1, std::to_string(__VAL0), \
std::to_string(__VAL1), \ std::to_string(__VAL1), \
paddle::string::Sprintf("" __VA_ARGS__)); paddle::string::Sprintf("" __VA_ARGS__));
#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \
typename paddle::platform::CompatibleType<decltype(__VAL0), \
decltype(__VAL1)>::type(__VAL)
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -9,8 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -9,8 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/platform/enforce.h" #include <memory>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/platform/enforce.h"
TEST(ENFORCE, OK) { TEST(ENFORCE, OK) {
PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345);
...@@ -196,3 +198,27 @@ TEST(ENFORCE_LT, FAIL) { ...@@ -196,3 +198,27 @@ TEST(ENFORCE_LT, FAIL) {
ASSERT_TRUE(in_catch); ASSERT_TRUE(in_catch);
} }
TEST(ENFORCE_NOT_NULL, OK) {
int* a = new int;
PADDLE_ENFORCE_NOT_NULL(a);
delete a;
}
TEST(ENFORCE_NOT_NULL, FAIL) {
bool in_catch = false;
int* a{nullptr};
try {
PADDLE_ENFORCE_NOT_NULL(a);
} catch (paddle::platform::EnforceNotMet error) {
in_catch = true;
const std::string msg = "a should not be null";
const char* what = error.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
}
ASSERT_TRUE(in_catch);
}
...@@ -7,4 +7,5 @@ cc_library(paddle_pybind SHARED ...@@ -7,4 +7,5 @@ cc_library(paddle_pybind SHARED
mean_op mean_op
guassian_random_op guassian_random_op
cross_entropy_op cross_entropy_op
recurrent_op) recurrent_op
fill_zeros_like_op)
configure_file(submit_local.sh.in configure_file(submit_local.sh.in
submit_local.sh paddle
@ONLY) @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/submit_local.sh DESTINATION bin install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle DESTINATION bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ)
RENAME paddle)
configure_file(tools/usage_stat/usage.sh configure_file(tools/usage_stat/usage.sh
usage.sh paddle_usage
@ONLY) @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/usage.sh DESTINATION opt/paddle/bin install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle_usage DESTINATION opt/paddle/bin
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ)
RENAME paddle_usage)
...@@ -33,6 +33,9 @@ Configuring cmake in /paddle/build ... ...@@ -33,6 +33,9 @@ Configuring cmake in /paddle/build ...
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DWITH_SWIG_PY=ON -DWITH_SWIG_PY=ON
-DWITH_C_API=${WITH_C_API:-OFF}
-DWITH_PYTHON=${WITH_PYTHON:-ON}
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
-DCUDNN_ROOT=/usr/ -DCUDNN_ROOT=/usr/
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
-DWITH_TESTING=${WITH_TESTING:-OFF} -DWITH_TESTING=${WITH_TESTING:-OFF}
...@@ -49,7 +52,9 @@ cmake .. \ ...@@ -49,7 +52,9 @@ cmake .. \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DWITH_SWIG_PY=ON \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
-DWITH_C_API=${WITH_C_API:-OFF} \
-DWITH_PYTHON=${WITH_PYTHON:-ON} \
-DCUDNN_ROOT=/usr/ \ -DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
-DWITH_TESTING=${WITH_TESTING:-OFF} \ -DWITH_TESTING=${WITH_TESTING:-OFF} \
......
...@@ -20,4 +20,4 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ ...@@ -20,4 +20,4 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
-DWITH_SWIG_PY=OFF \ -DWITH_SWIG_PY=OFF \
.. ..
make -j `nproc` make -j `nproc`
make install make install -j `nproc`
文件模式从 100644 更改为 100755
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup, Extension
setup(name="py_paddle",
version="${PADDLE_VERSION}",
packages=['py_paddle'],
include_package_data=True,
package_data={'py_paddle':['*.py','_swig_paddle.so']},
install_requires = [
'nltk>=3.2.2',
# We use `numpy.flip` in `test_image.py`.
# `numpy.flip` is introduced in `1.12.0`
'numpy>=1.12.0', # The numpy is required.
'protobuf==${PROTOBUF_VERSION}' # The paddle protobuf version
],
url='http://www.paddlepaddle.org/',
license='Apache 2.0',
)
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
settings(batch_size=128, learning_method=AdaGradOptimizer(), learning_rate=1e-4) settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4)
file_list = 'trainer/tests/fake_file_list.list' file_list = 'trainer/tests/fake_file_list.list'
...@@ -12,7 +12,7 @@ define_py_data_sources2( ...@@ -12,7 +12,7 @@ define_py_data_sources2(
embedding = embedding_layer( embedding = embedding_layer(
input=data_layer( input=data_layer(
name="word_ids", size=65536), name="word_ids", size=8191),
size=128, size=128,
param_attr=ParamAttr(sparse_update=True)) param_attr=ParamAttr(sparse_update=True))
prediction = fc_layer(input=embedding, size=10, act=SoftmaxActivation()) prediction = fc_layer(input=embedding, size=10, act=SoftmaxActivation())
......
...@@ -7,15 +7,15 @@ def init_hook(settings, is_train, **kwargs): ...@@ -7,15 +7,15 @@ def init_hook(settings, is_train, **kwargs):
@provider( @provider(
input_types={'word_ids': integer_value(65536), input_types={'word_ids': integer_value(8191),
'label': integer_value(10)}, 'label': integer_value(10)},
min_pool_size=0, min_pool_size=0,
init_hook=init_hook) init_hook=init_hook)
def process(settings, filename): def process(settings, filename):
if settings.is_train: if settings.is_train:
data_size = 2**20
else:
data_size = 2**10 data_size = 2**10
else:
data_size = 2**5
for _ in xrange(data_size): for _ in xrange(data_size):
yield random.randint(0, 65535), random.randint(0, 9) yield random.randint(0, 8190), random.randint(0, 9)
...@@ -100,25 +100,25 @@ TEST(average_window, gpu) { ...@@ -100,25 +100,25 @@ TEST(average_window, gpu) {
} }
TEST(average_window, gpu2) { TEST(average_window, gpu2) {
FLAGS_num_passes = 100; FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 2, 0.01); trainerOnePassTest(configFile1, true, false, 2, 0.01);
FLAGS_num_passes = 1; FLAGS_num_passes = 1;
} }
TEST(average_window, gpu4) { TEST(average_window, gpu4) {
FLAGS_num_passes = 100; FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 4, 0.01); trainerOnePassTest(configFile1, true, false, 4, 0.01);
FLAGS_num_passes = 1; FLAGS_num_passes = 1;
} }
TEST(average_window_cpu, gpu2) { TEST(average_window_cpu, gpu2) {
FLAGS_num_passes = 100; FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 2, 0.01, true); trainerOnePassTest(configFile1, true, false, 2, 0.01, true);
FLAGS_num_passes = 1; FLAGS_num_passes = 1;
} }
TEST(average_window_cpu, gpu4) { TEST(average_window_cpu, gpu4) {
FLAGS_num_passes = 100; FLAGS_num_passes = 20;
trainerOnePassTest(configFile1, true, false, 4, 0.01, true); trainerOnePassTest(configFile1, true, false, 4, 0.01, true);
FLAGS_num_passes = 1; FLAGS_num_passes = 1;
} }
......
...@@ -39,7 +39,7 @@ add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp ...@@ -39,7 +39,7 @@ add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel)
set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
......
...@@ -2657,6 +2657,31 @@ class SubSequenceLayer(LayerBase): ...@@ -2657,6 +2657,31 @@ class SubSequenceLayer(LayerBase):
self.create_bias_parameter(bias, size) self.create_bias_parameter(bias, size)
@config_layer('sub_nested_seq')
class SubNestedSequenceLayer(LayerBase):
def __init__(self, name, inputs, selected_indices, bias=False, **xargs):
if isinstance(inputs, list):
assert len(inputs) == 1, ('the first input of sub_nested_seq '
'layer is a single nested sequence.')
inputs = inputs[0]
if isinstance(selected_indices, list):
assert len(selected_indices) == 1, (
'the second input of '
'sub_nested_seq layer is a single layer which is a '
'set of selected indices.')
selected_indices = selected_indices[0]
super(SubNestedSequenceLayer, self).__init__(
name,
'sub_nested_seq',
0,
inputs=[inputs, selected_indices],
**xargs)
input_layer0 = self.get_input_layer(0)
size = input_layer0.size
self.set_layer_size(size)
@config_layer('out_prod') @config_layer('out_prod')
class OuterProdLayer(LayerBase): class OuterProdLayer(LayerBase):
def __init__(self, name, inputs, device=None): def __init__(self, name, inputs, device=None):
......
...@@ -129,6 +129,7 @@ __all__ = [ ...@@ -129,6 +129,7 @@ __all__ = [
'prelu_layer', 'prelu_layer',
'gated_unit_layer', 'gated_unit_layer',
'crop_layer', 'crop_layer',
'sub_nested_seq_layer',
'clip_layer', 'clip_layer',
'slice_projection', 'slice_projection',
] ]
...@@ -224,6 +225,7 @@ class LayerType(object): ...@@ -224,6 +225,7 @@ class LayerType(object):
PRELU = 'prelu' PRELU = 'prelu'
CROP_LAYER = 'crop' CROP_LAYER = 'crop'
SUB_NESTED_SEQ = 'sub_nested_seq'
CLIP_LAYER = 'clip' CLIP_LAYER = 'clip'
@staticmethod @staticmethod
...@@ -6088,6 +6090,53 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): ...@@ -6088,6 +6090,53 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
size=l.config.size) size=l.config.size)
@wrap_name_default()
@layer_support()
def sub_nested_seq_layer(input, selected_indices, name=None):
"""
The sub_nested_seq_layer accepts two inputs: the first one is a nested
sequence; the second one is a set of selceted indices in the nested sequence.
Then sub_nest_seq_layer trims the first nested sequence input according
to the selected indices to form a new output. This layer is useful in
beam training.
The example usage is:
.. code-block:: python
sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices])
:param input: A nested sequence.
:type input: LayerOutput
:param selected_indices: a set of sequence indices in the nested sequence.
:type input: LayerOutput
:param name: name of this layer.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), (
'The first input of '
'sub_nested_seq_layer must be a Paddle layer.')
assert isinstance(selected_indices, LayerOutput), (
'The second input of '
'sub_nested_seq_layer must be a Paddle layer.')
l = Layer(
inputs=input.name,
selected_indices=selected_indices.name,
name=name,
type=LayerType.SUB_NESTED_SEQ)
return LayerOutput(
name=name,
layer_type=LayerType.SUB_NESTED_SEQ,
parents=input,
size=l.config.size)
@wrap_name_default("clip") @wrap_name_default("clip")
def clip_layer(input, min, max, name=None): def clip_layer(input, min, max, name=None):
""" """
......
...@@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight ...@@ -7,6 +7,7 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer) test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_seq_select_layers)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input_seq"
type: "data"
size: 300
active_type: ""
}
layers {
name: "input"
type: "data"
size: 5
active_type: ""
}
layers {
name: "__sub_nested_seq_layer_0__"
type: "sub_nested_seq"
size: 300
active_type: ""
inputs {
input_layer_name: "input_seq"
}
inputs {
input_layer_name: "input"
}
}
input_layer_names: "input_seq"
output_layer_names: "__sub_nested_seq_layer_0__"
sub_models {
name: "root"
layer_names: "input_seq"
layer_names: "input"
layer_names: "__sub_nested_seq_layer_0__"
input_layer_names: "input_seq"
output_layer_names: "__sub_nested_seq_layer_0__"
is_recurrent_layer_group: false
}
#!/usr/bin/env python
#coding=utf-8
from paddle.trainer_config_helpers import *
beam_size = 5
data = data_layer(name='input_seq', size=300)
selected_ids = data_layer(name='input', size=beam_size)
sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids)
outputs(sub_nest_seq)
import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations
from default_scope_funcs import new_var, find_var, get_cur_scope
__all__ = ['Network'] # Only expose Network
class NetworkFunctor(object):
"""
Network Op Creation Function. Used internally in this module.
It convert string input to Variable. If it is not created before, just
create in scope.
It is a functor object. means the instances are callable.
:param func: The op creation function which generated in Python.
:param net: The Network instance.
"""
def __init__(self, func, net):
self.func = func
self.net = net
def __call__(self, *args, **kwargs):
if len(args) != 0:
raise ValueError("Paddle must use keyword argument")
inputs = self.func.all_input_args
for ipt in inputs:
if ipt in kwargs:
var = kwargs[ipt]
if isinstance(var, basestring):
tmp = new_var(var)
self.net.var_names[tmp] = var
var = tmp
if not isinstance(var, core.Variable):
raise TypeError(
"Input of op creation must be string or variable")
kwargs[ipt] = self.net.var_names[var]
notemp_outputs = self.func.all_not_temp_output_args
for name in notemp_outputs:
if name not in kwargs:
kwargs[
name] = self.func.__name__ + "@OUT@%d" % core.unique_integer(
)
outputs = self.func.all_output_args
for opt in outputs:
if opt in kwargs:
var = kwargs[opt]
if isinstance(var, basestring):
tmp = new_var(var)
self.net.var_names[tmp] = var
var = tmp
if not isinstance(var, core.Variable):
raise TypeError(
"Output of op creation must be string or variable")
kwargs[opt] = self.net.var_names[var]
op = self.func(**kwargs)
self.net.net.add_op(op)
lst = [find_var(kwargs[opt]) for opt in notemp_outputs]
if len(lst) == 1:
return lst[0]
elif len(lst) == 0:
return None
else:
return lst
class Network(object):
"""
The network concept. It avoid user to manually create operator, create
variable, and combine them into a Net. Just use Network.xxx can create the
operator, create variables in default scope, and add them into `self.net`.
For example:
.. code-block: python
net = Network()
out = net.add_two(X="a", Y="b")
fc_out = net.fc(X="out", W="fc.w")
net.run(...)
"""
def __init__(self):
self.net = core.Net.create()
funcs = (func_name for func_name in dir(op_creations)
if not func_name.startswith("__"))
self.var_names = dict()
# TODO(yuyang18): This code can work, but do not generate a good
# docstring, try to give a better way generate function in runtime
# later.
for func_name in funcs:
func = getattr(op_creations, func_name)
impl = NetworkFunctor(func, self)
setattr(self, func_name, impl.__call__)
self.__complete_add_op__ = False
def infer_shape(self):
self.complete_add_op()
self.net.infer_shape(get_cur_scope())
def run(self, device_context):
self.complete_add_op()
self.net.run(get_cur_scope(), device_context)
def __str__(self):
return str(self.net)
def complete_add_op(self):
if not self.__complete_add_op__:
self.net.complete_add_op()
self.__complete_add_op__ = True
if __name__ == '__main__':
net = Network()
out = net.add_two(X="a", Y="b")
fc_out = net.fc(X=out, W="fc.w", b="fc.b", activation="softmax")
net.complete_add_op()
print net
...@@ -2,7 +2,6 @@ import paddle.v2.framework.core as core ...@@ -2,7 +2,6 @@ import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2
import cStringIO
def get_all_op_protos(): def get_all_op_protos():
...@@ -146,64 +145,14 @@ class OpDescCreationMethod(object): ...@@ -146,64 +145,14 @@ class OpDescCreationMethod(object):
return False return False
def get_docstring_from_op_proto(op_proto): class OpInfo(object):
""" def __init__(self, name, method, inputs, outputs, attrs, no_temp_outputs):
Generate docstring from a OpProto self.name = name
:param op_proto: a OpProto instance. self.method = method
:type op_proto: op_proto_pb2.OpProto self.inputs = inputs
:return: docstring self.outputs = outputs
""" self.attrs = attrs
if not isinstance(op_proto, op_proto_pb2.OpProto): self.no_temp_outputs = no_temp_outputs
raise TypeError("Input must be OpProto")
f = cStringIO.StringIO()
f.write(op_proto.comment)
f.write("\n")
def __append_param__(name, comment, type):
# Maybe replace the following line with template engine is better.
f.write(":param ")
f.write(name)
f.write(": ")
f.write(comment)
f.write("\n")
f.write(":type ")
f.write(name)
f.write(": ")
f.write(type)
f.write("\n")
for ipt in op_proto.inputs:
__append_param__(ipt.name, ipt.comment, "list | basestr"
if ipt.multiple else "basestr")
temp_var_prefix = \
"This is a temporary variable. It does not have to set by user. "
for opt in op_proto.outputs:
__append_param__(opt.name, opt.comment if not opt.temporary else
temp_var_prefix + opt.comment, "list | basestr"
if opt.multiple else "basestr")
for attr in op_proto.attrs:
attr_type = None
if attr.type == attribute_pb2.INT:
attr_type = "int"
elif attr.type == attribute_pb2.FLOAT:
attr_type = "float"
elif attr.type == attribute_pb2.STRING:
attr_type = "basestr"
elif attr.type == attribute_pb2.INTS:
attr_type = "list of int"
elif attr.type == attribute_pb2.FLOATS:
attr_type = "list of float"
elif attr.type == attribute_pb2.STRINGS:
attr_type = "list of basestr"
if attr_type is None:
raise RuntimeError("Not supported attribute type " + attr.type)
__append_param__(attr.name, attr.comment, attr_type)
return f.getvalue()
def create_op_creation_method(op_proto): def create_op_creation_method(op_proto):
...@@ -216,38 +165,57 @@ def create_op_creation_method(op_proto): ...@@ -216,38 +165,57 @@ def create_op_creation_method(op_proto):
opdesc = method(*args, **kwargs) opdesc = method(*args, **kwargs)
return core.Operator.create(opdesc.SerializeToString()) return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto) return OpInfo(
__impl__.all_input_args = [var.name for var in op_proto.inputs] method=__impl__,
__impl__.all_output_args = [var.name for var in op_proto.outputs] name=op_proto.type,
__impl__.all_attr_args = [attr.name for attr in op_proto.attrs] inputs=[var.name for var in op_proto.inputs],
__impl__.all_not_temp_output_args = [ outputs=[var.name for var in op_proto.outputs],
var.name for var in op_proto.outputs if not var.temporary attrs=[attr.name for attr in op_proto.attrs],
] no_temp_outputs=[
var.name for var in op_proto.outputs if not var.temporary
])
return __impl__
class OperatorFactory(object):
def __init__(self):
self.op_methods = dict()
for op_proto in get_all_op_protos():
method = create_op_creation_method(op_proto)
self.op_methods[method.name] = method
class OpCreationsHolder(object): def __call__(self, *args, **kwargs):
""" if 'type' in kwargs:
A object will holds all op creation methods. if len(args) != 0:
raise ValueError("All Paddle argument should be key-word "
Use `op_creations.xxx_op` to access them. "argument except type")
""" t = kwargs.pop('type')
pass else:
if len(args) != 1:
raise ValueError("All Paddle argument should be key-word "
"argument except type")
t = args[0]
return self.get_op_info(t).method(**kwargs)
op_creations = OpCreationsHolder() def types(self):
return self.op_methods.keys()
def get_op_info(self, t):
if t not in self.op_methods:
raise ValueError("operator %s is not registered", t)
return self.op_methods.get(t)
def __bootstrap__(): def get_op_input_names(self, type):
""" return self.get_op_info(type).inputs
Bootstrap function for this module. It will dynamic create all op creation
methods in runtime. def get_op_output_names(self, type):
""" return self.get_op_info(type).outputs
for op_proto in get_all_op_protos():
func = create_op_creation_method(op_proto) def get_op_attr_names(self, type):
func.__name__ = str(op_proto.type) return self.get_op_info(type).attrs
setattr(op_creations, func.__name__, func)
def get_op_no_temp_output_names(self, type):
return self.get_op_info(type).no_temp_outputs
__bootstrap__() Operator = OperatorFactory() # Default global factory
...@@ -6,7 +6,6 @@ py_test(test_scope SRCS test_scope.py) ...@@ -6,7 +6,6 @@ py_test(test_scope SRCS test_scope.py)
py_test(test_tensor SRCS test_tensor.py) py_test(test_tensor SRCS test_tensor.py)
py_test(test_mul_op SRCS test_mul_op.py) py_test(test_mul_op SRCS test_mul_op.py)
py_test(test_network SRCS test_network.py)
py_test(test_mean_op SRCS test_mean_op.py) py_test(test_mean_op SRCS test_mean_op.py)
py_test(test_protobuf SRCS test_protobuf.py) py_test(test_protobuf SRCS test_protobuf.py)
...@@ -14,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py) ...@@ -14,6 +13,7 @@ py_test(test_protobuf SRCS test_protobuf.py)
py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_add_two_op SRCS test_add_two_op.py)
py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_softmax_op SRCS test_softmax_op.py)
py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
py_test(gradient_checker SRCS gradient_checker.py) py_test(gradient_checker SRCS gradient_checker.py)
...@@ -22,4 +22,6 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) ...@@ -22,4 +22,6 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py)
py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py)
py_test(test_op_creation_methods SRCS test_op_creation_methods.py) py_test(test_op_creation_methods SRCS test_op_creation_methods.py)
py_test(test_operator SRCS test_operator.py)
py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py)
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations from paddle.v2.framework.op import Operator
import numpy import numpy
import unittest import unittest
...@@ -80,7 +80,7 @@ if __name__ == '__main__': ...@@ -80,7 +80,7 @@ if __name__ == '__main__':
class GetNumericGradientTest(unittest.TestCase): class GetNumericGradientTest(unittest.TestCase):
def test_add_op(self): def test_add_op(self):
add_op = op_creations.add_two(X="X", Y="Y", Out="Z") add_op = Operator('add_two', X="X", Y="Y", Out="Z")
x = numpy.random.random((10, 1)).astype("float32") x = numpy.random.random((10, 1)).astype("float32")
y = numpy.random.random((10, 1)).astype("float32") y = numpy.random.random((10, 1)).astype("float32")
......
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import unittest import unittest
import numpy import numpy
import paddle.v2.framework.create_op_creation_methods as creation from paddle.v2.framework.op import Operator
class OpTestMeta(type): class OpTestMeta(type):
...@@ -21,18 +21,14 @@ class OpTestMeta(type): ...@@ -21,18 +21,14 @@ class OpTestMeta(type):
obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs)
def test_all(self): def test_all(self):
func = getattr(creation.op_creations, self.type, None)
self.assertIsNotNone(func)
scope = core.Scope() scope = core.Scope()
kwargs = dict() kwargs = dict()
places = [] places = [core.CPUPlace()]
places.append(core.CPUPlace()) if core.is_compile_gpu() and core.Operator.support_gpu(self.type):
if core.is_compile_gpu():
places.append(core.GPUPlace(0)) places.append(core.GPUPlace(0))
for place in places: for place in places:
for in_name in func.all_input_args: for in_name in Operator.get_op_input_names(self.type):
if hasattr(self, "inputs") and in_name in self.inputs: if hasattr(self, "inputs") and in_name in self.inputs:
kwargs[in_name] = in_name kwargs[in_name] = in_name
var = scope.new_var(in_name).get_tensor() var = scope.new_var(in_name).get_tensor()
...@@ -42,7 +38,7 @@ class OpTestMeta(type): ...@@ -42,7 +38,7 @@ class OpTestMeta(type):
else: else:
kwargs[in_name] = "@EMPTY@" kwargs[in_name] = "@EMPTY@"
for out_name in func.all_output_args: for out_name in Operator.get_op_output_names(self.type):
if not hasattr(self, "outputs"): if not hasattr(self, "outputs"):
raise ValueError( raise ValueError(
"The test op must set self.outputs dict.") "The test op must set self.outputs dict.")
...@@ -52,21 +48,23 @@ class OpTestMeta(type): ...@@ -52,21 +48,23 @@ class OpTestMeta(type):
kwargs[out_name] = out_name kwargs[out_name] = out_name
scope.new_var(out_name).get_tensor() scope.new_var(out_name).get_tensor()
for attr_name in func.all_attr_args: for attr_name in Operator.get_op_attr_names(self.type):
if hasattr(self, "attrs") and attr_name in self.attrs: if hasattr(self, "attrs") and attr_name in self.attrs:
kwargs[attr_name] = self.attrs[attr_name] kwargs[attr_name] = self.attrs[attr_name]
op = func(**kwargs) op = Operator(self.type, **kwargs)
op.infer_shape(scope) op.infer_shape(scope)
ctx = core.DeviceContext.create(place) ctx = core.DeviceContext.create(place)
op.run(scope, ctx) op.run(scope, ctx)
for out_name in func.all_output_args: for out_name in Operator.get_op_output_names(self.type):
actual = numpy.array(scope.find_var(out_name).get_tensor()) actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name] expect = self.outputs[out_name]
numpy.isclose(actual, expect) self.assertTrue(
numpy.allclose(actual, expect),
"output name: " + out_name + "has diff")
obj.test_all = test_all obj.test_all = test_all
return obj return obj
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import numpy import numpy
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation from paddle.v2.framework.op import Operator
from op_test_util import OpTestMeta from op_test_util import OpTestMeta
...@@ -21,7 +21,7 @@ class TestAddOp(unittest.TestCase): ...@@ -21,7 +21,7 @@ class TestAddOp(unittest.TestCase):
class TestAddGradOp(unittest.TestCase): class TestAddGradOp(unittest.TestCase):
def test_add_grad(self): def test_add_grad(self):
op = creation.op_creations.add_two(X="X", Y="Y", Out="Out") op = Operator('add_two', X="X", Y="Y", Out="Out")
backward_op = core.Operator.backward(op, set()) backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "add_two_grad") self.assertEqual(backward_op.type(), "add_two_grad")
expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).''' expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).'''
......
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import unittest import unittest
import numpy import numpy
import paddle.v2.framework.create_op_creation_methods as creation from paddle.v2.framework.op import Operator
class TestFc(unittest.TestCase): class TestFc(unittest.TestCase):
...@@ -24,7 +24,7 @@ class TestFc(unittest.TestCase): ...@@ -24,7 +24,7 @@ class TestFc(unittest.TestCase):
# Set a real numpy array here. # Set a real numpy array here.
# x_tensor.set(numpy.array([])) # x_tensor.set(numpy.array([]))
op = creation.op_creations.fc(X="X", Y="Y", W="W") op = Operator("fc", X="X", Y="Y", W="W")
for out in op.outputs(): for out in op.outputs():
if scope.find_var(out) is None: if scope.find_var(out) is None:
......
import unittest
from op_test_util import OpTestMeta
import numpy
class TestFillZerosLikeOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "fill_zeros_like"
self.inputs = {'Src': numpy.random.random((219, 232)).astype("float32")}
self.outputs = {'Dst': numpy.zeros_like(self.inputs['Src'])}
if __name__ == '__main__':
unittest.main()
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
from paddle.v2.framework.create_op_creation_methods import op_creations from paddle.v2.framework.op import Operator
import unittest import unittest
class TestNet(unittest.TestCase): class TestNet(unittest.TestCase):
def test_net_all(self): def test_net_all(self):
net = core.Net.create() net = core.Net.create()
op1 = op_creations.add_two(X="X", Y="Y", Out="Out") op1 = Operator("add_two", X="X", Y="Y", Out="Out")
net.add_op(op1) net.add_op(op1)
net2 = core.Net.create() net2 = core.Net.create()
net2.add_op(op_creations.fc(X="X", W="w", Y="fc.out")) net2.add_op(Operator("fc", X="X", W="w", Y="fc.out"))
net2.complete_add_op(True) net2.complete_add_op(True)
net.add_op(net2) net.add_op(net2)
net.complete_add_op(True) net.complete_add_op(True)
......
from paddle.v2.framework.network import Network
import paddle.v2.framework.core as core
import unittest
class TestNet(unittest.TestCase):
def test_net_all(self):
net = Network()
out = net.add_two(X="X", Y="Y")
fc_out = net.fc(X=out, W="w")
net.complete_add_op()
self.assertTrue(isinstance(fc_out, core.Variable))
self.assertEqual(
'''Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, add_two@OUT@0, fc@OUT@1).
Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@0).
Op(fc), inputs:(add_two@OUT@0, w, @EMPTY@), outputs:(fc@OUT@1, @TEMP@fc@0).
Op(mul), inputs:(add_two@OUT@0, w), outputs:(@TEMP@fc@0).
Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc@OUT@1).
''', str(net))
net2 = Network()
tmp = net2.add_two(X="X", Y="Y")
self.assertTrue(isinstance(tmp, core.Variable))
net2.complete_add_op()
self.assertEqual(
'''Op(plain_net), inputs:(X, Y), outputs:(add_two@OUT@2).
Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@2).
''', str(net2))
if __name__ == '__main__':
unittest.main()
import unittest import unittest
import paddle.v2.framework.create_op_creation_methods as creation import paddle.v2.framework.op as op
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
...@@ -8,7 +8,7 @@ import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2 ...@@ -8,7 +8,7 @@ import paddle.v2.framework.proto.attribute_pb2 as attribute_pb2
class TestGetAllProtos(unittest.TestCase): class TestGetAllProtos(unittest.TestCase):
def test_all(self): def test_all(self):
all_protos = creation.get_all_op_protos() all_protos = op.get_all_op_protos()
self.assertNotEqual(0, len(all_protos)) self.assertNotEqual(0, len(all_protos))
for each in all_protos: for each in all_protos:
...@@ -17,25 +17,25 @@ class TestGetAllProtos(unittest.TestCase): ...@@ -17,25 +17,25 @@ class TestGetAllProtos(unittest.TestCase):
class TestOpDescCreationMethod(unittest.TestCase): class TestOpDescCreationMethod(unittest.TestCase):
def test_plain_input_output(self): def test_plain_input_output(self):
op = op_proto_pb2.OpProto() op_proto = op_proto_pb2.OpProto()
op.type = "test" op_proto.type = "test"
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = "X" ipt.name = "X"
ipt.comment = "not matter" ipt.comment = "not matter"
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = "Y" ipt.name = "Y"
ipt.comment = "not matter" ipt.comment = "not matter"
opt = op.outputs.add() opt = op_proto.outputs.add()
opt.name = "Z" opt.name = "Z"
opt.comment = "not matter" opt.comment = "not matter"
op.comment = "not matter" op_proto.comment = "not matter"
self.assertTrue(op.IsInitialized()) self.assertTrue(op_proto.IsInitialized())
method = creation.OpDescCreationMethod(op) method = op.OpDescCreationMethod(op_proto)
output = method(X="a", Y="b", Z="c") output = method(X="a", Y="b", Z="c")
expected = op_desc_pb2.OpDesc() expected = op_desc_pb2.OpDesc()
...@@ -45,29 +45,29 @@ class TestOpDescCreationMethod(unittest.TestCase): ...@@ -45,29 +45,29 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected, output) self.assertEqual(expected, output)
def test_multiple_input_plain_output(self): def test_multiple_input_plain_output(self):
op = op_proto_pb2.OpProto() op_proto = op_proto_pb2.OpProto()
op.type = "fc" op_proto.type = "fc"
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = "X" ipt.name = "X"
ipt.comment = "" ipt.comment = ""
ipt.multiple = True ipt.multiple = True
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = "W" ipt.name = "W"
ipt.comment = "" ipt.comment = ""
ipt.multiple = True ipt.multiple = True
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = "b" ipt.name = "b"
ipt.comment = "" ipt.comment = ""
out = op.outputs.add() out = op_proto.outputs.add()
out.name = "Y" out.name = "Y"
out.comment = "" out.comment = ""
op.comment = "" op_proto.comment = ""
self.assertTrue(op.IsInitialized()) self.assertTrue(op_proto.IsInitialized())
method = creation.OpDescCreationMethod(op) method = op.OpDescCreationMethod(op_proto)
generated1 = method(X="x", W="w", b="b", Y="y") generated1 = method(X="x", W="w", b="b", Y="y")
expected1 = op_desc_pb2.OpDesc() expected1 = op_desc_pb2.OpDesc()
...@@ -93,14 +93,14 @@ class TestOpDescCreationMethod(unittest.TestCase): ...@@ -93,14 +93,14 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected2, generated2) self.assertEqual(expected2, generated2)
def test_attrs(self): def test_attrs(self):
op = op_proto_pb2.OpProto() op_proto = op_proto_pb2.OpProto()
op.type = "test" op_proto.type = "test"
ipt = op.inputs.add() ipt = op_proto.inputs.add()
ipt.name = 'X' ipt.name = 'X'
ipt.comment = "" ipt.comment = ""
def __add_attr__(name, type): def __add_attr__(name, type):
attr = op.attrs.add() attr = op_proto.attrs.add()
attr.name = name attr.name = name
attr.comment = "" attr.comment = ""
attr.type = type attr.type = type
...@@ -112,10 +112,10 @@ class TestOpDescCreationMethod(unittest.TestCase): ...@@ -112,10 +112,10 @@ class TestOpDescCreationMethod(unittest.TestCase):
__add_attr__("floats_attr", attribute_pb2.FLOATS) __add_attr__("floats_attr", attribute_pb2.FLOATS)
__add_attr__("strings_attr", attribute_pb2.STRINGS) __add_attr__("strings_attr", attribute_pb2.STRINGS)
op.comment = "" op_proto.comment = ""
self.assertTrue(op.IsInitialized()) self.assertTrue(op_proto.IsInitialized())
method = creation.OpDescCreationMethod(op) method = op.OpDescCreationMethod(op_proto)
generated = method( generated = method(
X="a", X="a",
...@@ -162,23 +162,23 @@ class TestOpDescCreationMethod(unittest.TestCase): ...@@ -162,23 +162,23 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(expected, generated) self.assertEqual(expected, generated)
def test_input_temporary_output(self): def test_input_temporary_output(self):
op = op_proto_pb2.OpProto() op_proto = op_proto_pb2.OpProto()
op.type = "test" op_proto.type = "test"
out = op.outputs.add() out = op_proto.outputs.add()
out.name = "OUT" out.name = "OUT"
out.comment = "" out.comment = ""
out = op.outputs.add() out = op_proto.outputs.add()
out.name = "TMP" out.name = "TMP"
out.comment = "" out.comment = ""
out.temporary = True out.temporary = True
out = op.outputs.add() out = op_proto.outputs.add()
out.name = "OUT2" out.name = "OUT2"
out.comment = "" out.comment = ""
op.comment = "" op_proto.comment = ""
method = creation.OpDescCreationMethod(op) method = op.OpDescCreationMethod(op_proto)
generated = method(OUT="a", OUT2="b") generated = method(OUT="a", OUT2="b")
desc = op_desc_pb2.OpDesc() desc = op_desc_pb2.OpDesc()
desc.outputs.extend(["a", core.var_names.temp(), "b"]) desc.outputs.extend(["a", core.var_names.temp(), "b"])
...@@ -190,60 +190,9 @@ class TestOpDescCreationMethod(unittest.TestCase): ...@@ -190,60 +190,9 @@ class TestOpDescCreationMethod(unittest.TestCase):
self.assertEqual(generated, desc) self.assertEqual(generated, desc)
class TestOpCreationDocStr(unittest.TestCase):
def test_all(self):
op = op_proto_pb2.OpProto()
op.type = "test"
op.comment = """Test Op.
This op is used for unit test, not a real op.
"""
a = op.inputs.add()
a.name = "a"
a.comment = "Input a for test op"
a.multiple = True
b = op.inputs.add()
b.name = "b"
b.comment = "Input b for test op"
self.assertTrue(op.IsInitialized())
o1 = op.outputs.add()
o1.name = "output"
o1.comment = "The output of test op"
o2 = op.outputs.add()
o2.name = "temp output"
o2.comment = "The temporary output of test op"
o2.temporary = True
test_str = op.attrs.add()
test_str.name = "str_attr"
test_str.type = attribute_pb2.STRING
test_str.comment = "A string attribute for test op"
actual = creation.get_docstring_from_op_proto(op)
expected_docstring = '''Test Op.
This op is used for unit test, not a real op.
:param a: Input a for test op
:type a: list | basestr
:param b: Input b for test op
:type b: basestr
:param output: The output of test op
:type output: basestr
:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op
:type temp output: basestr
:param str_attr: A string attribute for test op
:type str_attr: basestr
'''
self.assertEqual(expected_docstring, actual)
class TestOpCreations(unittest.TestCase): class TestOpCreations(unittest.TestCase):
def test_all(self): def test_all(self):
add_op = creation.op_creations.add_two(X="a", Y="b", Out="z") add_op = op.Operator("add_two", X="a", Y="b", Out="z")
self.assertIsNotNone(add_op) self.assertIsNotNone(add_op)
# Invoke C++ DebugString() # Invoke C++ DebugString()
self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).',
......
...@@ -2,7 +2,7 @@ import unittest ...@@ -2,7 +2,7 @@ import unittest
import numpy as np import numpy as np
import paddle.v2.framework.core as core import paddle.v2.framework.core as core
import paddle.v2.framework.create_op_creation_methods as creation from paddle.v2.framework.op import Operator
from op_test_util import OpTestMeta from op_test_util import OpTestMeta
...@@ -27,7 +27,7 @@ class TestSoftmaxOp(unittest.TestCase): ...@@ -27,7 +27,7 @@ class TestSoftmaxOp(unittest.TestCase):
class TestSoftmaxGradOp(unittest.TestCase): class TestSoftmaxGradOp(unittest.TestCase):
def test_softmax_grad(self): def test_softmax_grad(self):
op = creation.op_creations.softmax(X="X", Y="Y") op = Operator('softmax', X="X", Y="Y")
backward_op = core.Operator.backward(op, set()) backward_op = core.Operator.backward(op, set())
self.assertEqual(backward_op.type(), "softmax_grad") self.assertEqual(backward_op.type(), "softmax_grad")
expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).'''
......
from setuptools import setup from setuptools import setup, Distribution
class BinaryDistribution(Distribution):
def has_ext_modules(foo):
return True
packages=['paddle', packages=['paddle',
'paddle.proto', 'paddle.proto',
...@@ -11,7 +15,8 @@ packages=['paddle', ...@@ -11,7 +15,8 @@ packages=['paddle',
'paddle.v2.master', 'paddle.v2.master',
'paddle.v2.plot', 'paddle.v2.plot',
'paddle.v2.framework', 'paddle.v2.framework',
'paddle.v2.framework.proto'] 'paddle.v2.framework.proto',
'py_paddle']
setup_requires=["requests", setup_requires=["requests",
"numpy>=1.12", "numpy>=1.12",
...@@ -21,23 +26,33 @@ setup_requires=["requests", ...@@ -21,23 +26,33 @@ setup_requires=["requests",
"rarfile", "rarfile",
"scipy>=0.19.0", "scipy>=0.19.0",
"Pillow", "Pillow",
"nltk"] "nltk>=3.2.2"]
if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=["opencv-python"] setup_requires+=["opencv-python"]
setup(name='paddle', setup(name='paddlepaddle',
version='${PADDLE_VERSION}', version='${PADDLE_VERSION}',
description='Parallel Distributed Deep Learning', description='Parallel Distributed Deep Learning',
install_requires=setup_requires, install_requires=setup_requires,
packages=packages, packages=packages,
package_data={'paddle.v2.master': ['libpaddle_master.so'], package_data={
'paddle.v2.framework': ['core.so'] 'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so'],
'py_paddle':['*.py','_swig_paddle.so']
}, },
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling. # The paddle.v2.framework.proto will be generated while compiling.
# So that package points to other directory. # So that package points to other directory.
'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework' 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework',
'py_paddle': '${PROJ_ROOT}/paddle/py_paddle'
}, },
scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'],
distclass=BinaryDistribution,
data_files=[('/usr/local/opt/paddle/bin',
['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer',
'${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model',
'${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])]
) )
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册