提交 748cb5af 编写于 作者: 王益

Merge branch 'develop' of https://github.com/paddlepaddle/paddle into dockerfile-dev

......@@ -25,9 +25,9 @@ addons:
packages:
- gcc-4.8
- g++-4.8
- gfortran-4.8
- git
- build-essential
- libatlas-base-dev
- python
- python-pip
- python2.7-dev
......
......@@ -16,7 +16,7 @@
set(CBLAS_FOUND OFF)
## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......@@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add(
glog
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=OFF
CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF
)
......
......@@ -15,7 +15,6 @@
INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
......@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add(
openblas
${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
)
LIST(APPEND external_project_dependencies openblas)
ENDIF()
ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
......@@ -29,17 +29,12 @@ IF(WIN32)
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
IF(${HOST_SYSTEM} STREQUAL "centos")
SET(LIB "lib64")
ELSE()
SET(LIB "lib")
ENDIF()
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
......@@ -54,9 +49,11 @@ ExternalProject_Add(
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
)
LIST(APPEND external_project_dependencies protobuf)
......@@ -26,11 +26,12 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
"please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF()
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
......
......@@ -38,14 +38,6 @@ IF(NOT SWIG_FOUND)
SET(SWIG_DIR ${SWIG_SOURCES_DIR} CACHE FILEPATH "SWIG Directory" FORCE)
SET(SWIG_EXECUTABLE ${SWIG_SOURCES_DIR}/swig.exe CACHE FILEPATH "SWIG Executable" FORCE)
ELSE(WIN32)
# From PCRE configure
ExternalProject_Add(pcre
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY https://github.com/svn2github/pcre.git
PREFIX ${SWIG_SOURCES_DIR}/pcre
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SWIG_INSTALL_DIR}/pcre
)
# swig uses bison find it by cmake and pass it down
FIND_PACKAGE(BISON)
......@@ -54,16 +46,11 @@ IF(NOT SWIG_FOUND)
GIT_REPOSITORY https://github.com/swig/swig.git
GIT_TAG rel-3.0.10
PREFIX ${SWIG_SOURCES_DIR}
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && ./autogen.sh
CONFIGURE_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig &&
env "PCRE_LIBS=${SWIG_INSTALL_DIR}/pcre/lib/libpcre.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcrecpp.a ${SWIG_INSTALL_DIR}/pcre/lib/libpcreposix.a"
./configure
--prefix=${SWIG_INSTALL_DIR}
--with-pcre-prefix=${SWIG_INSTALL_DIR}/pcre
BUILD_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make
INSTALL_COMMAND cd ${SWIG_SOURCES_DIR}/src/swig && make install
UPDATE_COMMAND ""
DEPENDS pcre
CONFIGURE_COMMAND cd <SOURCE_DIR> && ./autogen.sh && ./configure
--prefix=${SWIG_INSTALL_DIR} --without-pcre
BUILD_COMMAND cd <SOURCE_DIR> && make
INSTALL_COMMAND cd <SOURCE_DIR> && make install
UPDATE_COMMAND ""
)
SET(SWIG_DIR ${SWIG_INSTALL_DIR}/share/swig/${SWIG_TARGET_VERSION})
......
......@@ -54,6 +54,7 @@ ExternalProject_Add(
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
CMAKE_ARGS -DBUILD_SHARED=ON
)
......
......@@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
......@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
......
......@@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
# but the name like centos is necessary in some scenes
# to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
IF(WIN32)
SET(HOST_SYSTEM "win32")
ELSE(WIN32)
......@@ -21,6 +29,7 @@ ELSE(WIN32)
SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx")
ELSE(APPLE)
IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
......@@ -29,8 +38,24 @@ ELSE(WIN32)
SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu")
ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
SET(HOST_SYSTEM "redhat")
ELSEIF(LINUX_ISSUE MATCHES "Fedora")
SET(HOST_SYSTEM "fedora")
ENDIF()
ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
IF(NOT HOST_SYSTEM)
SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
ENDIF()
ENDIF(APPLE)
ENDIF(WIN32)
......@@ -47,7 +72,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output
LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output
LOG_INSTALL 0 # Wrap install in script to log output
)
......@@ -25,6 +25,6 @@ paddle train \
--config_args=is_predict=1 \
--predict_output_dir=.
python gen_result.py > result.txt
python gen_result.py > result.csv
rm -rf rank-00000
......@@ -382,6 +382,15 @@ sampling_id_layer
:members: sampling_id_layer
:noindex:
Slicing and Joining Layers
==========================
pad_layer
-----------
.. automodule:: paddle.trainer_config_helpers.layers
:members: pad_layer
:noindex:
.. _api_trainer_config_helpers_layers_cost_layers:
Cost Layers
......
......@@ -4,6 +4,8 @@ Installing from Sources
* [1. Download and Setup](#download)
* [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu)
* [4. Build on Centos](#centos)
## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
......@@ -16,9 +18,10 @@ cd paddle
To compile the source code, your computer must be equipped with the following dependencies.
- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1)
- **CMake**: version >= 3.0 (at least CMake 3.4 on Mac OS X)
- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1) and gfortran compiler
- **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X)
- **BLAS**: MKL, OpenBlas or ATLAS
- **Python**: only support Python 2.7
**Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported!
For CUDA 8.0, GCC versions later than 5.3 are not supported!
......@@ -64,7 +67,8 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash
# specify MKL
......@@ -94,12 +98,78 @@ As a simple example, consider the following:
### Install Dependencies
- **CPU Dependencies**
- **Paddle Dependencies**
```bash
# necessary
sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git
sudo apt-get install -y git curl gcc g++ gfortran make build-essential automake
sudo apt-get install -y python python-pip python-numpy libpython-dev bison
sudo pip install 'protobuf==3.1.0.post1'
# install cmake 3.4
curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \
cd .. && rm -rf cmake-3.4.1
```
- **GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```bash
mkdir build && cd build
```
Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
```
## <span id="centos">Build on Centos 7</span>
### Install Dependencies
- **CPU Dependencies**
```bash
# necessary
sudo yum update
sudo yum install -y epel-release
sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
......@@ -142,7 +212,7 @@ Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DCMAKE_INSTALL_PREFIX=<path to install>
cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
......
......@@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers
- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列
......@@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_
last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列
......
此差异已折叠。
......@@ -20,23 +20,27 @@ limitations under the License. */
namespace paddle {
const SequenceArg& BufferArg::sequence() const {
// CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
return dynamic_cast<const SequenceArg&>(*this);
}
const SparseMatrixArg& BufferArg::sparse() const {
// CHECK_EQ(bufferType_, TENSOR_SPARSE);
CHECK_EQ(bufferType_, TENSOR_SPARSE);
return dynamic_cast<const SparseMatrixArg&>(*this);
}
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {}
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
} // namespace paddle
......@@ -23,10 +23,11 @@ limitations under the License. */
namespace paddle {
enum BufferType {
TENSOR_NORMAL = 0,
TENSOR_SEQUENCE_ID = 1,
TENSOR_SEQUENCE_DATA = 2,
TENSOR_SPARSE = 3
TENSOR_UNKNOWN = 0,
TENSOR_NORMAL = 1,
TENSOR_SEQUENCE_ID = 2,
TENSOR_SEQUENCE_DATA = 3,
TENSOR_SPARSE = 4
};
enum SparseDataType {
......@@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class BufferArg;
class SequenceArg;
class SparseMatrixArg;
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/**
* \brief BufferArg used as the argument type of Function.
......@@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* 3. SequenceArg for a Buffer of sequence data.
* 4. SparseMatrixArg for a Buffer of sparse matrix.
*
* Buffer shape
* For most buffers, the first dimension `shape()[0]` represents
* the size of the mini-batch.
*
* Buffer argType
* There is an ArgType property for the BufferArg used as Function Output.
* Whether the result of the Function calculation is assigned to the
* output Buffer or added to the output Buffer is determined by the
......@@ -71,6 +76,14 @@ public:
ArgType getArgType() const { return argType_; }
public:
BufferArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: buf_(nullptr),
valueType_(valueType),
shape_(shape),
argType_(argType) {}
BufferArg(void* buf,
ValueType valueType,
const TensorShape& shape,
......@@ -86,6 +99,7 @@ public:
valueType_(DataType<real>::value),
shape_(2),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth());
}
......@@ -98,6 +112,7 @@ public:
valueType_(DataType<real>::value),
shape_(shape),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements());
}
......@@ -107,6 +122,7 @@ public:
valueType_(DataType<real>::value),
shape_(1),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize());
}
......@@ -116,6 +132,7 @@ public:
valueType_(VALUE_TYPE_INT32),
shape_(1),
argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize());
}
......@@ -150,6 +167,8 @@ public:
ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; }
bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const;
......@@ -158,8 +177,8 @@ protected:
void* buf_;
ValueType valueType_;
TensorShape shape_;
BufferType bufferType_;
ArgType argType_ = UNSPECIFIED;
BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_{UNSPECIFIED};
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
......@@ -170,15 +189,24 @@ protected:
// if a < b then value_.buf_[a] < value_.buf_[b]
class SequenceIdArg : public BufferArg {
public:
SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
: BufferArg(VALUE_TYPE_INT32, shape, argType) {
CHECK_EQ(shape_.ndims(), (size_t)1);
CHECK_GT(shape_[0], 1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(void* buf,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
bufferType_ = TENSOR_SEQUENCE_ID;
CHECK_EQ(shape_.ndims(), (size_t)1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(const IVector& vector) : BufferArg(vector) {
bufferType_ = TENSOR_SEQUENCE_ID;
numSeqs_ = shape_[0] - 1;
}
......@@ -190,26 +218,41 @@ private:
size_t numSeqs_;
};
// sequence data
// sequences data
// For mini-batch calculate,
// one batch can contain more than one sequence of data.
// SequenceArg can be used to represent sequences that contain multiple
// unequal lengths.
class SequenceArg : public BufferArg {
public:
SequenceArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
SequenceArg(void* buf,
ValueType valueType,
const TensorShape& shape,
const SequenceIdArg& startPositions,
ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType),
startPositions_(startPositions) {}
startPositions_(startPositions) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
SequenceArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED)
: BufferArg(matrix, argType), startPositions_(vector) {}
: BufferArg(matrix, argType), startPositions_(vector) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
~SequenceArg() {}
void* getIdBuf() const { return startPositions_.data(); }
size_t numSeqs() const { return startPositions_.numSeqs(); }
SequenceIdArg& getSequenceId() { return startPositions_; }
const SequenceIdArg& getSequenceId() const { return startPositions_; }
private:
SequenceIdArg startPositions_;
......@@ -235,6 +278,7 @@ public:
nnz_(nnz),
format_(format),
type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1);
......
......@@ -14,9 +14,7 @@ limitations under the License. */
#include "BufferArg.h"
#include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/MemoryHandle.h"
#include "paddle/math/SparseMatrix.h"
namespace paddle {
......@@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
EXPECT_EQ(buffer.numSeqs(), 9);
}
TEST(BufferTest, asArgument) {
MatrixPtr matrix = Matrix::create(100, 200);
VectorPtr vector = Vector::create(100, false);
CpuSparseMatrix sparse(200, 300, 50);
// prepare arguments
BufferArgs argments;
argments.addArg(*matrix);
argments.addArg(*vector);
argments.addArg(sparse);
// function
auto function = [=](const BufferArgs& inputs) {
EXPECT_EQ(inputs.size(), 3);
// check inputs[0]
EXPECT_EQ(inputs[0].shape().ndims(), 2);
EXPECT_EQ(inputs[0].shape()[0], 100);
EXPECT_EQ(inputs[0].shape()[1], 200);
EXPECT_EQ(inputs[0].data(), matrix->getData());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
matrix->getHeight());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
matrix->getWidth());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
// check inputs[1]
EXPECT_EQ(inputs[1].shape().ndims(), 1);
EXPECT_EQ(inputs[1].shape()[0], 100);
EXPECT_EQ(inputs[1].data(), vector->getData());
CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
// check inputs[2]
EXPECT_EQ(inputs[2].shape().ndims(), 2);
EXPECT_EQ(inputs[2].shape()[0], 200);
EXPECT_EQ(inputs[2].shape()[1], 300);
EXPECT_EQ(inputs[2].data(), sparse.getData());
// CHECK_EQ(inputs[2].sparse().nnz(), 50);
// CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
};
// call function
function(argments);
}
} // namespace paddle
......@@ -19,12 +19,13 @@ if(WITH_TESTING)
# TODO:
# file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
# add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(TensorShapeTest)
add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest)
# add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(PadOpTest)
endif()
endif()
......
......@@ -17,7 +17,10 @@ limitations under the License. */
#include "paddle/math/Vector.h"
namespace paddle {
/**
* Context Projection Forward with CPU Matrix Device.
*
*/
template <>
void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
const CpuMatrix& input_mat,
......@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
}
/**
* \param inputs[0] input value.
* \param inputs[1] input weight.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
* Paddle Function for Context Projection Forward.
* Calculate the output layer value sequence after context projection.
*
* What is Context Projection for a sequence?
* For example, assumed input (x) has 4 words and the dimension of each word
* representation is 2. If we use zero to pad instead of learned weight to pad,
* and the context_lenth is 3, the output (y) is:
*
* @code
* x = [a1, a2;
* b1, b2;
* c1, c2;
* d1, d2]
* y = [0, 0, a1, a2, b1, b2;
* a1, a2, b1, b2, c1, c2;
* b1, b2, c1, c2, d1, d2;
* c1, c2, d1, d2, 0, 0]
* @endcode
*
* \param outputs[0].matrix output layer value, n * (d * l)
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix input layer value, n * d
* \param inputs[0].vector start position sequence, n * 1
* \param inputs[1].matrix input layer weight, pad * d
*/
template <DeviceType Device>
class ContextProjectionForwardFunc : public FunctionBase {
......@@ -85,28 +108,37 @@ public:
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size());
CHECK(1 == inputs.size() || 2 == inputs.size());
CHECK_EQ((size_t)1, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto val_seqs = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1);
CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data());
CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(val_seqs.shape().ndims(), (size_t)2);
CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1);
if (2 == inputs.size()) {
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
}
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]);
/// dim of input == dim of weight
if (2 == inputs.size()) {
CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]);
}
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_mat = outputs[0].matrix<Device>();
auto in_mat = inputs[0].matrix<Device>();
auto w_mat = !inputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
CHECK_EQ(out_seq.getArgType(), ADD_TO);
auto out_mat = out_seq.matrix<Device>();
const auto in_mat = val_seqs.matrix<Device>();
const auto w_mat =
(2 == inputs.size())
? inputs[1].matrix<Device>()
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seq_vec = val_seqs.getSequenceId().vector<int, Device>();
ContextProjectionForward<Device>(out_mat,
in_mat,
w_mat,
......@@ -122,8 +154,12 @@ private:
size_t begin_pad_;
};
/**
* Context Projection Backward with CPU Matrix Device.
*
*/
template <>
void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat,
const CpuIVector& seq_vec,
......@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i], pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size);
sub->addAtOffset(*mat, j * input_dim);
}
......@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat =
out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(
begin_pad + context_start + j - pad_size, pad_size);
sub->addAtOffset(*mat, j * input_dim);
......@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
if (end <= begin) continue;
if (!in_grad_mat) continue;
MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin);
MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin);
MatrixPtr dst = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(dst_begin, dst_end - dst_begin);
src->addAtOffset(*dst, j * input_dim);
}
}
}
/**
* \param inputs[0] input grad.
* \param inputs[1] weight grad.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
* Context Projection Backward Function.
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start position sequence, n * 1
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param outputs[1] weight grad, pad * d
*/
template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase {
......@@ -193,32 +235,36 @@ public:
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size());
CHECK_EQ((size_t)1, outputs.size());
CHECK_EQ((size_t)1, inputs.size());
CHECK_EQ((size_t)2, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(in_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(in_seq.shape().ndims(), (size_t)2);
CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);
CHECK(outputs[0].data() && inputs[2].data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1);
/// dim of input grad == dim of weight
CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]);
/// input and output grad has the same batch_size
CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]);
/// dim of output grad = dim of input grad * context_length
CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_);
CHECK_EQ(out_seq.getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_grad_mat = outputs[0].matrix<Device>();
const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
const auto out_grad_mat = in_seq.matrix<Device>();
auto in_grad_mat =
!inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[0].matrix<Device>();
auto w_grad_mat = !inputs[1].data()
!out_seq.data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: out_seq.matrix<Device>();
auto w_grad_mat = !outputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
: outputs[1].matrix<Device>();
ContextProjectionBackward<Device>(out_grad_mat,
in_grad_mat,
w_grad_mat,
......@@ -238,11 +284,16 @@ private:
size_t total_pad_;
};
#if 0
/**
* \param inputs[0] input grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* Context Projection Backward Data Function
* Update input layer grad
* input: sequence of output layer grad
* output: sequence of input layer grad
*
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start positon sequence, n * 1
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
......@@ -252,32 +303,30 @@ public:
context_start_ = config.get<int>("context_start");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
const auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(static_cast<int>(out_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
/// output layer grad dim == input layer grad dim * context_length_
CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
const auto out_grad_mat = in_seq.matrix<Device>();
const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
auto in_grad_mat = out_seq.matrix<Device>();
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
ContextProjectionBackwardData<Device>(
out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_);
}
private:
......@@ -286,9 +335,14 @@ private:
};
/**
* \param inputs[0] weight grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* Context Projection Backward Weight Function
* Update weight grad by backprop
* input: sequence of output layer grad
* output: weight grad
*
* \param outputs[0] weight grad, pad * d
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vecotr start positon sequence, n * 1
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
......@@ -300,28 +354,25 @@ public:
total_pad_ = config.get<size_t>("total_pad");
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data());
CHECK_EQ(static_cast<int>(outputs[0].shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]);
/// output layer grad dim == weight dim * context_length_
CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
const auto out_grad_mat = in_seq.matrix<Device>();
auto w_grad_mat = outputs[0].matrix<Device>();
ContextProjectionBackwardWeight<Device>(out_grad_mat,
w_grad_mat,
seq_vec,
context_length_,
context_start_,
......@@ -335,7 +386,6 @@ private:
size_t begin_pad_;
size_t total_pad_;
};
#endif
REGISTER_TYPED_FUNC(ContextProjectionForward,
CPU,
......@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward,
GPU,
ContextProjectionBackwardFunc);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU,
ContextProjectionBackwardDataFunc);
......@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU,
ContextProjectionBackwardWeightFunc);
#endif
#endif
} // namespace paddle
......@@ -21,14 +21,14 @@ namespace paddle {
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
* \param[in/out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position.
* \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not.
*
*/
template <DeviceType DType>
......@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template <DeviceType DType>
void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
......@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType>
void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence,
size_t context_length,
......@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType>
void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length,
......
......@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad);
}
__global__ void KeContextProjectionBackwardData(real* out_grad,
__global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence,
real* in_grad,
int input_dim,
size_t input_dim,
int context_length,
int context_start) {
int idx = threadIdx.x;
......@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0;
int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length;
auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) {
......@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim;
out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx];
if (j - outy == outx) break;
......@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start.
*
*/
void hl_context_projection_backward_data(real* out_grad,
void hl_context_projection_backward_data(const real* out_grad,
const int* sequence,
real* input_grad,
size_t num_sequences,
......@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
}
template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
const GpuIVector& sequence,
size_t context_length,
......@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
}
template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad,
__global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence,
real* w_grad,
int num_sequences,
......@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length;
output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) {
if (padId + context_start < 0) {
......@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning.
*
*/
void hl_context_projection_backward_weight(real* out_grad,
void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence,
real* w_grad,
size_t num_sequences,
......@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad,
const GpuMatrix& out_grad,
GpuMatrix& w_grad,
const GpuIVector& seq_vec,
size_t context_length,
......@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
}
template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
GpuMatrix& w_grad,
const GpuIVector& sequence,
......
......@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc(
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
compare.getGpuFunction()->calc(
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
{});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_in, *cpu_seq);
if (cpu_weight) {
cpu_inputs.addArg(*cpu_weight, *cpu_seq);
}
cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_in, *gpu_seq);
if (gpu_weight) {
gpu_inputs.addArg(*gpu_weight, *gpu_seq);
}
gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out);
}
......@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad);
}
compare.getCpuFunction()->calc(
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
compare.getGpuFunction()->calc(
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
cpu_outputs.addArg(
cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
gpu_outputs.addArg(
gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) {
......
......@@ -112,11 +112,51 @@ void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
}
/**
* \brief {o_0, o_1} = calc(i_0)
* \brief Normalization with across maps.
*
* \param inputs[0] input value.
* \param outputs[0] output value.
* \param outputs[1] denoms.
* This Function comes from the paper
* "ImageNet Classification with Deep Convolutional Neural Networks".
*
* The original formula is:
*
* Input(i, x, y)
* Output(i, x, y) = ----------------------------------------------
* -- upper
* (k + alpha * > (Input(j, x, y))^2) ^ (beta)
* -- j = lower
*
* upper is `min(C, c + N/2)`
* lower if `max(0, c - N/2)`
*
* Function implementation:
*
* inputs and outpus is NCHW format, while input.shape.ndims() is equal 4.
* And the meaning of each dimension(0-3) is respectively batch size,
* feature maps, rows and columns.
*
* Input and Output in the above formula is for each map(i) of one image, and
* Input(i, x, y), Output(i, x, y) represents an element in an image.
*
* C is the number of feature maps of one image, and N is a hyper-parameters
* is configured when Function is initialized. The sum in the denominator
* is the sum of the same position in the neighboring maps.
*
* In the implementation of Function, k is equal to 1,
* so Function has no argument for k.
*
* Function Arguments:
*
* \param size_ represent N
* \param scale_ represent alpha
* \param pow_ represent beta
* \param inputs[0] represent Input
* \param outputs[0] represent Output
* \param outputs[1] represent The denominator in the formula(except beta)
*
* Note:
* Save output[1] is to simplify the backward calculation.
* TODO, if only consider the forward calculation, we can optimize to
* remove the output[1].
*/
template <DeviceType Device>
class CrossMapNormalFunc : public FunctionBase {
......@@ -161,13 +201,36 @@ private:
};
/**
* \brief {o_0} = calc(i_0, i_1, i_2, i_3)
* \brief Backward calculation for normalization with across maps.
*
* Function implementation:
*
* The implementation of this Function is derived from the
* CrossMapNormalFunc implementation.
*
* InputGrad = OutputGrad * denoms ^ (-beta)
* -- upper
* + > (OutputGrad * OutputValue * (-2 * alpha * beta) / denoms) * InputValue
* -- lower
*
* \param inputs[0] input value.
* \param inputs[1] output value.
* \param inputs[2] output grad.
* \param inputs[3] denoms.
* \param outputs[0] input grad.
* The data of inputs/outputs format is the same as the forward interface
* and is NCHW.
*
* The upper and lower is the same as forward. The logic of the sum
* is also the same as forward.
*
* Function Arguments:
*
* \param size_ represent N
* \param scale_ represent alpha
* \param pow_ represent beta
* \param inputs[0] represent InputValue, inputs[0] of CrossMapNormalFunc
* \param inputs[1] represent OutputValue, outputs[0] of CrossMapNormalFunc
* \param inputs[2] represent OutputGrad
* \param inputs[3] represent denoms, outputs[1] of CrossMapNormalFunc
* This is the intermediate result that is
* preserved in the forward calculation.
* \param outputs[0] represent InputGrad
*/
template <DeviceType Device>
class CrossMapNormalGradFunc : public FunctionBase {
......@@ -188,8 +251,13 @@ public:
CHECK(inputs[0].shape() == inputs[3].shape());
CHECK(inputs[0].shape() == outputs[0].shape());
// TODO(hedaoyuan): need support ASSIGN_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
if (outputs[0].getArgType() != ADD_TO) {
// Currently, some algorithm implementations are ASSIGN_TO mode,
// if need to support the ADD_TO calculation, need to clear the output.
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
size_t samples = inputs[0].shape()[0];
size_t channels = inputs[0].shape()[1];
......
......@@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size;
FunctionCompare compare("CrossMapNormal",
FuncConfig()
.set("size", size)
.set("scale", (real)1.5)
.set("pow", (real)0.5));
Dims dims{numSamples, channels, imgSizeH, imgSizeW};
compare.cmpWithArg({Tensor(nullptr, dims)},
{Tensor(nullptr, dims), Tensor(nullptr, dims)},
{});
// init Test object
FunctionCompare test("CrossMapNormal",
FuncConfig()
.set("size", size)
.set("scale", (real)1.5)
.set("pow", (real)0.5));
// prepare input arguments
TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
// run Function
test.run();
}
}
}
......@@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size;
FunctionCompare compare("CrossMapNormalGrad",
FuncConfig()
.set("size", size)
.set("scale", (real)1.5)
.set("pow", (real)0.5));
Dims dims{numSamples, channels, imgSizeH, imgSizeW};
compare.cmpWithArg({Tensor(nullptr, dims),
Tensor(nullptr, dims),
Tensor(nullptr, dims),
Tensor(nullptr, dims)},
{Tensor(nullptr, dims)},
{});
FunctionCompare test("CrossMapNormalGrad",
FuncConfig()
.set("size", size)
.set("scale", (real)1.5)
.set("pow", (real)0.5));
TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
// run Function
test.run();
}
}
}
......
......@@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape,
ArgType argType) {
args_.push_back(std::make_shared<BufferArg>(arg, shape, argType));
_args_.push_back(new BufferArg(arg, shape, argType));
addArg(*_args_.back());
}
void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
_args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
}
void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType));
_args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
}
void BufferArgs::addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType) {
_args_.push_back(new SequenceArg(matrix, vector, argType));
addArg(*_args_.back());
}
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
......
......@@ -50,19 +50,44 @@ protected:
* Argument type for Function::calc().
* A BufferArgs contains a set of BufferArg,
* because Function can have multiple inputs and outputs.
*
* addArg() with Matix object used to adapt Layer Argument.
* Will create a BufferArg object in addArg(),
* and free in destructor of BufferArgs.
*
* addArg() with BufferArg object, just save BufferArg object address,
* and the caller needs to guarantee the validity of the BufferArg object
* in the BufferArgs life time.
*/
class BufferArgs {
public:
BufferArgs() {}
~BufferArgs() {
for (auto arg : _args_) {
delete arg;
}
}
size_t size() const { return args_.size(); }
// add argument into BufferArgs
// Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template <typename Tensor>
void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) {
args_.push_back(std::make_shared<BufferArg>(arg, argType));
void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
// Add arg into BufferArgs and reshape the arg.
......@@ -77,20 +102,37 @@ public:
void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED);
// get argument
const BufferArg& operator[](size_t num) const {
CHECK_LT(num, args_.size());
return *args_[num];
}
void addArg(BufferArg& arg) { args_.push_back(&arg); }
void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
void addArg(SequenceArg& arg) { args_.push_back(&arg); }
void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
private:
std::vector<BufferArgPtr> args_;
std::vector<BufferArg*> args_;
// The BufferArg object is constructed and freed by BufferArgs.
std::vector<BufferArg*> _args_;
};
/**
* \brief Base class for Function.
* The basic Function implementation requires override init and calc interfaces.
*
* The caller needs to ensure the validity of the arguments
* during Function execution.
*
* Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
* and ADD_TO.
* If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "Function.h"
#include <gtest/gtest.h>
#include "paddle/math/SparseMatrix.h"
namespace paddle {
......@@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
Function<DEVICE_TYPE_GPU>(gpuArgments);
}
/**
* Some tests case are used to check the consistency between the BufferArg type
* argument received by Function and the original type argument.
*
* Use Case:
* TEST() {
* Matrix matrix(...);
* CheckBufferArg lambda = [=](const BufferArg& arg) {
* // check matrix and arg are equivalent
* EXPECT_EQ(matrix, arg);
* }
*
* BufferArgs argments{matrix...};
* std::vector<CheckBufferArg> checkFunc{lambda...};
* testBufferArgs(argments, checkFunc);
* }
*/
typedef std::function<void(const BufferArg&)> CheckBufferArg;
void testBufferArgs(const BufferArgs& inputs,
const std::vector<CheckBufferArg>& check) {
EXPECT_EQ(inputs.size(), check.size());
for (size_t i = 0; i < inputs.size(); i++) {
check[i](inputs[i]);
}
}
void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) {
EXPECT_EQ(inputs.size(), 1);
check(inputs[0]);
}
TEST(Arguments, Matrix) {
MatrixPtr matrix = Matrix::create(100, 200);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.shape()[1], 200);
EXPECT_EQ(arg.data(), matrix->getData());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
};
BufferArgs argments;
argments.addArg(*matrix);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, Vector) {
VectorPtr vector = Vector::create(100, false);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 1);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.data(), vector->getData());
CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
};
BufferArgs argments;
argments.addArg(*vector);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, CpuSparseMatrix) {
CpuSparseMatrix sparse(200, 300, 50);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 200);
EXPECT_EQ(arg.shape()[1], 300);
EXPECT_EQ(arg.data(), sparse.getData());
// CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols());
};
BufferArgs argments;
argments.addArg(sparse);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, BufferArg) {
BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 3);
EXPECT_EQ(arg.shape()[0], 1);
EXPECT_EQ(arg.shape()[1], 2);
EXPECT_EQ(arg.shape()[2], 3);
};
BufferArgs argments;
argments.addArg(arg);
testBufferArgs(argments, check);
}
} // namespace paddle
......@@ -15,95 +15,186 @@ limitations under the License. */
#include "Function.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
*
* Use case:
* // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig.
* FunctionCompare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data,
* // only contains the argument type and shape.
* test.addInputs(input1);
* test.addInputs(input2);
* test.addOutputs(output1);
* test.addOutputs(output2);
* // Run.
* // Will according to the type and shape of arguments(inputs_/outputs_),
* // automatic initialization cpu and gpu function required arguments
* // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_).
* // Call the CPU and GPU Function calculation results.
* // Compares CPU and GPU calculation results for consistency.
* test.run();
*/
class FunctionCompare {
public:
FunctionCompare(const std::string& name, const FuncConfig& config)
: cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
cpu->init(config);
gpu->init(config);
: cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
cpuFunc_->init(config);
gpuFunc_->init(config);
}
~FunctionCompare() {}
// input need only contains shape, do not contains data.
void addInputs(const BufferArg& input) {
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
}
// output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output) {
size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuOutputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
gpuOutputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
}
void cmpWithArg(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) {
// init cpu and gpu arguments
auto initArgs = [=](
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) {
for (const auto arg : inArgs) {
size_t size = sizeof(real);
for (const auto dim : arg.dims_) {
size *= dim;
}
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
// already init outside
} else {
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_));
gpuArgs.emplace_back(
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
// will use an api to refactor this code.
CpuVector cpuVector(size / sizeof(real),
(real*)cpuArgs.back().getData());
GpuVector gpuVector(size / sizeof(real),
(real*)gpuArgs.back().getData());
cpuVector.uniform(0.001, 1);
gpuVector.copyFrom(cpuVector);
}
void addInputs(const SequenceArg& input) {
size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1;
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
TensorShape seqsId({numSeqs + 1});
// void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
// TODO: need be implemented.
}
void run() {
// prepare cpu/gpu arguments
initInputs();
// function calculate
auto callFunction = [](FunctionBase* function,
std::vector<BufferArgPtr>& inputs,
std::vector<BufferArgPtr>& outputs) {
BufferArgs inArgs;
BufferArgs outArgs;
for (auto arg : inputs) {
inArgs.addArg(*arg);
}
for (auto arg : outputs) {
outArgs.addArg(*arg);
}
function->calc(inArgs, outArgs);
};
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
initArgs(cpuInouts, gpuInouts, inouts);
// function calculate
cpu->calc(cpuInputs, cpuOutputs, cpuInouts);
gpu->calc(gpuInputs, gpuOutputs, gpuInouts);
callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
// check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) {
for (size_t i = 0; i < cpuArgs.size(); i++) {
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i];
size_t size = 1;
for (auto dim : cpu.dims_) {
size *= dim;
}
CpuVector cpuVector(size, (real*)cpu.getData());
GpuVector gpuVector(size, (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
};
checkArgs(cpuOutputs, gpuOutputs);
checkArgs(cpuInouts, gpuInouts);
compareOutputs();
}
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; }
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; }
protected:
void initInputs() {
for (size_t i = 0; i < cpuInputs_.size(); i++) {
initArg(*cpuInputs_[i]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
(real*)cpuInputs_[i]->data());
GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
gpuVector.copyFrom(cpuVector);
}
}
void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
}
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpu; }
// only init cpu argument, gpu argument copy from cpu argument.
void initArg(BufferArg& arg) {
CpuVector vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1);
}
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpu; }
void initArg(SequenceIdArg& arg, size_t batchSize) {
size_t numSeqs = arg.numSeqs();
int* buf = reinterpret_cast<int*>(arg.data());
int pos = 0;
size_t maxLen = 2 * batchSize / numSeqs;
for (int i = 0; i < (int)numSeqs; ++i) {
int len = uniformRandom(
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
1;
buf[i] = pos;
pos += len;
VLOG(1) << " len=" << len;
}
buf[numSeqs] = batchSize;
}
protected:
std::shared_ptr<FunctionBase> cpu;
std::shared_ptr<FunctionBase> gpu;
std::vector<CpuMemHandlePtr> cpuMemory;
std::vector<GpuMemHandlePtr> gpuMemory;
Arguments cpuInputs;
Arguments cpuOutputs;
Arguments cpuInouts;
Arguments gpuInputs;
Arguments gpuOutputs;
Arguments gpuInouts;
std::shared_ptr<FunctionBase> cpuFunc_;
std::shared_ptr<FunctionBase> gpuFunc_;
std::vector<CpuMemHandlePtr> cpuMemory_;
std::vector<GpuMemHandlePtr> gpuMemory_;
std::vector<BufferArgPtr> cpuInputs_;
std::vector<BufferArgPtr> cpuOutputs_;
std::vector<BufferArgPtr> gpuInputs_;
std::vector<BufferArgPtr> gpuOutputs_;
};
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PadOp.h"
#include "paddle/math/Vector.h"
namespace paddle {
template <>
void Pad<DEVICE_TYPE_CPU>(real* outputs,
const real* inputs,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
for (int i = 0; i < num; i++) {
for (int c = 0; c < inC; c++) {
for (int h = 0; h < inH; h++) {
int inoff = ((i * inC + c) * inH + h) * inW;
int outoff =
((i * outC + c + cstart) * outH + h + hstart) * outW + wstart;
memcpy(outputs + outoff, inputs + inoff, inW * sizeof(real));
}
}
}
}
template <>
void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
const real* outGrad,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
for (int i = 0; i < num; i++) {
for (int c = 0; c < inC; c++) {
for (int h = 0; h < inH; h++) {
int inoff = ((i * inC + c) * inH + h) * inW;
int outoff =
((i * outC + c + cstart) * outH + h + hstart) * outW + wstart;
CpuVector inG = CpuVector(inW, inGrad + inoff);
CpuVector outG = CpuVector(inW, const_cast<real*>(outGrad + outoff));
inG += outG;
}
}
}
}
/**
* \brief Padding zeros to input according to the specify dimension.
* The struct pad_ contains the padding size in each dimension.
* The input and output is a 4D tensor. In PadFunc, we only
* pad zeros to the 2nd to 4th dimension.
*
* Argument in this Function:
* \param pad_ A struct object contains the padding size in each dimension.
* It has six integers. The channelStart and channelEnd indicate
* how many zeros to add before and after the input in channel
* dimension. And the heightStart and heightEnd indicate padding
* in height dimension. The widthStart and widthEnd indicate the
* padding in width dimension.
* \param inputs A 4D tensor, only one input.
* \param outputs A 4D tensor, the output value after padding.
*
* For example,
* Input(2,2,2,3) = [
* [ [[1,2,3], [3,4,5]],
* [[2,3,5], [1,6,7]] ],
* [ [[4,3,1], [1,8,7]],
* [[3,8,9], [2,3,5]] ]
* ] # the shape is (1,2,2,3)
*
* pad_: if channelStart = channelEnd = 1, others are 0.
* Output(2,4,2,3) = [
* [ [[0,0,0], [0,0,0]],
* [[1,2,3], [3,4,5]],
* [[2,3,5], [1,6,7]],
* [[0,0,0], [0,0,0]] ],
* [ [[0,0,0], [0,0,0]],
* [[4,3,1], [1,8,7]],
* [[3,8,9], [2,3,5]],
* [[0,0,0], [0,0,0]] ]
* ] # the shape is (2,4,2,3)
*
* pad_: if widthStart = 1, widthEnd = 2, others are 0.
* Output(2,2,2,6) = [
* [ [[0,1,2,3,0,0], [0,3,4,5,0,0]],
* [[0,2,3,5,0,0], [0,1,6,7,0,0]] ],
* [ [[0,4,3,1,0,0], [0,1,8,7,0,0]],
* [[0,3,8,9,0,0], [0,2,3,5,0,0]] ],
* ] # the shape is (2,2,2,6)
*
* pad_: if heightStart = 1, heightEnd = 1, others are 0.
* Output(2,2,4,3) = [
* [ [[0,0,0], [1,2,3], [3,4,5], [0,0,0]],
* [[0,0,0], [2,3,5], [1,6,7], [0,0,0]] ],
* [ [[0,0,0], [4,3,1], [1,8,7], [0,0,0]],
* [[0,0,0], [3,8,9], [2,3,5], [0,0,0]] ],
* ] # the shape is (2,2,4,3)
*/
template <DeviceType Device>
class PadFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
size_t num = inputs[0].shape()[0];
size_t inC = inputs[0].shape()[1];
size_t inH = inputs[0].shape()[2];
size_t inW = inputs[0].shape()[3];
typename Tensor<real, Device>::Vector vec(outputs[0].shape().getElements(),
outputs[0].data<real>());
vec.zero();
Pad<Device>(outputs[0].data<real>(),
inputs[0].data<real>(),
num,
inC,
inH,
inW,
pad_);
}
private:
PadConf pad_;
};
/**
* \brief The backward propagation of padding Function. Remove the elements
* in the padding positions of forward.
*
* Argument in this Function:
* \param pad_ The same meaning as it in PadFunc.
* \param inputs The gradient with respect to the output value of PadFunc.
* \param outputs The gradient with respect to the input value of PadFunc.
*/
template <DeviceType Device>
class PadGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
CHECK_EQ(1UL, outputs.size());
size_t num = outputs[0].shape()[0];
size_t inC = outputs[0].shape()[1];
size_t inH = outputs[0].shape()[2];
size_t inW = outputs[0].shape()[3];
if (outputs[0].getArgType() != ADD_TO) {
// for unit test
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
PadGrad<Device>(outputs[0].data<real>(),
inputs[0].data<real>(),
num,
inC,
inH,
inW,
pad_);
}
private:
PadConf pad_;
};
REGISTER_TYPED_FUNC(Pad, CPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc);
#ifndef PADDLE_ONLY_CPU
REGISTER_TYPED_FUNC(Pad, GPU, PadFunc);
REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc);
#endif
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Function.h"
namespace paddle {
struct PadConf {
/// how many values to add before the data along channel dimension.
int channelStart;
/// how many values to add after the data along channel dimension.
int channelEnd;
/// how many values to add before the data along height dimension.
int heightStart;
/// how many values to add after the data along height dimension.
int heightEnd;
/// how many values to add before the data along width dimension.
int widthStart;
/// how many values to add after the data along width dimension.
int widthEnd;
};
/**
* \brief This funtion pads zeros to inputs according to the specify dimension.
* The input and output is a 4D tensor. Padding zeros from the 2nd to
* the 4th dimenstion according argument of pad.
*
* \param[out] outputs save results.
* \param[in] inputs input data.
* \param[in] num batch size of input data.
* \param[in] inC channel number of input data.
* \param[in] inH height of input data.
* \param[in] inH with of input data.
* \param[in] pad the padding config, contains the size along the
* specify dimension.
*/
template <DeviceType Device>
void Pad(real* outputs,
const real* inputs,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad);
/**
* \brief Padding operation backward.
*
* \param[out] inGrad gradients of previous layer.
* \param[in] outGrad output gradients.
* \param[in] num batch size of input data.
* \param[in] inC channel number of input data.
* \param[in] inH height of input data.
* \param[in] inH with of input data.
* \param[in] pad the padding config, contains the size along the
* specify dimension.
*/
template <DeviceType Device>
void PadGrad(real* inGrad,
const real* outGrad,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad);
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "hl_base.h"
#include "PadOp.h"
namespace paddle {
__global__ void KePad(real* outputs, const real* inputs,
int inC, int inH, int inW,
int padc, int padh, int padw,
int outC, int outH, int outW, int nthreads) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) {
const int w = idx % inW;
const int h = (idx / inW) % inH;
const int c = (idx / inW / inH) % inC;
const int n = idx / inW / inH / inC;
const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w;
outputs[off] = inputs[idx];
}
}
template <>
void Pad<DEVICE_TYPE_GPU>(real* outputs,
const real* inputs,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad) {
size_t nth = num * inC * inH * inW;
int blockSize = 1024;
int gridSize = (nth + 1024 - 1) / 1024;
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
KePad<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(outputs, inputs, inC, inH, inW, cstart, hstart, wstart,
outC, outH, outW, nth);
CHECK_SYNC("Pad");
}
__global__ void KePadDiff(real* inGrad, const real* outGrad,
int inC, int inH, int inW,
int padc, int padh, int padw,
int outC, int outH, int outW, int nthreads) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < nthreads) {
const int w = idx % inW;
const int h = (idx / inW) % inH;
const int c = (idx / inW / inH) % inC;
const int n = idx / inW / inH / inC;
const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w;
inGrad[idx] += outGrad[off];
}
}
template <>
void PadGrad<DEVICE_TYPE_GPU>(real* inGrad,
const real* outGrad,
const int num,
const int inC,
const int inH,
const int inW,
const PadConf& pad) {
int nth = num * inC * inH * inW;
int blockSize = 1024;
int gridSize = (nth + 1024 - 1) / 1024;
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
KePadDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
(inGrad, outGrad, inC, inH, inW, cstart, hstart, wstart,
outC, outH, outW, nth);
CHECK_SYNC("PadGrad");
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "FunctionTest.h"
namespace paddle {
TEST(Pad, real) {
for (size_t numSamples : {5, 32}) {
for (size_t channels : {1, 5, 32}) {
for (size_t imgSizeH : {5, 33, 100}) {
for (size_t imgSizeW : {5, 32, 96}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
FunctionCompare compare("Pad",
FuncConfig()
.set("cstart", 2)
.set("cend", 3)
.set("hstart", 1)
.set("hend", 2)
.set("wstart", 3)
.set("wend", 2));
TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW};
TensorShape outDims{
numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5};
compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, inDims));
compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, outDims, ASSIGN_TO));
compare.run();
}
}
}
}
}
TEST(PadGrad, real) {
for (size_t numSamples : {5, 32}) {
for (size_t channels : {1, 5, 32}) {
for (size_t imgSizeH : {5, 33, 100}) {
for (size_t imgSizeW : {5, 32, 96}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
FunctionCompare compare("PadGrad",
FuncConfig()
.set("cstart", 2)
.set("cend", 3)
.set("hstart", 1)
.set("hend", 2)
.set("wstart", 3)
.set("wend", 2));
TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW};
TensorShape outDims{
numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5};
compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, outDims));
compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, inDims, ASSIGN_TO));
compare.run();
}
}
}
}
}
} // namespace paddle
......@@ -55,6 +55,15 @@ public:
numElements();
}
void reshape(std::initializer_list<size_t> dims) {
ndims_ = dims.size();
if (ndims_ > kMinDims) {
dims_.resize(ndims_);
}
dims_.assign(dims);
numElements();
}
// number of dimensions of the tensor
size_t ndims() const { return ndims_; }
......@@ -82,7 +91,7 @@ private:
// init dims_
void initDims(size_t ndims) {
size_t count = ndims < 4 ? 4 : ndims;
size_t count = ndims < kMinDims ? kMinDims : ndims;
dims_.assign(count, 1);
}
......@@ -92,6 +101,7 @@ private:
// number of elements
size_t nelements_;
std::vector<size_t> dims_;
static const size_t kMinDims = 4;
};
} // namespace paddle
......@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
class IdentityActivation : public ActivationFunction {
public:
static const std::string name;
void forward(Argument& act) { (void)act; }
void backward(Argument& act) { (void)act; }
Error __must_check forward(Argument& act) {
(void)act;
return Error();
}
Error __must_check backward(Argument& act) {
(void)act;
return Error();
}
const std::string& getName() const { return name; }
};
const std::string IdentityActivation::name = "";
......@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
* \f]
*/
BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); }
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
Error __must_check forward(Argument& act) {
act.value->sigmoid(*act.value);
return Error();
}
Error __must_check backward(Argument& act) {
act.grad->sigmoidDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(sigmoid)
/**
......@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
MatrixPtr one_;
public:
void forward(Argument& act) { act.value->softmax(*act.value); }
Error __must_check forward(Argument& act) {
act.value->softmax(*act.value);
return Error();
}
void backward(Argument& act) {
Error __must_check backward(Argument& act) {
MatrixPtr outputV = act.value;
MatrixPtr outputG = act.grad;
......@@ -137,6 +152,7 @@ void backward(Argument& act) {
act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
}
return Error();
}
END_DEFINE_ACTIVATION(softmax)
......@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;
public:
void forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL);
Error __must_check forward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
if (!argument_.value) {
argument_.value = Matrix::create(nullptr,
......@@ -169,10 +188,14 @@ void forward(Argument& act) {
auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts);
return Error();
}
void backward(Argument& act) {
CHECK_EQ(act.grad->getWidth(), 1UL);
Error __must_check backward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
size_t numSequences = act.getNumSequences();
const int* starts = act.sequenceStartPositions->getData(false);
......@@ -184,8 +207,10 @@ void backward(Argument& act) {
argument_.value->setData(act.value->getData() + offset, 1UL, size);
argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
softmax_.backward(argument_);
Error status = softmax_.backward(argument_);
if (!status) return status;
}
return Error();
}
END_DEFINE_ACTIVATION(sequence_softmax)
......@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
* 0 otherwise.
*/
BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->relu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->reluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(relu)
/**
......@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu)
* TODO(yuyang18): Remove magic number 24 or make it configuable.
*/
BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->brelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->breluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(brelu)
/**
......@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); }
Error __must_check forward(Argument& act) {
act.value->tanh(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->tanhDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(tanh)
/**
......@@ -248,10 +291,14 @@ real a, b;
public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); }
Error __must_check forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Error();
}
void backward(Argument& act) {
Error __must_check backward(Argument& act) {
act.grad->scaledTanhDerivative(*act.value, a, b);
return Error();
}
END_DEFINE_ACTIVATION(stanh)
......@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->softrelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->softreluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(softrelu)
/**
......@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu)
* 0 if z=0
*/
BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -290,9 +343,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->abs2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->absDerivative(*act.in); }
Error __must_check backward(Argument& act) {
act.grad->absDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(abs)
/**
......@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -312,9 +369,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->square2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
Error __must_check backward(Argument& act) {
act.grad->squareDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(square)
/**
......@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp2(*act.value); }
Error __must_check forward(Argument& act) {
act.value->exp2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->expDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->expDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(exponential)
/**
......@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -346,9 +413,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->log2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
Error __must_check backward(Argument& act) {
act.grad->dotDiv(*act.grad, *act.in);
return Error();
}
END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/utils/Error.h"
namespace paddle {
......@@ -48,7 +49,7 @@ public:
*
* Usually, act is Layer::output_
*/
virtual void forward(Argument& act) = 0;
virtual Error __must_check forward(Argument& act) = 0;
/**
* @brief Backward propagaion
......@@ -57,7 +58,7 @@ public:
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/
virtual void backward(Argument& act) = 0;
virtual Error __must_check backward(Argument& act) = 0;
virtual const std::string& getName() const = 0;
};
......
......@@ -118,16 +118,15 @@ void ContextProjection::forward() {
/// first use state_, otherwise use weight_(padding false === w nullptr)
auto w_ptr =
state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
auto start_pos = in_->sequenceStartPositions;
const auto start_pos = in_->sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*in_->value);
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim));
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_));
outputs.addArg(*out_->value, ADD_TO);
inputs.addArg(*in_->value, *start_pos);
if (w_ptr) {
inputs.addArg(CpuMatrix(w_ptr->getData(), w_ptr->getHeight(), input_dim),
*start_pos);
}
outputs.addArg(*out_->value, *start_pos, ADD_TO);
forward_[0]->calc(inputs, outputs);
if (state_ && config_.context_start() < 0) {
......@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(CpuMatrix(
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim));
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim));
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_));
outputs.addArg(*out_->grad, ADD_TO);
inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_));
outputs.addArg(
CpuMatrix(
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim),
*in_->sequenceStartPositions->getVector(useGpu_),
ADD_TO);
outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim),
ADD_TO);
backward_[0]->calc(inputs, outputs);
if (config_.trainable_padding()) {
......
......@@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
batchValue_->resizeOrCreate(*output_.value);
batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true);
if (bias_ && bias_->getWGrad()) {
if (bias_) {
gate_.value->addBias(*(bias_->getW()), 1);
}
{
int numBatch = batchValue_->getNumBatch();
int batchSize = 0;
int curBatchSize = 0;
AsyncGpuBlock asyncGpuBlock;
for (int n = 0; n < numBatch; n++) {
MatrixPtr outputValueTmp = batchValue_->getBatchValue(n);
......@@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
gruValue.resetOutputValue =
(batchValue_->getBatchValue(*resetOutput_.value, n))->getData();
batchSize = outputValueTmp->getHeight();
curBatchSize = outputValueTmp->getHeight();
gruValue.prevOutValue =
(n == 0 ? nullptr
: (batchValue_->getBatchValue(n - 1, batchSize))->getData());
(n == 0
? nullptr
: (batchValue_->getBatchValue(n - 1, curBatchSize))->getData());
{
if (useGpu_) {
GruCompute::forward<1>(gruValue, getSize(), batchSize);
GruCompute::forward<1>(gruValue, getSize(), curBatchSize);
} else {
GruCompute::forward<0>(gruValue, getSize(), batchSize);
GruCompute::forward<0>(gruValue, getSize(), curBatchSize);
}
}
}
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h"
#include "AddtoLayer.h"
......@@ -334,7 +335,8 @@ void Layer::showOutputStats() {
void Layer::forwardActivation() {
/* activation */
activation_->forward(output_);
auto status = activation_->forward(output_);
status.check();
/* dropout */
if (config_.drop_rate() > 0) {
......@@ -372,7 +374,8 @@ void Layer::backwardActivation() {
oGrad->dotMul(*oGrad, *dropOutMask_);
}
activation_->backward(output_);
auto status = activation_->backward(output_);
status.check();
}
void Layer::forwardDropOut() {
......
......@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
*frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0);
}
}
activationGate_->forward(frameInputGate_[idxCurr]);
activationGate_->forward(frameForgetGate_[idxCurr]);
activation_->forward(frameInputNode_[idxCurr]);
auto status = activationGate_->forward(frameInputGate_[idxCurr]);
status.check();
status = activationGate_->forward(frameForgetGate_[idxCurr]);
status.check();
status = activation_->forward(frameInputNode_[idxCurr]);
status.check();
frameState_[idxCurr].value->zeroMem();
for (int i = 0; i < numDims_; i++) {
......@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
frameOutputGate_[idxCurr].value->addDotMul(
*frameState_[idxCurr].value, *checkOg_, 1.0, 1.0);
activationGate_->forward(frameOutputGate_[idxCurr]);
status = activationGate_->forward(frameOutputGate_[idxCurr]);
status.check();
framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value));
activationState_->forward(framePreOutput_[idxCurr]);
status = activationState_->forward(framePreOutput_[idxCurr]);
status.check();
frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value,
*frameOutputGate_[idxCurr].value);
......@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*frameOutputGate_[idxCurr].value);
activationState_->backward(framePreOutput_[idxCurr]);
activationState_->backward(framePreOutput_[idxCurr]).check();
frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad));
frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*framePreOutput_[idxCurr].value);
activationGate_->backward(frameOutputGate_[idxCurr]);
activationGate_->backward(frameOutputGate_[idxCurr]).check();
frameState_[idxCurr].grad->addDotMul(
*frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0);
......@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
}
}
activationGate_->backward(frameInputGate_[idxCurr]);
activationGate_->backward(frameForgetGate_[idxCurr]);
activation_->backward(frameInputNode_[idxCurr]);
activationGate_->backward(frameInputGate_[idxCurr]).check();
activationGate_->backward(frameForgetGate_[idxCurr]).check();
activation_->backward(frameInputNode_[idxCurr]).check();
if (bias_->getWGrad()) {
for (int i = 0; i < numDims_; i++) {
......
......@@ -193,7 +193,8 @@ public:
forwardOneInput(l);
}
activation_->forward(sampleOut_);
auto status = activation_->forward(sampleOut_);
status.check();
forwardCost();
}
......@@ -207,7 +208,8 @@ public:
backwardCost();
activation_->backward(sampleOut_);
auto status = activation_->backward(sampleOut_);
status.check();
if (biases_->getWGrad()) {
backwardBias(callback);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "PadLayer.h"
#include "paddle/utils/Stat.h"
namespace paddle {
REGISTER_LAYER(pad, PadLayer);
bool PadLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
auto& pad_conf = config_.inputs(0).pad_conf();
auto& img_conf = pad_conf.image_conf();
CHECK_EQ(config_.inputs_size(), 1);
inDims_ = TensorShape(
{0,
img_conf.channels(),
img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(),
img_conf.img_size()});
CHECK_EQ(2, pad_conf.pad_c_size());
CHECK_EQ(2, pad_conf.pad_h_size());
CHECK_EQ(2, pad_conf.pad_w_size());
padc_.push_back(pad_conf.pad_c(0));
padc_.push_back(pad_conf.pad_c(1));
padh_.push_back(pad_conf.pad_h(0));
padh_.push_back(pad_conf.pad_h(1));
padw_.push_back(pad_conf.pad_w(0));
padw_.push_back(pad_conf.pad_w(1));
outDims_ = TensorShape(4);
setOutDims(0);
createFunction(forward_,
"Pad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
createFunction(backward_,
"PadGrad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
return true;
}
void PadLayer::setOutDims(const size_t batchSize) {
outDims_.reshape({batchSize,
inDims_[1] + padc_[0] + padc_[1],
inDims_[2] + padh_[0] + padh_[1],
inDims_[3] + padw_[0] + padw_[1]});
}
void PadLayer::setTensorDim(const size_t batchSize) {
CHECK_EQ(static_cast<int>(inputLayers_.size()), 1);
inDims_.setDim(0, batchSize);
int h = inputLayers_[0]->getOutput().getFrameHeight();
if (h != 0) inDims_.setDim(2, h);
int w = inputLayers_[0]->getOutput().getFrameWidth();
if (w != 0) inDims_.setDim(3, w);
setOutDims(batchSize);
}
void PadLayer::forward(PassType passType) {
Layer::forward(passType);
MatrixPtr input = inputLayers_[0]->getOutputValue();
size_t batchSize = input->getHeight();
setTensorDim(batchSize);
int size = outDims_[1] * outDims_[2] * outDims_[3];
resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue();
REGISTER_TIMER_INFO("PadForward", getName().c_str());
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getInputValue(0), inDims_);
outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO);
forward_[0]->calc(inputs, outputs);
}
void PadLayer::backward(const UpdateCallback& callback) {
(void)callback;
REGISTER_TIMER_INFO("PadBackward", getName().c_str());
BufferArgs inputs;
BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outDims_);
outputs.addArg(*getInputGrad(0), inDims_, ADD_TO);
backward_[0]->calc(inputs, outputs);
}
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "Layer.h"
namespace paddle {
/**
* \brief This layer pads zeros to inputs according to the specify dimension.
* The input and output is a 4D tensor. Padding zeros from the 2nd to
* the 4th dimenstion according padc_, padh_ and padw_.
*/
class PadLayer : public Layer {
public:
explicit PadLayer(const LayerConfig& config) : Layer(config) {}
~PadLayer() {}
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
void forward(PassType passType);
void backward(const UpdateCallback& callback = nullptr);
protected:
void setOutDims(const size_t batchSize);
void setTensorDim(const size_t batchSize);
std::vector<int> padc_;
std::vector<int> padh_;
std::vector<int> padw_;
TensorShape inDims_;
TensorShape outDims_;
};
} // namespace paddle
......@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
if (prevOutput_) {
frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
}
activation_->forward(frameOutput_[start]);
activation_->forward(frameOutput_[start]).check();
for (int i = 1; i < length; ++i) {
frameOutput_[start + i].value->mul(
*frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]);
activation_->forward(frameOutput_[start + i]).check();
}
if (prevOutput_) {
prevOutput_->assign(*frameOutput_[start + length - 1].value);
}
} else {
activation_->forward(frameOutput_[start + length - 1]);
activation_->forward(frameOutput_[start + length - 1]).check();
for (int i = length - 2; i >= 0; --i) {
frameOutput_[start + i].value->mul(
*frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]);
activation_->forward(frameOutput_[start + i]).check();
}
}
}
......@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
MatrixPtr weightT = weight_->getW()->getTranspose();
if (!reversed_) {
for (int i = length - 1; i > 0; --i) {
activation_->backward(frameOutput_[start + i]);
activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i - 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1);
}
activation_->backward(frameOutput_[start]);
activation_->backward(frameOutput_[start]).check();
if (weight_->getWGrad()) {
weight_->getWGrad()->mul(
*output_.value->subMatrix(start, length - 1)->getTranspose(),
......@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
}
} else {
for (int i = 0; i < length - 1; ++i) {
activation_->backward(frameOutput_[start + i]);
activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i + 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1);
}
activation_->backward(frameOutput_[start + length - 1]);
activation_->backward(frameOutput_[start + length - 1]).check();
if (weight_->getWGrad()) {
weight_->getWGrad()->mul(
*output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
......@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
}
Argument arg;
arg.value = batch2;
activation_->forward(arg);
activation_->forward(arg).check();
}
}
batchValue_->copyBackSeq(*output_.value);
......@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize,
Argument arg;
arg.value = batch1;
arg.grad = batch2;
activation_->backward(arg);
activation_->backward(arg).check();
if (n != 0) {
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
......
......@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
nnz,
/*trans=*/false,
/*useGpu=*/useGpu_);
activation_->forward(arg);
//! TODO(yuyang18): Why we cannot invoke forwardActivation here?
activation_->forward(arg).check();
} else /* train and test in train, not generating */ {
// during training, this layer output value is *Matrix*, which is input of
// eg. multi-class-cross-entropy
......
......@@ -310,7 +310,11 @@ TEST(Layer, CTCLayer) {
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "ctc", 100, /* trans */ false, /* useGpu */ useGpu);
testLayerGrad(config,
"ctc",
100,
/* trans */ false, /* useGpu */
useGpu);
}
}
......@@ -587,7 +591,11 @@ TEST(Layer, hsigmoidLayer) {
config.layerConfig.add_inputs();
// Not support GPU now
testLayerGrad(config, "hsigmoid", 100, /* trans */ false, /* useGpu */ false);
testLayerGrad(config,
"hsigmoid",
100,
/* trans */ false, /* useGpu */
false);
}
TEST(Layer, multi_cross) {
......@@ -1022,8 +1030,12 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) {
}
TEST(Layer, NormLayer) {
testNormLayer("cmrnorm-projection", /* trans= */ false, /* useGpu= */ true);
testNormLayer("cmrnorm-projection", /* trans= */ false, /* useGpu= */ false);
testNormLayer("cmrnorm-projection",
/* trans= */ false, /* useGpu= */
true);
testNormLayer("cmrnorm-projection",
/* trans= */ false, /* useGpu= */
false);
}
void setPoolConfig(TestConfig* config,
......@@ -1563,6 +1575,35 @@ TEST(Layer, MultiplexLayer) {
}
}
TEST(Layer, PadLayer) {
TestConfig config;
config.biasSize = 0;
config.layerConfig.set_type("pad");
int c = 4;
int h = 31;
int w = 36;
size_t size = c * h * w;
config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0});
LayerInputConfig* input = config.layerConfig.add_inputs();
PadConfig* pad = input->mutable_pad_conf();
ImageConfig* image = pad->mutable_image_conf();
image->set_channels(c);
image->set_img_size(h);
image->set_img_size_y(w);
pad->add_pad_c(1);
pad->add_pad_c(2);
pad->add_pad_h(2);
pad->add_pad_h(3);
pad->add_pad_w(3);
pad->add_pad_w(5);
for (auto useGpu : {false, true}) {
testLayerGrad(config, "pad", 10, false, useGpu);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
......
......@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name,
ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
softmaxActivation->forward(dataLayer->getOutput());
softmaxActivation->forward(dataLayer->getOutput()).check();
layer->forward(PASS_GC);
layer->backward();
softmaxActivation->backward(dataLayer->getOutput());
softmaxActivation->backward(dataLayer->getOutput()).check();
return layer;
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "MemoryHandle.h"
#include "paddle/utils/Util.h"
namespace paddle {
/**
* @brief The RowBuffer class
* Represent the SparseRow Matrix Data.
*
* If not set memory handler, then the data could be auto growth.
*/
class RowBuffer {
public:
/**
* @brief RowBuffer create a auto-growth row buffer. The row length is width.
* @param width the length of each row, a.k.a matrix width.
*/
explicit RowBuffer(size_t width) : width_(width) {}
/**
* @brief RowBuffer create a row buffer, which cannot be auto-growth.
* @param mem the pre-allocated memory.
* @param width the length of each row, a.k.a matrix width.
*/
RowBuffer(const CpuMemHandlePtr& mem, size_t width)
: preallocatedBuf_(mem), width_(width) {}
/**
* @brief resize resize the buffer with rowCount
* @param rowCnt number of row. matrix height.
*/
inline void resize(int rowCnt) {
if (preallocatedBuf_) {
CHECK(preallocatedBuf_->getSize() >= rowCnt * width_ * sizeof(real));
} else {
rowStore_.resize(rowCnt * width_);
}
}
/**
* @brief get a row buffer with row index.
* @param row the index of row.
* @return row buffer.
*/
inline real* get(int row) const {
if (preallocatedBuf_) {
CHECK_LE((row + 1) * width_ * sizeof(real), preallocatedBuf_->getSize());
return reinterpret_cast<real*>(preallocatedBuf_->getBuf()) + row * width_;
} else {
CHECK_LE((row + 1) * width_, rowStore_.size());
return const_cast<real*>(rowStore_.data() + row * width_);
}
}
/**
* @brief get a row buffer with row index. If row index is larger than local
* buffer, the size of local buffer will grow.
* @param row the index of row.
* @return row buffer.
*/
inline real* getWithAutoGrowth(int row) {
if (preallocatedBuf_) {
return get(row);
} else {
if ((rowStore_.size() <= row * width_)) {
rowStore_.resize((row + 1) * width_);
}
return rowStore_.data() + row * width_;
}
}
/**
* @return raw data buffer.
*/
inline real* data() {
if (preallocatedBuf_) {
return reinterpret_cast<real*>(preallocatedBuf_->getBuf());
} else {
return rowStore_.data();
}
}
/**
* @brief clear local buffer. It only affect auto-growth buffer.
*/
inline void clear() { rowStore_.clear(); }
/**
* @brief get current number of rows.
* @return number of rows.
*/
inline size_t getRowCount() const {
if (preallocatedBuf_) {
return preallocatedBuf_->getSize() / sizeof(real) / width_;
} else {
return rowStore_.size() / width_;
}
}
/**
* @brief get is this buffer can automatically grow or not.
* @return ture if can automacitally grow.
*/
inline bool isAutoGrowth() const { return !preallocatedBuf_; }
/**
* @brief return the width of matrix. a.k.a length of row.
* @return width of matrix
*/
inline size_t getWidth() const { return width_; }
private:
//! TODO(yuyang18): Add resize method to CpuMemHandlePtr, then we can get rid
//! of std::vector here.
CpuMemHandlePtr preallocatedBuf_;
std::vector<real, AlignedAllocator<real, 32>> rowStore_;
size_t width_;
};
} // namespace paddle
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <string.h>
#include <algorithm>
#include "Matrix.h"
#include "RowBuffer.h"
#include "paddle/utils/Util.h"
DECLARE_bool(allow_inefficient_sparse_update);
......@@ -45,12 +46,9 @@ public:
IndexDictPtr indexDictHandle = nullptr,
bool trans = false)
: CpuMatrix(nullptr, height, width, trans),
storeMat_(dataHandle,
dataHandle ? dataHandle->getSize() / sizeof(real) / width : 0,
width,
trans),
indexDictHandle_(indexDictHandle) {
init(height, width);
buf_.reset(new RowBuffer(dataHandle, width));
}
virtual ~SparseRowCpuMatrix() {}
......@@ -71,25 +69,16 @@ public:
*
* @param row row id in local storage
*/
real* getLocalRow(size_t row) {
if (storeMat_.getData()) return storeMat_.rowBuf(row);
if (rowStore_.size() <= row * width_) {
rowStore_.resize((row + 1) * width_);
}
return rowStore_.data() + row * width_;
}
real* getLocalRow(size_t row) { return buf_->getWithAutoGrowth(row); }
/**
* reserve the storage for rows according to current size of indexDictHandle.
* reserve the storage for rows according to current size of
* indexDictHandle.
*
* This is only used when SparseRowCpuMatrix is constructed with
* indexDictHandle.
*/
void reserveStore() {
if (!storeMat_.getData() && !localIndices_->empty()) {
rowStore_.resize(localIndices_->size() * width_);
}
}
void reserveStore() { buf_->resize(localIndices_->size()); }
// row is the row id in the original matrix
virtual real* getRowBuf(size_t row) { return getRow(row); }
......@@ -117,7 +106,8 @@ public:
*
* If L1 decay set use L1, else if L2 set use L2, otherwise no decay atall.
*
* t0 is a int vector used by L1/L2 decay, size = height of parameter matrix,
* t0 is a int vector used by L1/L2 decay, size = height of parameter
* matrix,
* store the time that each weight row last updated.
*
* Time is batchId, currentTime is current batchId.
......@@ -176,8 +166,7 @@ public:
protected:
template <typename Func>
void apply(Func f) {
real* data = storeMat_.getData() ? storeMat_.getData() : rowStore_.data();
f(data, localIndices_->size() * width_);
f(buf_->data(), localIndices_->size() * width_);
}
void init(size_t height, size_t width);
......@@ -188,25 +177,24 @@ protected:
globalIndices_[id] = kUnusedId_;
}
localIndices_->clear();
rowStore_.clear();
buf_->clear();
}
inline void checkStoreSize() {
if (storeMat_.getData()) {
CHECK_LE(localIndices_->size(), storeMat_.getHeight());
} else if (!FLAGS_allow_inefficient_sparse_update) {
if (localIndices_->size() > 0.5 * height_) {
if (buf_->isAutoGrowth()) {
if (buf_->getRowCount() > 0.5 * height_) {
LOG(WARNING)
<< "There are more than 0.5*height (" << localIndices_->size()
<< ") rows are used for sparse "
<< "update, which is not efficient. Considering not use "
<< "sparse_update or set --allow_inefficient_sparse_update=true";
}
} else {
CHECK_LE(localIndices_->size(), buf_->getRowCount());
}
}
CpuMatrix storeMat_;
std::vector<real, AlignedAllocator<real, 32>> rowStore_;
std::unique_ptr<RowBuffer> buf_;
IndexDictPtr indexDictHandle_;
std::vector<unsigned int>* localIndices_; // =&indexDictHandle_->localIndices
unsigned int* globalIndices_; // =indexDictHandle_->globalIndices.data();
......
......@@ -4,6 +4,7 @@ add_simple_unittest(test_ExecViaCpu)
add_simple_unittest(test_SIMDFunctions)
add_simple_unittest(test_TrainingAlgorithm)
add_simple_unittest(test_SparseMatrix)
add_simple_unittest(test_RowBuffer)
# TODO(yuyang18): Refactor TestUtil.cpp. Remove this cross module reference.
add_unittest(test_matrixCompare
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "paddle/math/RowBuffer.h"
TEST(RowBuffer, testAutoGrow) {
paddle::RowBuffer buf(128);
ASSERT_EQ(128, buf.getWidth());
ASSERT_TRUE(buf.isAutoGrowth());
buf.resize(2);
ASSERT_EQ(2, buf.getRowCount());
for (size_t i = 0; i < buf.getWidth() * 2; ++i) {
buf.data()[i] = i;
}
for (size_t i = 0; i < buf.getRowCount(); ++i) {
for (size_t j = 0; j < buf.getWidth(); ++j) {
ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5);
}
}
auto data = buf.getWithAutoGrowth(2);
for (size_t i = 0; i < buf.getWidth(); ++i) {
data[i] = i;
}
ASSERT_EQ(3, buf.getRowCount());
for (size_t i = 0; i < buf.getRowCount() - 1; ++i) {
for (size_t j = 0; j < buf.getWidth(); ++j) {
ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5);
}
}
for (size_t i = 0; i < buf.getWidth(); ++i) {
ASSERT_NEAR(i, buf.get(2)[i], 1e-5);
}
}
TEST(RowBuffer, testWithMemBuf) {
paddle::CpuMemHandlePtr mem =
std::make_shared<paddle::CpuMemoryHandle>(128 * 2 * sizeof(real));
paddle::RowBuffer buf(mem, 128);
ASSERT_TRUE(!buf.isAutoGrowth());
ASSERT_EQ(2, buf.getRowCount());
for (size_t i = 0; i < buf.getWidth() * 2; ++i) {
buf.data()[i] = i;
}
for (size_t i = 0; i < buf.getRowCount(); ++i) {
for (size_t j = 0; j < buf.getWidth(); ++j) {
ASSERT_NEAR(i * buf.getWidth() + j, buf.getWithAutoGrowth(i)[j], 1e-5);
}
}
ASSERT_DEATH_IF_SUPPORTED(buf.getWithAutoGrowth(3), ".*");
}
......@@ -55,7 +55,7 @@ public:
// between startBatch() and finishBatch(), update() will be called
// by the trainer multiple times, each time for updating one Parameter
// with its gradient in PARAMETER_GRADIENT
virtual void update(Parameter* para) {
void update(Parameter* para) {
SetDevice setDevice(para->getDeviceId());
para->updateHook();
this->updateImpl(para);
......
......@@ -12,14 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Util.h"
#include <gtest/gtest.h>
#include <memory>
#include "ParameterService.pb.h"
#include "paddle/math/Vector.h"
#include "paddle/pserver/ProtoServer.h"
#include "paddle/utils/Stat.h"
#include "paddle/utils/Util.h"
DEFINE_string(server_addr, "127.0.0.1", "Server address");
DEFINE_int64(dim, 50000000, "Data size");
......@@ -162,18 +161,9 @@ TEST(ProtoServer, extended) {
int main(int argc, char** argv) {
paddle::initMain(argc, argv);
testing::InitGoogleTest(&argc, argv);
MyServer* server;
if (FLAGS_rdma_tcp == "rdma") {
server = new MyServer(FLAGS_port, 0);
} else {
server = new MyServer(FLAGS_port);
}
server->start();
MyServer server(FLAGS_port, FLAGS_rdma_tcp == "rdma" ? 0 : -1);
server.start();
usleep(10000);
int ret = RUN_ALL_TESTS();
exit(ret);
return RUN_ALL_TESTS();
}
......@@ -34,6 +34,10 @@ class IScanner(object):
class DenseScanner(IScanner):
"""
:type __mat__: numpy.ndarray
"""
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__mat__ = None
......@@ -47,6 +51,8 @@ class DenseScanner(IScanner):
def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments)
assert isinstance(self.input_type, dp2.InputType)
if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
argument.setSlotValue(self.pos, m)
......
......@@ -4,28 +4,32 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN apt-get update \
&& apt-get install -y cmake libprotobuf-dev protobuf-compiler git \
libgoogle-glog-dev libgflags-dev libgtest-dev \
libatlas-dev libatlas3-base g++ m4 python-pip \
python-protobuf python-numpy python-dev swig openssh-server \
wget unzip python-matplotlib tar xz-utils bzip2 gzip coreutils \
sed grep graphviz libjpeg-dev zlib1g-dev doxygen \
clang-3.8 llvm-3.8 libclang-3.8-dev \
&& apt-get clean -y
RUN cd /usr/src/gtest && cmake . && make && cp *.a /usr/lib
RUN pip install -U BeautifulSoup docopt PyYAML pillow \
sphinx sphinx_rtd_theme recommonmark jupyter
RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \
apt-get clean -y
RUN pip install --upgrade pip && \
pip install 'protobuf==3.1.0.post1' && \
pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \
cd .. && rm -rf cmake-3.4.1
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_SWIG_PY
ARG WITH_STYLE_CHECK
ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON}
ENV WITH_SWIG_PY=${WITH_SWIG_PY:-ON}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle
......
......@@ -4,28 +4,32 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN apt-get update \
&& apt-get install -y cmake libprotobuf-dev protobuf-compiler git \
libgoogle-glog-dev libgflags-dev libgtest-dev \
libatlas-dev libatlas3-base g++ m4 python-pip \
python-protobuf python-numpy python-dev swig openssh-server \
wget unzip python-matplotlib tar xz-utils bzip2 gzip coreutils \
sed grep graphviz libjpeg-dev zlib1g-dev doxygen \
clang-3.8 llvm-3.8 libclang-3.8-dev \
&& apt-get clean -y
RUN cd /usr/src/gtest && cmake . && make && cp *.a /usr/lib
RUN pip install -U BeautifulSoup docopt PyYAML pillow \
sphinx sphinx_rtd_theme recommonmark jupyter
RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \
apt-get clean -y
RUN pip install --upgrade pip && \
pip install 'protobuf==3.1.0.post1' && \
pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \
cd .. && rm -rf cmake-3.4.1
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_SWIG_PY
ARG WITH_STYLE_CHECK
ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-ON}
ENV WITH_SWIG_PY=${WITH_SWIG_PY:-ON}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle
......
#!/bin/bash
brew update
brew tap homebrew/science
brew install python
sudo pip install --upgrade protobuf
brew install swig openblas md5sha1sum protobuf
brew install openblas swig md5sha1sum
......@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONHOME=/opt/python/2.7.12
export PATH=/opt/python/2.7.12/bin:${PATH}
cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NRPOC=`nproc`
make -j $NPROC
make coveralls
......
......@@ -4,7 +4,7 @@
source ./common.sh
# Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn
# check websites for broken links
......
......@@ -184,7 +184,6 @@ protected:
* @param para
*/
virtual void updateImpl(Parameter* para) {}
virtual void update(Parameter* para) {}
};
/**
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This header defines some useful attribute by each compiler. It is the
* abstract layer of compilers.
*/
#ifdef __GNUC__
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
#define GCC_VERSION
#endif
/**
* __must_check macro. It make the function's return value must be used,
* otherwise it will raise a compile warning. And also Paddle treat all compile
* warnings as errors.
*/
#if GCC_VERSION >= 30400
#define __must_check __attribute__((warn_unused_result))
#else
#define __must_check
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <stdarg.h>
#include <stdio.h>
#include <memory>
#include <string>
#include "Compiler.h"
namespace paddle {
/**
* Error is Paddle error code. It only contain a std::string as error message.
*
*
* There are two styles to return error in Paddle.
*
* 1. Return Error
* When method return a status, the return must use `__must_check` attribute.
* Example as below.
* @code{cpp}
* Error __must_check foo();
*
* Error __must_check bar() {
* // do something.
* Status s = foo(); // invoke other method return status.
* if (!s) return s;
* // do something else.
* return Status();
* }
* @endcode{cpp}
*
* 2. Return by parameter.
* It is another way to return an error, by using a pointer parameter.
* Example as below.
*
* @code{cpp}
* Error bar();
*
* int foo(Error* error) {
* // Do something.
* Error s = bar();
* if (!s) {
* *error = s;
* return 0;
* }
* // Do something else.
* if (someInternalErrorHappend) {
* *error = Error("Some dimension is too large, %d", dimension);
* return 0;
* }
* // End of method.
* return someValue;
* }
*
* Error foobar() {
* Error s;
* // do something.
* foo(&s);
* if (!s) return s;
* }
* @endcode{cpp}
*
*
* Currently there is a helper method 'check' in status, because Paddle always
* use log(FATAL) or CHECK to make program exit before. When we clean all
* log(FATAL) and CHECK in Paddle, 'check' method will be removed.
*/
class Error {
public:
/**
* Construct a no-error value.
*/
Error() {}
/**
* @brief Create an Error use printf syntax.
*/
explicit Error(const char* fmt, ...) {
va_list ap;
va_start(ap, fmt);
constexpr size_t kBufferSize = 1024;
char buffer[kBufferSize];
vsnprintf(buffer, kBufferSize, fmt, ap);
this->msg_.reset(new std::string(buffer));
va_end(ap);
}
/**
* @brief msg will return the error message. If no error, return nullptr.
*/
const char* msg() const {
if (msg_) {
return msg_->c_str();
} else {
return nullptr;
}
}
/**
* @brief operator bool, return True if there is no error.
*/
operator bool() const { return msg_ == nullptr; }
/**
* @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/
void check() const { CHECK(*this) << msg(); }
private:
std::shared_ptr<std::string> msg_;
};
} // namespace paddle
......@@ -144,20 +144,20 @@ void runInitFunctions() {
}
void initMain(int argc, char** argv) {
initializeLogging(argc, argv);
installLayerStackTracer();
std::string line;
for (int i = 0; i < argc; ++i) {
line += argv[i];
line += ' ';
}
LOG(INFO) << "commandline: " << line;
#ifndef GFLAGS_GFLAGS_H_
namespace gflags = google;
#endif
gflags::ParseCommandLineFlags(&argc, &argv, true);
initializeLogging(argc, argv);
LOG(INFO) << "commandline: " << line;
CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];
installProfilerSwitch();
......
......@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
add_simple_unittest(test_ThreadBarrier)
add_simple_unittest(test_SpinLock)
add_simple_unittest(test_SIMDFlags)
add_simple_unittest(test_Error)
add_executable(
test_CustomStackTracePrint
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Error.h"
#include <gtest/gtest.h>
TEST(Error, testAll) {
paddle::Error error;
ASSERT_TRUE(error);
error = paddle::Error("I'm the error");
ASSERT_FALSE(error);
ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2");
ASSERT_FALSE(error);
ASSERT_STREQ("error2", error.msg());
int i = 3;
auto error3 = paddle::Error("error%d", i);
ASSERT_FALSE(error3);
ASSERT_STREQ("error3", error3.msg());
}
......@@ -255,6 +255,13 @@ message PriorBoxConfig {
repeated float variance = 4;
}
message PadConfig {
required ImageConfig image_conf = 1;
repeated uint32 pad_c = 2;
repeated uint32 pad_h = 3;
repeated uint32 pad_w = 4;
}
message LayerInputConfig {
required string input_layer_name = 1;
optional string input_parameter_name = 2;
......@@ -271,6 +278,7 @@ message LayerInputConfig {
optional MaxOutConfig maxout_conf = 11;
optional SppConfig spp_conf = 12;
optional PriorBoxConfig priorbox_conf = 13;
optional PadConfig pad_conf = 14;
}
message LayerConfig {
......
......@@ -4,11 +4,13 @@ set(OUTPUT_DIR
file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py)
file(GLOB V2_PY_FILES . ./paddle/v2/*.py)
set(PY_FILES paddle/__init__.py
${TRAINER_PY_FILES}
${HELPERS_PY_FILES}
${UTILS_PY_FILES})
${UTILS_PY_FILES}
${V2_PY_FILES})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
......
......@@ -493,6 +493,7 @@ class Input(Cfg):
block_expand=None,
maxout=None,
spp=None,
pad=None,
format=None,
nnz=None,
is_static=None,
......@@ -844,6 +845,12 @@ class SpatialPyramidPool(Cfg):
self.add_keys(locals())
@config_class
class Pad(Cfg):
def __init__(self, channels, pad_c, pad_h, pad_w):
self.add_keys(locals())
@config_class
class Norm(Cfg):
def __init__(self,
......@@ -1102,7 +1109,7 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
bilinear_conf.out_size_y = bilinear.out_size_y
def parse_pool(pool, input_layer_name, pool_conf):
def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
pool_conf.pool_type = pool.pool_type
config_assert(pool.pool_type in [
'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool'
......@@ -1127,10 +1134,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
pool_conf.padding, pool_conf.stride,
False)
not ceil_mode)
pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
pool_conf.padding_y,
pool_conf.stride_y, False)
pool_conf.stride_y, not ceil_mode)
def parse_spp(spp, input_layer_name, spp_conf):
......@@ -1803,9 +1810,8 @@ class ConvTransLayer(ConvTransLayerBase):
@config_layer('norm')
class NormLayer(LayerBase):
def __init__(self, name, inputs, device=None):
super(NormLayer, self).__init__(
name, 'norm', 0, inputs=inputs, device=device)
def __init__(self, name, inputs, **xargs):
super(NormLayer, self).__init__(name, 'norm', 0, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
norm_conf = self.config.inputs[input_index].norm_conf
......@@ -1817,23 +1823,22 @@ class NormLayer(LayerBase):
@config_layer('pool')
class PoolLayer(LayerBase):
def __init__(self, name, inputs, device=None):
super(PoolLayer, self).__init__(
name, 'pool', 0, inputs=inputs, device=device)
def __init__(self, name, inputs, ceil_mode=True, **xargs):
super(PoolLayer, self).__init__(name, 'pool', 0, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
pool_conf = self.config.inputs[input_index].pool_conf
parse_pool(self.inputs[input_index].pool, input_layer.name,
pool_conf)
pool_conf, ceil_mode)
self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x,
pool_conf.channels)
@config_layer('spp')
class SpatialPyramidPoolLayer(LayerBase):
def __init__(self, name, inputs, device=None):
def __init__(self, name, inputs, **xargs):
super(SpatialPyramidPoolLayer, self).__init__(
name, 'spp', 0, inputs=inputs, device=device)
name, 'spp', 0, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
spp_conf = self.config.inputs[input_index].spp_conf
......@@ -1842,6 +1847,25 @@ class SpatialPyramidPoolLayer(LayerBase):
self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels)
@config_layer('pad')
class PadLayer(LayerBase):
def __init__(self, name, inputs, **xargs):
super(PadLayer, self).__init__(name, 'pad', 0, inputs=inputs, **xargs)
pad = self.inputs[0].pad
self.config.inputs[0].pad_conf.pad_c.extend(pad.pad_c)
self.config.inputs[0].pad_conf.pad_h.extend(pad.pad_h)
self.config.inputs[0].pad_conf.pad_w.extend(pad.pad_w)
input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].pad_conf.image_conf
parse_image(pad, input_layer.name, image_conf)
out_ch = pad.channels + pad.pad_c[0] + pad.pad_c[1]
out_h = image_conf.img_size_y + pad.pad_h[0] + pad.pad_h[1]
out_w = image_conf.img_size + pad.pad_w[0] + pad.pad_w[1]
self.set_cnn_layer(name, out_h, out_w, out_ch)
self.config.size = out_ch * out_h * out_w
@config_layer('batch_norm')
class BatchNormLayer(LayerBase):
layer_type = 'batch_norm'
......@@ -1851,7 +1875,6 @@ class BatchNormLayer(LayerBase):
inputs,
active_type="linear",
bias=True,
device=None,
use_global_stats=True,
moving_average_fraction=0.9,
batch_norm_type=None,
......@@ -1893,7 +1916,6 @@ class BatchNormLayer(LayerBase):
0,
active_type=active_type,
inputs=inputs,
device=device,
**xargs)
if use_global_stats is not None:
......@@ -1927,9 +1949,9 @@ class BatchNormLayer(LayerBase):
@config_layer('trans')
class TransLayer(LayerBase):
def __init__(self, name, inputs, device=None):
def __init__(self, name, inputs, **xargs):
super(TransLayer, self).__init__(
name, 'trans', 0, inputs=inputs, device=device)
name, 'trans', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'TransLayer must have one and only one input')
......@@ -1938,9 +1960,9 @@ class TransLayer(LayerBase):
@config_layer('resize')
class ResizeLayer(LayerBase):
def __init__(self, name, size, inputs, device=None):
def __init__(self, name, size, inputs, **xargs):
super(ResizeLayer, self).__init__(
name, 'resize', size=size, inputs=inputs, device=device)
name, 'resize', size=size, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'ResizeLayer must have one and only one input')
......@@ -1948,9 +1970,9 @@ class ResizeLayer(LayerBase):
@config_layer('blockexpand')
class BlockExpandLayer(LayerBase):
def __init__(self, name, inputs, device=None):
def __init__(self, name, inputs, **xargs):
super(BlockExpandLayer, self).__init__(
name, 'blockexpand', 0, inputs=inputs, device=device)
name, 'blockexpand', 0, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
parse_block_expand(
......@@ -2628,7 +2650,7 @@ class AverageLayer(LayerBase):
@config_layer('cos')
class CosSimLayer(LayerBase):
def __init__(self, name, inputs, cos_scale=5, device=None):
def __init__(self, name, inputs, cos_scale=1, device=None):
super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs')
......
......@@ -108,6 +108,7 @@ __all__ = [
'print_layer',
'priorbox_layer',
'spp_layer',
'pad_layer',
]
......@@ -170,6 +171,7 @@ class LayerType(object):
BLOCK_EXPAND = "blockexpand"
MAXOUT = "maxout"
SPP_LAYER = "spp"
PAD_LAYER = "pad"
PRINT_LAYER = "print"
PRIORBOX_LAYER = "priorbox"
......@@ -1673,7 +1675,7 @@ def trans_layer(input, name=None, layer_attr=None):
@wrap_name_default()
@layer_support()
def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
"""
Cosine Similarity Layer. The cosine similarity equation is here.
......@@ -1979,7 +1981,8 @@ def img_pool_layer(input,
layer_attr=None,
pool_size_y=None,
stride_y=None,
padding_y=None):
padding_y=None,
ceil_mode=True):
"""
Image pooling Layer.
......@@ -2010,6 +2013,23 @@ def img_pool_layer(input,
:type stride_y: int|None
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with.
Defalut is True. If set false, Otherwise use floor.
- ceil_mode=True:
.. math::
w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
- ceil_mode=False:
.. math::
w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride))
h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
:type ceil_mode: bool
:return: LayerOutput object.
:rtype: LayerOutput
"""
......@@ -2047,6 +2067,7 @@ def img_pool_layer(input,
stride_y=stride_y,
padding_y=padding_y))
],
ceil_mode=ceil_mode,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
......@@ -3488,9 +3509,6 @@ def conv_projection(input,
groups=1,
param_attr=None):
"""
ConvProjection with a layer as input.
It performs element-wise multiplication with weight.
Different from img_conv_layer and conv_op, conv_projection is an Projection,
which can be used in mixed_layer and conat_layer. It use cudnn to implement
conv and only support GPU mode.
......@@ -3499,7 +3517,7 @@ def conv_projection(input,
.. code-block:: python
proj = conv_projection(img=input1,
proj = conv_projection(input=input1,
filter_size=3,
num_filters=64,
num_channels=64)
......@@ -3582,6 +3600,109 @@ def conv_projection(input,
return proj
@wrap_name_default("pad")
@layer_support()
def pad_layer(input,
pad_c=None,
pad_h=None,
pad_w=None,
name=None,
layer_attr=None):
"""
This operation pads zeros to the input data according to pad_c,pad_h
and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size
of padding. And the input data shape is NCHW.
For example, pad_c=[2,3] means padding 2 zeros before the
input data and 3 zeros after the input data in channel dimension.
pad_h means padding zeros in height dimension. pad_w means padding zeros
in width dimension.
For example,
.. code-block::
input(2,2,2,3) = [
[ [[1,2,3], [3,4,5]],
[[2,3,5], [1,6,7]] ],
[ [[4,3,1], [1,8,7]],
[[3,8,9], [2,3,5]] ]
]
pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
output(2,4,2,3) = [
[ [[0,0,0], [0,0,0]],
[[1,2,3], [3,4,5]],
[[2,3,5], [1,6,7]],
[[0,0,0], [0,0,0]] ],
[ [[0,0,0], [0,0,0]],
[[4,3,1], [1,8,7]],
[[3,8,9], [2,3,5]],
[[0,0,0], [0,0,0]] ]
]
The simply usage is:
.. code-block:: python
pad = pad_layer(input=ipt,
pad_c=[4,4],
pad_h=[0,0],
pad_w=[2,2])
:param input: layer's input.
:type input: LayerOutput
:param pad_c: padding size in channel dimension.
:type pad_c: list|None
:param pad_h: padding size in height dimension.
:type pad_h: list|None
:param pad_w: padding size in width dimension.
:type pad_w: list|None
:param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute
:param name: layer name.
:type name: basestring
:return: LayerOutput object.
:rtype: LayerOutput
"""
if pad_c is not None:
assert isinstance(pad_c, collections.Sequence) and len(pad_c) == 2
else:
pad_c = [0, 0]
if pad_h is not None:
assert isinstance(pad_h, collections.Sequence) and len(pad_h) == 2
else:
pad_h = [0, 0]
if pad_w is not None:
assert isinstance(pad_w, collections.Sequence) and len(pad_w) == 2
else:
pad_w = [0, 0]
assert input.num_filters is not None
in_ch = input.num_filters
out_ch = in_ch + pad_c[0] + pad_c[1]
l = Layer(
name=name,
type=LayerType.PAD_LAYER,
inputs=Input(
input.name,
pad=Pad(
channels=in_ch,
pad_c=pad_c,
pad_h=pad_h,
pad_w=pad_w, )),
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name,
layer_type=LayerType.PAD_LAYER,
parents=[input],
num_filters=out_ch,
size=l.config.size)
@wrap_name_default()
@layer_support()
def conv_shift_layer(a, b, name=None, layer_attr=None):
......
......@@ -9,17 +9,10 @@ add_test(NAME test_reset_hook
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
if (PROTOBUF_3)
add_paddle_exe(protobuf_equal
ProtobufEqualMain.cpp)
add_test(NAME test_layerHelpers
COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
)
else()
add_test(NAME test_layerHelpers
COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
)
endif()
add_paddle_exe(protobuf_equal
ProtobufEqualMain.cpp)
add_test(NAME test_layerHelpers
COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
)
......@@ -79,7 +79,7 @@ layers {
inputs {
input_layer_name: "b"
}
cos_scale: 5
cos_scale: 1
}
layers {
name: "__cos_sim_1__"
......@@ -92,7 +92,7 @@ layers {
inputs {
input_layer_name: "c"
}
cos_scale: 5
cos_scale: 1
}
layers {
name: "__sum_to_one_norm_layer_0__"
......
......@@ -2,16 +2,18 @@
cd `dirname $0`
set -e
PYTHON_EXEC=$1
COMPARE_PROTO_UTIL=$2
protostr=`dirname $0`/protostr
files=`ls $protostr | grep -v "unittest"`
./generate_protostr.sh $1
./generate_protostr.sh ${PYTHON_EXEC}
. ./file_list.sh
if [ -z $1 ]; then
if [ -z ${COMPARE_PROTO_UTIL} ]; then
for file in $files
do
base_protostr=$protostr/$file
......@@ -22,20 +24,20 @@ if [ -z $1 ]; then
else
for file in ${configs[*]}
do
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest; then
if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest; then
diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
fi
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then
if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then
diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
fi
done
for file in ${whole_configs[*]}
do
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then
if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then
diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
fi
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then
if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then
diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
fi
done
......
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
data = data_layer(name='data', size=2304, height=48, width=42)
conv = img_conv_layer(
input=data,
filter_size=3,
num_channels=1,
num_filters=16,
padding=1,
act=LinearActivation(),
bias_attr=True)
pool = img_pool_layer(
input=conv, num_channels=8, pool_size=2, stride=2, pool_type=MaxPooling())
pad = pad_layer(input=pool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
outputs(pad)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -4,7 +4,8 @@ packages=['paddle',
'paddle.proto',
'paddle.trainer',
'paddle.trainer_config_helpers',
'paddle.utils']
'paddle.utils',
'paddle.v2']
setup(name='paddle',
version='${PADDLE_VERSION}',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册