提交 8d47499e 编写于 作者: D dangqingqing

update code

...@@ -25,9 +25,9 @@ addons: ...@@ -25,9 +25,9 @@ addons:
packages: packages:
- gcc-4.8 - gcc-4.8
- g++-4.8 - g++-4.8
- gfortran-4.8
- git - git
- build-essential - build-essential
- libatlas-base-dev
- python - python
- python-pip - python-pip
- python2.7-dev - python2.7-dev
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
set(CBLAS_FOUND OFF) set(CBLAS_FOUND OFF)
## Find MKL First. ## Find MKL First.
set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL") set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include) ${MKL_ROOT}/include)
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
INCLUDE(cblas) INCLUDE(cblas)
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
MESSAGE(FATAL_ERROR "Please install OpenBlas, MKL or ATLAS.")
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
...@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE) SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/libopenblas.a" CACHE FILEPATH "openblas library" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
IF(CMAKE_COMPILER_IS_GNUCC)
ENABLE_LANGUAGE(Fortran)
LIST(APPEND CBLAS_LIBRARIES gfortran pthread)
ENDIF(CMAKE_COMPILER_IS_GNUCC)
IF(NOT CMAKE_Fortran_COMPILER)
MESSAGE(FATAL_ERROR "To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
ENDIF(NOT CMAKE_Fortran_COMPILER)
ExternalProject_Add( ExternalProject_Add(
openblas openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
URL "https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz" GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX ${CBLAS_SOURCES_DIR} PREFIX ${CBLAS_SOURCES_DIR}
INSTALL_DIR ${CBLAS_INSTALL_DIR} INSTALL_DIR ${CBLAS_INSTALL_DIR}
BUILD_IN_SOURCE 1 BUILD_IN_SOURCE 1
CONFIGURE_COMMAND "" BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} FC=${CMAKE_Fortran_COMPILER} CC=${CMAKE_C_COMPILER} HOSTCC=${CMAKE_C_COMPILER} NO_SHARED=1 libs netlib
BUILD_COMMAND make CC=${CMAKE_C_COMPILER} FC=${CMAKE_Fortran_COMPILER} INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 PREFIX=<INSTALL_DIR>
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
)
ExternalProject_Add_Step(
openblas lapacke_install
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h" "${CBLAS_INSTALL_DIR}/include/lapacke_mangling.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h" "${CBLAS_INSTALL_DIR}/include/lapacke.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h" "${CBLAS_INSTALL_DIR}/include/lapacke_config.h"
COMMAND ${CMAKE_COMMAND} -E copy "${CBLAS_SOURCES_DIR}/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h" "${CBLAS_INSTALL_DIR}/include/lapacke_utils.h"
DEPENDEES install
) )
LIST(APPEND external_project_dependencies openblas) LIST(APPEND external_project_dependencies openblas)
ENDIF() ENDIF(NOT ${CBLAS_FOUND})
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
...@@ -54,6 +54,7 @@ ExternalProject_Add( ...@@ -54,6 +54,7 @@ ExternalProject_Add(
CONFIGURE_COMMAND CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
......
...@@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ...@@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf.") "please use pip to upgrade protobuf.")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ######################################## ##################################### PYTHON ########################################
SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python)
SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python) SET(PYTHON_INSTALL_DIR ${THIRD_PARTY_PATH}/install/python)
......
...@@ -96,6 +96,7 @@ set(COMMON_FLAGS ...@@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs) -Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
...@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS ...@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor -Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=unused-local-typedefs -Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=unused-function # Warnings in Numpy Header.
......
...@@ -21,6 +21,7 @@ ELSE(WIN32) ...@@ -21,6 +21,7 @@ ELSE(WIN32)
SET(MACOS_VERSION ${VERSION}) SET(MACOS_VERSION ${VERSION})
SET(HOST_SYSTEM "macosx") SET(HOST_SYSTEM "macosx")
ELSE(APPLE) ELSE(APPLE)
IF(EXISTS "/etc/issue") IF(EXISTS "/etc/issue")
FILE(READ "/etc/issue" LINUX_ISSUE) FILE(READ "/etc/issue" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS") IF(LINUX_ISSUE MATCHES "CentOS")
...@@ -31,6 +32,14 @@ ELSE(WIN32) ...@@ -31,6 +32,14 @@ ELSE(WIN32)
SET(HOST_SYSTEM "ubuntu") SET(HOST_SYSTEM "ubuntu")
ENDIF() ENDIF()
ENDIF(EXISTS "/etc/issue") ENDIF(EXISTS "/etc/issue")
IF(EXISTS "/etc/redhat-release")
FILE(READ "/etc/redhat-release" LINUX_ISSUE)
IF(LINUX_ISSUE MATCHES "CentOS")
SET(HOST_SYSTEM "centos")
ENDIF()
ENDIF(EXISTS "/etc/redhat-release")
ENDIF(APPLE) ENDIF(APPLE)
ENDIF(WIN32) ENDIF(WIN32)
...@@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS ...@@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0 # Wrap download in script to log output LOG_DOWNLOAD 0 # Wrap download in script to log output
LOG_UPDATE 1 # Wrap update in script to log output LOG_UPDATE 1 # Wrap update in script to log output
LOG_CONFIGURE 1 # Wrap configure in script to log output LOG_CONFIGURE 1 # Wrap configure in script to log output
LOG_BUILD 1 # Wrap build in script to log output LOG_BUILD 0 # Wrap build in script to log output
LOG_TEST 1 # Wrap test in script to log output LOG_TEST 1 # Wrap test in script to log output
LOG_INSTALL 1 # Wrap install in script to log output LOG_INSTALL 0 # Wrap install in script to log output
) )
...@@ -4,6 +4,8 @@ Installing from Sources ...@@ -4,6 +4,8 @@ Installing from Sources
* [1. Download and Setup](#download) * [1. Download and Setup](#download)
* [2. Requirements](#requirements) * [2. Requirements](#requirements)
* [3. Build on Ubuntu](#ubuntu) * [3. Build on Ubuntu](#ubuntu)
* [4. Build on Centos](#centos)
## <span id="download">Download and Setup</span> ## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle). You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).
...@@ -64,7 +66,8 @@ As a simple example, consider the following: ...@@ -64,7 +66,8 @@ As a simple example, consider the following:
1. **BLAS Dependencies(optional)** 1. **BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`.
```bash ```bash
# specify MKL # specify MKL
...@@ -99,7 +102,7 @@ As a simple example, consider the following: ...@@ -99,7 +102,7 @@ As a simple example, consider the following:
```bash ```bash
# necessary # necessary
sudo apt-get update sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev git sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
sudo pip install wheel numpy sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0' sudo pip install 'protobuf>=3.0.0'
``` ```
...@@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH ...@@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules. # install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
``` ```
## <span id="centos">Build on Centos 7</span>
### Install Dependencies
- **CPU Dependencies**
```bash
# necessary
sudo yum update
sudo yum install -y epel-release
sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
- **GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```bash
mkdir build && cd build
```
Finally, you can build and install PaddlePaddle:
```bash
# you can add build option here, such as:
cmake3 .. -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules.
sudo pip install <path to install>/opt/paddle/share/wheels/*.whl
```
...@@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers ...@@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers
- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。 - `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列 - 输入:一个双层序列,或一个单层序列
...@@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_ ...@@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_
last = last_seq(input=layer, last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE) agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值): - `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值):
- 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列 - 输入:一个双层序列或一个单层序列
......
此差异已折叠。
...@@ -20,23 +20,27 @@ limitations under the License. */ ...@@ -20,23 +20,27 @@ limitations under the License. */
namespace paddle { namespace paddle {
const SequenceArg& BufferArg::sequence() const { const SequenceArg& BufferArg::sequence() const {
// CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
return dynamic_cast<const SequenceArg&>(*this); return dynamic_cast<const SequenceArg&>(*this);
} }
const SparseMatrixArg& BufferArg::sparse() const { const SparseMatrixArg& BufferArg::sparse() const {
// CHECK_EQ(bufferType_, TENSOR_SPARSE); CHECK_EQ(bufferType_, TENSOR_SPARSE);
return dynamic_cast<const SparseMatrixArg&>(*this); return dynamic_cast<const SparseMatrixArg&>(*this);
} }
SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType)
: BufferArg(sparse, argType), : BufferArg(sparse, argType),
row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32), row_(reinterpret_cast<void*>(sparse.getRows()), VALUE_TYPE_INT32),
col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {} col_(reinterpret_cast<void*>(sparse.getCols()), VALUE_TYPE_INT32) {
bufferType_ = TENSOR_SPARSE;
}
} // namespace paddle } // namespace paddle
...@@ -23,10 +23,11 @@ limitations under the License. */ ...@@ -23,10 +23,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
enum BufferType { enum BufferType {
TENSOR_NORMAL = 0, TENSOR_UNKNOWN = 0,
TENSOR_SEQUENCE_ID = 1, TENSOR_NORMAL = 1,
TENSOR_SEQUENCE_DATA = 2, TENSOR_SEQUENCE_ID = 2,
TENSOR_SPARSE = 3 TENSOR_SEQUENCE_DATA = 3,
TENSOR_SPARSE = 4
}; };
enum SparseDataType { enum SparseDataType {
...@@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; ...@@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class BufferArg; class BufferArg;
class SequenceArg; class SequenceArg;
class SparseMatrixArg; class SparseMatrixArg;
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/** /**
* \brief BufferArg used as the argument type of Function. * \brief BufferArg used as the argument type of Function.
...@@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr; ...@@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* 3. SequenceArg for a Buffer of sequence data. * 3. SequenceArg for a Buffer of sequence data.
* 4. SparseMatrixArg for a Buffer of sparse matrix. * 4. SparseMatrixArg for a Buffer of sparse matrix.
* *
* Buffer shape
* For most buffers, the first dimension `shape()[0]` represents
* the size of the mini-batch.
*
* Buffer argType
* There is an ArgType property for the BufferArg used as Function Output. * There is an ArgType property for the BufferArg used as Function Output.
* Whether the result of the Function calculation is assigned to the * Whether the result of the Function calculation is assigned to the
* output Buffer or added to the output Buffer is determined by the * output Buffer or added to the output Buffer is determined by the
...@@ -71,6 +76,14 @@ public: ...@@ -71,6 +76,14 @@ public:
ArgType getArgType() const { return argType_; } ArgType getArgType() const { return argType_; }
public: public:
BufferArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: buf_(nullptr),
valueType_(valueType),
shape_(shape),
argType_(argType) {}
BufferArg(void* buf, BufferArg(void* buf,
ValueType valueType, ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
...@@ -86,6 +99,7 @@ public: ...@@ -86,6 +99,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(2), shape_(2),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, matrix.getHeight()); shape_.setDim(0, matrix.getHeight());
shape_.setDim(1, matrix.getWidth()); shape_.setDim(1, matrix.getWidth());
} }
...@@ -98,6 +112,7 @@ public: ...@@ -98,6 +112,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(shape), shape_(shape),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
CHECK_EQ(matrix.getElementCnt(), shape.getElements()); CHECK_EQ(matrix.getElementCnt(), shape.getElements());
} }
...@@ -107,6 +122,7 @@ public: ...@@ -107,6 +122,7 @@ public:
valueType_(DataType<real>::value), valueType_(DataType<real>::value),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
...@@ -116,6 +132,7 @@ public: ...@@ -116,6 +132,7 @@ public:
valueType_(VALUE_TYPE_INT32), valueType_(VALUE_TYPE_INT32),
shape_(1), shape_(1),
argType_(argType) { argType_(argType) {
bufferType_ = TENSOR_NORMAL;
shape_.setDim(0, vector.getSize()); shape_.setDim(0, vector.getSize());
} }
...@@ -150,6 +167,8 @@ public: ...@@ -150,6 +167,8 @@ public:
ValueType valueType() const { return valueType_; } ValueType valueType() const { return valueType_; }
BufferType bufferType() const { return bufferType_; } BufferType bufferType() const { return bufferType_; }
const TensorShape& shape() const { return shape_; } const TensorShape& shape() const { return shape_; }
bool isSparse() const { return (TENSOR_SPARSE == bufferType_); }
bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; }
const SequenceArg& sequence() const; const SequenceArg& sequence() const;
const SparseMatrixArg& sparse() const; const SparseMatrixArg& sparse() const;
...@@ -158,8 +177,8 @@ protected: ...@@ -158,8 +177,8 @@ protected:
void* buf_; void* buf_;
ValueType valueType_; ValueType valueType_;
TensorShape shape_; TensorShape shape_;
BufferType bufferType_; BufferType bufferType_{TENSOR_UNKNOWN};
ArgType argType_ = UNSPECIFIED; ArgType argType_{UNSPECIFIED};
// leading dimensions. The size is dims_.size() // leading dimensions. The size is dims_.size()
// Dims lds_; // Dims lds_;
}; };
...@@ -170,15 +189,24 @@ protected: ...@@ -170,15 +189,24 @@ protected:
// if a < b then value_.buf_[a] < value_.buf_[b] // if a < b then value_.buf_[a] < value_.buf_[b]
class SequenceIdArg : public BufferArg { class SequenceIdArg : public BufferArg {
public: public:
SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED)
: BufferArg(VALUE_TYPE_INT32, shape, argType) {
CHECK_EQ(shape_.ndims(), (size_t)1);
CHECK_GT(shape_[0], 1);
numSeqs_ = shape_[0] - 1;
}
SequenceIdArg(void* buf, SequenceIdArg(void* buf,
const TensorShape& shape, const TensorShape& shape,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) {
bufferType_ = TENSOR_SEQUENCE_ID;
CHECK_EQ(shape_.ndims(), (size_t)1); CHECK_EQ(shape_.ndims(), (size_t)1);
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
SequenceIdArg(const IVector& vector) : BufferArg(vector) { SequenceIdArg(const IVector& vector) : BufferArg(vector) {
bufferType_ = TENSOR_SEQUENCE_ID;
numSeqs_ = shape_[0] - 1; numSeqs_ = shape_[0] - 1;
} }
...@@ -190,26 +218,41 @@ private: ...@@ -190,26 +218,41 @@ private:
size_t numSeqs_; size_t numSeqs_;
}; };
// sequence data // sequences data
// For mini-batch calculate,
// one batch can contain more than one sequence of data.
// SequenceArg can be used to represent sequences that contain multiple
// unequal lengths.
class SequenceArg : public BufferArg { class SequenceArg : public BufferArg {
public: public:
SequenceArg(ValueType valueType,
const TensorShape& shape,
ArgType argType = UNSPECIFIED)
: BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {}
SequenceArg(void* buf, SequenceArg(void* buf,
ValueType valueType, ValueType valueType,
const TensorShape& shape, const TensorShape& shape,
const SequenceIdArg& startPositions, const SequenceIdArg& startPositions,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(buf, valueType, shape, argType), : BufferArg(buf, valueType, shape, argType),
startPositions_(startPositions) {} startPositions_(startPositions) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
SequenceArg(const Matrix& matrix, SequenceArg(const Matrix& matrix,
const IVector& vector, const IVector& vector,
ArgType argType = UNSPECIFIED) ArgType argType = UNSPECIFIED)
: BufferArg(matrix, argType), startPositions_(vector) {} : BufferArg(matrix, argType), startPositions_(vector) {
bufferType_ = TENSOR_SEQUENCE_DATA;
}
~SequenceArg() {} ~SequenceArg() {}
void* getIdBuf() const { return startPositions_.data(); } void* getIdBuf() const { return startPositions_.data(); }
size_t numSeqs() const { return startPositions_.numSeqs(); } size_t numSeqs() const { return startPositions_.numSeqs(); }
SequenceIdArg& getSequenceId() { return startPositions_; }
const SequenceIdArg& getSequenceId() const { return startPositions_; }
private: private:
SequenceIdArg startPositions_; SequenceIdArg startPositions_;
...@@ -235,6 +278,7 @@ public: ...@@ -235,6 +278,7 @@ public:
nnz_(nnz), nnz_(nnz),
format_(format), format_(format),
type_(type) { type_(type) {
bufferType_ = TENSOR_SPARSE;
CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE));
CHECK_EQ(shape_.ndims(), (size_t)2); CHECK_EQ(shape_.ndims(), (size_t)2);
CHECK_EQ(row_.shape().ndims(), (size_t)1); CHECK_EQ(row_.shape().ndims(), (size_t)1);
......
...@@ -14,9 +14,7 @@ limitations under the License. */ ...@@ -14,9 +14,7 @@ limitations under the License. */
#include "BufferArg.h" #include "BufferArg.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/MemoryHandle.h" #include "paddle/math/MemoryHandle.h"
#include "paddle/math/SparseMatrix.h"
namespace paddle { namespace paddle {
...@@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) { ...@@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
EXPECT_EQ(buffer.numSeqs(), 9); EXPECT_EQ(buffer.numSeqs(), 9);
} }
TEST(BufferTest, asArgument) {
MatrixPtr matrix = Matrix::create(100, 200);
VectorPtr vector = Vector::create(100, false);
CpuSparseMatrix sparse(200, 300, 50);
// prepare arguments
BufferArgs argments;
argments.addArg(*matrix);
argments.addArg(*vector);
argments.addArg(sparse);
// function
auto function = [=](const BufferArgs& inputs) {
EXPECT_EQ(inputs.size(), 3);
// check inputs[0]
EXPECT_EQ(inputs[0].shape().ndims(), 2);
EXPECT_EQ(inputs[0].shape()[0], 100);
EXPECT_EQ(inputs[0].shape()[1], 200);
EXPECT_EQ(inputs[0].data(), matrix->getData());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getHeight(),
matrix->getHeight());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getWidth(),
matrix->getWidth());
EXPECT_EQ(inputs[0].matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
// check inputs[1]
EXPECT_EQ(inputs[1].shape().ndims(), 1);
EXPECT_EQ(inputs[1].shape()[0], 100);
EXPECT_EQ(inputs[1].data(), vector->getData());
CpuVector inVector = inputs[1].vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
// check inputs[2]
EXPECT_EQ(inputs[2].shape().ndims(), 2);
EXPECT_EQ(inputs[2].shape()[0], 200);
EXPECT_EQ(inputs[2].shape()[1], 300);
EXPECT_EQ(inputs[2].data(), sparse.getData());
// CHECK_EQ(inputs[2].sparse().nnz(), 50);
// CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols());
};
// call function
function(argments);
}
} // namespace paddle } // namespace paddle
...@@ -19,13 +19,13 @@ if(WITH_TESTING) ...@@ -19,13 +19,13 @@ if(WITH_TESTING)
# TODO: # TODO:
# file(GLOB test_files . *OpTest.cpp) # file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
# add_simple_unittest(CrossMapNormalOpTest) add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest(TensorShapeTest) add_simple_unittest(TensorShapeTest)
add_simple_unittest(TensorTypeTest) add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest) add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest) add_simple_unittest(FunctionTest)
add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(PadOpTest) add_simple_unittest(PadOpTest)
# add_simple_unittest(ContextProjectionOpTest)
endif() endif()
endif() endif()
......
...@@ -17,7 +17,10 @@ limitations under the License. */ ...@@ -17,7 +17,10 @@ limitations under the License. */
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
namespace paddle { namespace paddle {
/**
* Context Projection Forward with CPU Matrix Device.
*
*/
template <> template <>
void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat, void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
const CpuMatrix& input_mat, const CpuMatrix& input_mat,
...@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat, ...@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
} }
/** /**
* \param inputs[0] input value. * Paddle Function for Context Projection Forward.
* \param inputs[1] input weight. * Calculate the output layer value sequence after context projection.
* \param inputs[2] input sequence. *
* \param outputs[0] output value. * What is Context Projection for a sequence?
* For example, assumed input (x) has 4 words and the dimension of each word
* representation is 2. If we use zero to pad instead of learned weight to pad,
* and the context_lenth is 3, the output (y) is:
*
* @code
* x = [a1, a2;
* b1, b2;
* c1, c2;
* d1, d2]
* y = [0, 0, a1, a2, b1, b2;
* a1, a2, b1, b2, c1, c2;
* b1, b2, c1, c2, d1, d2;
* c1, c2, d1, d2, 0, 0]
* @endcode
*
* \param outputs[0].matrix output layer value, n * (d * l)
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix input layer value, n * d
* \param inputs[0].vector start position sequence, n * 1
* \param inputs[1].matrix input layer weight, pad * d
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionForwardFunc : public FunctionBase { class ContextProjectionForwardFunc : public FunctionBase {
...@@ -85,28 +108,37 @@ public: ...@@ -85,28 +108,37 @@ public:
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size()); CHECK(1 == inputs.size() || 2 == inputs.size());
CHECK_EQ((size_t)1, outputs.size()); CHECK_EQ((size_t)1, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto val_seqs = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.shape().ndims(), (size_t)2);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); if (2 == inputs.size()) {
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
}
/// dim of output = dim of input * context_length /// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_);
/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
/// input and output has the same batch_size /// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]);
/// dim of input == dim of weight
if (2 == inputs.size()) {
CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]);
}
CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(out_seq.getArgType(), ADD_TO);
auto out_mat = outputs[0].matrix<Device>(); auto out_mat = out_seq.matrix<Device>();
auto in_mat = inputs[0].matrix<Device>(); const auto in_mat = val_seqs.matrix<Device>();
auto w_mat = !inputs[1].data() const auto w_mat =
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) (2 == inputs.size())
: inputs[1].matrix<Device>(); ? inputs[1].matrix<Device>()
auto seq_vec = inputs[2].vector<int, Device>(); : typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
const auto seq_vec = val_seqs.getSequenceId().vector<int, Device>();
ContextProjectionForward<Device>(out_mat, ContextProjectionForward<Device>(out_mat,
in_mat, in_mat,
w_mat, w_mat,
...@@ -122,8 +154,12 @@ private: ...@@ -122,8 +154,12 @@ private:
size_t begin_pad_; size_t begin_pad_;
}; };
/**
* Context Projection Backward with CPU Matrix Device.
*
*/
template <> template <>
void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat, CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat, CpuMatrix& w_grad_mat,
const CpuIVector& seq_vec, const CpuIVector& seq_vec,
...@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size = int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]); std::min(starts[i] - begin, starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) { if (is_padding && w_grad_mat) {
MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size); MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i], pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size); MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size);
sub->addAtOffset(*mat, j * input_dim); sub->addAtOffset(*mat, j * input_dim);
} }
...@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t pad_size = int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]); std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) { if (is_padding && w_grad_mat) {
MatrixPtr mat = MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size); .subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr sub = w_grad_mat.subMatrix( MatrixPtr sub = w_grad_mat.subMatrix(
begin_pad + context_start + j - pad_size, pad_size); begin_pad + context_start + j - pad_size, pad_size);
sub->addAtOffset(*mat, j * input_dim); sub->addAtOffset(*mat, j * input_dim);
...@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat, ...@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
if (end <= begin) continue; if (end <= begin) continue;
if (!in_grad_mat) continue; if (!in_grad_mat) continue;
MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin); MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin);
MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin); MatrixPtr dst = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(dst_begin, dst_end - dst_begin);
src->addAtOffset(*dst, j * input_dim); src->addAtOffset(*dst, j * input_dim);
} }
} }
} }
/** /**
* \param inputs[0] input grad. * Context Projection Backward Function.
* \param inputs[1] weight grad. * Update the weight gradient and input layer gradient with backprop
* \param inputs[2] input sequence. *
* \param outputs[0] output value. * \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start position sequence, n * 1
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param outputs[1] weight grad, pad * d
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase { class ContextProjectionBackwardFunc : public FunctionBase {
...@@ -193,32 +235,36 @@ public: ...@@ -193,32 +235,36 @@ public:
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size()); CHECK_EQ((size_t)1, inputs.size());
CHECK_EQ((size_t)1, outputs.size()); CHECK_EQ((size_t)2, outputs.size());
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
<< "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK(in_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(in_seq.shape().ndims(), (size_t)2);
CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(out_seq.shape().ndims(), (size_t)2);
CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1);
CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);
CHECK(outputs[0].data() && inputs[2].data()); /// dim of input grad == dim of weight
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); /// input and output grad has the same batch_size
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1); /// dim of output grad = dim of input grad * context_length
CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_);
CHECK_EQ(out_seq.getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);
/// dim of input == dim of weight const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); const auto out_grad_mat = in_seq.matrix<Device>();
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_grad_mat = outputs[0].matrix<Device>();
auto in_grad_mat = auto in_grad_mat =
!inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) !out_seq.data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[0].matrix<Device>(); : out_seq.matrix<Device>();
auto w_grad_mat = !inputs[1].data() auto w_grad_mat = !outputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0) ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>(); : outputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
ContextProjectionBackward<Device>(out_grad_mat, ContextProjectionBackward<Device>(out_grad_mat,
in_grad_mat, in_grad_mat,
w_grad_mat, w_grad_mat,
...@@ -238,11 +284,16 @@ private: ...@@ -238,11 +284,16 @@ private:
size_t total_pad_; size_t total_pad_;
}; };
#if 0
/** /**
* \param inputs[0] input grad. * Context Projection Backward Data Function
* \param inputs[1] input sequence. * Update input layer grad
* \param outputs[0] output grad. * input: sequence of output layer grad
* output: sequence of input layer grad
*
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start positon sequence, n * 1
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase { class ContextProjectionBackwardDataFunc : public FunctionBase {
...@@ -252,32 +303,30 @@ public: ...@@ -252,32 +303,30 @@ public:
context_start_ = config.get<int>("context_start"); context_start_ = config.get<int>("context_start");
} }
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs, CHECK_EQ(1, static_cast<int>(inputs.size()));
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size())); CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size())); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); << "SequenceArg required here";
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2); const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2); const auto out_seq = dynamic_cast<const SequenceArg&>(outputs[0]);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data());
CHECK_EQ(static_cast<int>(out_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
/// output layer grad dim == input layer grad dim * context_length_
CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_);
/// input and output has the same batch_size /// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); CHECK_EQ(in_seq.shape()[0], out_seq.shape()[0]);
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>( const auto out_grad_mat = in_seq.matrix<Device>();
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>( auto in_grad_mat = out_seq.matrix<Device>();
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardData<Device>(out_grad_mat.get(), ContextProjectionBackwardData<Device>(
in_grad_mat.get(), out_grad_mat, in_grad_mat, seq_vec, context_length_, context_start_);
seq_vec,
context_length_,
context_start_);
} }
private: private:
...@@ -286,9 +335,14 @@ private: ...@@ -286,9 +335,14 @@ private:
}; };
/** /**
* \param inputs[0] weight grad. * Context Projection Backward Weight Function
* \param inputs[1] input sequence. * Update weight grad by backprop
* \param outputs[0] output grad. * input: sequence of output layer grad
* output: weight grad
*
* \param outputs[0] weight grad, pad * d
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vecotr start positon sequence, n * 1
*/ */
template <DeviceType Device> template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase { class ContextProjectionBackwardWeightFunc : public FunctionBase {
...@@ -300,28 +354,25 @@ public: ...@@ -300,28 +354,25 @@ public:
total_pad_ = config.get<size_t>("total_pad"); total_pad_ = config.get<size_t>("total_pad");
} }
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs, CHECK_EQ(1, static_cast<int>(inputs.size()));
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
CHECK_EQ(1, static_cast<int>(outputs.size())); CHECK_EQ(1, static_cast<int>(outputs.size()));
CHECK_EQ(0, static_cast<int>(inouts.size())); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here";
const auto in_seq = dynamic_cast<const SequenceArg&>(inputs[0]);
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2); CHECK_EQ(static_cast<int>(outputs[0].shape().ndims()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2); CHECK_EQ(static_cast<int>(in_seq.shape().ndims()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1); CHECK_EQ(static_cast<int>(in_seq.getSequenceId().shape().ndims()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]);
/// output layer grad dim == weight dim * context_length_
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>( CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_);
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(), const auto seq_vec = in_seq.getSequenceId().vector<int, Device>();
w_grad_mat.get(), const auto out_grad_mat = in_seq.matrix<Device>();
auto w_grad_mat = outputs[0].matrix<Device>();
ContextProjectionBackwardWeight<Device>(out_grad_mat,
w_grad_mat,
seq_vec, seq_vec,
context_length_, context_length_,
context_start_, context_start_,
...@@ -335,7 +386,6 @@ private: ...@@ -335,7 +386,6 @@ private:
size_t begin_pad_; size_t begin_pad_;
size_t total_pad_; size_t total_pad_;
}; };
#endif
REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionForward,
CPU, CPU,
...@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, ...@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward, REGISTER_TYPED_FUNC(ContextProjectionBackward,
GPU, GPU,
ContextProjectionBackwardFunc); ContextProjectionBackwardFunc);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData, REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU, GPU,
ContextProjectionBackwardDataFunc); ContextProjectionBackwardDataFunc);
...@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, ...@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU, GPU,
ContextProjectionBackwardWeightFunc); ContextProjectionBackwardWeightFunc);
#endif #endif
#endif
} // namespace paddle } // namespace paddle
...@@ -21,14 +21,14 @@ namespace paddle { ...@@ -21,14 +21,14 @@ namespace paddle {
/** /**
* \brief Context Projection Forward. * \brief Context Projection Forward.
* *
* \param[out] outputs output data. * \param[in/out] outputs output data.
* \param[in] input input data. * \param[in] input input data.
* \param[in] weight input weight. * \param[in] weight input weight.
* \param[in] sequence input data. * \param[in] sequence input data.
* \param[in] context_length consecutive rows for concatenation. * \param[in] context_length consecutive rows for concatenation.
* \param[in] context_start context start position. * \param[in] context_start context start position.
* \param[in] begin_pad begining pad position. * \param[in] begin_pad begining pad position.
* \param[in] is_padding whether padding 0 or not. * \param[in] is_padding whether padding 0 or not.
* *
*/ */
template <DeviceType DType> template <DeviceType DType>
...@@ -56,7 +56,7 @@ void ContextProjectionForward( ...@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/ */
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackward( void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
...@@ -68,7 +68,7 @@ void ContextProjectionBackward( ...@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardData( void ContextProjectionBackwardData(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad, typename Tensor<real, DType>::Matrix& in_grad,
const typename Tensor<int, DType>::Vector& sequence, const typename Tensor<int, DType>::Vector& sequence,
size_t context_length, size_t context_length,
...@@ -76,7 +76,7 @@ void ContextProjectionBackwardData( ...@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template <DeviceType DType> template <DeviceType DType>
void ContextProjectionBackwardWeight( void ContextProjectionBackwardWeight(
typename Tensor<real, DType>::Matrix& out_grad, const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& w_grad, typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec, const typename Tensor<int, DType>::Vector& seq_vec,
size_t context_length, size_t context_length,
......
...@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output, ...@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad); begin_pad);
} }
__global__ void KeContextProjectionBackwardData(real* out_grad, __global__ void KeContextProjectionBackwardData(const real* out_grad,
const int* sequence, const int* sequence,
real* in_grad, real* in_grad,
int input_dim, size_t input_dim,
int context_length, int context_length,
int context_start) { int context_start) {
int idx = threadIdx.x; int idx = threadIdx.x;
...@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real value = 0; real value = 0;
int instances = seq_end - seq_start + context_length - 1; int instances = seq_end - seq_start + context_length - 1;
out_grad += seq_start * input_dim * context_length; auto out = const_cast<real*>(out_grad);
out += seq_start * input_dim * context_length;
in_grad += seq_start * input_dim; in_grad += seq_start * input_dim;
for (int k = 0; k <= input_dim / block_size; k++) { for (int k = 0; k <= input_dim / block_size; k++) {
if (idx < input_dim) { if (idx < input_dim) {
...@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int outx = (i - context_length) < 0 ? i : (context_length - 1); int outx = (i - context_length) < 0 ? i : (context_length - 1);
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1)); int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
real* output_r = real* output_r =
out_grad + outy * input_dim * context_length + outx * input_dim; out + outy * input_dim * context_length + outx * input_dim;
for (int j = outy; j < seq_end - seq_start; j++) { for (int j = outy; j < seq_end - seq_start; j++) {
value += output_r[idx]; value += output_r[idx];
if (j - outy == outx) break; if (j - outy == outx) break;
...@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad, ...@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start. * @param[in] context_start context start.
* *
*/ */
void hl_context_projection_backward_data(real* out_grad, void hl_context_projection_backward_data(const real* out_grad,
const int* sequence, const int* sequence,
real* input_grad, real* input_grad,
size_t num_sequences, size_t num_sequences,
...@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad, ...@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
} }
template <> template <>
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
const GpuIVector& sequence, const GpuIVector& sequence,
size_t context_length, size_t context_length,
...@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, ...@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
} }
template<int THREADS_X, int THREADS_Y> template<int THREADS_X, int THREADS_Y>
__global__ void KeContextProjectionBackwardWeight(real* out_grad, __global__ void KeContextProjectionBackwardWeight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
int num_sequences, int num_sequences,
...@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, ...@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) { for (int seqId = idy; seqId < num_sequences; seqId += THREADS_Y) {
int seq_start = sequence[seqId]; int seq_start = sequence[seqId];
int seq_end = sequence[seqId+1]; int seq_end = sequence[seqId+1];
output_r = out_grad + seq_start * w_dim * context_length; output_r = const_cast<real*>(out_grad)
+ seq_start * w_dim * context_length;
if (context_start < 0) { if (context_start < 0) {
if (padId + context_start < 0) { if (padId + context_start < 0) {
...@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad, ...@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning. * beginning.
* *
*/ */
void hl_context_projection_backward_weight(real* out_grad, void hl_context_projection_backward_weight(const real* out_grad,
const int* sequence, const int* sequence,
real* w_grad, real* w_grad,
size_t num_sequences, size_t num_sequences,
...@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad, ...@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template <> template <>
void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>( void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
GpuMatrix& out_grad, const GpuMatrix& out_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& seq_vec, const GpuIVector& seq_vec,
size_t context_length, size_t context_length,
...@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>( ...@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
} }
template <> template <>
void ContextProjectionBackward<DEVICE_TYPE_GPU>(GpuMatrix& out_grad, void ContextProjectionBackward<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad, GpuMatrix& in_grad,
GpuMatrix& w_grad, GpuMatrix& w_grad,
const GpuIVector& sequence, const GpuIVector& sequence,
......
...@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start, ...@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform(); cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out); gpu_out.copyFrom(cpu_out);
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_weight ? cpu_weight->getData() : nullptr, cpu_inputs.addArg(cpu_in, *cpu_seq);
Dims{pad, input_dim}), if (cpu_weight) {
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_inputs.addArg(*cpu_weight, *cpu_seq);
Dims{cpu_seq->getSize()})}, }
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})}, cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO);
{});
compare.getGpuFunction()->calc( compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr, BufferArgs gpu_inputs;
Dims{pad, input_dim}), BufferArgs gpu_outputs;
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_inputs.addArg(gpu_in, *gpu_seq);
Dims{gpu_seq->getSize()})}, if (gpu_weight) {
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})}, gpu_inputs.addArg(*gpu_weight, *gpu_seq);
{}); }
gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO);
compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
autotest::TensorCheckEqual(cpu_out, gpu_out); autotest::TensorCheckEqual(cpu_out, gpu_out);
} }
...@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start, ...@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad); gpu_w_grad->copyFrom(*cpu_w_grad);
} }
compare.getCpuFunction()->calc( BufferArgs cpu_inputs;
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}), BufferArgs cpu_outputs;
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr, cpu_inputs.addArg(cpu_out_grad, *cpu_seq);
Dims{pad, input_dim}), cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO);
Tensor(reinterpret_cast<real*>(cpu_seq->getData()), cpu_outputs.addArg(
Dims{cpu_seq->getSize()})}, cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);
{Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})}, compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);
{});
BufferArgs gpu_inputs;
compare.getGpuFunction()->calc( BufferArgs gpu_outputs;
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}), gpu_inputs.addArg(gpu_out_grad, *gpu_seq);
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr, gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO);
Dims{pad, input_dim}), gpu_outputs.addArg(
Tensor(reinterpret_cast<real*>(gpu_seq->getData()), gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);
Dims{gpu_seq->getSize()})},
{Tensor(gpu_out_grad.getData(), compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);
Dims{batch_size, input_dim * context_length})},
{});
autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) { if (is_padding) {
......
...@@ -188,8 +188,13 @@ public: ...@@ -188,8 +188,13 @@ public:
CHECK(inputs[0].shape() == inputs[3].shape()); CHECK(inputs[0].shape() == inputs[3].shape());
CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[0].shape());
// TODO(hedaoyuan): need support ASSIGN_TO mode. if (outputs[0].getArgType() != ADD_TO) {
CHECK_EQ(outputs[0].getArgType(), ADD_TO); // Currently, some algorithm implementations are ASSIGN_TO mode,
// if need to support the ADD_TO calculation, need to clear the output.
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
size_t samples = inputs[0].shape()[0]; size_t samples = inputs[0].shape()[0];
size_t channels = inputs[0].shape()[1]; size_t channels = inputs[0].shape()[1];
......
...@@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) { ...@@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare compare("CrossMapNormal", // init Test object
FuncConfig() FunctionCompare test("CrossMapNormal",
.set("size", size) FuncConfig()
.set("scale", (real)1.5) .set("size", size)
.set("pow", (real)0.5)); .set("scale", (real)1.5)
Dims dims{numSamples, channels, imgSizeH, imgSizeW}; .set("pow", (real)0.5));
compare.cmpWithArg({Tensor(nullptr, dims)}, // prepare input arguments
{Tensor(nullptr, dims), Tensor(nullptr, dims)}, TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
{}); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
// run Function
test.run();
} }
} }
} }
...@@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) { ...@@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare compare("CrossMapNormalGrad", FunctionCompare test("CrossMapNormalGrad",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
.set("pow", (real)0.5)); .set("pow", (real)0.5));
Dims dims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
compare.cmpWithArg({Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims), test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
Tensor(nullptr, dims)}, test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
{Tensor(nullptr, dims)}, test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, shape));
{}); // run Function
test.run();
} }
} }
} }
......
...@@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) { ...@@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
void BufferArgs::addArg(const Matrix& arg, void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape, const TensorShape& shape,
ArgType argType) { ArgType argType) {
args_.push_back(std::make_shared<BufferArg>(arg, shape, argType)); _args_.push_back(new BufferArg(arg, shape, argType));
addArg(*_args_.back());
} }
void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType)); _args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
} }
void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) {
args_.push_back(std::make_shared<SparseMatrixArg>(arg, argType)); _args_.push_back(new SparseMatrixArg(arg, argType));
addArg(*_args_.back());
}
void BufferArgs::addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType) {
_args_.push_back(new SequenceArg(matrix, vector, argType));
addArg(*_args_.back());
} }
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_; ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
......
...@@ -50,19 +50,44 @@ protected: ...@@ -50,19 +50,44 @@ protected:
* Argument type for Function::calc(). * Argument type for Function::calc().
* A BufferArgs contains a set of BufferArg, * A BufferArgs contains a set of BufferArg,
* because Function can have multiple inputs and outputs. * because Function can have multiple inputs and outputs.
*
* addArg() with Matix object used to adapt Layer Argument.
* Will create a BufferArg object in addArg(),
* and free in destructor of BufferArgs.
*
* addArg() with BufferArg object, just save BufferArg object address,
* and the caller needs to guarantee the validity of the BufferArg object
* in the BufferArgs life time.
*/ */
class BufferArgs { class BufferArgs {
public: public:
BufferArgs() {} BufferArgs() {}
~BufferArgs() {
for (auto arg : _args_) {
delete arg;
}
}
size_t size() const { return args_.size(); } size_t size() const { return args_.size(); }
// add argument into BufferArgs // add argument into BufferArgs
// Tensor can be Matrix, Vector, IVector. // Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType. // For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template <typename Tensor> void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { _args_.push_back(new BufferArg(arg, argType));
args_.push_back(std::make_shared<BufferArg>(arg, argType)); addArg(*_args_.back());
}
void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
} }
// Add arg into BufferArgs and reshape the arg. // Add arg into BufferArgs and reshape the arg.
...@@ -77,20 +102,37 @@ public: ...@@ -77,20 +102,37 @@ public:
void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED);
void addArg(const Matrix& matrix,
const IVector& vector,
ArgType argType = UNSPECIFIED);
// get argument // get argument
const BufferArg& operator[](size_t num) const { const BufferArg& operator[](size_t num) const {
CHECK_LT(num, args_.size()); CHECK_LT(num, args_.size());
return *args_[num]; return *args_[num];
} }
void addArg(BufferArg& arg) { args_.push_back(&arg); }
void addArg(SequenceIdArg& arg) { args_.push_back(&arg); }
void addArg(SequenceArg& arg) { args_.push_back(&arg); }
void addArg(SparseMatrixArg& arg) { args_.push_back(&arg); }
private: private:
std::vector<BufferArgPtr> args_; std::vector<BufferArg*> args_;
// The BufferArg object is constructed and freed by BufferArgs.
std::vector<BufferArg*> _args_;
}; };
/** /**
* \brief Base class for Function. * \brief Base class for Function.
* The basic Function implementation requires override init and calc interfaces. * The basic Function implementation requires override init and calc interfaces.
* *
* The caller needs to ensure the validity of the arguments
* during Function execution.
*
* Function inputs are readonly, Function outputs have two modes: ASSIGN_TO * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
* and ADD_TO. * and ADD_TO.
* If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "Function.h" #include "Function.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/math/SparseMatrix.h"
namespace paddle { namespace paddle {
...@@ -56,4 +57,110 @@ TEST(Function, BufferArgs) { ...@@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
Function<DEVICE_TYPE_GPU>(gpuArgments); Function<DEVICE_TYPE_GPU>(gpuArgments);
} }
/**
* Some tests case are used to check the consistency between the BufferArg type
* argument received by Function and the original type argument.
*
* Use Case:
* TEST() {
* Matrix matrix(...);
* CheckBufferArg lambda = [=](const BufferArg& arg) {
* // check matrix and arg are equivalent
* EXPECT_EQ(matrix, arg);
* }
*
* BufferArgs argments{matrix...};
* std::vector<CheckBufferArg> checkFunc{lambda...};
* testBufferArgs(argments, checkFunc);
* }
*/
typedef std::function<void(const BufferArg&)> CheckBufferArg;
void testBufferArgs(const BufferArgs& inputs,
const std::vector<CheckBufferArg>& check) {
EXPECT_EQ(inputs.size(), check.size());
for (size_t i = 0; i < inputs.size(); i++) {
check[i](inputs[i]);
}
}
void testBufferArgs(const BufferArgs& inputs, const CheckBufferArg& check) {
EXPECT_EQ(inputs.size(), 1);
check(inputs[0]);
}
TEST(Arguments, Matrix) {
MatrixPtr matrix = Matrix::create(100, 200);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.shape()[1], 200);
EXPECT_EQ(arg.data(), matrix->getData());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getHeight(), matrix->getHeight());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getWidth(), matrix->getWidth());
EXPECT_EQ(arg.matrix<DEVICE_TYPE_CPU>().getData(), matrix->getData());
};
BufferArgs argments;
argments.addArg(*matrix);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, Vector) {
VectorPtr vector = Vector::create(100, false);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 1);
EXPECT_EQ(arg.shape()[0], 100);
EXPECT_EQ(arg.data(), vector->getData());
CpuVector inVector = arg.vector<real, DEVICE_TYPE_CPU>();
EXPECT_EQ(inVector.getSize(), vector->getSize());
EXPECT_EQ(inVector.getData(), vector->getData());
};
BufferArgs argments;
argments.addArg(*vector);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, CpuSparseMatrix) {
CpuSparseMatrix sparse(200, 300, 50);
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 2);
EXPECT_EQ(arg.shape()[0], 200);
EXPECT_EQ(arg.shape()[1], 300);
EXPECT_EQ(arg.data(), sparse.getData());
// CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ(arg.sparse().getRowBuf(), sparse.getRows());
EXPECT_EQ(arg.sparse().getColBuf(), sparse.getCols());
};
BufferArgs argments;
argments.addArg(sparse);
std::vector<CheckBufferArg> checkFunc;
checkFunc.push_back(check);
testBufferArgs(argments, checkFunc);
}
TEST(Arguments, BufferArg) {
BufferArg arg(nullptr, VALUE_TYPE_FLOAT, {1, 2, 3});
CheckBufferArg check = [=](const BufferArg& arg) {
EXPECT_EQ(arg.shape().ndims(), 3);
EXPECT_EQ(arg.shape()[0], 1);
EXPECT_EQ(arg.shape()[1], 2);
EXPECT_EQ(arg.shape()[2], 3);
};
BufferArgs argments;
argments.addArg(arg);
testBufferArgs(argments, check);
}
} // namespace paddle } // namespace paddle
...@@ -15,95 +15,186 @@ limitations under the License. */ ...@@ -15,95 +15,186 @@ limitations under the License. */
#include "Function.h" #include "Function.h"
#include "paddle/math/Vector.h" #include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h" #include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
namespace paddle { namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
*
* Use case:
* // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig.
* FunctionCompare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data,
* // only contains the argument type and shape.
* test.addInputs(input1);
* test.addInputs(input2);
* test.addOutputs(output1);
* test.addOutputs(output2);
* // Run.
* // Will according to the type and shape of arguments(inputs_/outputs_),
* // automatic initialization cpu and gpu function required arguments
* // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_).
* // Call the CPU and GPU Function calculation results.
* // Compares CPU and GPU calculation results for consistency.
* test.run();
*/
class FunctionCompare { class FunctionCompare {
public: public:
FunctionCompare(const std::string& name, const FuncConfig& config) FunctionCompare(const std::string& name, const FuncConfig& config)
: cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), : cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
cpu->init(config); cpuFunc_->init(config);
gpu->init(config); gpuFunc_->init(config);
}
~FunctionCompare() {}
// input need only contains shape, do not contains data.
void addInputs(const BufferArg& input) {
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
}
// output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output) {
size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuOutputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
gpuOutputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
} }
void cmpWithArg(const Arguments& inputs, void addInputs(const SequenceArg& input) {
const Arguments& outputs, size_t batchSize = input.shape()[0];
const Arguments& inouts) { size_t numSeqs = batchSize / 10 + 1;
// init cpu and gpu arguments
auto initArgs = [=]( size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
for (const auto arg : inArgs) { gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
size_t size = sizeof(real);
for (const auto dim : arg.dims_) { TensorShape seqsId({numSeqs + 1});
size *= dim; // void* cpuBuffer = cpuMemory_.back()->getBuf();
} // void* gpuBuffer = gpuMemory_.back()->getBuf();
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here size_t size =
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size)); input.shape().getElements() * sizeOfValuType(input.valueType());
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size)); cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
// already init outside // TODO: need be implemented.
} else { }
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size)); void run() {
cpuArgs.emplace_back( // prepare cpu/gpu arguments
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); initInputs();
gpuArgs.emplace_back(
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_)); // function calculate
// will use an api to refactor this code. auto callFunction = [](FunctionBase* function,
CpuVector cpuVector(size / sizeof(real), std::vector<BufferArgPtr>& inputs,
(real*)cpuArgs.back().getData()); std::vector<BufferArgPtr>& outputs) {
GpuVector gpuVector(size / sizeof(real), BufferArgs inArgs;
(real*)gpuArgs.back().getData()); BufferArgs outArgs;
cpuVector.uniform(0.001, 1); for (auto arg : inputs) {
gpuVector.copyFrom(cpuVector); inArgs.addArg(*arg);
}
} }
for (auto arg : outputs) {
outArgs.addArg(*arg);
}
function->calc(inArgs, outArgs);
}; };
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
initArgs(cpuInouts, gpuInouts, inouts);
// function calculate callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
cpu->calc(cpuInputs, cpuOutputs, cpuInouts); callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
gpu->calc(gpuInputs, gpuOutputs, gpuInouts);
// check outputs and inouts // check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { compareOutputs();
for (size_t i = 0; i < cpuArgs.size(); i++) { }
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i]; std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; }
size_t size = 1;
for (auto dim : cpu.dims_) { std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; }
size *= dim;
} protected:
CpuVector cpuVector(size, (real*)cpu.getData()); void initInputs() {
GpuVector gpuVector(size, (real*)gpu.getData()); for (size_t i = 0; i < cpuInputs_.size(); i++) {
initArg(*cpuInputs_[i]);
autotest::TensorCheckErr(cpuVector, gpuVector);
} // TODO: Need a BufferCopy used to copy from one BufferArg to another.
}; CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
checkArgs(cpuOutputs, gpuOutputs); (real*)cpuInputs_[i]->data());
checkArgs(cpuInouts, gpuInouts); GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
gpuVector.copyFrom(cpuVector);
}
}
void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
} }
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpu; } // only init cpu argument, gpu argument copy from cpu argument.
void initArg(BufferArg& arg) {
CpuVector vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1);
}
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpu; } void initArg(SequenceIdArg& arg, size_t batchSize) {
size_t numSeqs = arg.numSeqs();
int* buf = reinterpret_cast<int*>(arg.data());
int pos = 0;
size_t maxLen = 2 * batchSize / numSeqs;
for (int i = 0; i < (int)numSeqs; ++i) {
int len = uniformRandom(
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
1;
buf[i] = pos;
pos += len;
VLOG(1) << " len=" << len;
}
buf[numSeqs] = batchSize;
}
protected: protected:
std::shared_ptr<FunctionBase> cpu; std::shared_ptr<FunctionBase> cpuFunc_;
std::shared_ptr<FunctionBase> gpu; std::shared_ptr<FunctionBase> gpuFunc_;
std::vector<CpuMemHandlePtr> cpuMemory; std::vector<CpuMemHandlePtr> cpuMemory_;
std::vector<GpuMemHandlePtr> gpuMemory; std::vector<GpuMemHandlePtr> gpuMemory_;
Arguments cpuInputs; std::vector<BufferArgPtr> cpuInputs_;
Arguments cpuOutputs; std::vector<BufferArgPtr> cpuOutputs_;
Arguments cpuInouts; std::vector<BufferArgPtr> gpuInputs_;
Arguments gpuInputs; std::vector<BufferArgPtr> gpuOutputs_;
Arguments gpuOutputs;
Arguments gpuInouts;
}; };
} // namespace paddle } // namespace paddle
...@@ -89,20 +89,21 @@ public: ...@@ -89,20 +89,21 @@ public:
* \param inputs[0] input value. * \param inputs[0] input value.
* \param outputs[0] output value. * \param outputs[0] output value.
*/ */
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, inputs.size());
CHECK_EQ(1UL, outputs.size()); CHECK_EQ(1UL, outputs.size());
CHECK_EQ(0UL, inouts.size()); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
size_t num = inputs[0].dims_[0]; size_t num = inputs[0].shape()[0];
size_t inC = inputs[0].dims_[1]; size_t inC = inputs[0].shape()[1];
size_t inH = inputs[0].dims_[2]; size_t inH = inputs[0].shape()[2];
size_t inW = inputs[0].dims_[3]; size_t inW = inputs[0].shape()[3];
typename Tensor<real, Device>::Vector vec(outputs[0].shape().getElements(),
Pad<Device>(outputs[0].getData(), outputs[0].data<real>());
inputs[0].getData(), vec.zero();
Pad<Device>(outputs[0].data<real>(),
inputs[0].data<real>(),
num, num,
inC, inC,
inH, inH,
...@@ -140,21 +141,25 @@ public: ...@@ -140,21 +141,25 @@ public:
* \param inputs[0] output grad. * \param inputs[0] output grad.
* \param inouts[0] input grad. * \param inouts[0] input grad.
*/ */
void calc(const Arguments& inputs, void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, inputs.size());
CHECK_EQ(0UL, outputs.size()); CHECK_EQ(1UL, outputs.size());
CHECK_EQ(1UL, inouts.size());
size_t n = inouts[0].dims_[0]; size_t num = outputs[0].shape()[0];
size_t inC = inouts[0].dims_[1]; size_t inC = outputs[0].shape()[1];
size_t inH = inouts[0].dims_[2]; size_t inH = outputs[0].shape()[2];
size_t inW = inouts[0].dims_[3]; size_t inW = outputs[0].shape()[3];
if (outputs[0].getArgType() != ADD_TO) {
// for unit test
typename Tensor<real, Device>::Vector tmp(
outputs[0].shape().getElements(), outputs[0].data<real>());
tmp.zero();
}
PadGrad<Device>(inouts[0].getData(), PadGrad<Device>(outputs[0].data<real>(),
inputs[0].getData(), inputs[0].data<real>(),
n, num,
inC, inC,
inH, inH,
inW, inW,
......
...@@ -33,10 +33,12 @@ TEST(Pad, real) { ...@@ -33,10 +33,12 @@ TEST(Pad, real) {
.set("padh1", 2) .set("padh1", 2)
.set("padw0", 3) .set("padw0", 3)
.set("padw1", 2)); .set("padw1", 2));
Dims inDims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW};
Dims outDims{numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5}; TensorShape outDims{
compare.cmpWithArg( numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5};
{Tensor(nullptr, inDims)}, {Tensor(nullptr, outDims)}, {}); compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, inDims));
compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, outDims, ASSIGN_TO));
compare.run();
} }
} }
} }
...@@ -50,7 +52,6 @@ TEST(PadGrad, real) { ...@@ -50,7 +52,6 @@ TEST(PadGrad, real) {
for (size_t imgSizeW : {5, 32, 96}) { for (size_t imgSizeW : {5, 32, 96}) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
FunctionCompare compare("PadGrad", FunctionCompare compare("PadGrad",
FuncConfig() FuncConfig()
.set("padc0", 2) .set("padc0", 2)
...@@ -59,10 +60,12 @@ TEST(PadGrad, real) { ...@@ -59,10 +60,12 @@ TEST(PadGrad, real) {
.set("padh1", 2) .set("padh1", 2)
.set("padw0", 3) .set("padw0", 3)
.set("padw1", 2)); .set("padw1", 2));
Dims inDims{numSamples, channels, imgSizeH, imgSizeW}; TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW};
Dims outDims{numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5}; TensorShape outDims{
compare.cmpWithArg( numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5};
{Tensor(nullptr, outDims)}, {}, {Tensor(nullptr, inDims)}); compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, outDims));
compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, inDims, ASSIGN_TO));
compare.run();
} }
} }
} }
......
...@@ -55,6 +55,15 @@ public: ...@@ -55,6 +55,15 @@ public:
numElements(); numElements();
} }
void reshape(std::initializer_list<size_t> dims) {
ndims_ = dims.size();
if (ndims_ > kMinDims) {
dims_.resize(ndims_);
}
dims_.assign(dims);
numElements();
}
// number of dimensions of the tensor // number of dimensions of the tensor
size_t ndims() const { return ndims_; } size_t ndims() const { return ndims_; }
...@@ -82,7 +91,7 @@ private: ...@@ -82,7 +91,7 @@ private:
// init dims_ // init dims_
void initDims(size_t ndims) { void initDims(size_t ndims) {
size_t count = ndims < 4 ? 4 : ndims; size_t count = ndims < kMinDims ? kMinDims : ndims;
dims_.assign(count, 1); dims_.assign(count, 1);
} }
...@@ -92,6 +101,7 @@ private: ...@@ -92,6 +101,7 @@ private:
// number of elements // number of elements
size_t nelements_; size_t nelements_;
std::vector<size_t> dims_; std::vector<size_t> dims_;
static const size_t kMinDims = 4;
}; };
} // namespace paddle } // namespace paddle
...@@ -118,16 +118,15 @@ void ContextProjection::forward() { ...@@ -118,16 +118,15 @@ void ContextProjection::forward() {
/// first use state_, otherwise use weight_(padding false === w nullptr) /// first use state_, otherwise use weight_(padding false === w nullptr)
auto w_ptr = auto w_ptr =
state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr;
auto start_pos = in_->sequenceStartPositions; const auto start_pos = in_->sequenceStartPositions->getVector(useGpu_);
BufferArgs inputs; BufferArgs inputs;
BufferArgs outputs; BufferArgs outputs;
inputs.addArg(*in_->value); inputs.addArg(*in_->value, *start_pos);
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, if (w_ptr) {
w_ptr ? w_ptr->getHeight() : 0, inputs.addArg(CpuMatrix(w_ptr->getData(), w_ptr->getHeight(), input_dim),
input_dim)); *start_pos);
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); }
outputs.addArg(*out_->value, ADD_TO); outputs.addArg(*out_->value, *start_pos, ADD_TO);
forward_[0]->calc(inputs, outputs); forward_[0]->calc(inputs, outputs);
if (state_ && config_.context_start() < 0) { if (state_ && config_.context_start() < 0) {
...@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) { ...@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs inputs; BufferArgs inputs;
BufferArgs outputs; BufferArgs outputs;
inputs.addArg(CpuMatrix( inputs.addArg(*out_->grad, *in_->sequenceStartPositions->getVector(useGpu_));
in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); outputs.addArg(
inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, CpuMatrix(
w_ptr ? w_ptr->getHeight() : 0, in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim),
input_dim)); *in_->sequenceStartPositions->getVector(useGpu_),
inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); ADD_TO);
outputs.addArg(*out_->grad, ADD_TO); outputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr,
w_ptr ? w_ptr->getHeight() : 0,
input_dim),
ADD_TO);
backward_[0]->calc(inputs, outputs); backward_[0]->calc(inputs, outputs);
if (config_.trainable_padding()) { if (config_.trainable_padding()) {
......
...@@ -27,11 +27,11 @@ bool PadLayer::init(const LayerMap& layerMap, ...@@ -27,11 +27,11 @@ bool PadLayer::init(const LayerMap& layerMap,
auto& pad_conf = config_.inputs(0).pad_conf(); auto& pad_conf = config_.inputs(0).pad_conf();
auto& img_conf = pad_conf.image_conf(); auto& img_conf = pad_conf.image_conf();
CHECK_EQ(config_.inputs_size(), 1); CHECK_EQ(config_.inputs_size(), 1);
inDims_.push_back(0); inDims_ = TensorShape(
inDims_.push_back(img_conf.channels()); {0,
inDims_.push_back(img_conf.has_img_size_y() ? img_conf.img_size_y() img_conf.channels(),
: img_conf.img_size()); img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(),
inDims_.push_back(img_conf.img_size()); img_conf.img_size()});
CHECK_EQ(2, pad_conf.pad_c_size()); CHECK_EQ(2, pad_conf.pad_c_size());
CHECK_EQ(2, pad_conf.pad_h_size()); CHECK_EQ(2, pad_conf.pad_h_size());
...@@ -43,7 +43,7 @@ bool PadLayer::init(const LayerMap& layerMap, ...@@ -43,7 +43,7 @@ bool PadLayer::init(const LayerMap& layerMap,
padw_.push_back(pad_conf.pad_w(0)); padw_.push_back(pad_conf.pad_w(0));
padw_.push_back(pad_conf.pad_w(1)); padw_.push_back(pad_conf.pad_w(1));
outDims_.resize(4); outDims_ = TensorShape(4);
setOutDims(0); setOutDims(0);
createFunction(forward_, createFunction(forward_,
...@@ -68,20 +68,20 @@ bool PadLayer::init(const LayerMap& layerMap, ...@@ -68,20 +68,20 @@ bool PadLayer::init(const LayerMap& layerMap,
return true; return true;
} }
void PadLayer::setOutDims(int batchSize) { void PadLayer::setOutDims(const size_t batchSize) {
outDims_[0] = batchSize; outDims_.reshape({batchSize,
outDims_[1] = inDims_[1] + padc_[0] + padc_[1]; inDims_[1] + padc_[0] + padc_[1],
outDims_[2] = inDims_[2] + padh_[0] + padh_[1]; inDims_[2] + padh_[0] + padh_[1],
outDims_[3] = inDims_[3] + padw_[0] + padw_[1]; inDims_[3] + padw_[0] + padw_[1]});
} }
void PadLayer::setTensorDim(int batchSize) { void PadLayer::setTensorDim(const size_t batchSize) {
CHECK_EQ(static_cast<int>(inputLayers_.size()), 1); CHECK_EQ(static_cast<int>(inputLayers_.size()), 1);
inDims_[0] = batchSize; inDims_.setDim(0, batchSize);
int h = inputLayers_[0]->getOutput().getFrameHeight(); int h = inputLayers_[0]->getOutput().getFrameHeight();
if (h != 0) inDims_[2]; if (h != 0) inDims_.setDim(2, h);
int w = inputLayers_[0]->getOutput().getFrameWidth(); int w = inputLayers_[0]->getOutput().getFrameWidth();
if (w != 0) inDims_[3]; if (w != 0) inDims_.setDim(3, w);
setOutDims(batchSize); setOutDims(batchSize);
} }
...@@ -94,22 +94,22 @@ void PadLayer::forward(PassType passType) { ...@@ -94,22 +94,22 @@ void PadLayer::forward(PassType passType) {
resetOutput(batchSize, size); resetOutput(batchSize, size);
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
REGISTER_TIMER_INFO("PadForward", getName().c_str()); REGISTER_TIMER_INFO("PadForward", getName().c_str());
forward_[0]->calc({Tensor(input->getData(), inDims_)},
{Tensor(outV->getData(), outDims_)}, BufferArgs inputs;
{}); BufferArgs outputs;
inputs.addArg(*getInputValue(0), inDims_);
outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO);
forward_[0]->calc(inputs, outputs);
} }
void PadLayer::backward(const UpdateCallback& callback) { void PadLayer::backward(const UpdateCallback& callback) {
(void)callback; (void)callback;
MatrixPtr preGrad = inputLayers_[0]->getOutputGrad();
if (NULL == preGrad) {
return;
}
MatrixPtr outGrad = getOutputGrad();
REGISTER_TIMER_INFO("PadBackward", getName().c_str()); REGISTER_TIMER_INFO("PadBackward", getName().c_str());
backward_[0]->calc({Tensor(outGrad->getData(), outDims_)},
{}, BufferArgs inputs;
{Tensor(preGrad->getData(), inDims_)}); BufferArgs outputs;
inputs.addArg(*getOutputGrad(), outDims_);
outputs.addArg(*getInputGrad(0), inDims_, ADD_TO);
backward_[0]->calc(inputs, outputs);
} }
} // namespace paddle } // namespace paddle
...@@ -33,13 +33,13 @@ public: ...@@ -33,13 +33,13 @@ public:
void backward(const UpdateCallback& callback = nullptr); void backward(const UpdateCallback& callback = nullptr);
protected: protected:
void setOutDims(int batchSize); void setOutDims(const size_t batchSize);
void setTensorDim(int batchSize); void setTensorDim(const size_t batchSize);
std::vector<int> padc_; std::vector<int> padc_;
std::vector<int> padh_; std::vector<int> padh_;
std::vector<int> padw_; std::vector<int> padw_;
Dims inDims_; TensorShape inDims_;
Dims outDims_; TensorShape outDims_;
}; };
} // namespace paddle } // namespace paddle
...@@ -34,6 +34,10 @@ class IScanner(object): ...@@ -34,6 +34,10 @@ class IScanner(object):
class DenseScanner(IScanner): class DenseScanner(IScanner):
"""
:type __mat__: numpy.ndarray
"""
def __init__(self, input_type, pos): def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos) IScanner.__init__(self, input_type, pos)
self.__mat__ = None self.__mat__ = None
...@@ -47,6 +51,8 @@ class DenseScanner(IScanner): ...@@ -47,6 +51,8 @@ class DenseScanner(IScanner):
def finish_scan(self, argument): def finish_scan(self, argument):
assert isinstance(argument, swig_paddle.Arguments) assert isinstance(argument, swig_paddle.Arguments)
assert isinstance(self.input_type, dp2.InputType) assert isinstance(self.input_type, dp2.InputType)
if self.__mat__.dtype != numpy.float32:
self.__mat__ = self.__mat__.astype(numpy.float32)
m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False)
argument.setSlotValue(self.pos, m) argument.setSlotValue(self.pos, m)
......
#!/bin/bash #!/bin/bash
brew update brew update
brew tap homebrew/science brew tap homebrew/science
brew install python brew install openblas swig md5sha1sum
sudo pip install --upgrade protobuf
brew install swig openblas md5sha1sum protobuf
...@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then ...@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONHOME=/opt/python/2.7.12 export PYTHONHOME=/opt/python/2.7.12
export PATH=/opt/python/2.7.12/bin:${PATH} export PATH=/opt/python/2.7.12/bin:${PATH}
cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NRPOC=`nproc` NRPOC=`nproc`
make -j $NPROC make -j $NPROC
make coveralls make coveralls
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
source ./common.sh source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
# check websites for broken links # check websites for broken links
......
...@@ -2650,7 +2650,7 @@ class AverageLayer(LayerBase): ...@@ -2650,7 +2650,7 @@ class AverageLayer(LayerBase):
@config_layer('cos') @config_layer('cos')
class CosSimLayer(LayerBase): class CosSimLayer(LayerBase):
def __init__(self, name, inputs, cos_scale=5, device=None): def __init__(self, name, inputs, cos_scale=1, device=None):
super(CosSimLayer, self).__init__( super(CosSimLayer, self).__init__(
name, 'cos', 1, inputs=inputs, device=device) name, 'cos', 1, inputs=inputs, device=device)
config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'CosSimLayer must have 2 inputs')
......
...@@ -1674,7 +1674,7 @@ def trans_layer(input, name=None, layer_attr=None): ...@@ -1674,7 +1674,7 @@ def trans_layer(input, name=None, layer_attr=None):
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None): def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
""" """
Cosine Similarity Layer. The cosine similarity equation is here. Cosine Similarity Layer. The cosine similarity equation is here.
......
...@@ -9,17 +9,10 @@ add_test(NAME test_reset_hook ...@@ -9,17 +9,10 @@ add_test(NAME test_reset_hook
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
if (PROTOBUF_3) add_paddle_exe(protobuf_equal
add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp)
ProtobufEqualMain.cpp) add_test(NAME test_layerHelpers
add_test(NAME test_layerHelpers COMMAND
COMMAND ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal
${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal )
)
else()
add_test(NAME test_layerHelpers
COMMAND
${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE}
)
endif()
...@@ -79,7 +79,7 @@ layers { ...@@ -79,7 +79,7 @@ layers {
inputs { inputs {
input_layer_name: "b" input_layer_name: "b"
} }
cos_scale: 5 cos_scale: 1
} }
layers { layers {
name: "__cos_sim_1__" name: "__cos_sim_1__"
...@@ -92,7 +92,7 @@ layers { ...@@ -92,7 +92,7 @@ layers {
inputs { inputs {
input_layer_name: "c" input_layer_name: "c"
} }
cos_scale: 5 cos_scale: 1
} }
layers { layers {
name: "__sum_to_one_norm_layer_0__" name: "__sum_to_one_norm_layer_0__"
......
...@@ -2,16 +2,18 @@ ...@@ -2,16 +2,18 @@
cd `dirname $0` cd `dirname $0`
set -e set -e
PYTHON_EXEC=$1
COMPARE_PROTO_UTIL=$2
protostr=`dirname $0`/protostr protostr=`dirname $0`/protostr
files=`ls $protostr | grep -v "unittest"` files=`ls $protostr | grep -v "unittest"`
./generate_protostr.sh $1 ./generate_protostr.sh ${PYTHON_EXEC}
. ./file_list.sh . ./file_list.sh
if [ -z $1 ]; then if [ -z ${COMPARE_PROTO_UTIL} ]; then
for file in $files for file in $files
do do
base_protostr=$protostr/$file base_protostr=$protostr/$file
...@@ -22,20 +24,20 @@ if [ -z $1 ]; then ...@@ -22,20 +24,20 @@ if [ -z $1 ]; then
else else
for file in ${configs[*]} for file in ${configs[*]}
do do
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest; then if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest; then
diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
fi fi
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest; then
diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
fi fi
done done
for file in ${whole_configs[*]} for file in ${whole_configs[*]}
do do
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.unittest --whole; then
diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u diff $protostr/$file.protostr $protostr/$file.protostr.unittest -u
fi fi
if ! $1 $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then if ! ${COMPARE_PROTO_UTIL} $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest --whole; then
diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u diff $protostr/$file.protostr $protostr/$file.protostr.non_file_config.unittest -u
fi fi
done done
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册