Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
8d47499e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8d47499e
编写于
1月 18, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
差异文件
update code
上级
47e88f4a
f174c00e
变更
40
展开全部
显示空白变更内容
内联
并排
Showing
40 changed file
with
1043 addition
and
583 deletion
+1043
-583
.travis.yml
.travis.yml
+1
-1
cmake/cblas.cmake
cmake/cblas.cmake
+1
-1
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+25
-6
cmake/external/protobuf.cmake
cmake/external/protobuf.cmake
+1
-0
cmake/external/python.cmake
cmake/external/python.cmake
+1
-0
cmake/flags.cmake
cmake/flags.cmake
+2
-0
cmake/system.cmake
cmake/system.cmake
+11
-2
doc/getstarted/build_and_install/build_from_source_en.md
doc/getstarted/build_and_install/build_from_source_en.md
+66
-2
doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
+2
-2
doc/howto/usage/k8s/k8s_aws_en.md
doc/howto/usage/k8s/k8s_aws_en.md
+167
-128
doc/howto/usage/k8s/src/pserver_and_trainer.png
doc/howto/usage/k8s/src/pserver_and_trainer.png
+0
-0
paddle/function/BufferArg.cpp
paddle/function/BufferArg.cpp
+8
-4
paddle/function/BufferArg.h
paddle/function/BufferArg.h
+54
-10
paddle/function/BufferArgTest.cpp
paddle/function/BufferArgTest.cpp
+0
-53
paddle/function/CMakeLists.txt
paddle/function/CMakeLists.txt
+2
-2
paddle/function/ContextProjectionOp.cpp
paddle/function/ContextProjectionOp.cpp
+153
-105
paddle/function/ContextProjectionOp.h
paddle/function/ContextProjectionOp.h
+11
-11
paddle/function/ContextProjectionOpGpu.cu
paddle/function/ContextProjectionOpGpu.cu
+13
-11
paddle/function/ContextProjectionOpTest.cpp
paddle/function/ContextProjectionOpTest.cpp
+36
-35
paddle/function/CrossMapNormalOp.cpp
paddle/function/CrossMapNormalOp.cpp
+7
-2
paddle/function/CrossMapNormalOpTest.cpp
paddle/function/CrossMapNormalOpTest.cpp
+26
-21
paddle/function/Function.cpp
paddle/function/Function.cpp
+13
-3
paddle/function/Function.h
paddle/function/Function.h
+46
-4
paddle/function/FunctionTest.cpp
paddle/function/FunctionTest.cpp
+107
-0
paddle/function/FunctionTest.h
paddle/function/FunctionTest.h
+162
-71
paddle/function/PadOp.cpp
paddle/function/PadOp.cpp
+29
-24
paddle/function/PadOpTest.cpp
paddle/function/PadOpTest.cpp
+12
-9
paddle/function/TensorShape.h
paddle/function/TensorShape.h
+11
-1
paddle/gserver/layers/ContextProjection.cpp
paddle/gserver/layers/ContextProjection.cpp
+17
-15
paddle/gserver/layers/PadLayer.cpp
paddle/gserver/layers/PadLayer.cpp
+27
-27
paddle/gserver/layers/PadLayer.h
paddle/gserver/layers/PadLayer.h
+4
-4
paddle/py_paddle/dataprovider_converter.py
paddle/py_paddle/dataprovider_converter.py
+6
-0
paddle/scripts/travis/before_install.osx.sh
paddle/scripts/travis/before_install.osx.sh
+1
-3
paddle/scripts/travis/build_and_test.sh
paddle/scripts/travis/build_and_test.sh
+1
-1
paddle/scripts/travis/docs.sh
paddle/scripts/travis/docs.sh
+1
-1
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+1
-1
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+1
-1
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
+7
-14
python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
...g_helpers/tests/configs/protostr/test_ntm_layers.protostr
+2
-2
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
.../paddle/trainer_config_helpers/tests/configs/run_tests.sh
+8
-6
未找到文件。
.travis.yml
浏览文件 @
8d47499e
...
...
@@ -25,9 +25,9 @@ addons:
packages
:
-
gcc-4.8
-
g++-4.8
-
gfortran-4.8
-
git
-
build-essential
-
libatlas-base-dev
-
python
-
python-pip
-
python2.7-dev
...
...
cmake/cblas.cmake
浏览文件 @
8d47499e
...
...
@@ -16,7 +16,7 @@
set
(
CBLAS_FOUND OFF
)
## Find MKL First.
set
(
MKL_ROOT $ENV{MKL
_
ROOT} CACHE PATH
"Folder contains MKL"
)
set
(
MKL_ROOT $ENV{MKLROOT} CACHE PATH
"Folder contains MKL"
)
find_path
(
MKL_INCLUDE_DIR mkl.h PATHS
${
MKL_ROOT
}
/include
)
...
...
cmake/external/openblas.cmake
浏览文件 @
8d47499e
...
...
@@ -15,7 +15,6 @@
INCLUDE
(
cblas
)
IF
(
NOT
${
CBLAS_FOUND
}
)
MESSAGE
(
FATAL_ERROR
"Please install OpenBlas, MKL or ATLAS."
)
INCLUDE
(
ExternalProject
)
SET
(
CBLAS_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/openblas
)
...
...
@@ -28,20 +27,40 @@ IF(NOT ${CBLAS_FOUND})
SET
(
CBLAS_LIBRARIES
"
${
CBLAS_INSTALL_DIR
}
/lib/libopenblas.a"
CACHE FILEPATH
"openblas library"
FORCE
)
ENDIF
(
WIN32
)
IF
(
CMAKE_COMPILER_IS_GNUCC
)
ENABLE_LANGUAGE
(
Fortran
)
LIST
(
APPEND CBLAS_LIBRARIES gfortran pthread
)
ENDIF
(
CMAKE_COMPILER_IS_GNUCC
)
IF
(
NOT CMAKE_Fortran_COMPILER
)
MESSAGE
(
FATAL_ERROR
"To build lapack in libopenblas, "
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=..."
)
ENDIF
(
NOT CMAKE_Fortran_COMPILER
)
ExternalProject_Add
(
openblas
${
EXTERNAL_PROJECT_LOG_ARGS
}
URL
"https://github.com/xianyi/OpenBLAS/archive/v0.2.19.tar.gz"
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.2.19
PREFIX
${
CBLAS_SOURCES_DIR
}
INSTALL_DIR
${
CBLAS_INSTALL_DIR
}
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND
""
BUILD_COMMAND make CC=
${
CMAKE_C_COMPILER
}
FC=
${
CMAKE_Fortran_COMPILER
}
INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
BUILD_COMMAND
${
CMAKE_MAKE_PROGRAM
}
FC=
${
CMAKE_Fortran_COMPILER
}
CC=
${
CMAKE_C_COMPILER
}
HOSTCC=
${
CMAKE_C_COMPILER
}
NO_SHARED=1 libs netlib
INSTALL_COMMAND
${
CMAKE_MAKE_PROGRAM
}
install NO_SHARED=1 PREFIX=<INSTALL_DIR>
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
)
ExternalProject_Add_Step
(
openblas lapacke_install
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CBLAS_SOURCES_DIR
}
/src/openblas/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h"
"
${
CBLAS_INSTALL_DIR
}
/include/lapacke_mangling.h"
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CBLAS_SOURCES_DIR
}
/src/openblas/lapack-netlib/LAPACKE/include/lapacke.h"
"
${
CBLAS_INSTALL_DIR
}
/include/lapacke.h"
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CBLAS_SOURCES_DIR
}
/src/openblas/lapack-netlib/LAPACKE/include/lapacke_config.h"
"
${
CBLAS_INSTALL_DIR
}
/include/lapacke_config.h"
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
CBLAS_SOURCES_DIR
}
/src/openblas/lapack-netlib/LAPACKE/include/lapacke_utils.h"
"
${
CBLAS_INSTALL_DIR
}
/include/lapacke_utils.h"
DEPENDEES install
)
LIST
(
APPEND external_project_dependencies openblas
)
ENDIF
()
ENDIF
(
NOT
${
CBLAS_FOUND
}
)
INCLUDE_DIRECTORIES
(
${
CBLAS_INC_DIR
}
)
cmake/external/protobuf.cmake
浏览文件 @
8d47499e
...
...
@@ -54,6 +54,7 @@ ExternalProject_Add(
CONFIGURE_COMMAND
${
CMAKE_COMMAND
}
${
PROTOBUF_SOURCES_DIR
}
/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=
${
ZLIB_ROOT
}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=
${
PROTOBUF_INSTALL_DIR
}
...
...
cmake/external/python.cmake
浏览文件 @
8d47499e
...
...
@@ -31,6 +31,7 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
"please use pip to upgrade protobuf."
)
ENDIF
(
${
PY_GOOGLE.PROTOBUF_VERSION
}
VERSION_LESS
"3.0.0"
)
ELSE
(
PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND
)
MESSAGE
(
FATAL_ERROR
"Please install python 2.7 before building PaddlePaddle."
)
##################################### PYTHON ########################################
SET
(
PYTHON_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/python
)
SET
(
PYTHON_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/python
)
...
...
cmake/flags.cmake
浏览文件 @
8d47499e
...
...
@@ -96,6 +96,7 @@ set(COMMON_FLAGS
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=sign-compare
-Wno-error=unused-local-typedefs
)
set
(
GPU_COMMON_FLAGS
...
...
@@ -105,6 +106,7 @@ set(GPU_COMMON_FLAGS
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=sign-compare
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function
# Warnings in Numpy Header.
...
...
cmake/system.cmake
浏览文件 @
8d47499e
...
...
@@ -21,6 +21,7 @@ ELSE(WIN32)
SET
(
MACOS_VERSION
${
VERSION
}
)
SET
(
HOST_SYSTEM
"macosx"
)
ELSE
(
APPLE
)
IF
(
EXISTS
"/etc/issue"
)
FILE
(
READ
"/etc/issue"
LINUX_ISSUE
)
IF
(
LINUX_ISSUE MATCHES
"CentOS"
)
...
...
@@ -31,6 +32,14 @@ ELSE(WIN32)
SET
(
HOST_SYSTEM
"ubuntu"
)
ENDIF
()
ENDIF
(
EXISTS
"/etc/issue"
)
IF
(
EXISTS
"/etc/redhat-release"
)
FILE
(
READ
"/etc/redhat-release"
LINUX_ISSUE
)
IF
(
LINUX_ISSUE MATCHES
"CentOS"
)
SET
(
HOST_SYSTEM
"centos"
)
ENDIF
()
ENDIF
(
EXISTS
"/etc/redhat-release"
)
ENDIF
(
APPLE
)
ENDIF
(
WIN32
)
...
...
@@ -47,7 +56,7 @@ SET(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD 0
# Wrap download in script to log output
LOG_UPDATE 1
# Wrap update in script to log output
LOG_CONFIGURE 1
# Wrap configure in script to log output
LOG_BUILD
1
# Wrap build in script to log output
LOG_BUILD
0
# Wrap build in script to log output
LOG_TEST 1
# Wrap test in script to log output
LOG_INSTALL
1
# Wrap install in script to log output
LOG_INSTALL
0
# Wrap install in script to log output
)
doc/getstarted/build_and_install/build_from_source_en.md
浏览文件 @
8d47499e
...
...
@@ -4,6 +4,8 @@ Installing from Sources
*
[
1. Download and Setup
](
#download
)
*
[
2. Requirements
](
#requirements
)
*
[
3. Build on Ubuntu
](
#ubuntu
)
*
[
4. Build on Centos
](
#centos
)
## <span id="download">Download and Setup</span>
You can download PaddlePaddle from the
[
github source
](
https://github.com/PaddlePaddle/Paddle
)
.
...
...
@@ -64,7 +66,8 @@ As a simple example, consider the following:
1.
**BLAS Dependencies(optional)**
Paddle will find BLAS from system's default path. But you can specify MKL, OpenBLAS or ATLAS via
`MKL_ROOT`
,
`OPENBLAS_ROOT`
or
`ATLAS_ROOT`
.
CMake will search BLAS libraries from system. If not found, OpenBLAS will be downloaded, built and installed automatically.
To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via
`MKL_ROOT`
,
`OPENBLAS_ROOT`
or
`ATLAS_ROOT`
.
```bash
# specify MKL
...
...
@@ -99,7 +102,7 @@ As a simple example, consider the following:
```bash
# necessary
sudo apt-get update
sudo apt-get install -y g++ make cmake build-essential
libatlas-base-dev
python python-pip libpython-dev git
sudo apt-get install -y g++ make cmake build-essential python python-pip libpython-dev git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
...
...
@@ -150,3 +153,64 @@ export PATH=<path to install>/bin:$PATH
# install PaddlePaddle Python modules.
sudo
pip
install
<path to
install
>
/opt/paddle/share/wheels/
*
.whl
```
## <span id="centos">Build on Centos 7</span>
### Install Dependencies
-
**CPU Dependencies**
```bash
# necessary
sudo yum update
sudo yum install -y epel-release
sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git
sudo pip install wheel numpy
sudo pip install 'protobuf>=3.0.0'
```
-
**GPU Dependencies (optional)**
To build GPU version, you will need the following installed:
1. a CUDA-capable GPU
2. A supported version of Linux with a gcc compiler and toolchain
3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads)
4. NVIDIA cuDNN Library (availabel at https://developer.nvidia.com/cudnn)
The CUDA development environment relies on tight integration with the host development environment,
including the host compiler and C runtime libraries, and is therefore only supported on
distribution versions that have been qualified for this CUDA Toolkit release.
After downloading cuDNN library, issue the following commands:
```bash
sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
```
Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
```bash
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export PATH=/usr/local/cuda/bin:$PATH
```
### Build and Install
As usual, the best option is to create build folder under paddle project directory.
```
bash
mkdir
build
&&
cd
build
```
Finally, you can build and install PaddlePaddle:
```
bash
# you can add build option here, such as:
cmake3 ..
-DCMAKE_INSTALL_PREFIX
=
<path to
install
>
# please use sudo make install, if you want to install PaddlePaddle into the system
make
-j
`
nproc
`
&&
make
install
# set PaddlePaddle installation path in ~/.bashrc
export
PATH
=
<path to
install
>
/bin:
$PATH
# install PaddlePaddle Python modules.
sudo
pip
install
<path to
install
>
/opt/paddle/share/wheels/
*
.whl
```
doc/howto/deep_model/rnn/hierarchical_layer_cn.rst
浏览文件 @
8d47499e
...
...
@@ -32,7 +32,7 @@ pooling_layer 的使用示例如下,详细见 :ref:`api_trainer_config_helpers
- `pooling_type` 目前支持两种,分别是:MaxPooling()和AvgPooling()。
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.
EACH_
TIMESTEP` 时(默认值):
- 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列
- 输入:一个双层序列,或一个单层序列
...
...
@@ -54,7 +54,7 @@ last_seq 的使用示例如下( :ref:`api_trainer_config_helpers_layers_first_
last = last_seq(input=layer,
agg_level=AggregateLevel.EACH_SEQUENCE)
- `agg_level=AggregateLevel.TIMESTEP` 时(默认值):
- `agg_level=AggregateLevel.
EACH_
TIMESTEP` 时(默认值):
- 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列
- 输入:一个双层序列或一个单层序列
...
...
doc/howto/usage/k8s/k8s_aws_en.md
浏览文件 @
8d47499e
此差异已折叠。
点击以展开。
doc/howto/usage/k8s/src/pserver_and_trainer.png
0 → 100644
浏览文件 @
8d47499e
70.0 KB
paddle/function/BufferArg.cpp
浏览文件 @
8d47499e
...
...
@@ -20,23 +20,27 @@ limitations under the License. */
namespace
paddle
{
const
SequenceArg
&
BufferArg
::
sequence
()
const
{
//
CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA);
CHECK_EQ
(
bufferType_
,
TENSOR_SEQUENCE_DATA
);
return
dynamic_cast
<
const
SequenceArg
&>
(
*
this
);
}
const
SparseMatrixArg
&
BufferArg
::
sparse
()
const
{
//
CHECK_EQ(bufferType_, TENSOR_SPARSE);
CHECK_EQ
(
bufferType_
,
TENSOR_SPARSE
);
return
dynamic_cast
<
const
SparseMatrixArg
&>
(
*
this
);
}
SparseMatrixArg
::
SparseMatrixArg
(
const
CpuSparseMatrix
&
sparse
,
ArgType
argType
)
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{}
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{
bufferType_
=
TENSOR_SPARSE
;
}
SparseMatrixArg
::
SparseMatrixArg
(
const
GpuSparseMatrix
&
sparse
,
ArgType
argType
)
:
BufferArg
(
sparse
,
argType
),
row_
(
reinterpret_cast
<
void
*>
(
sparse
.
getRows
()),
VALUE_TYPE_INT32
),
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{}
col_
(
reinterpret_cast
<
void
*>
(
sparse
.
getCols
()),
VALUE_TYPE_INT32
)
{
bufferType_
=
TENSOR_SPARSE
;
}
}
// namespace paddle
paddle/function/BufferArg.h
浏览文件 @
8d47499e
...
...
@@ -23,10 +23,11 @@ limitations under the License. */
namespace
paddle
{
enum
BufferType
{
TENSOR_NORMAL
=
0
,
TENSOR_SEQUENCE_ID
=
1
,
TENSOR_SEQUENCE_DATA
=
2
,
TENSOR_SPARSE
=
3
TENSOR_UNKNOWN
=
0
,
TENSOR_NORMAL
=
1
,
TENSOR_SEQUENCE_ID
=
2
,
TENSOR_SEQUENCE_DATA
=
3
,
TENSOR_SPARSE
=
4
};
enum
SparseDataType
{
...
...
@@ -39,7 +40,6 @@ enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 };
class
BufferArg
;
class
SequenceArg
;
class
SparseMatrixArg
;
typedef
std
::
shared_ptr
<
BufferArg
>
BufferArgPtr
;
/**
* \brief BufferArg used as the argument type of Function.
...
...
@@ -50,6 +50,11 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* 3. SequenceArg for a Buffer of sequence data.
* 4. SparseMatrixArg for a Buffer of sparse matrix.
*
* Buffer shape
* For most buffers, the first dimension `shape()[0]` represents
* the size of the mini-batch.
*
* Buffer argType
* There is an ArgType property for the BufferArg used as Function Output.
* Whether the result of the Function calculation is assigned to the
* output Buffer or added to the output Buffer is determined by the
...
...
@@ -71,6 +76,14 @@ public:
ArgType
getArgType
()
const
{
return
argType_
;
}
public:
BufferArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
buf_
(
nullptr
),
valueType_
(
valueType
),
shape_
(
shape
),
argType_
(
argType
)
{}
BufferArg
(
void
*
buf
,
ValueType
valueType
,
const
TensorShape
&
shape
,
...
...
@@ -86,6 +99,7 @@ public:
valueType_
(
DataType
<
real
>::
value
),
shape_
(
2
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
shape_
.
setDim
(
0
,
matrix
.
getHeight
());
shape_
.
setDim
(
1
,
matrix
.
getWidth
());
}
...
...
@@ -98,6 +112,7 @@ public:
valueType_
(
DataType
<
real
>::
value
),
shape_
(
shape
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
CHECK_EQ
(
matrix
.
getElementCnt
(),
shape
.
getElements
());
}
...
...
@@ -107,6 +122,7 @@ public:
valueType_
(
DataType
<
real
>::
value
),
shape_
(
1
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
shape_
.
setDim
(
0
,
vector
.
getSize
());
}
...
...
@@ -116,6 +132,7 @@ public:
valueType_
(
VALUE_TYPE_INT32
),
shape_
(
1
),
argType_
(
argType
)
{
bufferType_
=
TENSOR_NORMAL
;
shape_
.
setDim
(
0
,
vector
.
getSize
());
}
...
...
@@ -150,6 +167,8 @@ public:
ValueType
valueType
()
const
{
return
valueType_
;
}
BufferType
bufferType
()
const
{
return
bufferType_
;
}
const
TensorShape
&
shape
()
const
{
return
shape_
;
}
bool
isSparse
()
const
{
return
(
TENSOR_SPARSE
==
bufferType_
);
}
bool
isSequenceArg
()
const
{
return
TENSOR_SEQUENCE_DATA
==
bufferType_
;
}
const
SequenceArg
&
sequence
()
const
;
const
SparseMatrixArg
&
sparse
()
const
;
...
...
@@ -158,8 +177,8 @@ protected:
void
*
buf_
;
ValueType
valueType_
;
TensorShape
shape_
;
BufferType
bufferType_
;
ArgType
argType_
=
UNSPECIFIED
;
BufferType
bufferType_
{
TENSOR_UNKNOWN
}
;
ArgType
argType_
{
UNSPECIFIED
}
;
// leading dimensions. The size is dims_.size()
// Dims lds_;
};
...
...
@@ -170,15 +189,24 @@ protected:
// if a < b then value_.buf_[a] < value_.buf_[b]
class
SequenceIdArg
:
public
BufferArg
{
public:
SequenceIdArg
(
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
VALUE_TYPE_INT32
,
shape
,
argType
)
{
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
1
);
CHECK_GT
(
shape_
[
0
],
1
);
numSeqs_
=
shape_
[
0
]
-
1
;
}
SequenceIdArg
(
void
*
buf
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
buf
,
VALUE_TYPE_INT32
,
shape
,
argType
)
{
bufferType_
=
TENSOR_SEQUENCE_ID
;
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
1
);
numSeqs_
=
shape_
[
0
]
-
1
;
}
SequenceIdArg
(
const
IVector
&
vector
)
:
BufferArg
(
vector
)
{
bufferType_
=
TENSOR_SEQUENCE_ID
;
numSeqs_
=
shape_
[
0
]
-
1
;
}
...
...
@@ -190,26 +218,41 @@ private:
size_t
numSeqs_
;
};
// sequence data
// sequences data
// For mini-batch calculate,
// one batch can contain more than one sequence of data.
// SequenceArg can be used to represent sequences that contain multiple
// unequal lengths.
class
SequenceArg
:
public
BufferArg
{
public:
SequenceArg
(
ValueType
valueType
,
const
TensorShape
&
shape
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
valueType
,
shape
,
argType
),
startPositions_
(
TensorShape
())
{}
SequenceArg
(
void
*
buf
,
ValueType
valueType
,
const
TensorShape
&
shape
,
const
SequenceIdArg
&
startPositions
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
buf
,
valueType
,
shape
,
argType
),
startPositions_
(
startPositions
)
{}
startPositions_
(
startPositions
)
{
bufferType_
=
TENSOR_SEQUENCE_DATA
;
}
SequenceArg
(
const
Matrix
&
matrix
,
const
IVector
&
vector
,
ArgType
argType
=
UNSPECIFIED
)
:
BufferArg
(
matrix
,
argType
),
startPositions_
(
vector
)
{}
:
BufferArg
(
matrix
,
argType
),
startPositions_
(
vector
)
{
bufferType_
=
TENSOR_SEQUENCE_DATA
;
}
~
SequenceArg
()
{}
void
*
getIdBuf
()
const
{
return
startPositions_
.
data
();
}
size_t
numSeqs
()
const
{
return
startPositions_
.
numSeqs
();
}
SequenceIdArg
&
getSequenceId
()
{
return
startPositions_
;
}
const
SequenceIdArg
&
getSequenceId
()
const
{
return
startPositions_
;
}
private:
SequenceIdArg
startPositions_
;
...
...
@@ -235,6 +278,7 @@ public:
nnz_
(
nnz
),
format_
(
format
),
type_
(
type
)
{
bufferType_
=
TENSOR_SPARSE
;
CHECK
((
valueType
==
VALUE_TYPE_FLOAT
)
||
(
valueType
==
VALUE_TYPE_DOUBLE
));
CHECK_EQ
(
shape_
.
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
row_
.
shape
().
ndims
(),
(
size_t
)
1
);
...
...
paddle/function/BufferArgTest.cpp
浏览文件 @
8d47499e
...
...
@@ -14,9 +14,7 @@ limitations under the License. */
#include "BufferArg.h"
#include <gtest/gtest.h>
#include "Function.h"
#include "paddle/math/MemoryHandle.h"
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
...
...
@@ -37,55 +35,4 @@ TEST(BufferTest, SequenceIdArg) {
EXPECT_EQ
(
buffer
.
numSeqs
(),
9
);
}
TEST
(
BufferTest
,
asArgument
)
{
MatrixPtr
matrix
=
Matrix
::
create
(
100
,
200
);
VectorPtr
vector
=
Vector
::
create
(
100
,
false
);
CpuSparseMatrix
sparse
(
200
,
300
,
50
);
// prepare arguments
BufferArgs
argments
;
argments
.
addArg
(
*
matrix
);
argments
.
addArg
(
*
vector
);
argments
.
addArg
(
sparse
);
// function
auto
function
=
[
=
](
const
BufferArgs
&
inputs
)
{
EXPECT_EQ
(
inputs
.
size
(),
3
);
// check inputs[0]
EXPECT_EQ
(
inputs
[
0
].
shape
().
ndims
(),
2
);
EXPECT_EQ
(
inputs
[
0
].
shape
()[
0
],
100
);
EXPECT_EQ
(
inputs
[
0
].
shape
()[
1
],
200
);
EXPECT_EQ
(
inputs
[
0
].
data
(),
matrix
->
getData
());
EXPECT_EQ
(
inputs
[
0
].
matrix
<
DEVICE_TYPE_CPU
>
().
getHeight
(),
matrix
->
getHeight
());
EXPECT_EQ
(
inputs
[
0
].
matrix
<
DEVICE_TYPE_CPU
>
().
getWidth
(),
matrix
->
getWidth
());
EXPECT_EQ
(
inputs
[
0
].
matrix
<
DEVICE_TYPE_CPU
>
().
getData
(),
matrix
->
getData
());
// check inputs[1]
EXPECT_EQ
(
inputs
[
1
].
shape
().
ndims
(),
1
);
EXPECT_EQ
(
inputs
[
1
].
shape
()[
0
],
100
);
EXPECT_EQ
(
inputs
[
1
].
data
(),
vector
->
getData
());
CpuVector
inVector
=
inputs
[
1
].
vector
<
real
,
DEVICE_TYPE_CPU
>
();
EXPECT_EQ
(
inVector
.
getSize
(),
vector
->
getSize
());
EXPECT_EQ
(
inVector
.
getData
(),
vector
->
getData
());
// check inputs[2]
EXPECT_EQ
(
inputs
[
2
].
shape
().
ndims
(),
2
);
EXPECT_EQ
(
inputs
[
2
].
shape
()[
0
],
200
);
EXPECT_EQ
(
inputs
[
2
].
shape
()[
1
],
300
);
EXPECT_EQ
(
inputs
[
2
].
data
(),
sparse
.
getData
());
// CHECK_EQ(inputs[2].sparse().nnz(), 50);
// CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ
(
inputs
[
2
].
sparse
().
getRowBuf
(),
sparse
.
getRows
());
EXPECT_EQ
(
inputs
[
2
].
sparse
().
getColBuf
(),
sparse
.
getCols
());
};
// call function
function
(
argments
);
}
}
// namespace paddle
paddle/function/CMakeLists.txt
浏览文件 @
8d47499e
...
...
@@ -19,13 +19,13 @@ if(WITH_TESTING)
# TODO:
# file(GLOB test_files . *OpTest.cpp)
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
#
add_simple_unittest(CrossMapNormalOpTest)
add_simple_unittest
(
CrossMapNormalOpTest
)
add_simple_unittest
(
TensorShapeTest
)
add_simple_unittest
(
TensorTypeTest
)
add_simple_unittest
(
BufferArgTest
)
add_simple_unittest
(
FunctionTest
)
add_simple_unittest
(
ContextProjectionOpTest
)
add_simple_unittest
(
PadOpTest
)
# add_simple_unittest(ContextProjectionOpTest)
endif
()
endif
()
...
...
paddle/function/ContextProjectionOp.cpp
浏览文件 @
8d47499e
...
...
@@ -17,7 +17,10 @@ limitations under the License. */
#include "paddle/math/Vector.h"
namespace
paddle
{
/**
* Context Projection Forward with CPU Matrix Device.
*
*/
template
<
>
void
ContextProjectionForward
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out_mat
,
const
CpuMatrix
&
input_mat
,
...
...
@@ -70,10 +73,30 @@ void ContextProjectionForward<DEVICE_TYPE_CPU>(CpuMatrix& out_mat,
}
/**
* \param inputs[0] input value.
* \param inputs[1] input weight.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
* Paddle Function for Context Projection Forward.
* Calculate the output layer value sequence after context projection.
*
* What is Context Projection for a sequence?
* For example, assumed input (x) has 4 words and the dimension of each word
* representation is 2. If we use zero to pad instead of learned weight to pad,
* and the context_lenth is 3, the output (y) is:
*
* @code
* x = [a1, a2;
* b1, b2;
* c1, c2;
* d1, d2]
* y = [0, 0, a1, a2, b1, b2;
* a1, a2, b1, b2, c1, c2;
* b1, b2, c1, c2, d1, d2;
* c1, c2, d1, d2, 0, 0]
* @endcode
*
* \param outputs[0].matrix output layer value, n * (d * l)
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix input layer value, n * d
* \param inputs[0].vector start position sequence, n * 1
* \param inputs[1].matrix input layer weight, pad * d
*/
template
<
DeviceType
Device
>
class
ContextProjectionForwardFunc
:
public
FunctionBase
{
...
...
@@ -85,28 +108,37 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK
_EQ
((
size_t
)
3
,
inputs
.
size
());
CHECK
(
1
==
inputs
.
size
()
||
2
==
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
val_seqs
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
outputs
[
0
].
data
()
&&
inputs
[
0
].
data
()
&&
inputs
[
2
].
data
());
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK
(
out_seq
.
data
()
&&
val_seqs
.
data
()
&&
val_seqs
.
getSequenceId
().
data
());
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
val_seqs
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
val_seqs
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
if
(
2
==
inputs
.
size
())
{
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
2
].
shape
().
ndims
(),
(
size_t
)
1
);
}
/// dim of output = dim of input * context_length
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
inputs
[
0
].
shape
()[
1
]
*
context_length_
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
shape
()[
1
],
inputs
[
1
].
shape
()[
1
]);
CHECK_EQ
(
out_seq
.
shape
()[
1
],
val_seqs
.
shape
()[
1
]
*
context_length_
);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
CHECK_EQ
(
val_seqs
.
shape
()[
0
],
out_seq
.
shape
()[
0
]);
/// dim of input == dim of weight
if
(
2
==
inputs
.
size
())
{
CHECK_EQ
(
val_seqs
.
shape
()[
1
],
inputs
[
1
].
shape
()[
1
]);
}
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
auto
out_mat
=
outputs
[
0
].
matrix
<
Device
>
();
auto
in_mat
=
inputs
[
0
].
matrix
<
Device
>
();
auto
w_mat
=
!
inputs
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
CHECK_EQ
(
out_seq
.
getArgType
(),
ADD_TO
);
auto
out_mat
=
out_seq
.
matrix
<
Device
>
();
const
auto
in_mat
=
val_seqs
.
matrix
<
Device
>
();
const
auto
w_mat
=
(
2
==
inputs
.
size
())
?
inputs
[
1
].
matrix
<
Device
>
()
:
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
);
const
auto
seq_vec
=
val_seqs
.
getSequenceId
().
vector
<
int
,
Device
>
();
ContextProjectionForward
<
Device
>
(
out_mat
,
in_mat
,
w_mat
,
...
...
@@ -122,8 +154,12 @@ private:
size_t
begin_pad_
;
};
/**
* Context Projection Backward with CPU Matrix Device.
*
*/
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
CpuMatrix
&
out_grad_mat
,
void
ContextProjectionBackward
<
DEVICE_TYPE_CPU
>
(
const
CpuMatrix
&
out_grad_mat
,
CpuMatrix
&
in_grad_mat
,
CpuMatrix
&
w_grad_mat
,
const
CpuIVector
&
seq_vec
,
...
...
@@ -146,7 +182,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
starts
[
i
]
-
begin
,
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
],
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
j
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
}
...
...
@@ -157,8 +194,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
int64_t
pad_size
=
std
::
min
(
end
-
starts
[
i
+
1
],
starts
[
i
+
1
]
-
starts
[
i
]);
if
(
is_padding
&&
w_grad_mat
)
{
MatrixPtr
mat
=
out_grad_mat
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
mat
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
starts
[
i
+
1
]
-
pad_size
,
pad_size
);
MatrixPtr
sub
=
w_grad_mat
.
subMatrix
(
begin_pad
+
context_start
+
j
-
pad_size
,
pad_size
);
sub
->
addAtOffset
(
*
mat
,
j
*
input_dim
);
...
...
@@ -169,17 +206,22 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(CpuMatrix& out_grad_mat,
if
(
end
<=
begin
)
continue
;
if
(
!
in_grad_mat
)
continue
;
MatrixPtr
src
=
in_grad_mat
.
subMatrix
(
begin
,
end
-
begin
);
MatrixPtr
dst
=
out_grad_mat
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
MatrixPtr
dst
=
const_cast
<
CpuMatrix
&>
(
out_grad_mat
)
.
subMatrix
(
dst_begin
,
dst_end
-
dst_begin
);
src
->
addAtOffset
(
*
dst
,
j
*
input_dim
);
}
}
}
/**
* \param inputs[0] input grad.
* \param inputs[1] weight grad.
* \param inputs[2] input sequence.
* \param outputs[0] output value.
* Context Projection Backward Function.
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start position sequence, n * 1
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param outputs[1] weight grad, pad * d
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardFunc
:
public
FunctionBase
{
...
...
@@ -193,32 +235,36 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
((
size_t
)
3
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
1
,
outputs
.
size
());
CHECK
(
outputs
[
0
].
data
()
&&
inputs
[
2
].
data
());
CHECK_EQ
(
outputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
0
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
inputs
[
2
].
shape
().
ndims
(),
(
size_t
)
1
);
/// dim of input == dim of weight
CHECK_EQ
(
inputs
[
0
].
shape
()[
1
],
inputs
[
1
].
shape
()[
1
]);
/// input and output has the same batch_size
CHECK_EQ
(
inputs
[
0
].
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
/// dim of output = dim of input * context_length
CHECK_EQ
(
outputs
[
0
].
shape
()[
1
],
inputs
[
0
].
shape
()[
1
]
*
context_length_
);
CHECK_EQ
((
size_t
)
1
,
inputs
.
size
());
CHECK_EQ
((
size_t
)
2
,
outputs
.
size
());
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
());
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
in_seq
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
out_seq
.
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
out_seq
.
getSequenceId
().
shape
().
ndims
(),
(
size_t
)
1
);
CHECK_EQ
(
outputs
[
1
].
shape
().
ndims
(),
(
size_t
)
2
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
/// dim of input grad == dim of weight
CHECK_EQ
(
out_seq
.
shape
()[
1
],
outputs
[
1
].
shape
()[
1
]);
/// input and output grad has the same batch_size
CHECK_EQ
(
out_seq
.
shape
()[
0
],
in_seq
.
shape
()[
0
]);
/// dim of output grad = dim of input grad * context_length
CHECK_EQ
(
in_seq
.
shape
()[
1
],
out_seq
.
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
out_seq
.
getArgType
(),
ADD_TO
);
CHECK_EQ
(
outputs
[
1
].
getArgType
(),
ADD_TO
);
auto
out_grad_mat
=
outputs
[
0
].
matrix
<
Device
>
();
const
auto
seq_vec
=
in_seq
.
getSequenceId
().
vector
<
int
,
Device
>
();
const
auto
out_grad_mat
=
in_seq
.
matrix
<
Device
>
();
auto
in_grad_mat
=
!
inputs
[
0
]
.
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
0
]
.
matrix
<
Device
>
();
auto
w_grad_mat
=
!
in
puts
[
1
].
data
()
!
out_seq
.
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
out_seq
.
matrix
<
Device
>
();
auto
w_grad_mat
=
!
out
puts
[
1
].
data
()
?
typename
Tensor
<
real
,
Device
>::
Matrix
(
nullptr
,
0
,
0
)
:
inputs
[
1
].
matrix
<
Device
>
();
auto
seq_vec
=
inputs
[
2
].
vector
<
int
,
Device
>
();
:
outputs
[
1
].
matrix
<
Device
>
();
ContextProjectionBackward
<
Device
>
(
out_grad_mat
,
in_grad_mat
,
w_grad_mat
,
...
...
@@ -238,11 +284,16 @@ private:
size_t
total_pad_
;
};
#if 0
/**
* \param inputs[0] input grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* Context Projection Backward Data Function
* Update input layer grad
* input: sequence of output layer grad
* output: sequence of input layer grad
*
* \param outputs[0].matrix input layer grad, n * d
* \param outputs[0].vector start position sequence, n * 1
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vector start positon sequence, n * 1
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardDataFunc
:
public
FunctionBase
{
...
...
@@ -252,32 +303,30 @@ public:
context_start_
=
config
.
get
<
int
>
(
"context_start"
);
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1
,
static_cast
<
int
>
(
inputs
.
size
()));
CHECK_EQ
(
1
,
static_cast
<
int
>
(
outputs
.
size
()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
CHECK
(
inputs
[
0
].
isSequenceArg
()
&&
outputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
const
auto
out_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
outputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
out_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
());
CHECK_EQ
(
static_cast
<
int
>
(
out_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
getSequenceId
().
shape
().
ndims
()),
1
);
/// output layer grad dim == input layer grad dim * context_length_
CHECK_EQ
(
in_seq
.
shape
().
ndims
(),
out_seq
.
shape
().
ndims
()
*
context_length_
);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]);
CHECK_EQ
(
in_seq
.
shape
()[
0
],
out_seq
.
shape
()[
0
]);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ASSIGN_TO
);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
const auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
const
auto
out_grad_mat
=
in_seq
.
matrix
<
Device
>
();
const
auto
seq_vec
=
in_seq
.
getSequenceId
().
vector
<
int
,
Device
>
();
auto
in_grad_mat
=
out_seq
.
matrix
<
Device
>
();
ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
ContextProjectionBackwardData
<
Device
>
(
out_grad_mat
,
in_grad_mat
,
seq_vec
,
context_length_
,
context_start_
);
}
private:
...
...
@@ -286,9 +335,14 @@ private:
};
/**
* \param inputs[0] weight grad.
* \param inputs[1] input sequence.
* \param outputs[0] output grad.
* Context Projection Backward Weight Function
* Update weight grad by backprop
* input: sequence of output layer grad
* output: weight grad
*
* \param outputs[0] weight grad, pad * d
* \param inputs[0].matrix output layer grad, n * (d * l)
* \param inputs[0].vecotr start positon sequence, n * 1
*/
template
<
DeviceType
Device
>
class
ContextProjectionBackwardWeightFunc
:
public
FunctionBase
{
...
...
@@ -300,28 +354,25 @@ public:
total_pad_
=
config
.
get
<
size_t
>
(
"total_pad"
);
}
void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, static_cast<int>(inputs.size()));
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1
,
static_cast
<
int
>
(
inputs
.
size
()));
CHECK_EQ
(
1
,
static_cast
<
int
>
(
outputs
.
size
()));
CHECK_EQ(0, static_cast<int>(inouts.size()));
CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData());
CHECK_EQ(static_cast<int>(outputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[0].dims_.size()), 2);
CHECK_EQ(static_cast<int>(inputs[1].dims_.size()), 1);
CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_);
auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]);
typename SequenceT<Device>::type seq_vec(
inputs[1].dims_[0], reinterpret_cast<int*>(inputs[1].getData()));
CHECK
(
inputs
[
0
].
isSequenceArg
())
<<
"SequenceArg required here"
;
const
auto
in_seq
=
dynamic_cast
<
const
SequenceArg
&>
(
inputs
[
0
]);
CHECK
(
in_seq
.
data
()
&&
in_seq
.
getSequenceId
().
data
()
&&
outputs
[
0
].
data
());
CHECK_EQ
(
static_cast
<
int
>
(
outputs
[
0
].
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
shape
().
ndims
()),
2
);
CHECK_EQ
(
static_cast
<
int
>
(
in_seq
.
getSequenceId
().
shape
().
ndims
()),
1
);
CHECK_EQ
(
in_seq
.
shape
()[
0
],
outputs
[
0
].
shape
()[
0
]);
/// output layer grad dim == weight dim * context_length_
CHECK_EQ
(
in_seq
.
shape
()[
1
],
outputs
[
0
].
shape
()[
1
]
*
context_length_
);
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
const
auto
seq_vec
=
in_seq
.
getSequenceId
().
vector
<
int
,
Device
>
();
const
auto
out_grad_mat
=
in_seq
.
matrix
<
Device
>
();
auto
w_grad_mat
=
outputs
[
0
].
matrix
<
Device
>
();
ContextProjectionBackwardWeight
<
Device
>
(
out_grad_mat
,
w_grad_mat
,
seq_vec
,
context_length_
,
context_start_
,
...
...
@@ -335,7 +386,6 @@ private:
size_t
begin_pad_
;
size_t
total_pad_
;
};
#endif
REGISTER_TYPED_FUNC
(
ContextProjectionForward
,
CPU
,
...
...
@@ -350,7 +400,6 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC
(
ContextProjectionBackward
,
GPU
,
ContextProjectionBackwardFunc
);
#if 0
REGISTER_TYPED_FUNC
(
ContextProjectionBackwardData
,
GPU
,
ContextProjectionBackwardDataFunc
);
...
...
@@ -358,5 +407,4 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU
,
ContextProjectionBackwardWeightFunc
);
#endif
#endif
}
// namespace paddle
paddle/function/ContextProjectionOp.h
浏览文件 @
8d47499e
...
...
@@ -21,7 +21,7 @@ namespace paddle {
/**
* \brief Context Projection Forward.
*
* \param[out] outputs output data.
* \param[
in/
out] outputs output data.
* \param[in] input input data.
* \param[in] weight input weight.
* \param[in] sequence input data.
...
...
@@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template
<
DeviceType
DType
>
void
ContextProjectionBackward
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
in_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
w_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
seq_vec
,
...
...
@@ -68,7 +68,7 @@ void ContextProjectionBackward(
template
<
DeviceType
DType
>
void
ContextProjectionBackwardData
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
in_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
sequence
,
size_t
context_length
,
...
...
@@ -76,7 +76,7 @@ void ContextProjectionBackwardData(
template
<
DeviceType
DType
>
void
ContextProjectionBackwardWeight
(
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
const
typename
Tensor
<
real
,
DType
>::
Matrix
&
out_grad
,
typename
Tensor
<
real
,
DType
>::
Matrix
&
w_grad
,
const
typename
Tensor
<
int
,
DType
>::
Vector
&
seq_vec
,
size_t
context_length
,
...
...
paddle/function/ContextProjectionOpGpu.cu
浏览文件 @
8d47499e
...
...
@@ -138,10 +138,10 @@ void ContextProjectionForward<DEVICE_TYPE_GPU>(GpuMatrix& output,
begin_pad
);
}
__global__
void
KeContextProjectionBackwardData
(
real
*
out_grad
,
__global__
void
KeContextProjectionBackwardData
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
in_grad
,
in
t
input_dim
,
size_
t
input_dim
,
int
context_length
,
int
context_start
)
{
int
idx
=
threadIdx
.
x
;
...
...
@@ -152,7 +152,8 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
real
value
=
0
;
int
instances
=
seq_end
-
seq_start
+
context_length
-
1
;
out_grad
+=
seq_start
*
input_dim
*
context_length
;
auto
out
=
const_cast
<
real
*>
(
out_grad
);
out
+=
seq_start
*
input_dim
*
context_length
;
in_grad
+=
seq_start
*
input_dim
;
for
(
int
k
=
0
;
k
<=
input_dim
/
block_size
;
k
++
)
{
if
(
idx
<
input_dim
)
{
...
...
@@ -169,7 +170,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
int
outx
=
(
i
-
context_length
)
<
0
?
i
:
(
context_length
-
1
);
int
outy
=
(
i
-
context_length
)
<
0
?
0
:
(
i
-
(
context_length
-
1
));
real
*
output_r
=
out
_grad
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
out
+
outy
*
input_dim
*
context_length
+
outx
*
input_dim
;
for
(
int
j
=
outy
;
j
<
seq_end
-
seq_start
;
j
++
)
{
value
+=
output_r
[
idx
];
if
(
j
-
outy
==
outx
)
break
;
...
...
@@ -194,7 +195,7 @@ __global__ void KeContextProjectionBackwardData(real* out_grad,
* @param[in] context_start context start.
*
*/
void
hl_context_projection_backward_data
(
real
*
out_grad
,
void
hl_context_projection_backward_data
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
input_grad
,
size_t
num_sequences
,
...
...
@@ -216,7 +217,7 @@ void hl_context_projection_backward_data(real* out_grad,
}
template
<
>
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
void
ContextProjectionBackwardData
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
const
GpuIVector
&
sequence
,
size_t
context_length
,
...
...
@@ -231,7 +232,7 @@ void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(GpuMatrix& out_grad,
}
template
<
int
THREADS_X
,
int
THREADS_Y
>
__global__
void
KeContextProjectionBackwardWeight
(
real
*
out_grad
,
__global__
void
KeContextProjectionBackwardWeight
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
int
num_sequences
,
...
...
@@ -254,7 +255,8 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
for
(
int
seqId
=
idy
;
seqId
<
num_sequences
;
seqId
+=
THREADS_Y
)
{
int
seq_start
=
sequence
[
seqId
];
int
seq_end
=
sequence
[
seqId
+
1
];
output_r
=
out_grad
+
seq_start
*
w_dim
*
context_length
;
output_r
=
const_cast
<
real
*>
(
out_grad
)
+
seq_start
*
w_dim
*
context_length
;
if
(
context_start
<
0
)
{
if
(
padId
+
context_start
<
0
)
{
...
...
@@ -318,7 +320,7 @@ __global__ void KeContextProjectionBackwardWeight(real* out_grad,
* beginning.
*
*/
void
hl_context_projection_backward_weight
(
real
*
out_grad
,
void
hl_context_projection_backward_weight
(
const
real
*
out_grad
,
const
int
*
sequence
,
real
*
w_grad
,
size_t
num_sequences
,
...
...
@@ -346,7 +348,7 @@ void hl_context_projection_backward_weight(real* out_grad,
template
<
>
void
ContextProjectionBackwardWeight
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
w_grad
,
const
GpuIVector
&
seq_vec
,
size_t
context_length
,
...
...
@@ -365,7 +367,7 @@ void ContextProjectionBackwardWeight<DEVICE_TYPE_GPU>(
}
template
<
>
void
ContextProjectionBackward
<
DEVICE_TYPE_GPU
>
(
GpuMatrix
&
out_grad
,
void
ContextProjectionBackward
<
DEVICE_TYPE_GPU
>
(
const
GpuMatrix
&
out_grad
,
GpuMatrix
&
in_grad
,
GpuMatrix
&
w_grad
,
const
GpuIVector
&
sequence
,
...
...
paddle/function/ContextProjectionOpTest.cpp
浏览文件 @
8d47499e
...
...
@@ -56,22 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out
.
randomizeUniform
();
gpu_out
.
copyFrom
(
cpu_out
);
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_weight
?
cpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_weight
?
gpu_weight
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_in
,
*
cpu_seq
);
if
(
cpu_weight
)
{
cpu_inputs
.
addArg
(
*
cpu_weight
,
*
cpu_seq
);
}
cpu_outputs
.
addArg
(
cpu_out
,
*
cpu_seq
,
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_in
,
*
gpu_seq
);
if
(
gpu_weight
)
{
gpu_inputs
.
addArg
(
*
gpu_weight
,
*
gpu_seq
);
}
gpu_outputs
.
addArg
(
gpu_out
,
*
gpu_seq
,
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckEqual
(
cpu_out
,
gpu_out
);
}
...
...
@@ -117,25 +120,23 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad
->
copyFrom
(
*
cpu_w_grad
);
}
compare
.
getCpuFunction
()
->
calc
(
{
Tensor
(
cpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
cpu_w_grad
?
cpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
cpu_seq
->
getData
()),
Dims
{
cpu_seq
->
getSize
()})},
{
Tensor
(
cpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
compare
.
getGpuFunction
()
->
calc
(
{
Tensor
(
gpu_in_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
}),
Tensor
(
gpu_w_grad
?
gpu_w_grad
->
getData
()
:
nullptr
,
Dims
{
pad
,
input_dim
}),
Tensor
(
reinterpret_cast
<
real
*>
(
gpu_seq
->
getData
()),
Dims
{
gpu_seq
->
getSize
()})},
{
Tensor
(
gpu_out_grad
.
getData
(),
Dims
{
batch_size
,
input_dim
*
context_length
})},
{});
BufferArgs
cpu_inputs
;
BufferArgs
cpu_outputs
;
cpu_inputs
.
addArg
(
cpu_out_grad
,
*
cpu_seq
);
cpu_outputs
.
addArg
(
cpu_in_grad
,
*
cpu_seq
,
ADD_TO
);
cpu_outputs
.
addArg
(
cpu_w_grad
?
*
cpu_w_grad
:
CpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getCpuFunction
()
->
calc
(
cpu_inputs
,
cpu_outputs
);
BufferArgs
gpu_inputs
;
BufferArgs
gpu_outputs
;
gpu_inputs
.
addArg
(
gpu_out_grad
,
*
gpu_seq
);
gpu_outputs
.
addArg
(
gpu_in_grad
,
*
gpu_seq
,
ADD_TO
);
gpu_outputs
.
addArg
(
gpu_w_grad
?
*
gpu_w_grad
:
GpuMatrix
(
nullptr
,
0
,
input_dim
),
ADD_TO
);
compare
.
getGpuFunction
()
->
calc
(
gpu_inputs
,
gpu_outputs
);
autotest
::
TensorCheckErr
(
cpu_in_grad
,
gpu_in_grad
);
if
(
is_padding
)
{
...
...
paddle/function/CrossMapNormalOp.cpp
浏览文件 @
8d47499e
...
...
@@ -188,8 +188,13 @@ public:
CHECK
(
inputs
[
0
].
shape
()
==
inputs
[
3
].
shape
());
CHECK
(
inputs
[
0
].
shape
()
==
outputs
[
0
].
shape
());
// TODO(hedaoyuan): need support ASSIGN_TO mode.
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
if
(
outputs
[
0
].
getArgType
()
!=
ADD_TO
)
{
// Currently, some algorithm implementations are ASSIGN_TO mode,
// if need to support the ADD_TO calculation, need to clear the output.
typename
Tensor
<
real
,
Device
>::
Vector
tmp
(
outputs
[
0
].
shape
().
getElements
(),
outputs
[
0
].
data
<
real
>
());
tmp
.
zero
();
}
size_t
samples
=
inputs
[
0
].
shape
()[
0
];
size_t
channels
=
inputs
[
0
].
shape
()[
1
];
...
...
paddle/function/CrossMapNormalOpTest.cpp
浏览文件 @
8d47499e
...
...
@@ -27,15 +27,19 @@ TEST(CrossMapNormal, real) {
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
<<
" size="
<<
size
;
FunctionCompare
compare
(
"CrossMapNormal"
,
// init Test object
FunctionCompare
test
(
"CrossMapNormal"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
Dims
dims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
compare
.
cmpWithArg
({
Tensor
(
nullptr
,
dims
)},
{
Tensor
(
nullptr
,
dims
),
Tensor
(
nullptr
,
dims
)},
{});
// prepare input arguments
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
// run Function
test
.
run
();
}
}
}
...
...
@@ -53,18 +57,19 @@ TEST(CrossMapNormalGrad, real) {
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
<<
" size="
<<
size
;
FunctionCompare
compare
(
"CrossMapNormalGrad"
,
FunctionCompare
test
(
"CrossMapNormalGrad"
,
FuncConfig
()
.
set
(
"size"
,
size
)
.
set
(
"scale"
,
(
real
)
1.5
)
.
set
(
"pow"
,
(
real
)
0.5
));
Dims
dims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
compare
.
cmpWithArg
({
Tensor
(
nullptr
,
dims
),
Tensor
(
nullptr
,
dims
),
Tensor
(
nullptr
,
dims
),
Tensor
(
nullptr
,
dims
)},
{
Tensor
(
nullptr
,
dims
)},
{});
TensorShape
shape
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
test
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
shape
));
// run Function
test
.
run
();
}
}
}
...
...
paddle/function/Function.cpp
浏览文件 @
8d47499e
...
...
@@ -79,15 +79,25 @@ FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
void
BufferArgs
::
addArg
(
const
Matrix
&
arg
,
const
TensorShape
&
shape
,
ArgType
argType
)
{
args_
.
push_back
(
std
::
make_shared
<
BufferArg
>
(
arg
,
shape
,
argType
));
_args_
.
push_back
(
new
BufferArg
(
arg
,
shape
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
BufferArgs
::
addArg
(
const
CpuSparseMatrix
&
arg
,
ArgType
argType
)
{
args_
.
push_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
arg
,
argType
));
_args_
.
push_back
(
new
SparseMatrixArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
BufferArgs
::
addArg
(
const
GpuSparseMatrix
&
arg
,
ArgType
argType
)
{
args_
.
push_back
(
std
::
make_shared
<
SparseMatrixArg
>
(
arg
,
argType
));
_args_
.
push_back
(
new
SparseMatrixArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
BufferArgs
::
addArg
(
const
Matrix
&
matrix
,
const
IVector
&
vector
,
ArgType
argType
)
{
_args_
.
push_back
(
new
SequenceArg
(
matrix
,
vector
,
argType
));
addArg
(
*
_args_
.
back
());
}
ClassRegistrar
<
FunctionBase
>
FunctionBase
::
funcRegistrar_
;
...
...
paddle/function/Function.h
浏览文件 @
8d47499e
...
...
@@ -50,19 +50,44 @@ protected:
* Argument type for Function::calc().
* A BufferArgs contains a set of BufferArg,
* because Function can have multiple inputs and outputs.
*
* addArg() with Matix object used to adapt Layer Argument.
* Will create a BufferArg object in addArg(),
* and free in destructor of BufferArgs.
*
* addArg() with BufferArg object, just save BufferArg object address,
* and the caller needs to guarantee the validity of the BufferArg object
* in the BufferArgs life time.
*/
class
BufferArgs
{
public:
BufferArgs
()
{}
~
BufferArgs
()
{
for
(
auto
arg
:
_args_
)
{
delete
arg
;
}
}
size_t
size
()
const
{
return
args_
.
size
();
}
// add argument into BufferArgs
// Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template
<
typename
Tensor
>
void
addArg
(
const
Tensor
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
args_
.
push_back
(
std
::
make_shared
<
BufferArg
>
(
arg
,
argType
));
void
addArg
(
const
Matrix
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
addArg
(
const
Vector
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
void
addArg
(
const
IVector
&
arg
,
ArgType
argType
=
UNSPECIFIED
)
{
_args_
.
push_back
(
new
BufferArg
(
arg
,
argType
));
addArg
(
*
_args_
.
back
());
}
// Add arg into BufferArgs and reshape the arg.
...
...
@@ -77,20 +102,37 @@ public:
void
addArg
(
const
CpuSparseMatrix
&
arg
,
ArgType
argType
=
UNSPECIFIED
);
void
addArg
(
const
GpuSparseMatrix
&
arg
,
ArgType
argType
=
UNSPECIFIED
);
void
addArg
(
const
Matrix
&
matrix
,
const
IVector
&
vector
,
ArgType
argType
=
UNSPECIFIED
);
// get argument
const
BufferArg
&
operator
[](
size_t
num
)
const
{
CHECK_LT
(
num
,
args_
.
size
());
return
*
args_
[
num
];
}
void
addArg
(
BufferArg
&
arg
)
{
args_
.
push_back
(
&
arg
);
}
void
addArg
(
SequenceIdArg
&
arg
)
{
args_
.
push_back
(
&
arg
);
}
void
addArg
(
SequenceArg
&
arg
)
{
args_
.
push_back
(
&
arg
);
}
void
addArg
(
SparseMatrixArg
&
arg
)
{
args_
.
push_back
(
&
arg
);
}
private:
std
::
vector
<
BufferArgPtr
>
args_
;
std
::
vector
<
BufferArg
*>
args_
;
// The BufferArg object is constructed and freed by BufferArgs.
std
::
vector
<
BufferArg
*>
_args_
;
};
/**
* \brief Base class for Function.
* The basic Function implementation requires override init and calc interfaces.
*
* The caller needs to ensure the validity of the arguments
* during Function execution.
*
* Function inputs are readonly, Function outputs have two modes: ASSIGN_TO
* and ADD_TO.
* If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation
...
...
paddle/function/FunctionTest.cpp
浏览文件 @
8d47499e
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#include "Function.h"
#include <gtest/gtest.h>
#include "paddle/math/SparseMatrix.h"
namespace
paddle
{
...
...
@@ -56,4 +57,110 @@ TEST(Function, BufferArgs) {
Function
<
DEVICE_TYPE_GPU
>
(
gpuArgments
);
}
/**
* Some tests case are used to check the consistency between the BufferArg type
* argument received by Function and the original type argument.
*
* Use Case:
* TEST() {
* Matrix matrix(...);
* CheckBufferArg lambda = [=](const BufferArg& arg) {
* // check matrix and arg are equivalent
* EXPECT_EQ(matrix, arg);
* }
*
* BufferArgs argments{matrix...};
* std::vector<CheckBufferArg> checkFunc{lambda...};
* testBufferArgs(argments, checkFunc);
* }
*/
typedef
std
::
function
<
void
(
const
BufferArg
&
)
>
CheckBufferArg
;
void
testBufferArgs
(
const
BufferArgs
&
inputs
,
const
std
::
vector
<
CheckBufferArg
>&
check
)
{
EXPECT_EQ
(
inputs
.
size
(),
check
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
check
[
i
](
inputs
[
i
]);
}
}
void
testBufferArgs
(
const
BufferArgs
&
inputs
,
const
CheckBufferArg
&
check
)
{
EXPECT_EQ
(
inputs
.
size
(),
1
);
check
(
inputs
[
0
]);
}
TEST
(
Arguments
,
Matrix
)
{
MatrixPtr
matrix
=
Matrix
::
create
(
100
,
200
);
CheckBufferArg
check
=
[
=
](
const
BufferArg
&
arg
)
{
EXPECT_EQ
(
arg
.
shape
().
ndims
(),
2
);
EXPECT_EQ
(
arg
.
shape
()[
0
],
100
);
EXPECT_EQ
(
arg
.
shape
()[
1
],
200
);
EXPECT_EQ
(
arg
.
data
(),
matrix
->
getData
());
EXPECT_EQ
(
arg
.
matrix
<
DEVICE_TYPE_CPU
>
().
getHeight
(),
matrix
->
getHeight
());
EXPECT_EQ
(
arg
.
matrix
<
DEVICE_TYPE_CPU
>
().
getWidth
(),
matrix
->
getWidth
());
EXPECT_EQ
(
arg
.
matrix
<
DEVICE_TYPE_CPU
>
().
getData
(),
matrix
->
getData
());
};
BufferArgs
argments
;
argments
.
addArg
(
*
matrix
);
std
::
vector
<
CheckBufferArg
>
checkFunc
;
checkFunc
.
push_back
(
check
);
testBufferArgs
(
argments
,
checkFunc
);
}
TEST
(
Arguments
,
Vector
)
{
VectorPtr
vector
=
Vector
::
create
(
100
,
false
);
CheckBufferArg
check
=
[
=
](
const
BufferArg
&
arg
)
{
EXPECT_EQ
(
arg
.
shape
().
ndims
(),
1
);
EXPECT_EQ
(
arg
.
shape
()[
0
],
100
);
EXPECT_EQ
(
arg
.
data
(),
vector
->
getData
());
CpuVector
inVector
=
arg
.
vector
<
real
,
DEVICE_TYPE_CPU
>
();
EXPECT_EQ
(
inVector
.
getSize
(),
vector
->
getSize
());
EXPECT_EQ
(
inVector
.
getData
(),
vector
->
getData
());
};
BufferArgs
argments
;
argments
.
addArg
(
*
vector
);
std
::
vector
<
CheckBufferArg
>
checkFunc
;
checkFunc
.
push_back
(
check
);
testBufferArgs
(
argments
,
checkFunc
);
}
TEST
(
Arguments
,
CpuSparseMatrix
)
{
CpuSparseMatrix
sparse
(
200
,
300
,
50
);
CheckBufferArg
check
=
[
=
](
const
BufferArg
&
arg
)
{
EXPECT_EQ
(
arg
.
shape
().
ndims
(),
2
);
EXPECT_EQ
(
arg
.
shape
()[
0
],
200
);
EXPECT_EQ
(
arg
.
shape
()[
1
],
300
);
EXPECT_EQ
(
arg
.
data
(),
sparse
.
getData
());
// CHECK_EQ(arg.sparse().nnz(), 50);
// CHECK_EQ(arg.sparse().dataFormat(), SPARSE_CSR_FORMAT);
// CHECK_EQ(arg.sparse().dataType(), SPARSE_FLOAT_VALUE);
EXPECT_EQ
(
arg
.
sparse
().
getRowBuf
(),
sparse
.
getRows
());
EXPECT_EQ
(
arg
.
sparse
().
getColBuf
(),
sparse
.
getCols
());
};
BufferArgs
argments
;
argments
.
addArg
(
sparse
);
std
::
vector
<
CheckBufferArg
>
checkFunc
;
checkFunc
.
push_back
(
check
);
testBufferArgs
(
argments
,
checkFunc
);
}
TEST
(
Arguments
,
BufferArg
)
{
BufferArg
arg
(
nullptr
,
VALUE_TYPE_FLOAT
,
{
1
,
2
,
3
});
CheckBufferArg
check
=
[
=
](
const
BufferArg
&
arg
)
{
EXPECT_EQ
(
arg
.
shape
().
ndims
(),
3
);
EXPECT_EQ
(
arg
.
shape
()[
0
],
1
);
EXPECT_EQ
(
arg
.
shape
()[
1
],
2
);
EXPECT_EQ
(
arg
.
shape
()[
2
],
3
);
};
BufferArgs
argments
;
argments
.
addArg
(
arg
);
testBufferArgs
(
argments
,
check
);
}
}
// namespace paddle
paddle/function/FunctionTest.h
浏览文件 @
8d47499e
...
...
@@ -15,95 +15,186 @@ limitations under the License. */
#include "Function.h"
#include "paddle/math/Vector.h"
#include "paddle/math/tests/TensorCheck.h"
#include "paddle/testing/TestUtil.h"
namespace
paddle
{
typedef
std
::
shared_ptr
<
BufferArg
>
BufferArgPtr
;
/**
* \brief A class for comparing CPU and GPU implementations of Function.
*
*
* Use case:
* // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig.
* FunctionCompare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data,
* // only contains the argument type and shape.
* test.addInputs(input1);
* test.addInputs(input2);
* test.addOutputs(output1);
* test.addOutputs(output2);
* // Run.
* // Will according to the type and shape of arguments(inputs_/outputs_),
* // automatic initialization cpu and gpu function required arguments
* // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_).
* // Call the CPU and GPU Function calculation results.
* // Compares CPU and GPU calculation results for consistency.
* test.run();
*/
class
FunctionCompare
{
public:
FunctionCompare
(
const
std
::
string
&
name
,
const
FuncConfig
&
config
)
:
cpu
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpu
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpu
->
init
(
config
);
gpu
->
init
(
config
);
:
cpu
Func_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-CPU"
)),
gpu
Func_
(
FunctionBase
::
funcRegistrar_
.
createByType
(
name
+
"-GPU"
))
{
cpu
Func_
->
init
(
config
);
gpu
Func_
->
init
(
config
);
}
void
cmpWithArg
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
{
// init cpu and gpu arguments
auto
initArgs
=
[
=
](
Arguments
&
cpuArgs
,
Arguments
&
gpuArgs
,
const
Arguments
&
inArgs
)
{
for
(
const
auto
arg
:
inArgs
)
{
size_t
size
=
sizeof
(
real
);
for
(
const
auto
dim
:
arg
.
dims_
)
{
size
*=
dim
;
~
FunctionCompare
()
{}
// input need only contains shape, do not contains data.
void
addInputs
(
const
BufferArg
&
input
)
{
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
gpuInputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
input
.
valueType
(),
input
.
shape
()));
}
if
(
arg
.
getData
())
{
// todo(tianbing), waste unnecessary mem here
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
arg
.
getData
(),
arg
.
dims_
));
// already init outside
}
else
{
cpuMemory
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuArgs
.
emplace_back
(
Tensor
((
real
*
)
cpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
gpuArgs
.
emplace_back
(
Tensor
((
real
*
)
gpuMemory
.
back
()
->
getBuf
(),
arg
.
dims_
));
// will use an api to refactor this code.
CpuVector
cpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
cpuArgs
.
back
().
getData
());
GpuVector
gpuVector
(
size
/
sizeof
(
real
),
(
real
*
)
gpuArgs
.
back
().
getData
());
cpuVector
.
uniform
(
0.001
,
1
);
gpuVector
.
copyFrom
(
cpuVector
);
// output need only contains shape, do not contains data.
void
addOutputs
(
const
BufferArg
&
output
)
{
size_t
size
=
output
.
shape
().
getElements
()
*
sizeOfValuType
(
output
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
cpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
cpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
gpuOutputs_
.
emplace_back
(
std
::
make_shared
<
BufferArg
>
(
gpuMemory_
.
back
()
->
getBuf
(),
output
.
valueType
(),
output
.
shape
(),
ASSIGN_TO
));
}
void
addInputs
(
const
SequenceArg
&
input
)
{
size_t
batchSize
=
input
.
shape
()[
0
];
size_t
numSeqs
=
batchSize
/
10
+
1
;
size_t
sizeId
=
(
numSeqs
+
1
)
*
sizeOfValuType
(
VALUE_TYPE_INT32
);
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
sizeId
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
sizeId
));
TensorShape
seqsId
({
numSeqs
+
1
});
// void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t
size
=
input
.
shape
().
getElements
()
*
sizeOfValuType
(
input
.
valueType
());
cpuMemory_
.
emplace_back
(
std
::
make_shared
<
CpuMemoryHandle
>
(
size
));
gpuMemory_
.
emplace_back
(
std
::
make_shared
<
GpuMemoryHandle
>
(
size
));
// TODO: need be implemented.
}
};
initArgs
(
cpuInputs
,
gpuInputs
,
inputs
);
initArgs
(
cpuOutputs
,
gpuOutputs
,
outputs
);
init
Args
(
cpuInouts
,
gpuInouts
,
inouts
);
void
run
()
{
// prepare cpu/gpu arguments
init
Inputs
(
);
// function calculate
cpu
->
calc
(
cpuInputs
,
cpuOutputs
,
cpuInouts
);
gpu
->
calc
(
gpuInputs
,
gpuOutputs
,
gpuInouts
);
auto
callFunction
=
[](
FunctionBase
*
function
,
std
::
vector
<
BufferArgPtr
>&
inputs
,
std
::
vector
<
BufferArgPtr
>&
outputs
)
{
BufferArgs
inArgs
;
BufferArgs
outArgs
;
for
(
auto
arg
:
inputs
)
{
inArgs
.
addArg
(
*
arg
);
}
for
(
auto
arg
:
outputs
)
{
outArgs
.
addArg
(
*
arg
);
}
function
->
calc
(
inArgs
,
outArgs
);
};
callFunction
(
cpuFunc_
.
get
(),
cpuInputs_
,
cpuOutputs_
);
callFunction
(
gpuFunc_
.
get
(),
gpuInputs_
,
gpuOutputs_
);
// check outputs and inouts
auto
checkArgs
=
[
=
](
const
Arguments
&
cpuArgs
,
const
Arguments
&
gpuArgs
)
{
for
(
size_t
i
=
0
;
i
<
cpuArgs
.
size
();
i
++
)
{
auto
cpu
=
cpuArgs
[
i
];
auto
gpu
=
gpuArgs
[
i
];
size_t
size
=
1
;
for
(
auto
dim
:
cpu
.
dims_
)
{
size
*=
dim
;
compareOutputs
();
}
CpuVector
cpuVector
(
size
,
(
real
*
)
cpu
.
getData
());
GpuVector
gpuVector
(
size
,
(
real
*
)
gpu
.
getData
());
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpuFunc_
;
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpuFunc_
;
}
protected:
void
initInputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuInputs_
.
size
();
i
++
)
{
initArg
(
*
cpuInputs_
[
i
]);
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector
cpuVector
(
cpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
cpuInputs_
[
i
]
->
data
());
GpuVector
gpuVector
(
gpuInputs_
[
i
]
->
shape
().
getElements
(),
(
real
*
)
gpuInputs_
[
i
]
->
data
());
gpuVector
.
copyFrom
(
cpuVector
);
}
}
void
compareOutputs
()
{
for
(
size_t
i
=
0
;
i
<
cpuOutputs_
.
size
();
i
++
)
{
// TODO, Need a BufferCheck used to compare the two buffers.
auto
cpu
=
cpuOutputs_
[
i
];
auto
gpu
=
gpuOutputs_
[
i
];
CpuVector
cpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
cpu
->
data
());
GpuVector
gpuVector
(
cpu
->
shape
().
getElements
(),
(
real
*
)
gpu
->
data
());
autotest
::
TensorCheckErr
(
cpuVector
,
gpuVector
);
}
};
checkArgs
(
cpuOutputs
,
gpuOutputs
);
checkArgs
(
cpuInouts
,
gpuInouts
);
}
std
::
shared_ptr
<
FunctionBase
>
getCpuFunction
()
const
{
return
cpu
;
}
// only init cpu argument, gpu argument copy from cpu argument.
void
initArg
(
BufferArg
&
arg
)
{
CpuVector
vector
(
arg
.
shape
().
getElements
(),
(
real
*
)
arg
.
data
());
vector
.
uniform
(
0.001
,
1
);
}
std
::
shared_ptr
<
FunctionBase
>
getGpuFunction
()
const
{
return
gpu
;
}
void
initArg
(
SequenceIdArg
&
arg
,
size_t
batchSize
)
{
size_t
numSeqs
=
arg
.
numSeqs
();
int
*
buf
=
reinterpret_cast
<
int
*>
(
arg
.
data
());
int
pos
=
0
;
size_t
maxLen
=
2
*
batchSize
/
numSeqs
;
for
(
int
i
=
0
;
i
<
(
int
)
numSeqs
;
++
i
)
{
int
len
=
uniformRandom
(
std
::
min
<
int64_t
>
(
maxLen
,
batchSize
-
pos
-
numSeqs
+
i
))
+
1
;
buf
[
i
]
=
pos
;
pos
+=
len
;
VLOG
(
1
)
<<
" len="
<<
len
;
}
buf
[
numSeqs
]
=
batchSize
;
}
protected:
std
::
shared_ptr
<
FunctionBase
>
cpu
;
std
::
shared_ptr
<
FunctionBase
>
gpu
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory
;
Arguments
cpuInputs
;
Arguments
cpuOutputs
;
Arguments
cpuInouts
;
Arguments
gpuInputs
;
Arguments
gpuOutputs
;
Arguments
gpuInouts
;
std
::
shared_ptr
<
FunctionBase
>
cpuFunc_
;
std
::
shared_ptr
<
FunctionBase
>
gpuFunc_
;
std
::
vector
<
CpuMemHandlePtr
>
cpuMemory_
;
std
::
vector
<
GpuMemHandlePtr
>
gpuMemory_
;
std
::
vector
<
BufferArgPtr
>
cpuInputs_
;
std
::
vector
<
BufferArgPtr
>
cpuOutputs_
;
std
::
vector
<
BufferArgPtr
>
gpuInputs_
;
std
::
vector
<
BufferArgPtr
>
gpuOutputs_
;
};
}
// namespace paddle
paddle/function/PadOp.cpp
浏览文件 @
8d47499e
...
...
@@ -89,20 +89,21 @@ public:
* \param inputs[0] input value.
* \param outputs[0] output value.
*/
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1UL
,
inputs
.
size
());
CHECK_EQ
(
1UL
,
outputs
.
size
());
CHECK_EQ
(
0UL
,
inouts
.
size
());
size_t
num
=
inputs
[
0
].
dims_
[
0
];
size_t
inC
=
inputs
[
0
].
dims_
[
1
];
size_t
inH
=
inputs
[
0
].
dims_
[
2
];
size_t
inW
=
inputs
[
0
].
dims_
[
3
];
Pad
<
Device
>
(
outputs
[
0
].
getData
(),
inputs
[
0
].
getData
(),
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ASSIGN_TO
);
size_t
num
=
inputs
[
0
].
shape
()[
0
];
size_t
inC
=
inputs
[
0
].
shape
()[
1
];
size_t
inH
=
inputs
[
0
].
shape
()[
2
];
size_t
inW
=
inputs
[
0
].
shape
()[
3
];
typename
Tensor
<
real
,
Device
>::
Vector
vec
(
outputs
[
0
].
shape
().
getElements
(),
outputs
[
0
].
data
<
real
>
());
vec
.
zero
();
Pad
<
Device
>
(
outputs
[
0
].
data
<
real
>
(),
inputs
[
0
].
data
<
real
>
(),
num
,
inC
,
inH
,
...
...
@@ -140,21 +141,25 @@ public:
* \param inputs[0] output grad.
* \param inouts[0] input grad.
*/
void
calc
(
const
Arguments
&
inputs
,
const
Arguments
&
outputs
,
const
Arguments
&
inouts
)
override
{
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
CHECK_EQ
(
1UL
,
inputs
.
size
());
CHECK_EQ
(
0UL
,
outputs
.
size
());
CHECK_EQ
(
1UL
,
inouts
.
size
());
CHECK_EQ
(
1UL
,
outputs
.
size
());
size_t
n
=
inouts
[
0
].
dims_
[
0
];
size_t
inC
=
inouts
[
0
].
dims_
[
1
];
size_t
inH
=
inouts
[
0
].
dims_
[
2
];
size_t
inW
=
inouts
[
0
].
dims_
[
3
];
size_t
n
um
=
outputs
[
0
].
shape
()
[
0
];
size_t
inC
=
outputs
[
0
].
shape
()
[
1
];
size_t
inH
=
outputs
[
0
].
shape
()
[
2
];
size_t
inW
=
outputs
[
0
].
shape
()
[
3
];
PadGrad
<
Device
>
(
inouts
[
0
].
getData
(),
inputs
[
0
].
getData
(),
n
,
if
(
outputs
[
0
].
getArgType
()
!=
ADD_TO
)
{
// for unit test
typename
Tensor
<
real
,
Device
>::
Vector
tmp
(
outputs
[
0
].
shape
().
getElements
(),
outputs
[
0
].
data
<
real
>
());
tmp
.
zero
();
}
PadGrad
<
Device
>
(
outputs
[
0
].
data
<
real
>
(),
inputs
[
0
].
data
<
real
>
(),
num
,
inC
,
inH
,
inW
,
...
...
paddle/function/PadOpTest.cpp
浏览文件 @
8d47499e
...
...
@@ -33,10 +33,12 @@ TEST(Pad, real) {
.
set
(
"padh1"
,
2
)
.
set
(
"padw0"
,
3
)
.
set
(
"padw1"
,
2
));
Dims
inDims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
Dims
outDims
{
numSamples
,
channels
+
5
,
imgSizeH
+
3
,
imgSizeW
+
5
};
compare
.
cmpWithArg
(
{
Tensor
(
nullptr
,
inDims
)},
{
Tensor
(
nullptr
,
outDims
)},
{});
TensorShape
inDims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
TensorShape
outDims
{
numSamples
,
channels
+
5
,
imgSizeH
+
3
,
imgSizeW
+
5
};
compare
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
inDims
));
compare
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
outDims
,
ASSIGN_TO
));
compare
.
run
();
}
}
}
...
...
@@ -50,7 +52,6 @@ TEST(PadGrad, real) {
for
(
size_t
imgSizeW
:
{
5
,
32
,
96
})
{
VLOG
(
3
)
<<
" numSamples="
<<
numSamples
<<
" channels="
<<
channels
<<
" imgSizeH="
<<
imgSizeH
<<
" imgSizeW="
<<
imgSizeW
;
FunctionCompare
compare
(
"PadGrad"
,
FuncConfig
()
.
set
(
"padc0"
,
2
)
...
...
@@ -59,10 +60,12 @@ TEST(PadGrad, real) {
.
set
(
"padh1"
,
2
)
.
set
(
"padw0"
,
3
)
.
set
(
"padw1"
,
2
));
Dims
inDims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
Dims
outDims
{
numSamples
,
channels
+
5
,
imgSizeH
+
3
,
imgSizeW
+
5
};
compare
.
cmpWithArg
(
{
Tensor
(
nullptr
,
outDims
)},
{},
{
Tensor
(
nullptr
,
inDims
)});
TensorShape
inDims
{
numSamples
,
channels
,
imgSizeH
,
imgSizeW
};
TensorShape
outDims
{
numSamples
,
channels
+
5
,
imgSizeH
+
3
,
imgSizeW
+
5
};
compare
.
addInputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
outDims
));
compare
.
addOutputs
(
BufferArg
(
VALUE_TYPE_FLOAT
,
inDims
,
ASSIGN_TO
));
compare
.
run
();
}
}
}
...
...
paddle/function/TensorShape.h
浏览文件 @
8d47499e
...
...
@@ -55,6 +55,15 @@ public:
numElements
();
}
void
reshape
(
std
::
initializer_list
<
size_t
>
dims
)
{
ndims_
=
dims
.
size
();
if
(
ndims_
>
kMinDims
)
{
dims_
.
resize
(
ndims_
);
}
dims_
.
assign
(
dims
);
numElements
();
}
// number of dimensions of the tensor
size_t
ndims
()
const
{
return
ndims_
;
}
...
...
@@ -82,7 +91,7 @@ private:
// init dims_
void
initDims
(
size_t
ndims
)
{
size_t
count
=
ndims
<
4
?
4
:
ndims
;
size_t
count
=
ndims
<
kMinDims
?
kMinDims
:
ndims
;
dims_
.
assign
(
count
,
1
);
}
...
...
@@ -92,6 +101,7 @@ private:
// number of elements
size_t
nelements_
;
std
::
vector
<
size_t
>
dims_
;
static
const
size_t
kMinDims
=
4
;
};
}
// namespace paddle
paddle/gserver/layers/ContextProjection.cpp
浏览文件 @
8d47499e
...
...
@@ -118,16 +118,15 @@ void ContextProjection::forward() {
/// first use state_, otherwise use weight_(padding false === w nullptr)
auto
w_ptr
=
state_
?
state_
.
get
()
:
is_padding
?
weight_
->
getW
().
get
()
:
nullptr
;
auto
start_pos
=
in_
->
sequenceStartPositions
;
const
auto
start_pos
=
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
);
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
*
in_
->
value
);
i
nputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
)
);
inputs
.
addArg
(
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
));
outputs
.
addArg
(
*
out_
->
value
,
ADD_TO
);
inputs
.
addArg
(
*
in_
->
value
,
*
start_pos
);
i
f
(
w_ptr
)
{
inputs
.
addArg
(
CpuMatrix
(
w_ptr
->
getData
(),
w_ptr
->
getHeight
(),
input_dim
)
,
*
start_pos
);
}
outputs
.
addArg
(
*
out_
->
value
,
*
start_pos
,
ADD_TO
);
forward_
[
0
]
->
calc
(
inputs
,
outputs
);
if
(
state_
&&
config_
.
context_start
()
<
0
)
{
...
...
@@ -166,13 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
));
inputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
inputs
.
addArg
(
*
out_
->
grad
,
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
));
outputs
.
addArg
(
CpuMatrix
(
in_
->
grad
?
in_
->
grad
->
getData
()
:
nullptr
,
batch_size
,
input_dim
),
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
),
ADD_TO
);
outputs
.
addArg
(
CpuMatrix
(
w_ptr
?
w_ptr
->
getData
()
:
nullptr
,
w_ptr
?
w_ptr
->
getHeight
()
:
0
,
input_dim
));
inputs
.
addArg
(
*
in_
->
sequenceStartPositions
->
getVector
(
useGpu_
));
outputs
.
addArg
(
*
out_
->
grad
,
ADD_TO
);
input_dim
),
ADD_TO
);
backward_
[
0
]
->
calc
(
inputs
,
outputs
);
if
(
config_
.
trainable_padding
())
{
...
...
paddle/gserver/layers/PadLayer.cpp
浏览文件 @
8d47499e
...
...
@@ -27,11 +27,11 @@ bool PadLayer::init(const LayerMap& layerMap,
auto
&
pad_conf
=
config_
.
inputs
(
0
).
pad_conf
();
auto
&
img_conf
=
pad_conf
.
image_conf
();
CHECK_EQ
(
config_
.
inputs_size
(),
1
);
inDims_
.
push_back
(
0
);
inDims_
.
push_back
(
img_conf
.
channels
());
inDims_
.
push_back
(
img_conf
.
has_img_size_y
()
?
img_conf
.
img_size_y
()
:
img_conf
.
img_size
());
inDims_
.
push_back
(
img_conf
.
img_size
()
);
inDims_
=
TensorShape
(
{
0
,
img_conf
.
channels
(),
img_conf
.
has_img_size_y
()
?
img_conf
.
img_size_y
()
:
img_conf
.
img_size
(),
img_conf
.
img_size
()}
);
CHECK_EQ
(
2
,
pad_conf
.
pad_c_size
());
CHECK_EQ
(
2
,
pad_conf
.
pad_h_size
());
...
...
@@ -43,7 +43,7 @@ bool PadLayer::init(const LayerMap& layerMap,
padw_
.
push_back
(
pad_conf
.
pad_w
(
0
));
padw_
.
push_back
(
pad_conf
.
pad_w
(
1
));
outDims_
.
resiz
e
(
4
);
outDims_
=
TensorShap
e
(
4
);
setOutDims
(
0
);
createFunction
(
forward_
,
...
...
@@ -68,20 +68,20 @@ bool PadLayer::init(const LayerMap& layerMap,
return
true
;
}
void
PadLayer
::
setOutDims
(
in
t
batchSize
)
{
outDims_
[
0
]
=
batchSize
;
outDims_
[
1
]
=
inDims_
[
1
]
+
padc_
[
0
]
+
padc_
[
1
];
outDims_
[
2
]
=
inDims_
[
2
]
+
padh_
[
0
]
+
padh_
[
1
];
outDims_
[
3
]
=
inDims_
[
3
]
+
padw_
[
0
]
+
padw_
[
1
]
;
void
PadLayer
::
setOutDims
(
const
size_
t
batchSize
)
{
outDims_
.
reshape
({
batchSize
,
inDims_
[
1
]
+
padc_
[
0
]
+
padc_
[
1
],
inDims_
[
2
]
+
padh_
[
0
]
+
padh_
[
1
],
inDims_
[
3
]
+
padw_
[
0
]
+
padw_
[
1
]})
;
}
void
PadLayer
::
setTensorDim
(
in
t
batchSize
)
{
void
PadLayer
::
setTensorDim
(
const
size_
t
batchSize
)
{
CHECK_EQ
(
static_cast
<
int
>
(
inputLayers_
.
size
()),
1
);
inDims_
[
0
]
=
batchSize
;
inDims_
.
setDim
(
0
,
batchSize
)
;
int
h
=
inputLayers_
[
0
]
->
getOutput
().
getFrameHeight
();
if
(
h
!=
0
)
inDims_
[
2
]
;
if
(
h
!=
0
)
inDims_
.
setDim
(
2
,
h
)
;
int
w
=
inputLayers_
[
0
]
->
getOutput
().
getFrameWidth
();
if
(
w
!=
0
)
inDims_
[
3
]
;
if
(
w
!=
0
)
inDims_
.
setDim
(
3
,
w
)
;
setOutDims
(
batchSize
);
}
...
...
@@ -94,22 +94,22 @@ void PadLayer::forward(PassType passType) {
resetOutput
(
batchSize
,
size
);
MatrixPtr
outV
=
getOutputValue
();
REGISTER_TIMER_INFO
(
"PadForward"
,
getName
().
c_str
());
forward_
[
0
]
->
calc
({
Tensor
(
input
->
getData
(),
inDims_
)},
{
Tensor
(
outV
->
getData
(),
outDims_
)},
{});
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
*
getInputValue
(
0
),
inDims_
);
outputs
.
addArg
(
*
getOutputValue
(),
outDims_
,
ASSIGN_TO
);
forward_
[
0
]
->
calc
(
inputs
,
outputs
);
}
void
PadLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
(
void
)
callback
;
MatrixPtr
preGrad
=
inputLayers_
[
0
]
->
getOutputGrad
();
if
(
NULL
==
preGrad
)
{
return
;
}
MatrixPtr
outGrad
=
getOutputGrad
();
REGISTER_TIMER_INFO
(
"PadBackward"
,
getName
().
c_str
());
backward_
[
0
]
->
calc
({
Tensor
(
outGrad
->
getData
(),
outDims_
)},
{},
{
Tensor
(
preGrad
->
getData
(),
inDims_
)});
BufferArgs
inputs
;
BufferArgs
outputs
;
inputs
.
addArg
(
*
getOutputGrad
(),
outDims_
);
outputs
.
addArg
(
*
getInputGrad
(
0
),
inDims_
,
ADD_TO
);
backward_
[
0
]
->
calc
(
inputs
,
outputs
);
}
}
// namespace paddle
paddle/gserver/layers/PadLayer.h
浏览文件 @
8d47499e
...
...
@@ -33,13 +33,13 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
);
protected:
void
setOutDims
(
in
t
batchSize
);
void
setTensorDim
(
in
t
batchSize
);
void
setOutDims
(
const
size_
t
batchSize
);
void
setTensorDim
(
const
size_
t
batchSize
);
std
::
vector
<
int
>
padc_
;
std
::
vector
<
int
>
padh_
;
std
::
vector
<
int
>
padw_
;
Dims
inDims_
;
Dims
outDims_
;
TensorShape
inDims_
;
TensorShape
outDims_
;
};
}
// namespace paddle
paddle/py_paddle/dataprovider_converter.py
浏览文件 @
8d47499e
...
...
@@ -34,6 +34,10 @@ class IScanner(object):
class
DenseScanner
(
IScanner
):
"""
:type __mat__: numpy.ndarray
"""
def
__init__
(
self
,
input_type
,
pos
):
IScanner
.
__init__
(
self
,
input_type
,
pos
)
self
.
__mat__
=
None
...
...
@@ -47,6 +51,8 @@ class DenseScanner(IScanner):
def
finish_scan
(
self
,
argument
):
assert
isinstance
(
argument
,
swig_paddle
.
Arguments
)
assert
isinstance
(
self
.
input_type
,
dp2
.
InputType
)
if
self
.
__mat__
.
dtype
!=
numpy
.
float32
:
self
.
__mat__
=
self
.
__mat__
.
astype
(
numpy
.
float32
)
m
=
swig_paddle
.
Matrix
.
createDenseFromNumpy
(
self
.
__mat__
,
True
,
False
)
argument
.
setSlotValue
(
self
.
pos
,
m
)
...
...
paddle/scripts/travis/before_install.osx.sh
浏览文件 @
8d47499e
#!/bin/bash
brew update
brew tap homebrew/science
brew
install
python
sudo
pip
install
--upgrade
protobuf
brew
install
swig openblas md5sha1sum protobuf
brew
install
openblas swig md5sha1sum
paddle/scripts/travis/build_and_test.sh
浏览文件 @
8d47499e
...
...
@@ -6,7 +6,7 @@ if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
export
PYTHONPATH
=
/opt/python/2.7.12/lib/python2.7/site-packages
export
PYTHONHOME
=
/opt/python/2.7.12
export
PATH
=
/opt/python/2.7.12/bin:
${
PATH
}
cmake ..
-DON_TRAVIS
=
ON
-DON_COVERALLS
=
ON
-DCOVERALLS_UPLOAD
=
ON
${
EXTRA_CMAKE_OPTS
}
cmake ..
-D
CMAKE_Fortran_COMPILER
=
/usr/bin/gfortran-4.8
-D
ON_TRAVIS
=
ON
-DON_COVERALLS
=
ON
-DCOVERALLS_UPLOAD
=
ON
${
EXTRA_CMAKE_OPTS
}
NRPOC
=
`
nproc
`
make
-j
$NPROC
make coveralls
...
...
paddle/scripts/travis/docs.sh
浏览文件 @
8d47499e
...
...
@@ -4,7 +4,7 @@
source
./common.sh
# Compile Documentation only.
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-DWITH_GPU
=
OFF
-DWITH_DOC
=
ON
${
EXTRA_CMAKE_OPTS
}
cmake ..
-DCMAKE_BUILD_TYPE
=
Debug
-D
CMAKE_Fortran_COMPILER
=
/usr/bin/gfortran-4.8
-D
WITH_GPU
=
OFF
-DWITH_DOC
=
ON
${
EXTRA_CMAKE_OPTS
}
make paddle_docs paddle_docs_cn
# check websites for broken links
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
8d47499e
...
...
@@ -2650,7 +2650,7 @@ class AverageLayer(LayerBase):
@
config_layer
(
'cos'
)
class
CosSimLayer
(
LayerBase
):
def
__init__
(
self
,
name
,
inputs
,
cos_scale
=
5
,
device
=
None
):
def
__init__
(
self
,
name
,
inputs
,
cos_scale
=
1
,
device
=
None
):
super
(
CosSimLayer
,
self
).
__init__
(
name
,
'cos'
,
1
,
inputs
=
inputs
,
device
=
device
)
config_assert
(
len
(
self
.
inputs
)
==
2
,
'CosSimLayer must have 2 inputs'
)
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
8d47499e
...
...
@@ -1674,7 +1674,7 @@ def trans_layer(input, name=None, layer_attr=None):
@
wrap_name_default
()
@
layer_support
()
def
cos_sim
(
a
,
b
,
scale
=
5
,
size
=
1
,
name
=
None
,
layer_attr
=
None
):
def
cos_sim
(
a
,
b
,
scale
=
1
,
size
=
1
,
name
=
None
,
layer_attr
=
None
):
"""
Cosine Similarity Layer. The cosine similarity equation is here.
...
...
python/paddle/trainer_config_helpers/tests/CMakeLists.txt
浏览文件 @
8d47499e
...
...
@@ -9,17 +9,10 @@ add_test(NAME test_reset_hook
${
PYTHON_EXECUTABLE
}
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/test_reset_hook.py
WORKING_DIRECTORY
${
PROJ_ROOT
}
/python/paddle
)
if
(
PROTOBUF_3
)
add_paddle_exe
(
protobuf_equal
add_paddle_exe
(
protobuf_equal
ProtobufEqualMain.cpp
)
add_test
(
NAME test_layerHelpers
add_test
(
NAME test_layerHelpers
COMMAND
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
${
PYTHON_EXECUTABLE
}
${
CMAKE_CURRENT_BINARY_DIR
}
/protobuf_equal
)
else
()
add_test
(
NAME test_layerHelpers
COMMAND
${
PROJ_ROOT
}
/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
${
PYTHON_EXECUTABLE
}
)
endif
()
)
python/paddle/trainer_config_helpers/tests/configs/protostr/test_ntm_layers.protostr
浏览文件 @
8d47499e
...
...
@@ -79,7 +79,7 @@ layers {
inputs {
input_layer_name: "b"
}
cos_scale:
5
cos_scale:
1
}
layers {
name: "__cos_sim_1__"
...
...
@@ -92,7 +92,7 @@ layers {
inputs {
input_layer_name: "c"
}
cos_scale:
5
cos_scale:
1
}
layers {
name: "__sum_to_one_norm_layer_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/run_tests.sh
浏览文件 @
8d47499e
...
...
@@ -2,16 +2,18 @@
cd
`
dirname
$0
`
set
-e
PYTHON_EXEC
=
$1
COMPARE_PROTO_UTIL
=
$2
protostr
=
`
dirname
$0
`
/protostr
files
=
`
ls
$protostr
|
grep
-v
"unittest"
`
./generate_protostr.sh
$
1
./generate_protostr.sh
$
{
PYTHON_EXEC
}
.
./file_list.sh
if
[
-z
$
1
]
;
then
if
[
-z
$
{
COMPARE_PROTO_UTIL
}
]
;
then
for
file
in
$files
do
base_protostr
=
$protostr
/
$file
...
...
@@ -22,20 +24,20 @@ if [ -z $1 ]; then
else
for
file
in
${
configs
[*]
}
do
if
!
$
1
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
;
then
if
!
$
{
COMPARE_PROTO_UTIL
}
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
;
then
diff
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
-u
fi
if
!
$
1
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
;
then
if
!
$
{
COMPARE_PROTO_UTIL
}
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
;
then
diff
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
-u
fi
done
for
file
in
${
whole_configs
[*]
}
do
if
!
$
1
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
--whole
;
then
if
!
$
{
COMPARE_PROTO_UTIL
}
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
--whole
;
then
diff
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.unittest
-u
fi
if
!
$
1
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
--whole
;
then
if
!
$
{
COMPARE_PROTO_UTIL
}
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
--whole
;
then
diff
$protostr
/
$file
.protostr
$protostr
/
$file
.protostr.non_file_config.unittest
-u
fi
done
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录