提交 c52bb850 编写于 作者: Y Yang Yang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into executor-design

...@@ -31,6 +31,3 @@ ...@@ -31,6 +31,3 @@
- id: go-fmt - id: go-fmt
types: types:
- go - go
- id: gometalinter
types:
- go
...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON) ...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON)
"different Python interpreter from compiling.") "different Python interpreter from compiling.")
endif() endif()
if(MOBILE_INFERENCE)
set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
......
...@@ -24,6 +24,10 @@ if(WITH_DOUBLE) ...@@ -24,6 +24,10 @@ if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE) add_definitions(-DPADDLE_TYPE_DOUBLE)
endif(WITH_DOUBLE) endif(WITH_DOUBLE)
if(WITH_TESTING)
add_definitions(-DPADDLE_WITH_TESTING)
endif(WITH_TESTING)
if(NOT WITH_TIMER) if(NOT WITH_TIMER)
add_definitions(-DPADDLE_DISABLE_TIMER) add_definitions(-DPADDLE_DISABLE_TIMER)
endif(NOT WITH_TIMER) endif(NOT WITH_TIMER)
......
...@@ -8,7 +8,7 @@ ExternalProject_Add( ...@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG "master" GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -36,6 +36,7 @@ ExternalProject_Add( ...@@ -36,6 +36,7 @@ ExternalProject_Add(
# change this back to the official Github repo once my PR is # change this back to the official Github repo once my PR is
# merged. # merged.
GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git"
GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -45,11 +46,11 @@ ExternalProject_Add( ...@@ -45,11 +46,11 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
......
...@@ -31,6 +31,7 @@ ExternalProject_Add( ...@@ -31,6 +31,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
GIT_TAG v0.3.5
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -43,12 +44,12 @@ ExternalProject_Add( ...@@ -43,12 +44,12 @@ ExternalProject_Add(
-DWITH_GFLAGS=ON -DWITH_GFLAGS=ON
-Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
......
...@@ -56,11 +56,11 @@ IF(WITH_TESTING) ...@@ -56,11 +56,11 @@ IF(WITH_TESTING)
-DBUILD_GMOCK=ON -DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON -Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON -Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
......
...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${OPTIONAL_ARGS} ${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}
......
...@@ -35,6 +35,7 @@ ExternalProject_Add( ...@@ -35,6 +35,7 @@ ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09
PREFIX ${WARPCTC_SOURCES_DIR} PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -48,9 +49,9 @@ ExternalProject_Add( ...@@ -48,9 +49,9 @@ ExternalProject_Add(
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
-DBUILD_SHARED=ON -DBUILD_SHARED=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
) )
......
...@@ -42,11 +42,11 @@ ExternalProject_Add( ...@@ -42,11 +42,11 @@ ExternalProject_Add(
-DBUILD_SHARED_LIBS=OFF -DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_MACOSX_RPATH=ON -DCMAKE_MACOSX_RPATH=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)
......
...@@ -389,13 +389,60 @@ function(go_test TARGET_NAME) ...@@ -389,13 +389,60 @@ function(go_test TARGET_NAME)
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endfunction(go_test) endfunction(go_test)
# Modification of standard 'protobuf_generate_cpp()' with protobuf-lite support
# Usage:
# paddle_protobuf_generate_cpp(<proto_srcs> <proto_hdrs> <proto_files>)
function(paddle_protobuf_generate_cpp SRCS HDRS)
if(NOT ARGN)
message(SEND_ERROR "Error: paddle_protobuf_generate_cpp() called without any proto files")
return()
endif()
set(${SRCS})
set(${HDRS})
if (MOBILE_INFERENCE)
set(EXTRA_FLAG "lite:")
else()
set(EXTRA_FLAG "")
endif()
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
list(APPEND ${SRCS} "${_protobuf_protoc_src}")
list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
add_custom_command(
OUTPUT "${_protobuf_protoc_src}"
"${_protobuf_protoc_hdr}"
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
-I${CMAKE_CURRENT_SOURCE_DIR}
--cpp_out "${EXTRA_FLAG}${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM )
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
function(proto_library TARGET_NAME) function(proto_library TARGET_NAME)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(proto_srcs) set(proto_srcs)
set(proto_hdrs) set(proto_hdrs)
protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf)
endfunction() endfunction()
......
...@@ -5,12 +5,12 @@ ...@@ -5,12 +5,12 @@
Both deep learning systems and programming languages help users describe computation procedures. These systems use various representations of computation: Both deep learning systems and programming languages help users describe computation procedures. These systems use various representations of computation:
- Caffe, Torch, and Paddle: sequences of layers. - Caffe, Torch, and Paddle: sequences of layers.
- TensorFlow, Caffe2, Mxnet: graphs of operators. - TensorFlow, Caffe2, Mxnet: graph of operators.
- PaddlePaddle: nested blocks, like C++ and Java programs. - PaddlePaddle: nested blocks, like C++ and Java programs.
## Block in Programming Languages and Deep Learning ## Block in Programming Languages and Deep Learning
In programming languages, a block is a pair of curly braces that includes local variables definitions and a sequence of instructions, or operators. In programming languages, a block is a pair of curly braces that includes local variables definitions and a sequence of instructions or operators.
Blocks work with control flow structures like `if`, `else`, and `for`, which have equivalents in deep learning: Blocks work with control flow structures like `if`, `else`, and `for`, which have equivalents in deep learning:
...@@ -24,14 +24,14 @@ A key difference is that a C++ program describes a one pass computation, whereas ...@@ -24,14 +24,14 @@ A key difference is that a C++ program describes a one pass computation, whereas
## Stack Frames and the Scope Hierarchy ## Stack Frames and the Scope Hierarchy
The existence of the backward makes the execution of a block of traditional programs and PaddlePaddle different to each other: The existence of the backward pass makes the execution of a block of PaddlePaddle different from traditional programs:
| programming languages | PaddlePaddle | | programming languages | PaddlePaddle |
|-----------------------|-------------------------------| |-----------------------|---------------------------------|
| stack | scope hierarchy | | stack | scope hierarchy |
| stack frame | scope | | stack frame | scope |
| push at entering block| push at entering block | | push at entering block| push at entering block |
| pop at leaving block | destroy at minibatch completes| | pop at leaving block | destroy when minibatch completes|
1. In traditional programs: 1. In traditional programs:
...@@ -42,9 +42,9 @@ The existence of the backward makes the execution of a block of traditional prog ...@@ -42,9 +42,9 @@ The existence of the backward makes the execution of a block of traditional prog
1. In PaddlePaddle 1. In PaddlePaddle
- When the execution enters a block, PaddlePaddle adds a new scope, where it realizes variables. - When the execution enters a block, PaddlePaddle adds a new scope, where it realizes variables.
- PaddlePaddle doesn't pop a scope after the execution of the block because variables therein are to be used by the backward pass. So it has a stack forest known as a *scope hierarchy*. - PaddlePaddle doesn't pop a scope after the execution of the block because variables therein are used by the backward pass. So it has a stack forest known as a *scope hierarchy*.
- The height of the highest tree is the maximum depth of nested blocks. - The height of the highest tree is the maximum depth of nested blocks.
- After the process of a minibatch, PaddlePaddle destroys the scope hierarchy. - After the processing of a minibatch, PaddlePaddle destroys the scope hierarchy.
## Use Blocks in C++ and PaddlePaddle Programs ## Use Blocks in C++ and PaddlePaddle Programs
...@@ -94,14 +94,14 @@ with ie.false_block(): ...@@ -94,14 +94,14 @@ with ie.false_block():
o1, o2 = ie(cond) o1, o2 = ie(cond)
``` ```
In both examples, the left branch computes `x+y` and `softmax(x+y)`, the right branch computes `x+1` and `fc(x)`. In both examples, the left branch computes `x+y` and `softmax(x+y)`, the right branch computes `fc(x)` and `x+1` .
A difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances. The `ie.input(true, 0)` invocation returns instances in the 0-th input, `x`, that corresponds to true values in `cond` as the local variable `x`, where `ie.input(false, 0)` returns instances corresponding to false values. The difference is that variables in the C++ program contain scalar values, whereas those in the PaddlePaddle programs are mini-batches of instances.
### Blocks with `for` and `RNNOp` ### Blocks with `for` and `RNNOp`
The following RNN model from the [RNN design doc](./rnn.md) The following RNN model in PaddlePaddle from the [RNN design doc](./rnn.md) :
```python ```python
x = sequence([10, 20, 30]) # shape=[None, 1] x = sequence([10, 20, 30]) # shape=[None, 1]
...@@ -112,9 +112,9 @@ U = var(0.375, param=true) # shape=[1] ...@@ -112,9 +112,9 @@ U = var(0.375, param=true) # shape=[1]
rnn = pd.rnn() rnn = pd.rnn()
with rnn.step(): with rnn.step():
h = rnn.memory(init = m) h = rnn.memory(init = m)
hh = rnn.previous_memory(h) h_prev = rnn.previous_memory(h)
a = layer.fc(W, x) a = layer.fc(W, x)
b = layer.fc(U, hh) b = layer.fc(U, h_prev)
s = pd.add(a, b) s = pd.add(a, b)
act = pd.sigmoid(s) act = pd.sigmoid(s)
rnn.update_memory(h, act) rnn.update_memory(h, act)
...@@ -147,9 +147,9 @@ for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) { ...@@ -147,9 +147,9 @@ for (int i = 1; i <= sizeof(x)/sizeof(x[0]); ++i) {
## Compilation and Execution ## Compilation and Execution
Like TensorFlow programs, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest part executes the message for training or inference. Like TensorFlow, a PaddlePaddle program is written in Python. The first part describes a neural network as a protobuf message, and the rest executes the message for training or inference.
The generation of this protobuf message is like what a compiler generates a binary executable file. The execution of the message that the OS executes the binary file. The generation of this protobuf message is similar to how a compiler generates a binary executable file. The execution of the message is similar to how the OS executes the binary file.
## The "Binary Executable File Format" ## The "Binary Executable File Format"
...@@ -186,8 +186,8 @@ Also, the RNN operator in above example is serialized into a protobuf message of ...@@ -186,8 +186,8 @@ Also, the RNN operator in above example is serialized into a protobuf message of
``` ```
OpDesc { OpDesc {
inputs = {0} // the index of x inputs = {0} // the index of x in vars of BlockDesc above
outputs = {5, 3} // indices of act and hidden_out outputs = {5, 3} // indices of act and hidden_out in vars of BlockDesc above
attrs { attrs {
"memories" : {1} // the index of h "memories" : {1} // the index of h
"step_net" : <above step net> "step_net" : <above step net>
...@@ -203,14 +203,14 @@ This `OpDesc` value is in the `ops` field of the `BlockDesc` value representing ...@@ -203,14 +203,14 @@ This `OpDesc` value is in the `ops` field of the `BlockDesc` value representing
During the generation of the Protobuf message, the Block should store VarDesc (the Protobuf message which describes Variable) and OpDesc (the Protobuf message which describes Operator). During the generation of the Protobuf message, the Block should store VarDesc (the Protobuf message which describes Variable) and OpDesc (the Protobuf message which describes Operator).
VarDesc in a block should have its name scope to avoid local variables affect parent block's name scope. VarDesc in a block should have its name scope to avoid local variables affect parent block's name scope.
Child block's name scopes should inherit the parent's so that OpDesc in child block can reference a VarDesc that stored in parent block. For example Child block's name scopes should inherit the parent's so that OpDesc in child block can reference a VarDesc that stored in parent block. For example:
```python ```python
a = pd.Varaible(shape=[20, 20]) a = pd.Variable(shape=[20, 20])
b = pd.fc(a, params=["fc.w", "fc.b"]) b = pd.fc(a, params=["fc.w", "fc.b"])
rnn = pd.create_rnn() rnn = pd.create_rnn()
with rnn.stepnet() with rnn.stepnet():
x = a.as_step_input() x = a.as_step_input()
# reuse fc's parameter # reuse fc's parameter
fc_without_b = pd.get_variable("fc.w") fc_without_b = pd.get_variable("fc.w")
...@@ -218,17 +218,17 @@ with rnn.stepnet() ...@@ -218,17 +218,17 @@ with rnn.stepnet()
out = rnn() out = rnn()
``` ```
the method `pd.get_variable` can help retrieve a Variable by a name, a Variable may store in a parent block, but might be retrieved in a child block, so block should have a variable scope that supports inheritance. The method `pd.get_variable` can help retrieve a Variable by the name. The Variable may be stored in a parent block, but might be retrieved in a child block, so block should have a variable scope that supports inheritance.
In compiler design, the symbol table is a data structure created and maintained by compilers to store information about the occurrence of various entities such as variable names, function names, classes, etc. In compiler design, the symbol table is a data structure created and maintained by compilers to store information about the occurrence of various entities such as variable names, function names, classes, etc.
To store the definition of variables and operators, we define a C++ class `SymbolTable`, like the one used in compilers. To store the definition of variables and operators, we define a C++ class `SymbolTable`, like the one used in compilers.
`SymbolTable` can do the following stuff: `SymbolTable` can do the following:
- store the definitions (some names and attributes) of variables and operators, - store the definitions (some names and attributes) of variables and operators,
- to verify if a variable was declared, - verify if a variable was declared,
- to make it possible to implement type checking (offer Protobuf message pointers to `InferShape` handlers). - make it possible to implement type checking (offer Protobuf message pointers to `InferShape` handlers).
```c++ ```c++
...@@ -240,19 +240,18 @@ class SymbolTable { ...@@ -240,19 +240,18 @@ class SymbolTable {
OpDesc* NewOp(const string& name=""); OpDesc* NewOp(const string& name="");
// TODO determine whether name is generated by python or C++ // TODO determine whether name is generated by python or C++.
// currently assume that a unique name will be generated by C++ if the // Currently assume that a unique name will be generated by C++ if the
// argument name left default. // argument name is left default.
VarDesc* NewVar(const string& name=""); VarDesc* Var(const string& name="");
// find a VarDesc by name, if recursive true, find parent's SymbolTable // find a VarDesc by name, if recursive is true, find parent's SymbolTable
// recursively. // recursively.
// this interface is introduced to support InferShape, find protobuf messages // this interface is introduced to support InferShape, find protobuf messages
// of variables and operators, pass pointers into InferShape. // of variables and operators, pass pointers into InferShape.
// operator
// //
// NOTE maybe some C++ classes such as VarDescBuilder and OpDescBuilder should // NOTE maybe some C++ classes such as VarDescBuilder and OpDescBuilder should
// be proposed and embedded into pybind to enable python operate on C++ pointers. // be proposed and embedded into pybind to enable python operation on C++ pointers.
VarDesc* FindVar(const string& name, bool recursive=true); VarDesc* FindVar(const string& name, bool recursive=true);
OpDesc* FindOp(const string& name); OpDesc* FindOp(const string& name);
...@@ -270,7 +269,7 @@ class SymbolTable { ...@@ -270,7 +269,7 @@ class SymbolTable {
After all the description of variables and operators is added into SymbolTable, After all the description of variables and operators is added into SymbolTable,
the block has enough information to run. the block has enough information to run.
The `Block` class takes a `BlockDesc` as input, and provide `Run` and `InferShape` functions. The `Block` class takes a `BlockDesc` as input, and provides `Run` and `InferShape` functions.
```c++ ```c++
...@@ -302,7 +301,7 @@ public: ...@@ -302,7 +301,7 @@ public:
void CreateVariables(const framework::Scope& scope); void CreateVariables(const framework::Scope& scope);
void CreateOperators(); void CreateOperators();
// some other necessary interfaces of NetOp are list below // some other necessary interfaces of NetOp are listed below
// ... // ...
private: private:
...@@ -316,15 +315,14 @@ private: ...@@ -316,15 +315,14 @@ private:
Block inherits from OperatorBase, which has a Run method. Block inherits from OperatorBase, which has a Run method.
Block's Run method will run its operators sequentially. Block's Run method will run its operators sequentially.
There is another important interface called `Eval`, which take some arguments called targets, and generate a minimal graph which takes targets as the end points and creates a new Block, There is another important interface called `Eval`, which takes some arguments called targets and generates a minimal graph which treats targets as the end points and creates a new Block. After `Run`, `Eval` will get the latest value and return the targets.
after `Run`, `Eval` will get the latest value and return the targets.
The definition of Eval is as follows: The definition of Eval is as follows:
```c++ ```c++
// clean a block description by targets using the corresponding dependency graph. // clean a block description by targets using the corresponding dependency graph.
// return a new BlockDesc with minimal number of operators. // return a new BlockDesc with minimal number of operators.
// NOTE not return a Block but the block's description so that this can be distributed // NOTE: The return type is not a Block but the block's description so that this can be distributed
// to a cluster. // to a cluster.
BlockDesc Prune(const BlockDesc& desc, vector<string> targets); BlockDesc Prune(const BlockDesc& desc, vector<string> targets);
......
# Design for GAN
GAN (General Adversarial Net [https://arxiv.org/abs/1406.2661]) is an important model for unsupervised learning and widely used in many areas.
It applies several important concepts in machine learning system design, including building and running subgraphs, dependency tracing, different optimizers in one executor and so forth.
In our GAN design, we wrap it as a user-friendly easily customized python API to design different models. We take the conditional DC-GAN (Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks [https://arxiv.org/abs/1511.06434]) as an example due to its good performance on image generation.
<p align="center">
<img src="./test.dot.png" width = "35%" align="center"/><br/>
Figure 1. The overall running logic of GAN. The black solid arrows indicate the forward pass; the green dashed arrows indicate the backward pass of generator training; the red dashed arrows indicate the backward pass of the discriminator training. The BP pass of the green (red) arrow should only update the parameters in the green (red) boxes. The diamonds indicate the data providers. d\_loss and g\_loss marked in red and green are the two targets we would like to run.
</p>
The operators, layers and functions required/optional to build a GAN demo is summarized in https://github.com/PaddlePaddle/Paddle/issues/4563.
<p align="center">
<img src="./dcgan.png" width = "90%" align="center"/><br/>
Figure 2. Photo borrowed from the original DC-GAN paper.
</p>
## The Conditional-GAN might be a class.
This design we adopt the popular open source design in https://github.com/carpedm20/DCGAN-tensorflow and https://github.com/rajathkmp/DCGAN. It contains following data structure:
- DCGAN(object): which contains everything required to build a GAN model. It provides following member functions methods as API:
- __init__(...): Initialize hyper-parameters (like conv dimension and so forth), and declare model parameters of discriminator and generator as well.
- generator(z, y=None): Generate a fake image from input noise z. If the label y is provided, the conditional GAN model will be chosen.
Returns a generated image.
- discriminator(image):
Given an image, decide if it is from a real source or a fake one.
Returns a 0/1 binary label.
- build_model(self):
build the whole GAN model, define training loss for both generator and discrimator.
## Discussion on Engine Functions required to build GAN
- Trace the tensor and variable dependency in the engine executor. (Very critical, otherwise GAN can'be be trained correctly)
- Different optimizers responsible for optimizing different loss.
To be more detailed, we introduce our design of DCGAN as following:
### Class member Function: Initializer
- Set up hyper-parameters, including condtional dimension, noise dimension, batch size and so forth.
- Declare and define all the model variables. All the discriminator parameters are included in the list self.theta_D and all the generator parameters are included in the list self.theta_G.
```python
class DCGAN(object):
def __init__(self, y_dim=None):
# hyper parameters
self.y_dim = y_dim # conditional gan or not
self.batch_size = 100
self.z_dim = z_dim # input noise dimension
# define parameters of discriminators
self.D_W0 = pd.Variable(shape=[3,3, 1, 128], data=pd.gaussian_normal_randomizer())
self.D_b0 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data
self.D_W1 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer())
self.D_b1 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data
self.D_W2 = pd.Varialble(np.random.rand(128, 1))
self.D_b2 = pd.Variable(np.zeros(128))
self.theta_D = [self.D_W0, self.D_b0, self.D_W1, self.D_b1, self.D_W2, self.D_b2]
# define parameters of generators
self.G_W0 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer())
self.G_b0 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data
self.G_W1 = pd.Variable(shape=[784, 128], data=pd.gaussian_normal_randomizer())
self.G_b1 = pd.Variable(np.zeros(128)) # variable also support initialization using a numpy data
self.G_W2 = pd.Varialble(np.random.rand(128, 1))
self.G_b2 = pd.Variable(np.zeros(128))
self.theta_G = [self.G_W0, self.G_b0, self.G_W1, self.G_b1, self.G_W2, self.G_b2]
```
### Class member Function: Generator
- Given a noisy input z, returns a fake image.
- Concatenation, batch-norm, FC operations required;
- Deconv layer required, which is missing now...
```python
class DCGAN(object):
def generator(self, z, y = None):
# input z: the random noise
# input y: input data label (optional)
# output G_im: generated fake images
if not self.y_dim:
z = pd.layer.concat(1, [z, y])
G_h0 = pd.layer.fc(z, self.G_w0, self.G_b0)
G_h0_bn = pd.layer.batch_norm(G_h0)
G_h0_relu = pd.layer.relu(G_h0_bn)
G_h1 = pd.layer.deconv(G_h0_relu, self.G_w1, self.G_b1)
G_h1_bn = pd.layer.batch_norm(G_h1)
G_h1_relu = pd.layer.relu(G_h1_bn)
G_h2 = pd.layer.deconv(G_h1_relu, self.G_W2, self.G_b2))
G_im = pd.layer.tanh(G_im)
return G_im
```
### Class member function: Discriminator
- Given a noisy input z, returns a fake image.
- Concatenation, Convolution, batch-norm, FC, Leaky-ReLU operations required;
```python
class DCGAN(object):
def discriminator(self, image):
# input image: either generated images or real ones
# output D_h2: binary logit of the label
D_h0 = pd.layer.conv2d(image, w=self.D_w0, b=self.D_b0)
D_h0_bn = pd.layer.batchnorm(h0)
D_h0_relu = pd.layer.lrelu(h0_bn)
D_h1 = pd.layer.conv2d(D_h0_relu, w=self.D_w1, b=self.D_b1)
D_h1_bn = pd.layer.batchnorm(D_h1)
D_h1_relu = pd.layer.lrelu(D_h1_bn)
D_h2 = pd.layer.fc(D_h1_relu, w=self.D_w2, b=self.D_b2)
return D_h2
```
### Class member function: Build the model
- Define data readers as placeholders to hold the data;
- Build generator and discriminators;
- Define two training losses for discriminator and generator, respectively.
If we have execution dependency engine to back-trace all tensors, the module building our GAN model will be like this:
```python
class DCGAN(object):
def build_model(self):
if self.y_dim:
self.y = pd.data(pd.float32, [self.batch_size, self.y_dim])
self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
self.z = pd.data(tf.float32, [None, self.z_size])
# step 1: generate images by generator, classify real/fake images with discriminator
if self.y_dim: # if conditional GAN, includes label
self.G = self.generator(self.z, self.y)
self.D_t = self.discriminator(self.images)
# generated fake images
self.sampled = self.sampler(self.z, self.y)
self.D_f = self.discriminator(self.G)
else: # original version of GAN
self.G = self.generator(self.z)
self.D_t = self.discriminator(self.images)
# generate fake images
self.sampled = self.sampler(self.z)
self.D_f = self.discriminator(self.images)
# step 2: define the two losses
self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size))
self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size))
self.d_loss = self.d_loss_real + self.d_loss_fake
self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_f, np.ones(self.batch_szie))
```
If we do not have dependency engine but blocks, the module building our GAN model will be like this:
```python
class DCGAN(object):
def build_model(self, default_block):
# input data in the default block
if self.y_dim:
self.y = pd.data(pd.float32, [self.batch_size, self.y_dim])
self.images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
# self.faked_images = pd.data(pd.float32, [self.batch_size, self.im_size, self.im_size])
self.z = pd.data(tf.float32, [None, self.z_size])
# step 1: generate images by generator, classify real/fake images with discriminator
with pd.default_block().g_block():
if self.y_dim: # if conditional GAN, includes label
self.G = self.generator(self.z, self.y)
self.D_g = self.discriminator(self.G, self.y)
else: # original version of GAN
self.G = self.generator(self.z)
self.D_g = self.discriminator(self.G, self.y)
self.g_loss = pd.reduce_mean(pd.cross_entropy(self.D_g, np.ones(self.batch_szie))
with pd.default_block().d_block():
if self.y_dim: # if conditional GAN, includes label
self.D_t = self.discriminator(self.images, self.y)
self.D_f = self.discriminator(self.G, self.y)
else: # original version of GAN
self.D_t = self.discriminator(self.images)
self.D_f = self.discriminator(self.G)
# step 2: define the two losses
self.d_loss_real = pd.reduce_mean(pd.cross_entropy(self.D_t, np.ones(self.batch_size))
self.d_loss_fake = pd.reduce_mean(pd.cross_entropy(self.D_f, np.zeros(self.batch_size))
self.d_loss = self.d_loss_real + self.d_loss_fake
```
Some small confusion and problems with this design:
- D\_g and D\_f are actually the same thing, but has to be written twice; i.e., if we want to run two sub-graphs conceptually, the same codes have to be written twice if they are shared by the graph.
- Requires ability to create a block anytime, rather than in if-else or rnn only;
## Main function for the demo:
Generally, the user of GAN just need to the following things:
- Define an object as DCGAN class;
- Build the DCGAN model;
- Specify two optimizers for two different losses with respect to different parameters.
```python
# pd for short, should be more concise.
from paddle.v2 as pd
import numpy as np
import logging
if __name__ == "__main__":
# dcgan class in the default graph/block
# if we use dependency engine as tensorflow
# the codes, will be slightly different like:
# dcgan = DCGAN()
# dcgan.build_model()
with pd.block() as def_block:
dcgan = DCGAN()
dcgan.build_model(def_block)
# load mnist data
data_X, data_y = self.load_mnist()
# Two subgraphs required!!!
with pd.block().d_block():
d_optim = pd.train.Adam(lr = .001, beta= .1)
d_step = d_optim.minimize(dcgan.d_loss, dcgan.theta_D)
with pd.block.g_block():
g_optim = pd.train.Adam(lr = .001, beta= .1)
g_step = pd.minimize(dcgan.g_loss, dcgan.theta_G)
# executor
sess = pd.executor()
# training
for epoch in xrange(10000):
for batch_id in range(N / batch_size):
idx = ...
# sample a batch
batch_im, batch_label = data_X[idx:idx+batch_size], data_y[idx:idx+batch_size]
# sample z
batch_z = np.random.uniform(-1., 1., [batch_size, z_dim])
if batch_id % 2 == 0:
sess.run(d_step,
feed_dict = {dcgan.images: batch_im,
dcgan.y: batch_label,
dcgan.z: batch_z})
else:
sess.run(g_step,
feed_dict = {dcgan.z: batch_z})
```
# More thinking about dependency engine v.s. block design:
- What if we just want to run an intermediate result? Do we need to run the whole block/graph?
- Should we call eval() to get the fake images in the first stage? And then train the discriminator in the second stage?
...@@ -33,7 +33,6 @@ digraph ImageClassificationGraph { ...@@ -33,7 +33,6 @@ digraph ImageClassificationGraph {
cost -> MSE_Grad [color=red]; cost -> MSE_Grad [color=red];
d_cost -> MSE_Grad [color=red]; d_cost -> MSE_Grad [color=red];
x -> MSE_Grad [color=red];
l -> MSE_Grad [color=red]; l -> MSE_Grad [color=red];
y -> MSE_Grad -> d_y [color=red]; y -> MSE_Grad -> d_y [color=red];
......
## Optimizer Design
### The Problem
A PaddlePaddle program, or a block, is a sequence of operators operating variables. A training program needs to do three kinds of works:
1. the forward pass, which computes intermediate results and the cost(s),
1. the backward pass, which derives gradients from intermediate results and costs, and
1. the optimization pass, which update model parameters to optimize the cost(s).
These works rely on three kinds of operators:
1. forward operators,
1. gradient operators, and
1. optimization operators.
It's true that users should be able to create all these operators manually by calling some low-level API, but it would be much more convenient if they could only describe the forward pass and let PaddlePaddle create the backward and optimization operators automatically.
In this design, we propose a high-level API that automatically derives the optimisation pass and operators from the forward pass.
### High-level Python API to describe the training process
1. User write code to describe the network:
```python
images = layer.data("images")
labels = layer.data("labels")
w1 = pd.var("w1")
b1 = pd.var("b1")
hidden = layer.fc(images, w=w1, b=b1)
cost = layer.mse(hidden, labels)
```
The above code snippet will create forward operators in [Block](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.md).
2. Users create a certain kind of Optimizer with some argument.
```python
optimizer = AdagradOptimizer(learing_rate=0.001)
```
3. Users use the optimizer to `minimize` a certain `cost` through updating parameters in parameter_list.
```python
opt_op_list = optimizer.minimize(cost, parameter_list=[w1, b1])
```
The above code snippet will create gradient and optimization operators in Block. The return value of `minimize()` is list of optimization operators that will be run by session.
4. Users use Session/Executor to run this opt_op_list as target to do training.
```python
sess.run(target= opt_op_list, ...)
```
#### Optimizer Python interface:
```python
class Optimizer(object):
"""Optimizer Base class.
"""
def __init__(self):
pass
def create_backward_pass(self, loss, parameter_list=None):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
return None
def create_optimization_pass(self, parameters_and_grads):
"""Add optimization operators to update gradients to variables.
Args:
parameters_and_grads: a list of (variable, gradient) pair to update.
Returns:
optmization_op_list: a list of optimization operator that will update parameter using gradient.
"""
return None
def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
`create_optimization_pass()` into one.
"""
params_grads = self.create_backward_pass(loss, parameter_list)
update_ops = self.create_optimization_pass(params_grads)
return update_ops
```
Users can inherit the Optimizer above to create their own Optimizer with some special logic, such as AdagradOptimizer.
...@@ -22,7 +22,7 @@ Whenever we create a block, we need to set its parent block to the current block ...@@ -22,7 +22,7 @@ Whenever we create a block, we need to set its parent block to the current block
```python ```python
class Program(objects): class Program(objects):
def __init__(self): def __init__(self):
self.proto = core.NewProgram() # a C++ ProgramDesc pointer. self.desc = core.NewProgram() # a C++ ProgramDesc pointer.
self.blocks = vector<Block>() self.blocks = vector<Block>()
self.blocks.append(Block(self, -1)) # the global block self.blocks.append(Block(self, -1)) # the global block
self.current_block = 0 # initialized to the global block self.current_block = 0 # initialized to the global block
...@@ -57,7 +57,7 @@ A [Block](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.m ...@@ -57,7 +57,7 @@ A [Block](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/block.m
```python ```python
class Block(objects): class Block(objects):
def __init__(self, program, parent_idx): def __init__(self, program, parent_idx):
self.proto = core.NewBlock(program.proto) self.desc = core.NewBlock(program.desc)
self.program = program self.program = program
self.vars = map<string, Variable>() self.vars = map<string, Variable>()
self.ops = vector<Operator>() self.ops = vector<Operator>()
...@@ -98,11 +98,11 @@ class Operator(object): ...@@ -98,11 +98,11 @@ class Operator(object):
outputs,# dict<stirng, Variable> outputs,# dict<stirng, Variable>
attrs # dict<string, Any> attrs # dict<string, Any>
): ):
self.proto = core.NewOpDesc(block.proto, type, inputs, outputs, attrs) self.desc = core.NewOpDesc(block.desc, type, inputs, outputs, attrs)
core.infer_shape(self.proto, inputs, outputs) core.infer_shape(self.desc, inputs, outputs)
def type(self): def type(self):
return self.proto.type() return self.desc.type()
``` ```
`Operator` creates the `OpDesc` message in C++ space, so that it can call the `InferShape` function, which is in C++. `Operator` creates the `OpDesc` message in C++ space, so that it can call the `InferShape` function, which is in C++.
...@@ -124,7 +124,7 @@ class Variable(object): ...@@ -124,7 +124,7 @@ class Variable(object):
name = unique_name_generator() name = unique_name_generator()
self.name = name self.name = name
self.block = block self.block = block
self.proto = core.NewVarDesc(block.proto, name, shape, lod_level) self.desc = core.NewVarDesc(block.desc, name, shape, lod_level)
self.writer = None self.writer = None
``` ```
...@@ -214,3 +214,7 @@ def fc_layer(input, size, ...): ...@@ -214,3 +214,7 @@ def fc_layer(input, size, ...):
out.writer = op out.writer = op
return out return out
``` ```
## Optimizer
[Optimizer Design Doc](./optimizer.md)
...@@ -17,22 +17,22 @@ The goals of refactoring include: ...@@ -17,22 +17,22 @@ The goals of refactoring include:
1. A graph is composed of *variables* and *operators*. 1. A graph is composed of *variables* and *operators*.
1. The description of graphs must be capable of being serialized/deserialized, so that: 1. The description of graphs must be serializable/deserializable, so that:
1. It can to be sent to the cloud for distributed execution, and 1. It can be sent to the cloud for distributed execution, and
1. It can be sent to clients for mobile or enterprise deployment. 1. It can be sent to clients for mobile or enterprise deployment.
1. The Python program does the following steps 1. The Python program does two things
1. *compilation*: run a Python program to generate a protobuf message representation of the graph and send it to 1. *Compilation* runs a Python program to generate a protobuf message representation of the graph and send it to
1. the C++ library `libpaddle.so` for local execution, 1. the C++ library `libpaddle.so` for local execution,
1. the master process of a distributed training job for training, or 1. the master process of a distributed training job for training, or
1. the server process of a Kubernetes serving job for distributed serving. 1. the server process of a Kubernetes serving job for distributed serving.
1. *execution*: execute the graph by constructing instances of class [`Variable`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/variable.h#L24) and [`OperatorBase`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L70), according to the protobuf message. 1. *Execution* executes the graph by constructing instances of class [`Variable`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/variable.h#L24) and [`OperatorBase`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L70), according to the protobuf message.
## Description and Realization of Computation Graph ## Description and Realization of Computation Graph
At compile time, the Python program generates a protobuf message representation of the graph, or the description of the graph. At compile time, the Python program generates a protobuf message representation of the graph, or a description of the graph.
At runtime, the C++ program realizes the graph and runs it. At runtime, the C++ program realizes the graph and runs it.
...@@ -42,11 +42,11 @@ At runtime, the C++ program realizes the graph and runs it. ...@@ -42,11 +42,11 @@ At runtime, the C++ program realizes the graph and runs it.
|Operation|[OpDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L35)|[Operator](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L64)| |Operation|[OpDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L35)|[Operator](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L64)|
|Block|BlockDesc|Block| |Block|BlockDesc|Block|
The word *graph* is interchangeable with *block* in this document. A graph represents computation steps and local variables similar to a C++/Java program block, or a pair of parentheses(`{` and `}`). The word *graph* is interchangeable with *block* in this document. A graph consists of computation steps and local variables similar to a C++/Java program block, or a pair of parentheses(`{` and `}`).
## Compilation and Execution ## Compilation and Execution
1. Run an application Python program to describe the graph. In particular, the Python application program does the following: 1. Run a Python program to describe the graph. In particular, the Python application program does the following:
1. Create `VarDesc` to represent local/intermediate variables, 1. Create `VarDesc` to represent local/intermediate variables,
1. Create operators and set attributes, 1. Create operators and set attributes,
...@@ -54,10 +54,10 @@ The word *graph* is interchangeable with *block* in this document. A graph repr ...@@ -54,10 +54,10 @@ The word *graph* is interchangeable with *block* in this document. A graph repr
1. Infer the type and the shape of variables, 1. Infer the type and the shape of variables,
1. Plan memory-reuse for variables, 1. Plan memory-reuse for variables,
1. Generate the backward graph 1. Generate the backward graph
1. Optimize the computation graph. 1. Add optimization operators to the computation graph.
1. Potentially, split the graph for distributed training. 1. Optionally, split the graph for distributed training.
1. The invocation of `train` or [`infer`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/inference.py#L108) methods in the application Python program does the following: 1. The invocation of `train` or [`infer`](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/inference.py#L108) methods in the Python program does the following:
1. Create a new Scope instance in the [scope hierarchy](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md) for each run of a block, 1. Create a new Scope instance in the [scope hierarchy](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/scope.md) for each run of a block,
1. realize local variables defined in the BlockDesc message in the new scope, 1. realize local variables defined in the BlockDesc message in the new scope,
...@@ -107,8 +107,8 @@ Compile Time -> IR -> Runtime ...@@ -107,8 +107,8 @@ Compile Time -> IR -> Runtime
![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/dd598e8f1976f5759f58af5e5ef94738a6b2e661/op.dot) ![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/dd598e8f1976f5759f58af5e5ef94738a6b2e661/op.dot)
* `Operator` is the fundamental building block of the user interface. * `Operator` is the fundamental building block of the user interface.
* Operator stores input/output variable names, and attributes. * Operator stores input/output variable names and attributes.
* The `InferShape` interface is used to infer the shape of the output variable shapes based on the shapes of the input variables. * The `InferShape` interface is used to infer the shape of the output variables based on the shapes of the input variables.
* Use `Run` to compute the `output` variables from the `input` variables. * Use `Run` to compute the `output` variables from the `input` variables.
--- ---
...@@ -139,7 +139,7 @@ Compile Time -> IR -> Runtime ...@@ -139,7 +139,7 @@ Compile Time -> IR -> Runtime
* Limit the number of `tensor.device(dev) = ` in your code. * Limit the number of `tensor.device(dev) = ` in your code.
* `thrust::transform` and `std::transform`. * `thrust::transform` and `std::transform`.
* `thrust` has the same API as C++ standard library. Using `transform`, one can quickly implement customized element-wise kernels. * `thrust` has the same API as C++ standard library. Using `transform`, one can quickly implement customized element-wise kernels.
* `thrust` also has more complex APIs, like `scan`, `reduce`, `reduce_by_key`. * `thrust`, in addition, supports more complex APIs, like `scan`, `reduce`, `reduce_by_key`.
* Hand-writing `GPUKernel` and `CPU` code * Hand-writing `GPUKernel` and `CPU` code
* Do not write in header (`.h`) files. CPU Kernel should be in cpp source (`.cc`) and GPU kernels should be in cuda (`.cu`) files. (GCC cannot compile GPU code.) * Do not write in header (`.h`) files. CPU Kernel should be in cpp source (`.cc`) and GPU kernels should be in cuda (`.cu`) files. (GCC cannot compile GPU code.)
--- ---
...@@ -185,10 +185,10 @@ Make sure the registration process is executed and linked. ...@@ -185,10 +185,10 @@ Make sure the registration process is executed and linked.
1. Write an Op class and its gradient Op class, if required. 1. Write an Op class and its gradient Op class, if required.
2. Write an Op maker class. In the constructor of this class, describe the inputs, outputs and attributes of the operator. 2. Write an Op maker class. In the constructor of this class, describe the inputs, outputs and attributes of the operator.
3. Invoke the macro `REGISTER_OP`. This macro will 3. Invoke the macro `REGISTER_OP`. This macro will
1. Call maker class to complete the `proto` and the `checker` 1. Call maker class to complete `proto` and `checker`
2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap` 2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap`
4. Invoke the `USE` macro in which the Op is used, to make sure that it is linked. 4. Invoke the `USE` macro in which the Op is used to make sure that it is linked.
--- ---
# Backward Module (1/2) # Backward Module (1/2)
...@@ -199,13 +199,14 @@ Make sure the registration process is executed and linked. ...@@ -199,13 +199,14 @@ Make sure the registration process is executed and linked.
--- ---
# Backward Module (2/2) # Backward Module (2/2)
### Build Backward Network ### Build Backward Network
- **Input**: graph of forward operators - **Input**: a graph of forward operators
- **Output**: graph of backward operators - **Output**: a graph of backward operators
- **Corner cases in construction** - **Corner cases in construction**
- Shared Variables => insert an `Add` operator to combine gradients - Shared Variables => insert an `Add` operator to combine gradients
- No Gradient => insert a `fill_zero_grad` operator - No Gradient => insert a `fill_zero_grad` operator
- Recursive NetOp => call `Backward` recursively - Recursive NetOp => call `Backward` recursively
- RNN Op => recursively call `Backward` on stepnet - RNN Op => recursively call `Backward` on stepnet
- RNN Op => recursively call `Backward` on stepnet
--- ---
...@@ -215,10 +216,10 @@ Make sure the registration process is executed and linked. ...@@ -215,10 +216,10 @@ Make sure the registration process is executed and linked.
* Only dims and data pointers are stored in `Tensor`. * Only dims and data pointers are stored in `Tensor`.
* All operations on `Tensor` are written in `Operator` or global functions. * All operations on `Tensor` are written in `Operator` or global functions.
* Variable length Tensor design [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md) * Variable length Tensor design [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md)
* `Variable` instances are the inputs and the outputs of an operator. Not just `Tensor`. * `Variable` instances are the inputs and the outputs of an operator, not just `Tensor`.
* `step_scopes` in RNN is a variable and not a tensor. * `step_scopes` in RNN is a variable and not a tensor.
* `Scope` is where variables are stores. * `Scope` is where variables are stored.
* map<string `variable_name`, Variable> * map<string `var name`, Variable>
* `Scope` has a hierarchical structure. The local scope can get variables from its parent scope. * `Scope` has a hierarchical structure. The local scope can get variables from its parent scope.
--- ---
...@@ -246,7 +247,7 @@ Make sure the registration process is executed and linked. ...@@ -246,7 +247,7 @@ Make sure the registration process is executed and linked.
--- ---
# Control the migration quality # Control the migration quality
- Compare the performance of migrated models with old ones. - Compare the performance of migrated models with old ones.
- Follow the google C++ style - Follow the google C++ style guide.
- Build the automatic workflow of generating Python/C++ documentations. - Build the automatic workflow of generating Python/C++ documentations.
- The documentation of layers and ops should be written inside the code. - The documentation of layers and ops should be written inside the code.
- Take the documentation quality into account when submitting pull requests. - Take the documentation quality into account when submitting pull requests.
......
...@@ -3,15 +3,17 @@ ...@@ -3,15 +3,17 @@
## The Problem Posed ## The Problem Posed
In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance.
However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. However, we noticed two problems with the current deisgn:
More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. 1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message.
## Current Implementation 1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation.
OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is ## The Current Implementation
The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is
```cpp ```cpp
struct OpInfo { struct OpInfo {
......
...@@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`. ...@@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`.
```cpp ```cpp
class Scope { class Scope {
public: public:
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
const Variable* FindVar(const std::string& name) const; const Variable* FindVar(const std::string& name) const;
private: private:
...@@ -98,7 +98,7 @@ class Scope { ...@@ -98,7 +98,7 @@ class Scope {
Variable* FindVar(const std::string& name) const; Variable* FindVar(const std::string& name) const;
// return if already contains same name variable. // return if already contains same name variable.
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
private: private:
std::shared_ptr<Scope> parent_; std::shared_ptr<Scope> parent_;
...@@ -107,7 +107,7 @@ class Scope { ...@@ -107,7 +107,7 @@ class Scope {
``` ```
## Only scope can create a variable ## Only scope can create a variable
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`. To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `Var` can construct `Variable`.
## When scope destroyed, all variables inside this scope should be destroyed together ## When scope destroyed, all variables inside this scope should be destroyed together
...@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar ...@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar
## Orthogonal interface ## Orthogonal interface
`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily. `FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `Var` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `Var`, we can implement `Var` easily.
# Design Doc: Selected Rows
`SelectedRows` is a kind of sparse tensor data type, which is designed to support `embedding` operators. The gradient of embedding table is a sparse tensor. Only a few rows are non-zero values in that tensor. It is straightforward to represent the sparse tensor by the following sparse tensor data structure:
```cpp
class SelectedRows {
private:
vector<int> rows_;
Tensor value_;
int height_;
};
```
The field `height_` shows the first dimension of `SelectedRows`. The `rows` are the indices of which rows of `SelectedRows` are non-zeros. The `value_` field is an N-dim tensor and shape is `[rows.size() /* NUM_ROWS */, ...]`, which supplies values for each row. The dimension of `SelectedRows` satisfies `[height_] + value_.shape[1:]`.
Suppose that a SelectedRows-typed variable `x` has many rows, but only two of them have values -- row 73 is `[1, 2]` and row 84 is `[3, 4]`, the `SelectedRows` representation would be:
```
x = SelectedRow {
rows = [73, 84],
value = [[1, 2], [3,4]]
}
```
## SelectedRows in Protobuf
`SelectedRows` is a kind of `Variable`. `VarDesc` in protobuf should describe the `SelectedRows` information. Only the tensor dimension of a `SelectedRows` will be described in compile-time since the `rows_` and `value_` are related to training data.
So we use `TensorDesc` to unify `data_type` and `dims`. A LodTensorDesc contains a `TensorDesc` and `lod_level`. The description of `SelectedRows` is a Tensor description.
```proto
message TensorDesc {
required DataType data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
}
message LodTensorDesc {
required TensorDesc tensor = 1;
optional int lod_level = 2;
}
message VarDesc {
required string name = 1;
enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
required VarType type = 2;
optional LodTensorDesc lod_desc = 3;
optional TensorDesc selected_rows_desc = 4;
optional bool persistable = 5 [ default = false ];
}
```
## InferShape for Selected Rows
Just like `LoD` information, `InferShape` method will inference output tensor type as well. The operator should decide whether its output is a `SelectedRows` or `Dense` tensor.
For example, the gradient operator of `TableLookup` will always generate `SelectedRows`. Its `InferShape` method should be like following
```cpp
void TableLookupGrad::InferShape(context) {
...
context.SetDataType("Embedding.Grad", kSelectedRows);
}
```
## Sparse Operators
There are several operators should be written to support `SelectedRows`. They are:
1. Operators which generates `SelectedRows` gradient. e.g. Gradient of `TableLookupOp`.
2. Optimize operators which support `SelectedRows` gradient. e.g. `SGD` or `AdaGrad` for `SelectedRows`. However, there should be only one `SGD` operator. `OpWithKernel::Run` should select a suitable kernel for both `dense` tensor or `SelectedRows`.
...@@ -161,7 +161,7 @@ class TensorArray: ...@@ -161,7 +161,7 @@ class TensorArray:
@name: str @name: str
the name of the variable to output. the name of the variable to output.
''' '''
tensor = NewVar(name) tensor = Var(name)
tensor_array_stack(self.name, tensor) tensor_array_stack(self.name, tensor)
return tensor return tensor
......
digraph Test {
z -> generator -> G_img;
G_img -> discriminator -> D_f -> d_loss_f;
label0 -> d_loss_f -> d_loss;
img -> discriminator -> D_t -> d_loss_t;
label1 -> d_loss_t -> d_loss;
d_loss -> d_loss_t[color=red, style=dashed];
d_loss -> d_loss_f[color=red, style=dashed];
d_loss_t -> D_t[color=red, style=dashed];
d_loss_f -> D_f[color=red, style=dashed];
D_t -> discriminator[color=red, style=dashed];
D_f -> discriminator[color=red, style=dashed];
D_f -> g_loss;
label2 -> g_loss;
g_loss -> D_f[color=green, style=dashed];
D_f -> discriminator[color=green, style=dashed];
discriminator -> G_img[color=green, style=dashed];
G_img -> generator[color=green, style=dashed];
discriminator [color=red, shape=box];
generator [color=green, shape=box];
z [shape=diamond];
img [shape=diamond];
label0 [shape=diamond];
label1 [shape=diamond];
label2 [shape=diamond];
d_loss [color=red];
g_loss [color=green];
}
...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep ...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep
## Definition of VarDesc ## Definition of VarDesc
A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`.
```proto ```proto
message VarDesc { message VarDesc {
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
required VarType type = 2;
optional LoDTensorDesc lod_desc = 3;
optional TensorDesc selected_rows_desc = 4;
optional bool persistable = 5 [ default = false ];
} }
``` ```
## Definition of LodTensorDesc ## Definition of TensorDesc
```proto ```proto
enum DataType { enum DataType {
...@@ -38,87 +45,25 @@ enum DataType { ...@@ -38,87 +45,25 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [default=0];
} }
``` ```
## Definition of Variable in Python A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedRows`, please reference [`SelectedRows`](./selected_rows.md).
In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable.
```python
image = Variable(dims=[-1, 640, 480])
# fc1 and fc2 are both Variable
fc1 = layer.fc(input=image, output_size=10)
fc2 = layer.fc(input=fc1, output_size=20)
```
### what should class `Variable` Have
1. `name`.a name of string type is used to mark the value of the Variable.
1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator.
1. `operator`. Variable should record which operator produce itself. The reaon is:
- we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable.
In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph.
```python
import VarDesc
import LoDTensorDesc
import framework
def AddInitialOperator(variable, initializer):
# add an initialize Operator to block to init this Variable
class Variable(object):
def __init__(self, name, dims, type, initializer):
self._block = get_default_block()
self._name = name
self.op = None
tensor_desc = LoDTensorDesc(data_type=type, dims=dims)
_var_desc = VarDesc(name=name, lod_tensor=tensor_desc)
self._var = framework.CreateVar(_var_desc)
self._block.add_var(self)
# add initial op according to initializer ## Definition of LodTensorDesc
if initializer is not None:
AddInitialOperator(self, initializer)
def dims(self):
return self._var.dims()
def data_type(self):
return self._var.data_type()
def to_proto(self): ```proto
pass message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int lod_level = 2;
}
``` ```
Then we can use this Variable to create a fc layer in Python. A LoDTensorDesc contains a tensor and a lod_level.
```python ## Definition of Variable in Python
import paddle as pd
def flatten_size(X, num_flatten_dims):
prod = 1 # of last num_flatten_dims
for i in xrange(num_flatten_dims):
prod = prod * X.dims[-i-1]
return prod
def layer.fc(X, output_size, num_flatten_dims):
W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size])
b = Variable(pd.random_uniform(), type=FP32, dims=[output_size])
out = Variable(type=FP32)
y = operator.fc(X, W, b, output=out) # fc will put fc op input into out
pd.InferShape(y)
return out
x = Variable(dims=[-1, 640, 480])
y = layer.fc(x, output_size=100)
z = layer.fc(y, output_size=200)
paddle.eval(targets=[z], ...) For Variable in Python, please reference [`Python API`](./python_api.md).
print(z)
```
...@@ -21,7 +21,7 @@ wmt14数据的提供文件在 `python/paddle/v2/dataset/wmt14.py <https://github ...@@ -21,7 +21,7 @@ wmt14数据的提供文件在 `python/paddle/v2/dataset/wmt14.py <https://github
循环神经网络在每个时间步骤顺序地处理序列。下面列出了 LSTM 的架构的示例。 循环神经网络在每个时间步骤顺序地处理序列。下面列出了 LSTM 的架构的示例。
.. image:: ../../../tutorials/sentiment_analysis/bi_lstm.jpg .. image:: src/bi_lstm.jpg
:align: center :align: center
一般来说,循环网络从 :math:`t=1` 到 :math:`t=T` 或者反向地从 :math:`t=T` 到 :math:`t=1` 执行以下操作。 一般来说,循环网络从 :math:`t=1` 到 :math:`t=T` 或者反向地从 :math:`t=T` 到 :math:`t=1` 执行以下操作。
...@@ -96,7 +96,7 @@ Sequence to Sequence Model with Attention ...@@ -96,7 +96,7 @@ Sequence to Sequence Model with Attention
我们将使用 sequence to sequence model with attention 我们将使用 sequence to sequence model with attention
作为例子演示如何配置复杂的循环神经网络模型。该模型的说明如下图所示。 作为例子演示如何配置复杂的循环神经网络模型。该模型的说明如下图所示。
.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png .. image:: src/encoder-decoder-attention-model.png
:align: center :align: center
在这个模型中,源序列 :math:`S = \{s_1, \dots, s_T\}` 在这个模型中,源序列 :math:`S = \{s_1, \dots, s_T\}`
......
...@@ -19,7 +19,7 @@ Simple Gated Recurrent Neural Network ...@@ -19,7 +19,7 @@ Simple Gated Recurrent Neural Network
Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below. Recurrent neural network process a sequence at each time step sequentially. An example of the architecture of LSTM is listed below.
.. image:: ../../../tutorials/sentiment_analysis/src/bi_lstm.jpg .. image:: src/bi_lstm.jpg
:align: center :align: center
Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`. Generally speaking, a recurrent network perform the following operations from :math:`t=1` to :math:`t=T`, or reversely from :math:`t=T` to :math:`t=1`.
...@@ -78,7 +78,7 @@ Sequence to Sequence Model with Attention ...@@ -78,7 +78,7 @@ Sequence to Sequence Model with Attention
----------------------------------------- -----------------------------------------
We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. An illustration of the sequence to sequence model with attention is shown in the following figure. We will use the sequence to sequence model with attention as an example to demonstrate how you can configure complex recurrent neural network models. An illustration of the sequence to sequence model with attention is shown in the following figure.
.. image:: ../../../tutorials/text_generation/encoder-decoder-attention-model.png .. image:: src/encoder-decoder-attention-model.png
:align: center :align: center
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`. In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
......
图像分类教程
==========
在本教程中,我们将使用CIFAR-10数据集训练一个卷积神经网络,并使用这个神经网络来对图片进行分类。如下图所示,卷积神经网络可以辨识图片中的主体,并给出分类结果。
<center>![Image Classification](./image_classification.png)</center>
## 数据准备
首先下载CIFAR-10数据集。下面是CIFAR-10数据集的官方网址:
<https://www.cs.toronto.edu/~kriz/cifar.html>
我们准备了一个脚本,可以用于从官方网站上下载CIFAR-10数据集,转为jpeg文件并存入特定的目录。使用这个脚本前请确认已经安装了pillow及相关依赖模块。可以参照下面的命令进行安装:
1. 安装pillow
```bash
sudo apt-get install libjpeg-dev
pip install pillow
```
2. 下载数据集
```bash
cd demo/image_classification/data/
sh download_cifar.sh
```
CIFAR-10数据集包含60000张32x32的彩色图片。图片分为10类,每个类包含6000张。其中50000张图片作为训练集,10000张作为测试集。
下图展示了所有的图片类别,每个类别中随机抽取了10张图片。
<center>![Image Classification](./cifar.png)</center>
脚本运行完成后,我们应当会得到一个名为cifar-out的文件夹,其下子文件夹的结构如下
```
train
---airplane
---automobile
---bird
---cat
---deer
---dog
---frog
---horse
---ship
---truck
test
---airplane
---automobile
---bird
---cat
---deer
---dog
---frog
---horse
---ship
---truck
```
cifar-out下包含`train``test`两个文件夹,其中分别包含了CIFAR-10中的训练集和测试集。这两个文件夹下各自有10个子文件夹,每个子文件夹下存储相应分类的图片。将图片按照上述结构存储好之后,我们就可以着手对分类模型进行训练了。
## 预处理
数据下载之后,还需要进行预处理,将数据转换为Paddle的格式。我们可以通过如下命令进行预处理工作:
```
cd demo/image_classification/
sh preprocess.sh
```
其中`preprocess.sh` 调用 `./demo/image_classification/preprocess.py` 对图片进行预处理
```sh
export PYTHONPATH=$PYTHONPATH:../../
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
```
`./demo/image_classification/preprocess.py` 使用如下参数:
- `-i``--input` 给出输入数据所在路径;
- `-s``--size` 给出图片尺寸;
- `-c``--color` 标示图片是彩色图或灰度图
## 模型训练
在开始训练之前,我们需要先创建一个模型配置文件。下面我们给出了一个配置示例。**注意**,这里的列出的和`vgg_16_cifar.py`文件稍有差别,因为该文件可适用于预测。
```python
from paddle.trainer_config_helpers import *
data_dir='data/cifar-out/batches/'
meta_path=data_dir+'batches.meta'
args = {'meta':meta_path, 'mean_img_size': 32,
'img_size': 32, 'num_classes': 10,
'use_jpeg': 1, 'color': "color"}
define_py_data_sources2(train_list=data_dir+"train.list",
test_list=data_dir+'test.list',
module='image_provider',
obj='processData',
args=args)
settings(
batch_size = 128,
learning_rate = 0.1 / 128.0,
learning_method = MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128))
img = data_layer(name='image', size=3*32*32)
lbl = data_layer(name="label", size=10)
# small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img, num_channels=3)
outputs(classification_cost(input=predict, label=lbl))
```
在第一行中我们载入用于定义网络的函数。
```python
from paddle.trainer_config_helpers import *
```
之后定义的`define_py_data_sources2`使用Python数据提供器,其中 `args`将在`image_provider.py`进行使用,该文件负责产生图片数据并传递给Paddle系统
- `meta`: 训练集平均值。
- `mean_img_size`: 平均特征图的高度及宽度。
- `img_size`:输入图片的高度及宽度。
- `num_classes`:类别个数。
- `use_jpeg`:处理过程中数据存储格式。
- `color`:标示是否为彩色图片。
`settings`用于设置训练算法。在下面的例子中,learning rate被设置为0.1除以batch size,而weight decay则为0.0005乘以batch size。
```python
settings(
batch_size = 128,
learning_rate = 0.1 / 128.0,
learning_method = MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128)
)
```
`small_vgg`定义了网络结构。这里我们使用的是一个小的VGG网络。关于VGG卷积神经网络的描述可以参考:[http://www.robots.ox.ac.uk/~vgg/research/very_deep/](http://www.robots.ox.ac.uk/~vgg/research/very_deep/)
```python
# small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img, num_channels=3)
```
配置创建完毕后,可以运行脚本train.sh来训练模型。
```bash
config=vgg_16_cifar.py
output=./cifar_vgg_model
log=train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--save_dir=$output \
2>&1 | tee $log
python -m paddle.utils.plotcurve -i $log > plot.png
```
- 这里我们使用的是GPU模式进行训练。如果你没有GPU环境,可以设置`use_gpu=0`
- `./demo/image_classification/vgg_16_cifar.py`是网络和数据配置文件。各项参数的详细说明可以在命令行参数相关文档中找到。
- 脚本`plotcurve.py`依赖于python的`matplotlib`模块。因此如果这个脚本运行失败,也许是因为需要安装`matplotlib`
在训练完成后,训练及测试误差曲线图会被`plotcurve.py`脚本保存在 `plot.png`中。下面是一个误差曲线图的示例:
<center>![Training and testing curves.](./plot.png)</center>
## 预测
在训练完成后,模型及参数会被保存在路径`./cifar_vgg_model/pass-%05d`下。例如第300个pass的模型会被保存在`./cifar_vgg_model/pass-00299`
要对一个图片的进行分类预测,我们可以使用`predict.sh`,该脚本将输出预测分类的标签:
```
sh predict.sh
```
predict.sh:
```
model=cifar_vgg_model/pass-00299/
image=data/cifar-out/test/airplane/seaplane_s_000978.png
use_gpu=1
python prediction.py $model $image $use_gpu
```
## 练习
在CUB-200数据集上使用VGG模型训练一个鸟类图片分类模型。相关的鸟类数据集可以从如下地址下载,其中包含了200种鸟类的照片(主要来自北美洲)。
<http://www.vision.caltech.edu/visipedia/CUB-200.html>
## 细节探究
### 卷积神经网络
卷积神经网络是一种使用卷积层的前向神经网络,很适合构建用于理解图片内容的模型。一个典型的神经网络如下图所示:
![Convolutional Neural Network](./lenet.png)
一个卷积神经网络包含如下层:
- 卷积层:通过卷积操作从图片或特征图中提取特征
- 池化层:使用max-pooling对特征图下采样
- 全连接层:使输入层到隐藏层的神经元是全部连接的。
卷积神经网络在图片分类上有着惊人的性能,这是因为它发掘出了图片的两类重要信息:局部关联性质和空间不变性质。通过交替使用卷积和池化处理, 卷积神经网络能够很好的表示这两类信息。
关于如何定义网络中的层,以及如何在层之间进行连接,请参考Layer文档。
Image Classification Tutorial
==============================
This tutorial will guide you through training a convolutional neural network to classify objects using the CIFAR-10 image classification dataset.
As shown in the following figure, the convolutional neural network can recognize the main object in images, and output the classification result.
<center>![Image Classification](./image_classification.png)</center>
## Data Preparation
First, download CIFAR-10 dataset. CIFAR-10 dataset can be downloaded from its official website.
<https://www.cs.toronto.edu/~kriz/cifar.html>
We have prepared a script to download and process CIFAR-10 dataset. The script will download CIFAR-10 dataset from the official dataset.
It will convert it to jpeg images and organize them into a directory with the required structure for the tutorial. Make sure that you have installed pillow and its dependents.
Consider the following commands:
1. install pillow dependents
```bash
sudo apt-get install libjpeg-dev
pip install pillow
```
2. download data and preparation
```bash
cd demo/image_classification/data/
sh download_cifar.sh
```
The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.
Here are the classes in the dataset, as well as 10 random images from each:
<center>![Image Classification](./cifar.png)</center>
After downloading and converting, we should find a directory (cifar-out) containing the dataset in the following format:
```
train
---airplane
---automobile
---bird
---cat
---deer
---dog
---frog
---horse
---ship
---truck
test
---airplane
---automobile
---bird
---cat
---deer
---dog
---frog
---horse
---ship
---truck
```
It has two directories:`train` and `test`. These two directories contain training data and testing data of CIFAR-10, respectively. Each of these two folders contains 10 sub-folders, ranging from `airplane` to `truck`. Each sub-folder contains images with the corresponding label. After the images are organized into this structure, we are ready to train an image classification model.
## Preprocess
After the data has been downloaded, it needs to be pre-processed into the Paddle format. We can run the following command for preprocessing.
```
cd demo/image_classification/
sh preprocess.sh
```
`preprocess.sh` calls `./demo/image_classification/preprocess.py` to preprocess image data.
```sh
export PYTHONPATH=$PYTHONPATH:../../
data_dir=./data/cifar-out
python preprocess.py -i $data_dir -s 32 -c 1
```
`./demo/image_classification/preprocess.py` has the following arguments
- `-i` or `--input` specifes the input data directory.
- `-s` or `--size` specifies the processed size of images.
- `-c` or `--color` specifes whether images are color images or gray images.
## Model Training
We need to create a model config file before training the model. An example of the config file (vgg_16_cifar.py) is listed below. **Note**, it is slightly different from the `vgg_16_cifar.py` which also applies to the prediction.
```python
from paddle.trainer_config_helpers import *
data_dir='data/cifar-out/batches/'
meta_path=data_dir+'batches.meta'
args = {'meta':meta_path, 'mean_img_size': 32,
'img_size': 32, 'num_classes': 10,
'use_jpeg': 1, 'color': "color"}
define_py_data_sources2(train_list=data_dir+"train.list",
test_list=data_dir+'test.list',
module='image_provider',
obj='processData',
args=args)
settings(
batch_size = 128,
learning_rate = 0.1 / 128.0,
learning_method = MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128))
img = data_layer(name='image', size=3*32*32)
lbl = data_layer(name="label", size=10)
# small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img, num_channels=3)
outputs(classification_cost(input=predict, label=lbl))
```
The first line imports python functions for defining networks.
```python
from paddle.trainer_config_helpers import *
```
Then define an `define_py_data_sources2` which use python data provider
interface. The arguments in `args` are used in `image_provider.py` which
yeilds image data and transform them to Paddle.
- `meta`: the mean value of training set.
- `mean_img_size`: the size of mean feature map.
- `img_size`: the height and width of input image.
- `num_classes`: the number of classes.
- `use_jpeg`: the data storage type when preprocessing.
- `color`: specify color image.
`settings` specifies the training algorithm. In the following example,
it specifies learning rate as 0.1, but divided by batch size, and the weight decay
is 0.0005 and multiplied by batch size.
```python
settings(
batch_size = 128,
learning_rate = 0.1 / 128.0,
learning_method = MomentumOptimizer(0.9),
regularization = L2Regularization(0.0005 * 128)
)
```
The `small_vgg` specifies the network. We use a small version of VGG convolutional network as our network
for classification. A description of VGG network can be found here [http://www.robots.ox.ac.uk/~vgg/research/very_deep/](http://www.robots.ox.ac.uk/~vgg/research/very_deep/).
```python
# small_vgg is predined in trainer_config_helpers.network
predict = small_vgg(input_image=img, num_channels=3)
```
After writing the config, we can train the model by running the script train.sh.
```bash
config=vgg_16_cifar.py
output=./cifar_vgg_model
log=train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=1 \
--save_dir=$output \
2>&1 | tee $log
python -m paddle.utils.plotcurve -i $log > plot.png
```
- Here we use GPU mode to train. If you have no gpu environment, just set `use_gpu=0`.
- `./demo/image_classification/vgg_16_cifar.py` is the network and data configuration file. The meaning of the other flags can be found in the documentation of the command line flags.
- The script `plotcurve.py` requires the python module of `matplotlib`, so if it fails, maybe you need to install `matplotlib`.
After training finishes, the training and testing error curves will be saved to `plot.png` using `plotcurve.py` script. An example of the plot is shown below:
<center>![Training and testing curves.](./plot.png)</center>
## Prediction
After we train the model, the model file as well as the model parameters are stored in path `./cifar_vgg_model/pass-%05d`. For example, the model of the 300-th pass is stored at `./cifar_vgg_model/pass-00299`.
To make a prediction for an image, one can run `predict.sh` as follows. The script will output the label of the classfiication.
```
sh predict.sh
```
predict.sh:
```
model=cifar_vgg_model/pass-00299/
image=data/cifar-out/test/airplane/seaplane_s_000978.png
use_gpu=1
python prediction.py $model $image $use_gpu
```
## Exercise
Train a image classification of birds using VGG model and CUB-200 dataset. The birds dataset can be downloaded here. It contains an image dataset with photos of 200 bird species (mostly North American).
<http://www.vision.caltech.edu/visipedia/CUB-200.html>
## Delve into Details
### Convolutional Neural Network
A Convolutional Neural Network is a feedforward neural network that uses convolution layers. It is very suitable for building neural networks that process and understand images. A standard convolutional neural network is shown below:
![Convolutional Neural Network](./lenet.png)
Convolutional Neural Network contains the following layers:
- Convolutional layer: It uses convolution operation to extract features from an image or a feature map.
- Pooling layer: It uses max-pooling to downsample feature maps.
- Fully Connected layer: It uses fully connected connections to transform features.
Convolutional Neural Network achieves amazing performance for image classification because it exploits two important characteristics of images: *local correlation* and *spatial invariance*. By iteratively applying convolution and max-pooing operations, convolutional neural network can well represent these two characteristics of images.
For more details of how to define layers and their connections, please refer to the documentation of layers.
# 完整教程
* [快速入门](quick_start/index_cn.rst)
* [个性化推荐](rec/ml_regression_cn.rst)
* [图像分类](image_classification/index_cn.md)
* [情感分析](sentiment_analysis/index_cn.md)
* [语义角色标注](semantic_role_labeling/index_cn.md)
* [机器翻译](text_generation/index_cn.md)
## 常用模型
* [ResNet模型](imagenet_model/resnet_model_cn.md)
* [词向量模型](embedding_model/index_cn.md)
# TUTORIALS
There are several examples and demos here.
* [Quick Start](quick_start/index_en.md)
* [MovieLens Regression](rec/ml_regression_en.rst)
* [Image Classification](image_classification/index_en.md)
* [Sentiment Analysis](sentiment_analysis/index_en.md)
* [Semantic Role Labeling](semantic_role_labeling/index_en.md)
* [Text Generation](text_generation/index_en.md)
* [Image Auto-Generation](gan/index_en.md)
## Model Zoo
* [ImageNet: ResNet](imagenet_model/resnet_model_en.md)
* [Embedding: Chinese Word](embedding_model/index_en.md)
```eval_rst
.. _demo_ml_dataset:
```
# MovieLens数据集
[MovieLens 数据集](http://grouplens.org/datasets/movielens/)由GroupLens Research实验室搜集整理。
该数据集包含一些用户信息、电影信息以及电影评分\[1-5\]。根据数据量规模,该数据及有很多不同的版本。
我们用[MovieLens 百万数据集](http://files.grouplens.org/datasets/movielens/ml-1m.zip)作为示例数据
集,其中包含6,000位用户对4,000部电影的1,000,000条评价。该数据集于2003年2月发布。
## 数据集特征
[ml-1m 数据集](http://files.grouplens.org/datasets/movielens/ml-1m.zip)中有许多的特征。在[ml-1m 数据集]
(http://files.grouplens.org/datasets/movielens/ml-1m.zip)中的这些数据文件(含有".dat"的后缀)实际上是CSV文件,
分隔符为"::"。以下我们翻译数据集网站中README文件的描述:
### 评分文件描述(ratings.dat)
所有的评分数据都包含在"ratings.dat"文件中,遵循如下的格式:
用户ID::电影ID::评分::时间戳
- 用户ID范围从1到6040
- 电影ID范围从1到3952
- 评分被调整为5星的规模(只允许整数的星级)
- 时间戳表示为从1970-01-01(UTC)来的秒数,与time(2)的返回值一致
- 每位用户至少有20条评分
### 用户文件描述(users.dat)
所有的用户信息都包含在"users.dat"文件中,遵循如下的格式:
用户ID::性别::年龄::职业::邮编
所有的人口统计学信息由用户自愿提供,没有进行正确性的检查。只有含有人
口统计学信息的用户才被包含在数据集中。
- 性别,用"M"表示男性,"F"表示女性
- 年龄从下列列表范围中选取:
* 1: "18岁以下"
* 18: "18-24岁"
* 25: "25-34岁"
* 35: "35-44岁"
* 45: "45-49岁"
* 50: "50-55岁"
* 56: "56+"
- 职业从下面所列中选择:
* 0: "其他"或不确定
* 1: "学术/教育工作者"
* 2: "艺术家"
* 3: "文书工作/管理员"
* 4: "大学生/研究生"
* 5: "客户服务"
* 6: "医生/医疗保健"
* 7: "行政工作/管理人员"
* 8: "农民"
* 9: "操持家务者"
* 10: "高中毕业生"
* 11: "律师"
* 12: "程序员"
* 13: "退休人员"
* 14: "销售/市场"
* 15: "科学家"
* 16: "自由职业者"
* 17: "技术员/工程师"
* 18: "推销员/手工艺者"
* 19: "无业人士"
* 20: "作家"
### 电影文件描述(movies.dat)
所有的电影信息都包含在"movies.dat"文件中,遵循如下的格式:
电影ID::电影名称::电影类型
- 电影名称(包括发行时间)与IMDB网站提供的一致
- 电影类型如符合多种用管道符号|分割,选自下列类型:
* 动作片
* 冒险片
* 动画片
* 儿童片
* 喜剧片
* 犯罪片
* 纪录片
* 戏剧
* 奇幻片
* 黑色电影
* 恐怖片
* 音乐剧
* 悬疑片
* 浪漫片
* 科幻片
* 惊险电影
* 战争片
* 西部片
- 由于意外的副本记录和测试记录,有些电影ID可能与实际电影不相符合
- 电影大部分是手工输入数据,因此可能会有一些错误和不一致发生
```eval_rst
.. _demo_ml_dataset:
```
# MovieLens Dataset
The [MovieLens Dataset](http://grouplens.org/datasets/movielens/) was collected by GroupLens Research.
The data set contains some user information, movie information, and many movie ratings from \[1-5\].
The data sets have many version depending on the size of set.
We use [MovieLens 1M Dataset](http://files.grouplens.org/datasets/movielens/ml-1m.zip) as a demo dataset, which contains
1 million ratings from 6000 users on 4000 movies. Released 2/2003.
## Dataset Features
In [ml-1m Dataset](http://files.grouplens.org/datasets/movielens/ml-1m.zip), there are many features in these dataset.
The data files (which have ".dat" extension) in [ml-1m Dataset](http://files.grouplens.org/datasets/movielens/ml-1m.zip)
is basically CSV file that delimiter is "::". The description in README we quote here.
### RATINGS FILE DESCRIPTION(ratings.dat)
All ratings are contained in the file "ratings.dat" and are in the
following format:
UserID::MovieID::Rating::Timestamp
- UserIDs range between 1 and 6040
- MovieIDs range between 1 and 3952
- Ratings are made on a 5-star scale (whole-star ratings only)
- Timestamp is represented in seconds since the epoch as returned by time(2)
- Each user has at least 20 ratings
### USERS FILE DESCRIPTION(users.dat)
User information is in the file "users.dat" and is in the following
format:
UserID::Gender::Age::Occupation::Zip-code
All demographic information is provided voluntarily by the users and is
not checked for accuracy. Only users who have provided some demographic
information are included in this data set.
- Gender is denoted by a "M" for male and "F" for female
- Age is chosen from the following ranges:
* 1: "Under 18"
* 18: "18-24"
* 25: "25-34"
* 35: "35-44"
* 45: "45-49"
* 50: "50-55"
* 56: "56+"
- Occupation is chosen from the following choices:
* 0: "other" or not specified
* 1: "academic/educator"
* 2: "artist"
* 3: "clerical/admin"
* 4: "college/grad student"
* 5: "customer service"
* 6: "doctor/health care"
* 7: "executive/managerial"
* 8: "farmer"
* 9: "homemaker"
* 10: "K-12 student"
* 11: "lawyer"
* 12: "programmer"
* 13: "retired"
* 14: "sales/marketing"
* 15: "scientist"
* 16: "self-employed"
* 17: "technician/engineer"
* 18: "tradesman/craftsman"
* 19: "unemployed"
* 20: "writer"
### MOVIES FILE DESCRIPTION(movies.dat)
Movie information is in the file "movies.dat" and is in the following
format:
MovieID::Title::Genres
- Titles are identical to titles provided by the IMDB (including
year of release)
- Genres are pipe-separated and are selected from the following genres:
* Action
* Adventure
* Animation
* Children's
* Comedy
* Crime
* Documentary
* Drama
* Fantasy
* Film-Noir
* Horror
* Musical
* Mystery
* Romance
* Sci-Fi
* Thriller
* War
* Western
- Some MovieIDs do not correspond to a movie due to accidental duplicate
entries and/or test entries
- Movies are mostly entered by hand, so errors and inconsistencies may exist
MovieLens数据集评分回归模型
===========================
这里我们在MovieLens数据集描述一种 **余弦相似度回归** 任务。
该示例将展示paddle如何进行词向量嵌入,处理相似度回归,针对文本
的单词级别的卷积神经网络,以及paddle如何处理多种类型的输入。
需要注意的是,该模型网络只是用于进行demo展示paddle如何工作,而
没有进行结构的微调。
**我们非常欢迎您用PADDLEPADDLE构建更好的示例,如果您有好的建议来
让这个示例变得更好,希望能让我们知晓。**
数据准备
`````````
下载并解压数据集
'''''''''''''''''
这里我们使用 :ref:`demo_ml_dataset` 。
要下载和解压数据集,只需要简单的运行下面的命令即可。
.. code-block:: bash
cd demo/recommendation/data
./ml_data.sh
:code:`demo/recommendation/data/ml-1m` 的目录结构为:
.. code-block:: text
+--ml-1m
+--- movies.dat # 电影特征
+--- ratings.dat # 评分
+--- users.dat # 用户特征
+--- README # 数据集描述
字段配置文件
'''''''''''''
**字段配置文件** 用来具体说明数据集的字段和文件格式,
例如,说明每个特征文件具体字段是 **什么** 类型。
ml-1m的字段配置文件在目录 :code:`demo/recommendation/data/config.json` 中。
其具体说明了字段类型和文件名称:
1) 用户文件中有四种类型的字段\: 编号,性别,年龄和职业;
2) 文件名称为"users.dat",文件的分隔符为"::"。
.. include:: ../../../demo/recommendation/data/config.json
:code: json
:literal:
准备数据
`````````
你需要安装python的第三方库。
**强烈推荐使用VIRTUALENV来创造一个干净的python环境。**
.. code-block:: bash
pip install -r requirements.txt
预处理数据一般的命令为:
.. code-block:: bash
cd demo/recommendation
./preprocess.sh
下面介绍预处理过程具体的步骤。
提取电影或用户的特征并生成python对象
'''''''''''''''''''''''''''''''''''''
在movielens 1m数据集中,电影和用户有许多的特征。
评分文件的每一行仅仅提供电影或用户的编号来代表相应的电影或用户。
我们首先处理电影或用户的特征文件,然后用pickle命令将特征( **Meta** )对象存储为文件。
Meta配置文件
.............
**Meta配置文件** 用来具体描述 **如何** 解析数据集中的每一个字段。
该文件可以从字段配置文件生成,或是手动编辑生成。文件的格式可以
为json或yaml格式。解析器能通过文件的扩展名自动识别文件的格式。
要将字段配置文件转化为meta配置文件,只需要运行:
.. code-block:: bash
cd demo/recommendation/data
python config_generator.py config.json > meta_config.json
生成的meta配置文件如下所示:
.. include:: ../../../demo/recommendation/data/meta_config.json
:code: json
:literal:
在meta文件中有两种特征\: 电影和用户。
* 在电影文件movies.dat中
* 我们仅用"::"来分隔每一行
* pos 0 代表编号
* pos 1 特征:
* name是电影名
* 利用正则表达式来解析该特征
* 基于字母的词嵌入特征
* 是序列
* pos 2 特征:
* name是体裁
* type是one hot稠密向量
* dictionary由解析自动生成,每一个key由'|'分隔
* 在用户文件users.dat中
* 我们仅用"::"来分隔每一行
* pos 0 代表编号
* pos 1 特征:
* name是性别
* 简单的基于字母的词嵌入
* pos 2 特征:
* name是年龄
* 是整个的词嵌入
* 嵌入编号会根据单词排序
* pos 3 特征:
* name是职业
* 简单的整个词嵌入
Meta文件
''''''''
有了meta配置文件之后,我们可以生成 **Meta文件** ,该文件是python的pickle对象,
存储着电影或用户信息。可以运行下面的命令来生成。
.. code-block:: bash
python meta_generator.py ml-1m meta.bin --config=meta_config.json
meta文件 :code:`meta.bin` 的结构如下:
.. code-block:: text
+--+ movie
| +--+ __meta__
| | +--+ raw_meta # 每个特征的meta配置。列表
| | | +
| | | | # 编号字段,我们用编号作为key
| | | +--+ {'count': 3883, 'max': 3952, 'is_key': True, 'type': 'id', 'min': 1}
| | | |
| | | | # 电影名字段,嵌入特征字典
| | | +--+ {'dict': [ ... ], 'type': 'embedding', 'name': 'title', 'seq': 'sequence'}
| | | |
| | | | # 体裁字段,体裁字典
| | | +--+ {'dict': [ ... ], 'type': 'one_hot_dense', 'name': 'genres'}
| | |
| | +--+ feature_map [1, 2] # a list for raw_meta index for feature field.
| | # it means there are 2 features for each key.
| | # * 0 offset of feature is raw_meta[1], Title.
| | # * 1 offset of feature is raw_meta[2], Genres.
| |
| +--+ 1 # 电影1的特征
| | +
| | +---+ [[...], [...]] # title ids, genres dense vector
| |
| +--+ 2
| |
| +--+ ...
|
+--- user
+--+ __meta__
| +
| +--+ raw_meta
| | +
| | +--+ id field as user
| | |
| | +--+ {'dict': ['F', 'M'], 'type': 'embedding', 'name': 'gender', 'seq': 'no_sequence'}
| | |
| | +--+ {'dict': ['1', '18', '25', '35', '45', '50', '56'], 'type': 'embedding', 'name': 'age', 'seq': 'no_sequence'}
| | |
| | +--+ {'dict': [...], 'type': 'embedding', 'name': 'occupation', 'seq': 'no_sequence'}
| |
| +--+ feature_map [1, 2, 3]
|
+--+ 1 # 用户1的特征
|
+--+ 2
+--+ ...
分割训练/测试文件
''''''''''''''''''
我们将 :code:`ml-1m/ratings.dat` 文件分割为训练和测试文件。分割文件的方法是:对于每位用户,我们将评分分成两部分。
这样的话每位用户在测试文件中将与训练文件含有同样的信息。
用 :code:`separate.py` 来分离训练和测试文件。
.. code-block:: bash
python split.py ml-1m/ratings.dat --delimiter="::" --test_ratio=0.1
这样就会生成两个文件::code:`ml-1m/ratings.dat.train` 和 :code:`ml-1m/ratings.data.test` 。
将他们移动到目录 :code:`data` ,然后进行随机打乱,再为paddle的训练过程提供文件列表。
.. code-block:: bash
shuf ml-1m/ratings.dat.train > ratings.dat.train
cp ml-1m/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
神经网络结构配置
`````````````````
训练器配置文件
'''''''''''''''
网络结构如下图所示:
.. image:: rec_regression_network.png
:align: center
:alt: rec_regression_network
该示例的神经网络配置文件 :code:`trainer_config.py` 如下所示:
.. literalinclude:: ../../../demo/recommendation/trainer_config.py
:language: python
:lines: 15-
在文件 :code:`trainer_config.py` 中,我们仅仅是将每个特征种类映射到一个特征向量中,以下
展示了如何将每个特征映射到一个向量。
* :code:`id` \: 仅仅是简单的嵌入,然后添加一个全连接层。
* :code:`embedding` \:
- 如果是序列,则先做嵌入,然后再做一次文本卷积网络操作,
然后得到平均采样的结果。
- 如果不是序列,则先做嵌入,然后添加一个全连接层。
* :code:`one_host_dense` \:
- 仅仅是两个全连接层。
然后我们利用多输入的:code:`fc_layer` 全连接层将电影的每个特征结合成一个电影特征,
并且对用户的特征做同样的操作,也得到一个用户特征。然后我们求这两个特征的余弦相似度。
在这些网络中,我们用以下的一些:ref:`api_trainer_config` 中的接口。
* 数据层, :ref:`api_trainer_config_helpers_layers_data_layer`
* 全连接层, :ref:`api_trainer_config_helpers_layers_fc_layer`
* 嵌入层, :ref:`api_trainer_config_helpers_layers_embedding_layer`
* 文本投影层, :ref:`api_trainer_config_helpers_layers_context_projection`
* 采样层, :ref:`api_trainer_config_helpers_layers_pooling_layer`
* 余弦相似度层, :ref:`api_trainer_config_helpers_layers_cos_sim`
* 文本卷积采样层, :ref:`api_trainer_config_helpers_network_text_conv_pool`
* 声明Python数据源, :ref:`api_trainer_config_helpers_data_sources`
数据提供脚本
'''''''''''''
.. literalinclude:: ../../../demo/recommendation/dataprovider.py
:language: python
:lines: 15-
数据提供脚本仅仅是读取meta.bin和评分文件,生成训练需要的样本。
在脚本 :code:`dataprovider.py` 中,我们需要设置:
* obj.slots\: 特征的类型和维度。
* use_seq\: :code:`dataprovider.py` 中的数据是否为序列模式。
* process\: 返回数据的每一条样本给 :code:`paddle` 。
数据提供脚本的细节文档可以参考 :ref:`api_pydataprovider2` 。
训练
````
准备好数据,配置了网络,编写好数据提供脚本后,现在我们可以开始paddle训练了。
代码 :code:`run.sh` 如下:
.. literalinclude:: ../../../demo/recommendation/run.sh
:language: bash
:lines: 16-
该脚本仅仅是开始一个paddle训练过程,将日志写入文件 :code:`log.txt` ,然后
打印在屏幕上。
脚本 :code:`run.sh` 中的每一行命令,请参考页面 :ref:`cmd_line_index` 。
这些参数的简短介绍如下:
* config\: 告诉paddle哪个文件是神经网络的配置文件。
* save_dir\: 告诉paddle将模型保存在: code:`./output` 中。
* use_gpu\: 是否使用GPU,默认为不使用。
* trainer_count\: 一台机器上面的线程数量。
* test_all_data_in_one_period\: 每一个测试周期测试一次所有数据。否则,
每个测试周期测试: code:`batch_size` 批次的数据。
* log_period\: 在训练了: code:`log_period` 批次后打印日志。
* dot_period\: 在每训练: code:`dot_period` 个批次后打印一个 :code:`.` 。
* num_passes\: 训练至多: code:`num_passes` 轮。
如果训练过程启动成功的话,输出应该类似如下:
.. code-block:: text
I0601 08:07:22.832059 10549 TrainerInternal.cpp:157] Batch=100 samples=160000 AvgCost=4.13494 CurrentCost=4.13494 Eval: CurrentEval:
I0601 08:07:50.672627 10549 TrainerInternal.cpp:157] Batch=200 samples=320000 AvgCost=3.80957 CurrentCost=3.48421 Eval: CurrentEval:
I0601 08:08:18.877369 10549 TrainerInternal.cpp:157] Batch=300 samples=480000 AvgCost=3.68145 CurrentCost=3.42519 Eval: CurrentEval:
I0601 08:08:46.863963 10549 TrainerInternal.cpp:157] Batch=400 samples=640000 AvgCost=3.6007 CurrentCost=3.35847 Eval: CurrentEval:
I0601 08:09:15.413025 10549 TrainerInternal.cpp:157] Batch=500 samples=800000 AvgCost=3.54811 CurrentCost=3.33773 Eval: CurrentEval:
I0601 08:09:36.058670 10549 TrainerInternal.cpp:181] Pass=0 Batch=565 samples=902826 AvgCost=3.52368 Eval:
I0601 08:09:46.215489 10549 Tester.cpp:101] Test samples=97383 cost=3.32155 Eval:
I0601 08:09:46.215966 10549 GradientMachine.cpp:132] Saving parameters to ./output/model/pass-00000
I0601 08:09:46.233397 10549 ParamUtil.cpp:99] save dir ./output/model/pass-00000
I0601 08:09:46.233438 10549 Util.cpp:209] copy trainer_config.py to ./output/model/pass-00000
I0601 08:09:46.233541 10549 ParamUtil.cpp:147] fileName trainer_config.py
模型被保存在 :code:`output/` 目录中。你可以在任何时候用 :code:`Ctrl-C` 来停止训练。
模型评估和预测
```````````````
在训练了几个轮次以后,你可以对模型进行评估,得到最好轮次下的模型。运行下面命令即可:
.. code-block:: bash
./evaluate.sh
你将看到如下的信息:
.. code-block:: text
Best pass is 00009, error is 3.06949, which means predict get error as 0.875998002281
evaluating from pass output/pass-00009
然后,你可以预测任何用户对于任何一部电影的评价,运行下面命令即可:
.. code-block:: bash
python prediction.py 'output/pass-00009/'
预测程序将读取用户的输入,然后输出预测分数。用户预测的命令行界面如下:
.. code-block:: text
Input movie_id: 9
Input user_id: 4
Prediction Score is 2.56
Input movie_id: 8
Input user_id: 2
Prediction Score is 3.13
Regression MovieLens Ratting
============================
Here we demonstrate a **Cosine Similarity Regression** job in movie lens dataset.
This demo will show how paddle does (word) embedding job,
handles the similarity regression,
the character-level convolutional networks for text, and how does paddle handle
multiple types of inputs.
Note that the model structure is not fine-tuned and just a demo to show how paddle works.
YOU ARE WELCOME TO BUILD A BETTER DEMO
BY USING PADDLEPADDLE, AND LET US KNOW TO MAKE THIS DEMO BETTER.
Data Preparation
````````````````
Download and extract dataset
''''''''''''''''''''''''''''
We use :ref:`demo_ml_dataset` here.
To download and unzip the dataset, simply run the following commands.
.. code-block:: bash
cd demo/recommendation/data
./ml_data.sh
And the directory structure of :code:`demo/recommendation/data/ml-1m` is:
.. code-block:: text
+--ml-1m
+--- movies.dat # movie features
+--- ratings.dat # ratings
+--- users.dat # user features
+--- README # dataset description
Field config file
'''''''''''''''''
**Field config file** is used to specify the fields of the dataset and the file format,
i.e, specific **WHAT** type it is in each feature file.
The field config file of ml-1m shows in :code:`demo/recommendation/data/config.json`.
It specifics the field types and file names: 1) there are four types of field for user file\: id, gender, age and occupation;
2) the filename is "users.dat", and the delimiter of file is "::".
.. include:: ../../../demo/recommendation/data/config.json
:code: json
:literal:
Preprocess Data
```````````````
You need to install python 3rd party libraries.
IT IS HIGHLY RECOMMEND TO USE VIRTUALENV MAKE A CLEAN PYTHON ENVIRONMENT.
.. code-block:: bash
pip install -r requirements.txt
The general command for preprocessing the dataset is:
.. code-block:: bash
cd demo/recommendation
./preprocess.sh
And the detail steps are introduced as follows.
Extract Movie/User features to python object
'''''''''''''''''''''''''''''''''''''''''''''
There are many features in movie or user in movielens 1m dataset.
Each line of rating file just provides a Movie/User id to refer each movie or user.
We process the movie/user feature file first, and pickle the feature (**Meta**) object as a file.
Meta config file
................
**Meta config file** is used to specific **HOW** to parse each field in dataset.
It could be translated from field config file, or written by hand.
Its file format could be either json or yaml syntax file. Parser will automatically choose the file format by extension name.
To convert Field config file to meta config file, just run:
.. code-block:: bash
cd demo/recommendation/data
python config_generator.py config.json > meta_config.json
The meta config file shows below:
.. include:: ../../../demo/recommendation/data/meta_config.json
:code: json
:literal:
There are two kinds of features in meta\: movie and user.
* in movie file, whose name is movies.dat
* we just split each line by "::"
* pos 0 is id.
* pos 1 feature:
* name is title.
* it uses regex to parse this feature.
* it is a char based word embedding feature.
* it is a sequence.
* pos 2 feature:
* name is genres.
* type is one hot dense vector.
* dictionary is auto generated by parsing, each key is split by '|'
* in user file, whose name is users.dat
* we just split each line by "::"
* pos 0 is id.
* pos 1 feature:
* name is gender
* just simple char based embedding.
* pos 2 feature:
* name is age
* just whole word embedding.
* embedding id will be sort by word.
* pos 3 feature:
* name is occupation.
* just simple whole word embedding.
Meta file
'''''''''
After having meta config file, we can generate **Meta file**, a python pickle object which stores movie/user information.
The following commands could be run to generate it.
.. code-block:: bash
python meta_generator.py ml-1m meta.bin --config=meta_config.json
And the structure of the meta file :code:`meta.bin` is:
.. code-block:: text
+--+ movie
| +--+ __meta__
| | +--+ raw_meta # each feature meta config. list
| | | +
| | | | # ID Field, we use id as key
| | | +--+ {'count': 3883, 'max': 3952, 'is_key': True, 'type': 'id', 'min': 1}
| | | |
| | | | # Titile field, the dictionary list of embedding.
| | | +--+ {'dict': [ ... ], 'type': 'embedding', 'name': 'title', 'seq': 'sequence'}
| | | |
| | | | # Genres field, the genres dictionary
| | | +--+ {'dict': [ ... ], 'type': 'one_hot_dense', 'name': 'genres'}
| | |
| | +--+ feature_map [1, 2] # a list for raw_meta index for feature field.
| | # it means there are 2 features for each key.
| | # * 0 offset of feature is raw_meta[1], Title.
| | # * 1 offset of feature is raw_meta[2], Genres.
| |
| +--+ 1 # movie 1 features
| | +
| | +---+ [[...], [...]] # title ids, genres dense vector
| |
| +--+ 2
| |
| +--+ ...
|
+--- user
+--+ __meta__
| +
| +--+ raw_meta
| | +
| | +--+ id field as user
| | |
| | +--+ {'dict': ['F', 'M'], 'type': 'embedding', 'name': 'gender', 'seq': 'no_sequence'}
| | |
| | +--+ {'dict': ['1', '18', '25', '35', '45', '50', '56'], 'type': 'embedding', 'name': 'age', 'seq': 'no_sequence'}
| | |
| | +--+ {'dict': [...], 'type': 'embedding', 'name': 'occupation', 'seq': 'no_sequence'}
| |
| +--+ feature_map [1, 2, 3]
|
+--+ 1 # user 1 features
|
+--+ 2
+--+ ...
Split Training/Testing files
''''''''''''''''''''''''''''
We split :code:`ml-1m/ratings.dat` into a training and testing file. The way to split file is for each user, we split the
rating by two parts. So each user in testing file will have some rating information in training file.
Use :code:`separate.py` to separate the training and testing file.
.. code-block:: bash
python split.py ml-1m/ratings.dat --delimiter="::" --test_ratio=0.1
Then two files will be generated\: :code:`ml-1m/ratings.dat.train` and :code:`ml-1m/rating.data.test`.
Move them to workspace :code:`data`, shuffle the train file, and prepare the file list for paddle train.
.. code-block:: bash
shuf ml-1m/ratings.dat.train > ratings.dat.train
cp ml-1m/ratings.dat.test .
echo "./data/ratings.dat.train" > train.list
echo "./data/ratings.dat.test" > test.list
Neural Network Configuration
````````````````````````````
Trainer Config File
'''''''''''''''''''
The network structure shows below.
.. image:: rec_regression_network.png
:align: center
:alt: rec_regression_network
The demo's neural network config file :code:`trainer_config.py` show as below.
.. literalinclude:: ../../../demo/recommendation/trainer_config.py
:language: python
:lines: 15-
In this :code:`trainer_config.py`, we just map each feature type to
a feature vector, following shows how to map each feature to a vector shows below.
* :code:`id`\: Just simple embedding, and then add to fully connected layer.
* :code:`embedding`\:
- if is_sequence, get the embedding and do a text convolutional operation,
get the average pooling result.
- if not sequence, get the embedding and add to fully connected layer.
* :code:`one_host_dense`\:
- just two fully connected layer.
Then we combine each features of movie into one movie feature by a
:code:`fc_layer` with multiple inputs, and do the same thing to user features,
get one user feature. Then we calculate the cosine similarity of these two
features.
In these networks, we use several APIs in :ref:`api_trainer_config` . There are
* Data Layer, :ref:`api_trainer_config_helpers_layers_data_layer`
* Fully Connected Layer, :ref:`api_trainer_config_helpers_layers_fc_layer`
* Embedding Layer, :ref:`api_trainer_config_helpers_layers_embedding_layer`
* Context Projection Layer, :ref:`api_trainer_config_helpers_layers_context_projection`
* Pooling Layer, :ref:`api_trainer_config_helpers_layers_pooling_layer`
* Cosine Similarity Layer, :ref:`api_trainer_config_helpers_layers_cos_sim`
* Text Convolution Pooling Layer, :ref:`api_trainer_config_helpers_network_text_conv_pool`
* Declare Python Data Sources :ref:`api_trainer_config_helpers_data_sources`.
Data Provider
'''''''''''''
.. literalinclude:: ../../../demo/recommendation/dataprovider.py
:language: python
:lines: 15-
The data provider just read the meta.bin and rating file, yield each sample for training.
In this :code:`dataprovider.py`, we should set\:
* obj.slots\: The feature types and dimension.
* use_seq\: Whether this :code:`dataprovider.py` in sequence mode or not.
* process\: Return each sample of data to :code:`paddle`.
The data provider details document see :ref:`api_pydataprovider2`.
Train
`````
After prepare data, config network, writting data provider, now we can run paddle training.
The :code:`run.sh` is shown as follow:
.. literalinclude:: ../../../demo/recommendation/run.sh
:language: bash
:lines: 16-
It just start a paddle training process, write the log to :code:`log.txt`,
then print it on screen.
Each command line argument in :code:`run.sh`, please refer to the :ref:`cmd_line_index` page. The short description of these arguments is shown as follow.
* config\: Tell paddle which file is neural network configuration.
* save_dir\: Tell paddle save model into :code:`./output`.
* use_gpu\: Use gpu or not. Default is false.
* trainer_count\: The compute thread in one machine.
* test_all_data_in_one_period\: Test All Data during one test period. Otherwise,
will test a :code:`batch_size` data in one test period.
* log_period\: Print log after train :code:`log_period` batches.
* dot_period\: Print a :code:`.` after train :code:`dot_period` batches.
* num_passes\: Train at most :code:`num_passes`.
If training process starts successfully, the output likes follow:
.. code-block:: text
I0601 08:07:22.832059 10549 TrainerInternal.cpp:157] Batch=100 samples=160000 AvgCost=4.13494 CurrentCost=4.13494 Eval: CurrentEval:
I0601 08:07:50.672627 10549 TrainerInternal.cpp:157] Batch=200 samples=320000 AvgCost=3.80957 CurrentCost=3.48421 Eval: CurrentEval:
I0601 08:08:18.877369 10549 TrainerInternal.cpp:157] Batch=300 samples=480000 AvgCost=3.68145 CurrentCost=3.42519 Eval: CurrentEval:
I0601 08:08:46.863963 10549 TrainerInternal.cpp:157] Batch=400 samples=640000 AvgCost=3.6007 CurrentCost=3.35847 Eval: CurrentEval:
I0601 08:09:15.413025 10549 TrainerInternal.cpp:157] Batch=500 samples=800000 AvgCost=3.54811 CurrentCost=3.33773 Eval: CurrentEval:
I0601 08:09:36.058670 10549 TrainerInternal.cpp:181] Pass=0 Batch=565 samples=902826 AvgCost=3.52368 Eval:
I0601 08:09:46.215489 10549 Tester.cpp:101] Test samples=97383 cost=3.32155 Eval:
I0601 08:09:46.215966 10549 GradientMachine.cpp:132] Saving parameters to ./output/model/pass-00000
I0601 08:09:46.233397 10549 ParamUtil.cpp:99] save dir ./output/model/pass-00000
I0601 08:09:46.233438 10549 Util.cpp:209] copy trainer_config.py to ./output/model/pass-00000
I0601 08:09:46.233541 10549 ParamUtil.cpp:147] fileName trainer_config.py
The model is saved in :code:`output/` directory. You can use :code:`Ctrl-C` to stop training whenever you want.
Evaluate and Predict
````````````````````
After training several passes, you can evaluate them and get the best pass. Just run
.. code-block:: bash
./evaluate.sh
You will see messages like this:
.. code-block:: text
Best pass is 00009, error is 3.06949, which means predict get error as 0.875998002281
evaluating from pass output/pass-00009
Then, you can predict what any user will rate a movie. Just run
.. code-block:: bash
python prediction.py 'output/pass-00009/'
Predictor will read user input, and predict scores. It has a command-line user interface as follows:
.. code-block:: text
Input movie_id: 9
Input user_id: 4
Prediction Score is 2.56
Input movie_id: 8
Input user_id: 2
Prediction Score is 3.13
# 语义角色标注教程 #
语义角色标注(Semantic role labeling, SRL)是浅层语义解析的一种形式,其目的是在给定的输入句子中发现每个谓词的谓词论元结构。 SRL作为很多自然语言处理任务中的中间步骤是很有用的,如信息提取、文档自动分类和问答。 实例如下 [1]:
[ <sub>A0</sub> He ] [ <sub>AM-MOD</sub> would ][ <sub>AM-NEG</sub> n’t ] [ <sub>V</sub> accept] [ <sub>A1</sub> anything of value ] from [<sub>A2</sub> those he was writing about ].
- V: 动词
- A0: 接受者
- A1: 接受的东西
- A2: 从……接受
- A3: 属性
- AM-MOD: 情态动词
- AM-NEG: 否定
给定动词“accept”,句子中的组块将会扮演某些语义角色。这里,标签方案来自 Penn Proposition Bank。
到目前为止,大多数成功的SRL系统是建立在某种形式的句法分析结果之上的,使用了基于句法结构的预定义特征模板。 本教程将介绍使用深度双向长短期记忆(DB-LSTM)模型[2]的端到端系统来解决SRL任务,这在很大程度上优于先前的最先进的系统。 这个系统将SRL任务视为序列标注问题。
## 数据描述
相关论文[2]采用 CoNLL-2005&2012 共享任务中设置的数据进行训练和测试。由于数据许可的原因,演示采用 CoNLL-2005 的测试数据集,可以在网站上找到。
用户只需执行以下命令就可以下载并处理原始数据:
```bash
cd data
./get_data.sh
```
`data `目录会出现如下几个新的文件:
```bash
conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments
feature: the extracted features from data set
```
## 训练
### DB-LSTM
请参阅情感分析的演示以了解有关长期短期记忆单元的更多信息。
与在 Sentiment Analysis 演示中使用的 Bidirectional-LSTM 不同,DB-LSTM 采用另一种方法来堆叠LSTM层。首先,标准LSTM以正向处理该序列。该 LSTM 层的输入和输出作为下一个 LSTM 层的输入,并被反向处理。这两个标准 LSTM 层组成一对 LSTM。然后我们堆叠一对对的 LSTM 层后得到深度 LSTM 模型。
下图展示了时间扩展的2层 DB-LSTM 网络。
<center>
![pic](./network_arch.png)
</center>
### 特征
两个输入特征在这个流程中起着至关重要的作用:predicate(pred)和argument(arguments)。 还采用了两个其他特征:谓词上下文(ctx-p)和区域标记(mr)。 因为单个谓词不能精确地描述谓词信息,特别是当相同的词在句子中出现多于一次时。 使用谓词上下文,可以在很大程度上消除歧义。类似地,如果它位于谓词上下文区域中,则使用区域标记 m<sub>r</sub> = 1 来表示参数位置,反之则 m<sub>r</sub> = 0。这四个简单的特征是我们的SRL系统所需要的。上下文大小设置为1的一个样本的特征如下[2]所示:
<center>
![pic](./feature.jpg)
</center>
在这个示例中,相应的标记句子是:
[ <sub>A1</sub> A record date ] has [ <sub>AM-NEG</sub> n't ] been [ <sub>V</sub> set ] .
在演示中, 我们采用上面的特征模板, 包括: `argument`, `predicate`, `ctx-p (p=-1,0,1)`, `mark` 并使用 `B/I/O` 方案来标记每个参数。这些特征和标签存储在 `feature` 文件中, 用`\t`分割。
### 数据提供
`dataprovider.py` 是一个包装数据的 Python 文件。 函数 `hook()` 定义了网络的数据槽。六个特征和标签都是索引槽。
```
def hook(settings, word_dict, label_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(2),
integer_value_sequence(len(label_dict))]
```
相应的数据迭代器如下:
```
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
```
函数 `process` 返回8个特征list和1个标签list。
### 神经网络配置
`db_lstm.py` 是在训练过程中加载字典并定义数据提供程序模块和网络架构的神经网络配置文件。
九个 `data_layer` 从数据提供程序加载实例。八个特征分别转换为向量,并由`mixed_layer`混合。 深度双向LSTM层提取softmax层的特征。目标函数是标签的交叉熵。
### 训练
训练的脚本是 `train.sh`,用户只需执行:
```bash
./train.sh
```
`train.sh` 中的内容:
```
paddle train \
--config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \
--num_passes=10000 \
--average_test_period=10000000 \
--init_model_path=./data \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : 网络配置文件
- \--use_gpu=false: 使用 CPU 训练(如果已安装 PaddlePaddle GPU版本并想使用 GPU 训练可以设置为true,目前 crf_layer 不支持 GPU)
- \--log_period=500: 每20个batch输出日志
- \--trainer_count=1: 设置线程数(或 GPU 数)
- \--show_parameter_stats_period=5000: 每100个batch显示参数统计
- \--save_dir=./output: 模型输出路径
- \--num_passes=10000: 设置数据遍历次数,一个pass意味着PaddlePaddle训练数据集中的所有样本被遍历一次
- \--average_test_period=10000000: 每个 average_test_period 批次对平均参数进行测试
- \--init_model_path=./data: 参数初始化路径
- \--load_missing_parameter_strategy=rand: 随机初始不存在的参数
- \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
训练后,模型将保存在目录`output`中。 我们的训练曲线如下:
<center>
![pic](./src/curve.jpg)
</center>
### 测试
测试脚本是 `test.sh`, 执行:
```bash
./test.sh
```
`tesh.sh` 的主要部分:
```
paddle train \
--config=./db_lstm.py \
--model_list=$model_list \
--job=test \
--config_args=is_test=1 \
```
- \--config=./db_lstm.py: 网络配置文件
- \--model_list=$model_list.list: 模型列表文件
- \--job=test: 指示测试任务
- \--config_args=is_test=1: 指示测试任务的标记
- \--test_all_data_in_one_period=1: 在一个周期内测试所有数据
### 预测
预测脚本是 `predict.sh`,用户只需执行:
```bash
./predict.sh
```
`predict.sh`中,用户应该提供网络配置文件,模型路径,标签文件,字典文件,特征文件。
```
python predict.py
-c $config_file \
-w $best_model_path \
-l $label_file \
-p $predicate_dict_file \
-d $dict_file \
-i $input_file \
-o $output_file
```
`predict.py` 是主要的可执行python脚本,其中包括函数:加载模型,加载数据,数据预测。网络模型将输出标签的概率分布。 在演示中,我们使用最大概率的标签作为结果。用户还可以根据概率分布矩阵实现柱搜索或维特比解码。
预测后,结果保存在 `predict.res` 中。
## 引用
[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005.
[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.
```eval_rst
.. _semantic_role_labeling:
```
# Semantic Role labeling Tutorial #
Semantic role labeling (SRL) is a form of shallow semantic parsing whose goal is to discover the predicate-argument structure of each predicate in a given input sentence. SRL is useful as an intermediate step in a wide range of natural language processing tasks, such as information extraction. automatic document categorization and question answering. An instance is as following [1]:
[ <sub>A0</sub> He ] [ <sub>AM-MOD</sub> would ][ <sub>AM-NEG</sub> n’t ] [ <sub>V</sub> accept] [ <sub>A1</sub> anything of value ] from [<sub>A2</sub> those he was writing about ].
- V: verb
- A0: acceptor
- A1: thing accepted
- A2: accepted-from
- A3: Attribute
- AM-MOD: modal
- AM-NEG: negation
Given the verb "accept", the chunks in sentence would play certain semantic roles. Here, the label scheme is from Penn Proposition Bank.
To this date, most of the successful SRL systems are built on top of some form of parsing results where pre-defined feature templates over the syntactic structure are used. This tutorial will present an end-to-end system using deep bidirectional long short-term memory (DB-LSTM)[2] for solving the SRL task, which largely outperforms the previous state-of-the-art systems. The system regards SRL task as the sequence labelling problem.
## Data Description
The relevant paper[2] takes the data set in CoNLL-2005&2012 Shared Task for training and testing. Accordingto data license, the demo adopts the test data set of CoNLL-2005, which can be reached on website.
To download and process the original data, user just need to execute the following command:
```bash
cd data
./get_data.sh
```
Several new files appear in the `data `directory as follows.
```bash
conll05st-release:the test data set of CoNll-2005 shared task
test.wsj.words:the Wall Street Journal data sentences
test.wsj.props: the propositional arguments
feature: the extracted features from data set
```
## Training
### DB-LSTM
Please refer to the Sentiment Analysis demo to learn more about the long short-term memory unit.
Unlike Bidirectional-LSTM that used in Sentiment Analysis demo, the DB-LSTM adopts another way to stack LSTM layer. First a standard LSTM processes the sequence in forward direction. The input and output of this LSTM layer are taken by the next LSTM layer as input, processed in reversed direction. These two standard LSTM layers compose a pair of LSTM. Then we stack LSTM layers pair after pair to obtain the deep LSTM model.
The following figure shows a temporal expanded 2-layer DB-LSTM network.
<center>
![pic](./src/network_arch.png)
</center>
### Features
Two input features play an essential role in this pipeline: predicate (pred) and argument (argu). Two other features: predicate context (ctx-p) and region mark (mr) are also adopted. Because a single predicate word can not exactly describe the predicate information, especially when the same words appear more than one times in a sentence. With the predicate context, the ambiguity can be largely eliminated. Similarly, we use region mark m<sub>r</sub> = 1 to denote the argument position if it locates in the predicate context region, or m<sub>r</sub> = 0 if does not. These four simple features are all we need for our SRL system. Features of one sample with context size set to 1 is showed as following[2]:
<center>
![pic](./src/feature.jpg)
</center>
In this sample, the coresponding labelled sentence is:
[ <sub>A1</sub> A record date ] has [ <sub>AM-NEG</sub> n't ] been [ <sub>V</sub> set ] .
In the demo, we adopt the feature template as above, consists of : `argument`, `predicate`, `ctx-p (p=-1,0,1)`, `mark` and use `B/I/O` scheme to label each argument. These features and labels are stored in `feature` file, and separated by `\t`.
### Data Provider
`dataprovider.py` is the python file to wrap data. `hook()` function is to define the data slots for network. The Six features and label are all IndexSlots.
```
def hook(settings, word_dict, label_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(2),
integer_value_sequence(len(label_dict))]
```
The corresponding data iterator is as following:
```
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len
marks = mark.split()
mark_slot = [int(w) for w in marks]
label_list = label.split()
label_slot = [settings.label_dict.get(w) for w in label_list]
yield word_slot, predicate_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, mark_slot, label_slot
```
The `process`function yield 9 lists which are 8 features and label.
### Neural Network Config
`db_lstm.py` is the neural network config file to load the dictionaries and define the data provider module and network architecture during the training procedure.
Nine `data_layer` load instances from data provider. Eight features are transformed into embedddings respectively, and mixed by `mixed_layer` . Deep bidirectional LSTM layers extract features for the softmax layer. The objective function is cross entropy of labels.
### Run Training
The script for training is `train.sh`, user just need to execute:
```bash
./train.sh
```
The content in `train.sh`:
```
paddle train \
--config=./db_lstm.py \
--use_gpu=0 \
--log_period=5000 \
--trainer_count=1 \
--show_parameter_stats_period=5000 \
--save_dir=./output \
--num_passes=10000 \
--average_test_period=10000000 \
--init_model_path=./data \
--load_missing_parameter_strategy=rand \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
- \--config=./db_lstm.py : network config file.
- \--use_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train, until now crf_layer do not support GPU
- \--log_period=500: print log every 20 batches.
- \--trainer_count=1: set thread number (or GPU count).
- \--show_parameter_stats_period=5000: show parameter statistic every 100 batches.
- \--save_dir=./output: output path to save models.
- \--num_passes=10000: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
- \--average_test_period=10000000: do test on average parameter every average_test_period batches
- \--init_model_path=./data: parameter initialization path
- \--load_missing_parameter_strategy=rand: random initialization unexisted parameters
- \--test_all_data_in_one_period=1: test all data in one period
After training, the models will be saved in directory `output`. Our training curve is as following:
<center>
![pic](./src/curve.jpg)
</center>
### Run testing
The script for testing is `test.sh`, user just need to execute:
```bash
./test.sh
```
The main part in `tesh.sh`
```
paddle train \
--config=./db_lstm.py \
--model_list=$model_list \
--job=test \
--config_args=is_test=1 \
```
- \--config=./db_lstm.py: network config file
- \--model_list=$model_list.list: model list file
- \--job=test: indicate the test job
- \--config_args=is_test=1: flag to indicate test
- \--test_all_data_in_one_period=1: test all data in 1 period
### Run prediction
The script for prediction is `predict.sh`, user just need to execute:
```bash
./predict.sh
```
In `predict.sh`, user should offer the network config file, model path, label file, word dictionary file, feature file
```
python predict.py
-c $config_file \
-w $best_model_path \
-l $label_file \
-p $predicate_dict_file \
-d $dict_file \
-i $input_file \
-o $output_file
```
`predict.py` is the main executable python script, which includes functions: load model, load data, data prediction. The network model will output the probability distribution of labels. In the demo, we take the label with maximum probability as result. User can also implement the beam search or viterbi decoding upon the probability distribution matrix.
After prediction, the result is saved in `predict.res`.
## Reference
[1] Martha Palmer, Dan Gildea, and Paul Kingsbury. The Proposition Bank: An Annotated Corpus of Semantic Roles , Computational Linguistics, 31(1), 2005.
[2] Zhou, Jie, and Wei Xu. "End-to-end learning of semantic role labeling using recurrent neural networks." Proceedings of the Annual Meeting of the Association for Computational Linguistics. 2015.
# 情感分析教程
情感分析有许多应用场景。 一个基本的应用场景是区分给定文本的褒贬两极性,给定的文本可以是一个文档、句子、或者是一个小的文本片段。 一个简单的例子如:把用户在购物网站、旅游网站、团购网站(亚马逊、天猫、淘宝等)上发表的评论分成正面评论和负面评论两类。
情感分析也常用于基于大量评论和个人博客来监控社会媒体。 例如,研究人员分析了几个关于消费者信心和政治观点的调查,结果发现它们与同时期的Twitter消息中的情绪词频率相关 [1]。 另一个例子是通过分析每日Twitter博客的文本内容来预测股票变动 [2]。
另一方面,抓取产品的用户评论并分析他们的情感,有助于理解用户对不同公司,不同产品,甚至不同竞争对手产品的偏好。
本教程将指导您完成长期短期记忆(LSTM)网络的训练过程,以分类来自[大型电影评论数据集](http://ai.stanford.edu/~amaas/data/sentiment/)(有时称为[互联网电影数据库 (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf))的句子的情感 。 此数据集包含电影评论及其相关联的类别标签,即正面和负面。
## 数椐准备
### IMDB 数椐介绍
训练模型之前, 我们需要预处理数椐并构建一个字典。 首先, 你可以使用下面的脚本下载 IMDB 数椐集和[Moses](http://www.statmt.org/moses/)工具, 这是一个基于统计的机器翻译系统. 我们提供了一个数据预处理脚本,它不仅能够处理IMDB数据,还能处理其他用户自定义的数据。 为了使用提前编写的脚本,需要将标记的训练和测试样本移动到另一个路径,这已经在`get_imdb.sh`中完成。
```
cd demo/sentiment/data
./get_imdb.sh
```
如果数椐获取成功,你将在目录```./demo/sentiment/data```中看到下面的文件:
```
aclImdb get_imdb.sh imdb mosesdecoder-master
```
* aclImdb: 从外部网站上下载的原始数椐集。
* imdb: 仅包含训练和测试数椐集。
* mosesdecoder-master: Moses 工具。
IMDB数据集包含25,000个已标注过的高极性电影评论用于训练,25,000个用于测试。负面的评论的得分小于等于4,正面的评论的得大于等于7,总评分10分。 运行完脚本 `./get_imdb.sh`后, 我们可以看到在目录 `aclImdb`中的数椐集的结构如下:
```
imdbEr.txt imdb.vocab README test train
```
* train: 训练数椐集。
* test : 测试数椐集。
* imdb.vocab: 字典文件。
* imdbEr.txt: 字典imdb.vocab中每个切分单词的预期评级。
* README: 数椐说明文档。
测试集和训练集目录包含下面的文件:
```
labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
```
* pos: 正面评价样本,包含12,500个txt文件,每个文件是一个电影评论。
* neg: 负面评价样本,包含12,500个txt文件,每个文件是一个电影评论。
* unsup: 未标记的评价样本,包含50,000个txt文件。
* urls_xx.txt: 每个评论的网址。
* xxBow.feat: 用于统计词频的Bow模型特征。
### IMDB 数椐准备
在这个例子中,我们只使用已经标注过的训练集和测试集,且默认在训练集上构建字典,而不使用IMDB数椐集中的imdb.vocab做为字典。训练集已经做了随机打乱排序而测试集没有。 Moses 工具中的脚本`tokenizer.perl` 用于切分单单词和标点符号。执行下面的命令就可以预处理数椐。
```
cd demo/sentiment/
./preprocess.sh
```
preprocess.sh:
```
data_dir="./data/imdb"
python preprocess.py -i data_dir
```
* data_dir: 输入数椐所在目录。
* preprocess.py: 预处理脚本。
运行成功后目录`demo/sentiment/data/pre-imdb` 结构如下:
```
dict.txt labels.list test.list test_part_000 train.list train_part_000
```
* test\_part\_000 and train\_part\_000: 所有标记的测试集和训练集, 训练集已经随机打乱。
* train.list and test.list: 训练集和测试集文件列表。
* dict.txt: 利用训练集生成的字典。
* labels.txt: neg 0, pos 1, 含义:标签0表示负面的评论,标签1表示正面的评论。
### 用户自定义数椐预处理
如果你执行其它的用情感分析来分类文本的任务,可以按如下的结构来准备数椐. 我们提供了脚本来构建字典和预处理数椐。所以你只用按下面的结构来组织数椐就行了。
```
dataset
|----train
| |----class1
| | |----text_files
| |----class2
| | |----text_files
| | ...
|----test
| |----class1
| | |----text_files
| |----class2
| | |----text_files
| | ...
```
* dataset: 一级目录。
* train, test: 二级目录。
* class1,class2,...: 三级目录。
* text_files: 文本格式的实例文件。
所有同目录下的文本实例文件都是同级别的。 每个文本文件包含一个或者多个实例,每一行表示一个实例。 为了充分的随机打乱训练集, 在预处理含有多行数椐的文本文件时参数设置稍有不同, 执行`preprocess.sh`脚本时需要加上`-m True`参数。 tokenizer.perl 默认用来切分单记和标点符号,如果你不需要这个操作,在运行`preprocess.sh`时加上`-t False`参数即可。
## 训练模型
在这步任务中,我们使用了循环神经网络(RNN)的 LSTM 架构来训练情感分析模型。 引入LSTM模型主要是为了克服消失梯度的问题。 LSTM网络类似于具有隐藏层的标准循环神经网络, 但是隐藏层中的每个普通节点被一个记忆单元替换。 每个记忆单元包含四个主要的元素: 输入门, 具有自循环连接的神经元,忘记门和输出门。 更多的细节可以在文献中找到[4]。 LSTM架构的最大优点是它可以在长时间间隔内记忆信息,而没有短时记忆的损失。在有新的单词来临的每一个时间步骤内,存储在记忆单元区块的历史信息被更新用来迭代的学习单词以合理的序列程现。
<center>![LSTM](src/lstm.png)</center>
<center>图表 1. LSTM [3]</center>
情感分析是自然语言理解中最典型的问题之一。 它的目的是预测在一个序列中表达的情感态度。 通常, ,仅仅是一些关键词,如形容词和副词,在预测序列或段落的情感中起主要作用。然而有些评论上下文非常长,例如 IMDB的数椐集。 我们只所以使用LSTM来执行这个任务是因为其改进的设计并且具有门机制。 首先,它能够从词级到具有可变上下文长度的上下文级别来总结表示。 第二,它可以在句子级别利用可扩展的上下文, 而大多数方法只是利用n-gram级别的知识。第三,它直接学习段落表示,而不是组合上下文级别信息。
在本演示中,我们提供两个网络,即双向LSTM和三层堆叠LSTM。
#### 双向LSTM
图2是双向LSTM网络,后面连全连接层和softmax层。
<center>![BiLSTM](src/bi_lstm.jpg)</center>
<center>图 2. Bidirectional-LSTM </center>
#### Stacked-LSTM
图3是三层LSTM结构。图的底部是word embedding(对文档处理后形成的单词向量)。 接下来,连接三个LSTM隐藏层,并且第二个是反向LSTM。然后提取隐藏LSTM层的所有时间步长的最大词向量作为整个序列的表示。 最后,使用具有softmax激活的全连接前馈层来执行分类任务。 更多内容可查看参考文献 [5]。
<center>![StackedLSTM](src/stacked_lstm.jpg)</center>
<center>图 3. Stacked-LSTM for sentiment analysis </center>
**配置**
进入`demo/sentiment` 目录 , `trainer_config.py` 是一个配置文件的例子, 其中包含算法和网络配置。第一行从`sentiment_net.py`中导出预定义的网络。
trainer_config.py:
```python
from sentiment_net import *
data_dir = "./data/pre-imdb"
# whether this config is used for test
is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
#################### Network Config ######################
stacked_lstm_net(dict_dim, class_dim=class_dim,
stacked_num=3, is_predict=is_predict)
#bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
```
* **数椐定义**:
* get\_config\_arg(): 获取通过 `--config_args=xx` 设置的命令行参数。
* 定义训练数椐和测试数椐提供者, 这里使用了PaddlePaddle的Python接口来加载数椐。想了解更多细节可以参考PyDataProvider部分的文档
* **算法配置**:
* 使用随机梯度下降(sgd)算法。
* 使用 adam 优化。
* 设置batch size大小为128。
* 设置平均sgd窗口。
* 设置全局学习率。
* **网络配置**:
* dict_dim: 获取字典维度。
* class_dim: 设置类别数,IMDB有两个标签,即正面评价标签和负面评价标签。
* `stacked_lstm_net`: 预定义网络如图3所示,默认情况下使用此网络
* `bidirectional_lstm_net`: 预定义网络,如图2所示。
**训练**
首先安装PaddlePaddle。 然后使用下面的脚本 `train.sh` 来开启本地的训练。
```
cd demo/sentiment/
./train.sh
```
train.sh:
```
config=trainer_config.py
output=./model_output
paddle train --config=$config \
--save_dir=$output \
--job=train \
--use_gpu=false \
--trainer_count=4 \
--num_passes=10 \
--log_period=20 \
--dot_period=20 \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
* \--config=$config: 设置网络配置。
* \--save\_dir=$output: 设置输出路径以保存训练完成的模型。
* \--job=train: 设置工作模式为训练。
* \--use\_gpu=false: 使用CPU训练,如果你安装GPU版本的PaddlePaddle,并想使用GPU来训练设置为true。
* \--trainer\_count=4:设置线程数(或GPU个数)。
* \--num\_passes=15: 设置pass,PaddlePaddle中的一个pass意味着对数据集中的所有样本进行一次训练。
* \--log\_period=20: 每20个batch打印一次日志。
* \--show\_parameter\_stats\_period=100: 每100个batch打印一次统计信息。
* \--test\_all_data\_in\_one\_period=1: 每次测试都测试所有数据。
如果运行成功,输出日志保存在路径 `demo/sentiment/train.log`中,模型保存在目录`demo/sentiment/model_output/`中。 输出日志说明如下:
```
Batch=20 samples=2560 AvgCost=0.681644 CurrentCost=0.681644 Eval: classification_error_evaluator=0.36875 CurrentEval: classification_error_evaluator=0.36875
...
Pass=0 Batch=196 samples=25000 AvgCost=0.418964 Eval: classification_error_evaluator=0.1922
Test samples=24999 cost=0.39297 Eval: classification_error_evaluator=0.149406
```
- Batch=xx: 表示训练了xx个Batch。
- samples=xx: 表示训练了xx个样本。。
- AvgCost=xx: 从第0个batch到当前batch的平均损失。
- CurrentCost=xx: 最新log_period个batch处理的当前损失。
- Eval: classification\_error\_evaluator=xx: 表示第0个batch到当前batch的分类错误。
- CurrentEval: classification\_error\_evaluator: 最新log_period个batch的分类错误。
- Pass=0: 通过所有训练集一次称为一遍。 0表示第一次经过训练集。
默认情况下,我们使用`stacked_lstm_net`网络,当传递相同的样本数时,它的收敛速度比`bidirectional_lstm_net`快。如果要使用双向LSTM,只需删除最后一行中的注释并把“stacked_lstm_net”注释掉。
## 测试模型
测试模型是指使用训练出的模型评估已标记的验证集。
```
cd demo/sentiment
./test.sh
```
test.sh:
```bash
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
sort | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="model_output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
net_conf=trainer_config.py
paddle train --config=$net_conf \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
```
函数`get_best_pass`依据分类错误率获得最佳模型进行测试。 在本示例中,我们默认使用IMDB的测试数据集作为验证。 与训练不同,它需要在这里指定`--job = test`和模型路径,即`--model_list = $model_list`。如果运行成功,日志将保存在“demo / sentiment / test.log”的路径中。例如,在我们的测试中,最好的模型是`model_output / pass-00002`,分类误差是0.115645,如下:
```
Pass=0 samples=24999 AvgCost=0.280471 Eval: classification_error_evaluator=0.115645
```
## 预测
`predict.py`脚本提供了一个预测接口。在使用它之前请安装PaddlePaddle的python api。 预测IMDB的未标记评论的一个实例如下:
```
cd demo/sentiment
./predict.sh
```
predict.sh:
```
#Note the default model is pass-00002, you shold make sure the model path
#exists or change the mode path.
model=model_output/pass-00002/
config=trainer_config.py
label=data/pre-imdb/labels.list
cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
--tconf=$config\
--model=$model \
--label=$label \
--dict=./data/pre-imdb/dict.txt \
--batch_size=1
```
* `cat ./data/aclImdb/test/pos/10007_10.txt` : 输入预测样本。
* `predict.py` : 预测接口脚本。
* `--tconf=$config` : 设置网络配置。
* `--model=$model` : 设置模型路径。
* `--label=$label` : 设置标签类别字典,这个字典是整数标签和字符串标签的一个对应。
* `--dict=data/pre-imdb/dict.txt` : 设置字典文件。
* `--batch_size=1` : 设置batch size。
注意应该确保默认模型路径`model_output / pass-00002`存在或更改为其它模型路径。
本示例的预测结果:
```
Loading parameters from model_output/pass-00002/
./data/aclImdb/test/pos/10014_7.txt: predicting label is pos
```
我们真诚地感谢您的关注,并欢迎您来参与贡献。
## 参考文档
[1] Brendan O'Connor, Ramnath Balasubramanyan, Bryan R. Routledge, and Noah A. Smith. 2010. [From Tweets to Polls: Linking Text Sentiment to Public Opinion Time Series](http://homes.cs.washington.edu/~nasmith/papers/oconnor+balasubramanyan+routledge+smith.icwsm10.pdf). In ICWSM-2010. <br>
[2] Johan Bollen, Huina Mao, Xiaojun Zeng. 2011. [Twitter mood predicts the stock market](http://arxiv.org/abs/1010.3003), Journal of Computational Science.<br>
[3] Alex Graves, Marcus Liwicki, Santiago Fernan- dez, Roman Bertolami, Horst Bunke, and Ju ̈rgen Schmidhuber. 2009. [A novel connectionist system for unconstrained handwriting recognition. IEEE Transactions on Pattern Analysis and Machine In- telligence](http://www.cs.toronto.edu/~graves/tpami_2009.pdf), 31(5):855–868.<br>
[4] Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http://arxiv.org/abs/1506.00019v1), arXiv:1506.00019. <br>
[5] Jie Zhou and Wei Xu; [End-to-end Learning of Semantic Role Labeling Using Recurrent Neural Networks](http://www.aclweb.org/anthology/P/P15/P15-1109.pdf); ACL-IJCNLP 2015. <br>
# Sentiment Analysis Tutorial
Sentiment analysis has many applications. A basic task in sentiment analysis is classifying the polarity of a given text at the document, sentence or feature/aspect level. One simple example is to classify the customer reviews in a shopping website, a tourism website, and group buying websites like Amazon, TaoBao, Tmall etc.
Sentiment analysis is also used to monitor social media based on large amount of reviews or blogs. For example, the researchers analyzed several surveys on consumer confidence and political opinion, found they correlate to sentiment word frequencies in contemporaneous Twitter messages [1]. Another example is to forecast stock movements through analyzing the text content of a daily Twitter blog [2].
On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products.
This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the Internet Movie Database (IMDB). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy.
## Data Preparation
### IMDB Data Introduction
Before training models, we need to preprocess the data and build a dictionary. First, you can use following script to download IMDB dataset and [Moses](http://www.statmt.org/moses/) tool, which is a statistical machine translation system. We provide a data preprocessing script, which is capable of handling not only IMDB data, but also other user-defined data. In order to use the pre-written script, it needs to move labeled train and test samples to another path, which has been done in `get_imdb.sh`.
```
cd demo/sentiment/data
./get_imdb.sh
```
If the data is obtained successfuly, you will see the following files at ```./demo/sentiment/data```:
```
aclImdb get_imdb.sh imdb mosesdecoder-master
```
* aclImdb: raw dataset downloaded from website.
* imdb: only contains train and test data.
* mosesdecoder-master: Moses tool.
IMDB dataset contains 25,000 highly polar movie reviews for training, and 25,000 for testing. A negative review has a score ≤ 4 out of 10, and a positive review has a score ≥ 7 out of 10. After running `./get_imdb.sh`, we can find the dataset has the following structure in `aclImdb`.
```
imdbEr.txt imdb.vocab README test train
```
* train: train sets.
* test : test sets.
* imdb.vocab: dictionary.
* imdbEr.txt: expected rating for each token in imdb.vocab.
* README: data documentation.
The file in train set directory is as follows. The test set also contains them except `unsup` and `urls_unsup.txt`.
```
labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt
```
* pos: positive samples, contains 12,500 txt files, each file is one movie review.
* neg: negative samples, contains 12,500 txt files, each file is one movie review.
* unsup: unlabeled samples, contains 50,000 txt files.
* urls_xx.txt: urls of each reviews.
* xxBow.feat: already-tokenized bag of words (BoW) features.
### IMDB Data Preparation
In this demo, we only use labled train and test set and not use imdb.vocab as dictionary. By default, dictionary is builded on train set. Train set is shuffled and test set is not. `tokenizer.perl` in Moses tool is used to tokenize the words and punctuation. Simply execute the following command to preprcess data.
```
cd demo/sentiment/
./preprocess.sh
```
preprocess.sh:
```
data_dir="./data/imdb"
python preprocess.py -i data_dir
```
* data_dir: input data directory.
* preprocess.py: preprocess script.
If running successfully, you will see `demo/sentiment/data/pre-imdb` directory as follows:
```
dict.txt labels.list test.list test_part_000 train.list train_part_000
```
* test\_part\_000 and train\_part\_000: all labeled test and train sets. Train sets have be shuffled.
* train.list and test.list: train and test file lists.
* dict.txt: dictionary generated on train sets by default.
* labels.txt: neg 0, pos 1, means label 0 is negative review, label 1 is positive review.
### User-defined Data Preparation
If you perform other sentiment classifcation task, you can prepare data as follows. We have provided the scripts to build dictionary and preprocess data. So just organize data as follows.
```
dataset
|----train
| |----class1
| | |----text_files
| |----class2
| | |----text_files
| | ...
|----test
| |----class1
| | |----text_files
| |----class2
| | |----text_files
| | ...
```
* dataset: 1st directory.
* train, test: 2nd directory.
* class1,class2,...: 3rd directory.
* text_files: samples with text file format.
All samples with text files format under the same folder are same category. Each text file contains one or more samples and each line is one sample. In order to shuffle fully, the preprocessing is a little different for data with multiple lines in one text file, which needs to set `-m True` in `preprocess.sh`. And tokenizer.perl is used by default. If you don't need it, only set `-t False` in `preprocess.sh'.
## Training
In this task, we use Recurrent Neural Network (RNN) of LSTM architecure to train sentiment analysis model. LSTM model was introduced primarily in order to overcome the problem of vanishing gradients. LSTM network resembles a standard recurrent neural network with a hidden layer, but each ordinary node in the hidden layer is replaced by a memory cell. Each memory cell contains four main elements: an input gate, a neuron with a self-recurrent connection, a forget gate and an output gate. More details can be found in the literature [4]. The biggest advantage of the LSTM architecture is that it learns to memorize information over long time intervals without the loss of short time memory. At each time step with a new coming word, historical information stored in the memory block is updated to iteratively learn the sequence representation.
<center>![LSTM](./lstm.png)</center>
<center>Figure 1. LSTM [3]</center>
Sentiment analysis is among the most typical problems in natural language understanding. It aims at predicting the attitude expressed in a sequence. Usually, only some key words, like adjectives and adverbs words, play a major role in predicting the sentiment of sequences or paragraphs. However, some review or comment contexts are very long, such as IMDB dataset. We use LSTM to perform this task for its improved design with the gate mechanism. First, it is able to summarize the representation from word level to context level with variable context length which is adapted by the gate values. Second, it can utilize the expanded context at the sentence level, while most methods are good at utilizing n-gram level knowledge. Third, it learns the paragraph representation directly rather than combining the context level information. This results in this end-to-end framework.
In this demo we provide two network, namely bidirectional-LSTM and three layers of stacked-LSTM.
#### Bidirectional-LSTM
One is a bidirectional LSTM network, connected by fully connected layer and softmax, as shown in Figure 2.
<center>![BiLSTM](./bi_lstm.jpg)</center>
<center>Figure 2. Bidirectional-LSTM </center>
#### Stacked-LSTM
Another is three-layer LSTM structure in Figure 3. The bottom of the figure is word embedding. Next, three LSTM-Hidden layers are connected and the second LSTM is reversed. Then extract the maximum hidden vectors of all time step of hidden and LSTM layer as the representation for the entire sequence. Finally, a fully connected feed forward layer with softmax activation is used to perform the classification task. This network is refered to paper [5].
<center>![StackedLSTM](./stacked_lstm.jpg)</center>
<center>Figure 3. Stacked-LSTM for sentiment analysis </center>
**Config**
Switch into `demo/sentiment` directory, `trainer_config.py` file is an example of the config, containing algorithm and newtork configure. The first line imports predefined networks from `sentiment_net.py`.
trainer_config.py:
```python
from sentiment_net import *
data_dir = "./data/pre-imdb"
# whether this config is used for test
is_test = get_config_arg('is_test', bool, False)
# whether this config is used for prediction
is_predict = get_config_arg('is_predict', bool, False)
dict_dim, class_dim = sentiment_data(data_dir, is_test, is_predict)
################## Algorithm Config #####################
settings(
batch_size=128,
learning_rate=2e-3,
learning_method=AdamOptimizer(),
average_window=0.5,
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
#################### Network Config ######################
stacked_lstm_net(dict_dim, class_dim=class_dim,
stacked_num=3, is_predict=is_predict)
#bidirectional_lstm_net(dict_dim, class_dim=class_dim, is_predict=is_predict)
```
* **Data Definition**:
* get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument.
* Define data provider, here using Python interface to load data. For details, you can refer to the document of PyDataProvider2.
* **Algorithm Configuration**:
* set batch size of 128.
* set global learning rate.
* use adam optimization.
* set average sgd window.
* set L2 regularization.
* set gradient clipping threshold.
* **Network Configuration**:
* dict_dim: dictionary dimension.
* class_dim: category number, IMDB has two label, namely positive and negative label.
* `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default.
* `bidirectional_lstm_net`: predefined network as shown in Figure 2.
**Training**
Install PaddlePaddle first if necessary. Then you can use script `train.sh` as follows to launch local training.
```
cd demo/sentiment/
./train.sh
```
train.sh:
```
config=trainer_config.py
output=./model_output
paddle train --config=$config \
--save_dir=$output \
--job=train \
--use_gpu=false \
--trainer_count=4 \
--num_passes=10 \
--log_period=20 \
--dot_period=20 \
--show_parameter_stats_period=100 \
--test_all_data_in_one_period=1 \
2>&1 | tee 'train.log'
```
* \--config=$config: set network config.
* \--save\_dir=$output: set output path to save models.
* \--job=train: set job mode to train.
* \--use\_gpu=false: use CPU to train, set true, if you install GPU version of PaddlePaddle and want to use GPU to train.
* \--trainer\_count=4: set thread number (or GPU count).
* \--num\_passes=15: set pass number, one pass in PaddlePaddle means training all samples in dataset one time.
* \--log\_period=20: print log every 20 batches.
* \--show\_parameter\_stats\_period=100: show parameter statistic every 100 batches.
* \--test\_all_data\_in\_one\_period=1: test all data every testing.
If the run succeeds, the output log is saved in path of `demo/sentiment/train.log` and model is saved in path of `demo/sentiment/model_output/`. The output log is explained as follows.
```
Batch=20 samples=2560 AvgCost=0.681644 CurrentCost=0.681644 Eval: classification_error_evaluator=0.36875 CurrentEval: classification_error_evaluator=0.36875
...
Pass=0 Batch=196 samples=25000 AvgCost=0.418964 Eval: classification_error_evaluator=0.1922
Test samples=24999 cost=0.39297 Eval: classification_error_evaluator=0.149406
```
- Batch=xx: means passing xx batches.
- samples=xx: means passing xx samples.
- AvgCost=xx: averaged cost from 0-th batch to current batch.
- CurrentCost=xx: current cost of latest log_period batches.
- Eval: classification\_error\_evaluator=xx: means classfication error from 0-th batch ro current batch.
- CurrentEval: classification\_error\_evaluator: current classfication error of the lates log_period batches.
- Pass=0: Going through all training set one time is called one pass. 0 means going through training set first time.
By default, we use the `stacked_lstm_net` network, which converges at a faster rate than `bidirectional_lstm_net` when passing same sample number. If you want to use bidirectional LSTM, just remove comment in the last line and comment `stacked_lstm_net`.
## Testing
Testing means evaluating the labeled validation set using trained model.
```
cd demo/sentiment
./test.sh
```
test.sh:
```bash
function get_best_pass() {
cat $1 | grep -Pzo 'Test .*\n.*pass-.*' | \
sed -r 'N;s/Test.* error=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
sort | head -n 1
}
log=train.log
LOG=`get_best_pass $log`
LOG=(${LOG})
evaluate_pass="model_output/pass-${LOG[1]}"
echo 'evaluating from pass '$evaluate_pass
model_list=./model.list
touch $model_list | echo $evaluate_pass > $model_list
net_conf=trainer_config.py
paddle train --config=$net_conf \
--model_list=$model_list \
--job=test \
--use_gpu=false \
--trainer_count=4 \
--config_args=is_test=1 \
2>&1 | tee 'test.log'
```
The function `get_best_pass` gets the best model by classification error rate for testing. In this example, We use test dataset of IMDB as validation by default. Unlike training, it needs to specify `--job=test` and model path, namely `--model_list=$model_list` here. If running successfully, the log is saved in path of `demo/sentiment/test.log`. For example, in our test, the best model is `model_output/pass-00002`, the classification error is 0.115645 as follows.
```
Pass=0 samples=24999 AvgCost=0.280471 Eval: classification_error_evaluator=0.115645
```
## Prediction
`predict.py` provides a predicting interface. You should install python api of PaddlePaddle before using it. One example to predict unlabeled review of IMDB is as follows. Simply running:
```
cd demo/sentiment
./predict.sh
```
predict.sh:
```
#Note the default model is pass-00002, you shold make sure the model path
#exists or change the mode path.
model=model_output/pass-00002/
config=trainer_config.py
label=data/pre-imdb/labels.list
cat ./data/aclImdb/test/pos/10007_10.txt | python predict.py \
--tconf=$config\
--model=$model \
--label=$label \
--dict=./data/pre-imdb/dict.txt \
--batch_size=1
```
* `cat ./data/aclImdb/test/pos/10007_10.txt` : the input sample.
* `predict.py` : predicting interface.
* `--tconf=$config` : set network configure.
* ` --model=$model` : set model path.
* `--label=$label` : set dictionary about corresponding relation between integer label and string label.
* `--dict=data/pre-imdb/dict.txt` : set dictionary.
* `--batch_size=1` : set batch size.
Note you should make sure the default model path `model_output/pass-00002`
exists or change the model path.
Predicting result of this example:
```
Loading parameters from model_output/pass-00002/
./data/aclImdb/test/pos/10014_7.txt: predicting label is pos
```
We sincerely appreciate your interest and welcome your contributions.
## Reference
[1] Brendan O'Connor, Ramnath Balasubramanyan, Bryan R. Routledge, and Noah A. Smith. 2010. [From Tweets to Polls: Linking Text Sentiment to Public Opinion Time Series](http://homes.cs.washington.edu/~nasmith/papers/oconnor+balasubramanyan+routledge+smith.icwsm10.pdf). In ICWSM-2010. <br>
[2] Johan Bollen, Huina Mao, Xiaojun Zeng. 2011. [Twitter mood predicts the stock market](http://arxiv.org/abs/1010.3003), Journal of Computational Science.<br>
[3] Alex Graves, Marcus Liwicki, Santiago Fernan- dez, Roman Bertolami, Horst Bunke, and Ju ̈rgen Schmidhuber. 2009. [A novel connectionist system for unconstrained handwriting recognition. IEEE Transactions on Pattern Analysis and Machine In- telligence](http://www.cs.toronto.edu/~graves/tpami_2009.pdf), 31(5):855–868.<br>
[4] Zachary C. Lipton, [A Critical Review of Recurrent Neural Networks for Sequence Learning](http://arxiv.org/abs/1506.00019v1), arXiv:1506.00019. <br>
[5] Jie Zhou and Wei Xu; [End-to-end Learning of Semantic Role Labeling Using Recurrent Neural Networks](http://www.aclweb.org/anthology/P/P15/P15-1109.pdf); ACL-IJCNLP 2015. <br>
# 文本生成教程 #
在语言生成领域中,“序列到序列”(sequence to sequence)的方法已被证明是一种强大的模型。它可以被应用于进行机器翻译(machine translation)、query改写(query rewriting)、图像描述(image captioning)等等。
本篇教程将会指导你通过训练一个“序列到序列”的神经网络机器翻译(NMT)模型来将法语翻译成英语。
我们遵循 [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473) 这篇文章,其中详细说明了模型架构,以及在WMT-14数据集上得到良好表现的训练过程。本篇教程在PaddlePaddle中重现了这一良好的训练结果。
我们感谢@caoying的pull request,其中定义了模型架构和solver配置。
## 数据准备 ##
### 下载与解压缩 ###
从该链接 [http://www-lium.univ-lemans.fr/~schwenk/cslm\_joint\_paper/](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/) 下载WMT-14数据集,然后解压,并将Develop和Test数据分别放入不同的文件夹。
- **Train data**: [bitexts (选择过后的)](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz)
- **Develop and Test data**: [dev 与 test 数据](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz)
在Linux下,只需要简单地运行以下命令。否则你需要自己下载、解压、拆分到不同文件夹、并且分别重命名文件后缀。
```bash
cd demo/seqToseq/data
./wmt14_data.sh
```
我们会发现数据集 `wmt14` 中包含如下表所示的3个文件夹。
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<colgroup>
<col class="left" />
<col class="left" />
<col class="left" />
<col class="left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="left">folder name</th>
<th scope="col" class="left">French-English parallel corpora file</th>
<th scope="col" class="left">number of total file</th>
<th scope="col" class="left">size</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">train_data</td>
<td class="left">ccb2_pc30.src, ccb2_pc30.trg, etc</td>
<td class="left">12</td>
<td class="left">3.55G</td>
</tr>
<tr>
<td class="left">test_data</td>
<td class="left">ntst1213.src, ntst1213.trg</td>
<td class="left">2</td>
<td class="left">1636k</td>
</tr>
<tr>
<td class="left">gen_data</td>
<td class="left">ntst14.src, ntst14.trg</td>
<td class="left">2</td>
<td class="left">864k</td>
</tr>
</tbody>
</table>
<br/>
- 每个文件夹都包含法语到英语的平行语料库
- **XXX.src** 是原始法语文件;**XXX.trg** 是目标英语文件
- **XXX.src****XXX.trg** 的行数应该一致
- 每行都是一个法语或者英语的句子
- **XXX.src****XXX.trg** 中任意第i行的句子之间都有着一一对应的关系
### 用户自定义数据集 ###
如果你想进行诸如语义转述(Paraphrasing)等其他“序列到序列”的任务,你只需要按照如下方式组织数据,并将它们放在`demo/seqToseq/data`目录下:
dataset
train
file1.src file1.trg
file2.src file2.trg
......
test
file1.src file1.trg
file2.src file2.trg
......
gen
file1.src file1.trg
file2.src file2.trg
......
- 一级目录:数据集文件夹名称
- 二级目录:train、test和gen这三个文件夹是固定的
- 三级目录:源语言到目标语言的平行语料库文件
- **XXX.src** 是源语言的文件,**XXX.trg** 时目标语言的文件
- 文件中的每行都必须是一个句子
- **XXX.src****XXX.trg** 中任意第i行的句子之间都必须有着一一对应的关系
## 数据预处理 ##
### 预处理工作流程 ###
- 将每个源语言到目标语言的平行语料库文件合并为一个文件:
- 合并每个 **XXX.src****XXX.trg** 文件为 **XXX**
- **XXX** 中的第i行 = **XXX.src** 中的第i行 + '\t' + **XXX.trg**中的第i行
- 创建训练数据的“源字典”和“目标字典”,每个字典都有DICTSIZE个单词,包括:
- 词频最高的(DICTSIZE - 3)个单词
- 3个特殊符号
- `<s>`:序列的开始
- `<e>`:序列的结束
- `<unk>`:未包含在字典中的单词
### 预处理命令和结果
对数据集进行预处理的基本命令是:
```python
cd demo/seqToseq/
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
```
- `-i INPUT`:输入的原始数据集路径
- `-d DICTSIZE`:指定的字典单词数,如果没有设置,字典会包含输入数据集中的所有单词
- `-m --mergeDict`:合并 “源字典”和“目标字典”,使得两个字典有相同的上下文
你将会看到如下消息:
concat parallel corpora for dataset
build source dictionary for train data
build target dictionary for train data
dictionary size is XXX
然后你只需要运行以下命令:
```python
python preprocess.py -i data/wmt14 -d 30000
```
这将花费数分钟的时间,并且将预处理好的数据集存放在`demo/seqToseq/data/pre-wmt14`目录下。目录结构如下:
train test gen train.list test.list gen.list src.dict trg.dict# Text generation Tutorial #
- **train, test, gen**:分别包含了法语到英语的平行语料库的训练数据、测试数据和生成数据。文件夹中的每个文件的每一行包含两部分,首先是法语序列,然后是对应的英语序列。
- **train.list, test.list, gen.list**:分别为train,test,gen文件夹中的文件列表
- **src.dict, trg.dict**:源(法语)/目标(英语)字典,每个字典包含总共30000个单词:29997个最高频单词和3个特殊符号
## 模型训练 ##
### 简介###
神经网络机器翻译(NMT)旨在建立一个可以被协同调至最优翻译效果的单神经元网络。近期提出的NMT模型通常都属于编解码模型(encoder–decoder models)的一种。编解码模型将一个源语句编码为一个定长的向量,然后解码器通过这个向量生成一个目标语句。
在这个任务中,我们使用了一个编解码模型的扩展,它同时学习排列(align)与翻译。每当模型在翻译过程中生成了一个单词,它就会在源语句中搜索出最相关信息的位置的集合。解码器根据上下文向量预测出一个目标单词,这个向量与源中搜索出的位置和所有之前生成的目标单词有关。如想了解更多详细的解释,可以参考 [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473)
这个模型对于编解码模型来说,最不同的特色是它并没有将输入语句编码为一个单独的定长向量。相反,它将输入语句编码为向量的序列,其中每个向量对应输入语句中的一个元素。然后在解码被翻译的语句时,会自适应地从这些向量中选择一个子集出来。这使得NMT模型得以解放出来,不必再将任意长度源语句中的所有信息压缩至一个定长的向量中。该模型在长语句翻译的场景下效果提升更加明显,在任意长度语句翻译的场景下都可以观察到其效果的提升。
<center>![](./encoder-decoder-attention-model.png)</center>
<center>Figure 1. Encoder-Decoder-Attention-Model</center>
### 使用PaddlePaddle训练模型 ###
我们在训练之前需要常见一个模型配置文件,这里是一个例子`demo/seqToseq/translation/train.conf`。前三行import了定义network,job_mode和attention_mode的python函数。
```python
from seqToseq_net import *
is_generating = False
### Data Definiation
train_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating)
```
1. **Data Definiation**:在示例中我们定义了一个序列到序列的训练和测试数据。它返回train_conf作为配置,其输入参数如下:
- data_dir:训练数据和测试数据的目录
- is_generating:这个配置是否用来生成,这里设置为False
2. **Algorithm Configuration**:在示例中我们使用SGD训练算法(默认),和ADAM学习方法,指定batch_size为50,learning_rate为5e-4
3. **Network Architecture**:在示例中我们使用attention版本的GRU编解码网络。它包括了一个双向的GRU作为编码器和解码器,它模拟了解码翻译过程中在源语句中的搜索。
### 训练模型的命令与结果###
写完模型配置之后,我们可以通过以下命令来训练模型:
```bash
cd demo/seqToseq/translation
./train.sh
```
`train.sh` 的内容如下所示:
```bash
paddle train \
--config='translation/train.conf' \
--save_dir='translation/model' \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
```
- config: 设置神经网络的配置文件
- save_dir: 设置保存模型的输出路径
- use_gpu: 是否使用GPU训练,这里设置为使用CPU
- num_passes: 设置passes的数量。paddle中的一条pass表示训练数据集中所有的样本一次
- show_parameter_stats_period: 这里每隔100个batch显示一次参数统计信息
- trainer_count: 设置CPU线程数或者GPU设备数
- log_period: 这里每隔10个batch打印一次日志
- dot_period: 这里每个5个batch打印一个点"."
训练的损失函数默认每隔10个batch打印一次,你将会看到如下消息:
I0719 19:16:45.952062 15563 TrainerInternal.cpp:160] Batch=10 samples=500 AvgCost=198.475 CurrentCost=198.475 Eval: classification_error_evaluator=0.737155 CurrentEval: classification_error_evaluator=0.737155
I0719 19:17:56.707319 15563 TrainerInternal.cpp:160] Batch=20 samples=1000 AvgCost=157.479 CurrentCost=116.483 Eval: classification_error_evaluator=0.698392 CurrentEval: classification_error_evaluator=0.659065
.....
- AvgCost:从第0个batch到当前batch的平均cost
- CurrentCost::当前batch的cost
- classification\_error\_evaluator(Eval):从第0个评估到当前评估中,每个单词的预测错误率
- classification\_error\_evaluator(CurrentEval):当前评估中,每个单词的预测错误率
当classification\_error\_evaluator的值低于0.35时,模型就训练成功了。
## 文本生成 ##
### 简介###
一般而言,NMT模型受制于源语句的编码,并且通过给出当前目标单词来预测下一个目标单词。在训练过程中,当前单词在相比之下总是被当作真值(ground truth)。在生成过程中,当前单词是解码器最后一步的输出,这来自于PaddlePaddle的内存中。
而且,我们使用集束搜索(Beam Search)来生成序列。集束搜索使用广度优先搜索来构建搜索树。对于树的每一层,生成当前层的所有后继状态,并将它们按照启发代价(heuristic cost)升序排列。但是这种方法在每层只保存预设数量的最优状态(这个数量称为beam size)。
### 预训练的模型 ###
我们在拥有50个节点的集群中训练模型,每个节点有两个6核CPU。我们在5天里训练了16个pass,其中每条pass花费了7个小时。model_dir中有16个子目录,每个里面都包含202MB的全部的模型参数。然后我们发现pass-00012的模型有着最高的BLEU值27.77(参考文献[BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf))。要下载解压这个模型,只需在linux下运行如下命令:
```bash
cd demo/seqToseq/data
./wmt14_model.sh
```
### 使用PaddlePaddle生成模型 ###
在翻译法语句子之前,我们需要创建模型配置文件。这里是一个例子`demo/seqToseq/translation/gen.conf`。前三行import了定义network,job_mode和attention_mode的python函数。
```python
from seqToseq_net import *
is_generating = True
################## Data Definiation #####################
gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating,
gen_result = "./translation/gen_result")
############## Algorithm Configuration ##################
settings(
learning_method = AdamOptimizer(),
batch_size = 1,
learning_rate = 0)
################# Network configure #####################
gru_encoder_decoder(gen_conf, is_generating)
```
1. **Data Definiation**:在示例中我们定义了一个序列到序列的生成数据。它返回gen_conf作为配置,其输入参数如下:
- data_dir:生成数据的目录
 - is_generating:这个配置是否用来生成,这里设置为True
 - gen_result:保存生成结果的文件
2. **Algorithm Configuration**:在生成过程中我们使用SGD训练算法,并指定batch_size为1(每次生成1个序列),learning_rate为0
3. **Network Architecture**:本质上与训练模型一样
### 生成模型的命令与结果 ###
写完模型配置之后,我们可以通过以下命令来进行从法语到英语的文本翻译:
```bash
cd demo/seqToseq/translation
./gen.sh
```
`gen.sh` 的内容如下所示。与训练模型不同的是,这里有一些不同的参数需要指定:
```bash
paddle train \
--job=test \
--config='translation/gen.conf' \
--save_dir='data/wmt14_model' \
--use_gpu=true \
--num_passes=13 \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
```
- job:设置任务的模式为测试
- save_dir:存储模型的路径
- num_passes and test_pass:从test_pass到(num_passes - 1)加载模型参数,这里只加载 `data/wmt14_model/pass-00012`
你将会看到这样的消息:
I0706 14:48:31.178915 31441 GradientMachine.cpp:143] Loading parameters from data/wmt14_model/pass-00012
I0706 14:48:40.012039 31441 Tester.cpp:125] Batch=100 samples=100 AvgCost=0
I0706 14:48:48.898632 31441 Tester.cpp:125] Batch=200 samples=200 AvgCost=0
...
然后在`demo/seqToseq/translation/gen_result`中的生成结果如下所示:
0
0 -11.1314 The <unk> <unk> about the width of the seats while large controls are at stake <e>
1 -11.1519 The <unk> <unk> on the width of the seats while large controls are at stake <e>
2 -11.5988 The <unk> <unk> about the width of the seats while large controls are at stake . <e>
1
0 -24.4149 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of the Dubai <unk> . <e>
1 -26.9524 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of Dubai &apos; s <unk> . <e>
2 -27.9574 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of Dubai &apos; s Dubai <unk> . <e>
...
- 这是集束搜索的结果,其中beam size是3
- 第一行的“0”和第6行的“1”表示生成数据的序列id
- 其他六行列出了集束搜索的结果
- 第二列是集束搜索的得分(从大到小)
- 第三列是生成的英语序列
- 有两个特殊标识:
- `<e>`:序列的结尾
- `<unk>`:不包含在字典中的单词
### BLEU评估 ###
对机器翻译的人工评估工作很广泛但也很昂贵。一篇论文 [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf) 展示了一种方法,当需要快速或者频繁的评估时,使用自动的替补来替代经验丰富的人工评判。[Moses](http://www.statmt.org/moses/) 是一个统计学的机器翻译系统,我们使用其中的 [multi-bleu.perl](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/multi-bleu.perl) 来做BLEU评估。运行以下命令来下载这个脚本:
```bash
cd demo/seqToseq/translation
./moses_bleu.sh
```
由于标准的翻译结果已经下载到这里`data/wmt14/gen/ntst14.trg`,我们可以运行以下命令来做BLEU评估。
```bash
cd demo/seqToseq/translation
./eval_bleu.sh FILE BEAMSIZE
```
- FILE:生成的结果文件
- BEAMSIZE:集束搜索中的扩展广度
# Text generation Tutorial #
Sequence to sequence has been proven to be a powerful model for language generation. It can be used for machine translation, query rewriting, image captioning, etc.
This tutorial guides you through training a sequence to sequence model for neural machine translation (NMT) network that translates French to English.
We follow the paper [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473) , which details the model architecture and training procedure for good performance on WMT-14 dataset. This tutorial reproduces this result in PaddlePaddle.
We thank @caoying for the pull request that defines the model architecture and solver configurations.
## Data Preparation ##
### Download and Extract ###
Download the WMT-14 dataset from [http://www-lium.univ-lemans.fr/~schwenk/cslm\_joint\_paper/](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/), extract it, and divide Develop and Test data into separate folder.
- **Train data**: [bitexts (after selection)](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/bitexts.tgz)
- **Develop and Test data**: [dev+test data](http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz)
To do this, simply run the following commands in linux, otherwise, you need to download, extract, divide, and rename the file suffix respectively.
```bash
cd demo/seqToseq/data
./wmt14_data.sh
```
We should find that the dataset `wmt14` has three folders as shown in the following table.
<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
<colgroup>
<col class="left" />
<col class="left" />
<col class="left" />
<col class="left" />
</colgroup>
<thead>
<tr>
<th scope="col" class="left">folder name</th>
<th scope="col" class="left">French-English parallel corpora file</th>
<th scope="col" class="left">number of total file</th>
<th scope="col" class="left">size</th>
</tr>
</thead>
<tbody>
<tr>
<td class="left">train_data</td>
<td class="left">ccb2_pc30.src, ccb2_pc30.trg, etc</td>
<td class="left">twelve</td>
<td class="left">3.55G</td>
</tr>
<tr>
<td class="left">test_data</td>
<td class="left">ntst1213.src, ntst1213.trg</td>
<td class="left">two</td>
<td class="left">1636k</td>
</tr>
<tr>
<td class="left">gen_data</td>
<td class="left">ntst14.src, ntst14.trg</td>
<td class="left">two</td>
<td class="left">864k</td>
</tr>
</tbody>
</table>
<br/>
- Each folder has French-English parallel corpora
- **XXX.src** are source French files; **XXX.trg** are target English files.
- The number of lines of **XXX.src** and **XXX.trg** should be the same.
- Each line is a French/English sentence.
- There is a one-to-one correspondence between the sentence at the i-th line of **XXX.src** and **XXX.trg**.
### User Defined Dataset ###
If you need to do other sequence-to-sequence tasks, such as Paraphrasing, you only need to organize the data as follows, and place them in `demo/seqToseq/data`:
dataset
train
file1.src file1.trg
file2.src file2.trg
......
test
file1.src file1.trg
file2.src file2.trg
......
gen
file1.src file1.trg
file2.src file2.trg
......
- 1st directory: dataset folder name
- 2nd directory: folder of train, test, and gen. The names of these three folders are fixed.
- 3rd file: Source-Target parallel corpora files.
- **XXX.src** are source files, **XXX.trg** are target files.
- Each line of the file must be a sequence.
- There should be a one-to-one correspondence between the i-th sequence of **XXX.src** and **XXX.trg**.
## Data Preprocess ##
### Preprocessing Workflow ###
- Concat each Source-Target parallel corpora to be one file:
- concat each **XXX.src** and **XXX.trg** to be **XXX**.
- the i-th line of **XXX** = the i-th line of **XXX.src** + '\t' + the i-th line of **XXX.trg**
- Build source and target dictionary of train data, each dictionary has DICTSIZE words:
- the most frequent (DICTSIZE-3) words
- 3 special token:
- `<s>`: the start of a sequence
- `<e>`: the end of a sequence
- `<unk>`: a word not included in dictionary
### Preprocessing Command and Result
The general command for preprocessing the dataset is:
```python
cd demo/seqToseq/
python preprocess.py -i INPUT [-d DICTSIZE] [-m]
```
- `-i INPUT`: the path of input original dataset
- `-d DICTSIZE`: the specified word count of dictionary, if not set, dictionary will contain all the words in input dataset
- `-m --mergeDict`: merge source and target dictionary, thus, two dictionaries have the same context
And you will see messages like this:
concat parallel corpora for dataset
build source dictionary for train data
build target dictionary for train data
dictionary size is XXX
Here, you can simply run the command:
```python
python preprocess.py -i data/wmt14 -d 30000
```
It will take several minutes, and store the preprocessed dataset in `demo/seqToseq/data/pre-wmt14`, the directory has following structure.
train test gen train.list test.list gen.list src.dict trg.dict
- **train, test, gen**: folder contains French-English parallel corpora of train data, test data and gen data respectively. Each line of file in folder contains two parts, the former is a French sequence, and the latter is a corresponding English sequence.
- **train.list, test.list, gen.list**: text contains a file list in train folder, test folder and gen folder respectively
- **src.dict, trg.dict**: source (French) / target (English) dictionary, each dictionary has 30000 words: the most frequent 29997 words and 3 special token
## Model Training ##
### Introduction ###
Neural machine translation (NMT) aims at building a single neural network that can be jointly tuned to maximize translation performance. Recently proposed NMT models often belong to a family of encoder–decoder models. Encoder-Decoder models encode a source sentence into a fixed-length vector from which a decoder generates a target sentence.
In this task, we use an extension to the encoder–decoder model which learns to align and translate jointly. Each time the model generates a word in a translation, it searches for a set of positions in the source sentence for the most relevant information. The decoder predicts a target word based on the context vectors associated with these source positions and all the previous generated target words. For more detailed explanation, readers can refer to paper [Neural Machine Translation by Jointly Learning to Align and Translate](http://arxiv.org/abs/1409.0473).
The most distinguishing feature of this model is that it doesn't encode an input sentence into a single fixed-length vector. Instead, it encodes the input sentence into a sequence of vectors, where one vector corresponds to an input element. A subset of these vectors is chosen adaptively while decoding the translated sentence. This frees a NMT model from having to squash all the information of a source sentence, regardless of its length, into a fixed-length vector. The improvement of this model is more apparent for longer sentences, but the improvement can be observed for sentences of any length.
<center>![](./encoder-decoder-attention-model.png)</center>
<center>Figure 1. Encoder-Decoder-Attention-Model</center>
### Training Model in PaddlePaddle ###
We need to create a model config file before training. Here is an example `demo/seqToseq/translation/train.conf`. The first three lines import python function for defining network, and define the job_mode and attention_mode.
```python
from seqToseq_net import *
is_generating = False
### Data Definiation
train_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating)
### Algorithm Configuration
settings(
learning_method = AdamOptimizer(),
batch_size = 50,
learning_rate = 5e-4)
### Network Architecture
gru_encoder_decoder(train_conf, is_generating)
```
1. **Data Definiation**: We define a SeqToSeq train and test data in our example. It returns train_conf as the configuration, following is its input arguments:
- data_dir: directory of train data and test data
- is\_generating: whether this config is used for generating, here is false
2. **Algorithm Configuration**: We use the SGD training algorithm (default), ADAM learning method in our example, specify batch_size as 50, and learning rate as 5e-4.
3. **Network Architecture**: We use an attention version of GRU Encoder-Decoder network in our example. It consists a bidirectional GRU as an encoder and a decoder that emulates searching through a source sentence during decoding a translation.
### Training Command and Result###
After writing the model config, we can train the model by running the command:
```bash
cd demo/seqToseq/translation
./train.sh
```
The `train.sh` is shown as follows:
```bash
paddle train \
--config='translation/train.conf' \
--save_dir='translation/model' \
--use_gpu=false \
--num_passes=16 \
--show_parameter_stats_period=100 \
--trainer_count=4 \
--log_period=10 \
--dot_period=5 \
2>&1 | tee 'translation/train.log'
```
- config: set config of neural network
- save_dir: set output path to save models
- use_gpu: whether to use GPU to train, here use CPU
- num_passes: set number of passes. One pass in paddle means training all samples in dataset one time
- show_parameter_stats_period: here show parameter statistic every 100 batches
- trainer_count: set number of CPU threads or GPU devices
- log_period: here print log every 10 batches
- dot_period: here print '.' every 5 batches
The training loss function is printed every 10 batch by default, and you will see messages like this:
I0719 19:16:45.952062 15563 TrainerInternal.cpp:160] Batch=10 samples=500 AvgCost=198.475 CurrentCost=198.475 Eval: classification_error_evaluator=0.737155 CurrentEval: classification_error_evaluator=0.737155
I0719 19:17:56.707319 15563 TrainerInternal.cpp:160] Batch=20 samples=1000 AvgCost=157.479 CurrentCost=116.483 Eval: classification_error_evaluator=0.698392 CurrentEval: classification_error_evaluator=0.659065
.....
- AvgCost: Average Cost from 0th batch to current batch
- CurrentCost: Cost in current batch
- classification\_error\_evaluator(Eval): False prediction rate for each word from 0th evaluation to current evaluation
- classification\_error\_evaluator(CurrentEval): False prediction rate for each word in current evaluation
And when the classification\_error\_evaluator is less than 0.35, the model is trained sucessfully.
## Text Generation ##
### Introduction ###
Generally speaking, the NMT model is conditioned on the encodings of the source sentence, and then to predict the next target word by given the current target word. In the training process, the current word is always knowns as the ground truth, by contrast. In the generating process, the current word is the output of the decoder in last time step, which is accessed to from a memory in PaddlePaddle.
Besides, we use Beam Search to generate sequences. Beam search uses breadth-first search to build its search tree. At each level of the tree, it generates all successors of the states at the current level, sorting them in increasing order of heuristic cost. However, it only stores a predetermined number of best states at each level (called the beam size).
### Pretrained model ###
We trained the model on a cluster with 50 nodes, each node has two 6-core CPUs. We trained 16 passes in 5 days, where each pass takes 7 hours. The model_dir has 16 sub-folder, each of which contains the whole model parameters with 202MB size. And we find pass-00012 model has the highest BLEU 27.77 (see paper [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf)). To download and extract this model, simply run the following commands in linux.
```bash
cd demo/seqToseq/data
./wmt14_model.sh
```
### Generating Model in PaddlePaddle ###
We need to create a model config file before translating French sequence. Here is an example `demo/seqToseq/translation/gen.conf`, the first three lines import python function for defining network, and define the job\_mode and attention\_mode.
```python
from seqToseq_net import *
is_generating = True
################## Data Definiation #####################
gen_conf = seq_to_seq_data(data_dir = "./data/pre-wmt14",
is_generating = is_generating,
gen_result = "./translation/gen_result")
############## Algorithm Configuration ##################
settings(
learning_method = AdamOptimizer(),
batch_size = 1,
learning_rate = 0)
################# Network configure #####################
gru_encoder_decoder(gen_conf, is_generating)
```
1. **Data Definiation**: We defines an SeqToSeq gen data in our example. It returns gen_conf as the configuration, following is its input arguments:
- data\_dir: directory of gen data
  - is\_generating: whether this config is used for generating, here is true
  - gen\_result: file to store the generation result
2. **Algorithm Configuration**: We use SGD traing algorithm in generation, and specify batch_size as 1 (each time generate one sequence), and learning rate as 0.
3. **Network Architecture**: Essentially the same as the training model.
### Generating Command and Result ###
After writing the model config, we can do text translation from French to English by running the command:
```bash
cd demo/seqToseq/translation
./gen.sh
```
The `gen.sh` is shown as follows, unlike training, there are some different arguments to specify:
```bash
paddle train \
--job=test \
--config='translation/gen.conf' \
--save_dir='data/wmt14_model' \
--use_gpu=true \
--num_passes=13 \
--test_pass=12 \
--trainer_count=1 \
2>&1 | tee 'translation/gen.log'
```
- job: set job mode to test
- save_dir: the path of saved models
- num_passes and test_pass: loading model parameters from test_pass to (num_passes - 1), here only loads `data/wmt14_model/pass-00012`
You will see messages like this:
I0706 14:48:31.178915 31441 GradientMachine.cpp:143] Loading parameters from data/wmt14_model/pass-00012
I0706 14:48:40.012039 31441 Tester.cpp:125] Batch=100 samples=100 AvgCost=0
I0706 14:48:48.898632 31441 Tester.cpp:125] Batch=200 samples=200 AvgCost=0
...
And the generating result in `demo/seqToseq/translation/gen_result` likes:
0
0 -11.1314 The <unk> <unk> about the width of the seats while large controls are at stake <e>
1 -11.1519 The <unk> <unk> on the width of the seats while large controls are at stake <e>
2 -11.5988 The <unk> <unk> about the width of the seats while large controls are at stake . <e>
1
0 -24.4149 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of the Dubai <unk> . <e>
1 -26.9524 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of Dubai &apos; s <unk> . <e>
2 -27.9574 The dispute is between the major aircraft manufacturers about the width of the tourist seats on the <unk> flights , paving the way for a <unk> confrontation during the month of Dubai &apos; s Dubai <unk> . <e>
...
- This is the beam search result, where beam size is 3
- '0' in 1st-line and '1' in 6th-line mean the sequence-id in gen data
- Other six lines list the beam search results
- The 2nd-column is the score of beam search (from large to small)
- The 3rd-colunm is the generating English sequence
- There is 2 special tokens:
- `<e>`: the end of a sequence
- `<unk>`: a word not included in dictionary
### Bleu Evalutaion ###
Human evaluations of machine translation are extensive but expensive. Paper [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf) presents a method as an automated understudy to skilled human judges which substitutes for them when there is need for quick or frequent evaluations. [Moses](http://www.statmt.org/moses/) is a statistical machine translation system, and we use [multi-bleu.perl](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/generic/multi-bleu.perl) of it to do Bleu Evalution. To download this script, simply run the following command:
```bash
cd demo/seqToseq/translation
./moses_bleu.sh
```
Since the standard translation is alrealy downloaded as `data/wmt14/gen/ntst14.trg`, we can do Bleu Evalution by running the command:
```bash
cd demo/seqToseq/translation
./eval_bleu.sh FILE BEAMSIZE
```
- FILE: the generation result file
- BEAMSIZE: expand width in beam search
The tutorials in v1_api_tutorials are using v1_api currently, and will be upgraded to v2_api later.
Thus, v1_api_tutorials is a temporary directory. We decide not to maintain it and will delete it in future.
Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and
[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 hash: 328e7b9b7306b45e7b9879139a9f86698115981f6283032e1312093a6a6ddb04
updated: 2017-08-07T23:37:48.867469328Z updated: 2017-10-16T08:00:23.484693528Z
imports: imports:
- name: github.com/alecthomas/gometalinter
version: bae2f1293d092fd8167939d5108d1b025eaef9de
- name: github.com/beorn7/perks - name: github.com/beorn7/perks
version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
subpackages: subpackages:
...@@ -10,7 +12,7 @@ imports: ...@@ -10,7 +12,7 @@ imports:
- name: github.com/cockroachdb/cmux - name: github.com/cockroachdb/cmux
version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92 version: 112f0506e7743d64a6eb8fedbcff13d9979bbf92
- name: github.com/coreos/etcd - name: github.com/coreos/etcd
version: d0d1a87aa96ae14914751d42264262cb69eda170 version: f1d7dd87da3e8feab4aaf675b8e29c6a5ed5f58b
subpackages: subpackages:
- alarm - alarm
- auth - auth
...@@ -149,7 +151,7 @@ imports: ...@@ -149,7 +151,7 @@ imports:
- name: github.com/satori/go.uuid - name: github.com/satori/go.uuid
version: 879c5887cd475cd7864858769793b2ceb0d44feb version: 879c5887cd475cd7864858769793b2ceb0d44feb
- name: github.com/sirupsen/logrus - name: github.com/sirupsen/logrus
version: a3f95b5c423586578a4e099b11a46c2479628cac version: f006c2ac4710855cf0f916dd6b77acf6b048dc6e
- name: github.com/topicai/candy - name: github.com/topicai/candy
version: 1b9030d056fa9f8c4b1f9c91b52fe4b8ab4cd8cc version: 1b9030d056fa9f8c4b1f9c91b52fe4b8ab4cd8cc
- name: github.com/ugorji/go - name: github.com/ugorji/go
...@@ -159,12 +161,13 @@ imports: ...@@ -159,12 +161,13 @@ imports:
- name: github.com/xiang90/probing - name: github.com/xiang90/probing
version: 07dd2e8dfe18522e9c447ba95f2fe95262f63bb2 version: 07dd2e8dfe18522e9c447ba95f2fe95262f63bb2
- name: golang.org/x/crypto - name: golang.org/x/crypto
version: 1351f936d976c60a0a48d728281922cf63eafb8d version: 9419663f5a44be8b34ca85f08abc5fe1be11f8a3
repo: https://github.com/golang/crypto.git repo: https://github.com/golang/crypto.git
vcs: git vcs: git
subpackages: subpackages:
- bcrypt - bcrypt
- blowfish - blowfish
- ssh/terminal
- name: golang.org/x/net - name: golang.org/x/net
version: c8c74377599bd978aee1cf3b9b63a8634051cec2 version: c8c74377599bd978aee1cf3b9b63a8634051cec2
subpackages: subpackages:
...@@ -219,3 +222,4 @@ testImports: ...@@ -219,3 +222,4 @@ testImports:
version: 05e8a0eda380579888eb53c394909df027f06991 version: 05e8a0eda380579888eb53c394909df027f06991
subpackages: subpackages:
- assert - assert
...@@ -24,3 +24,5 @@ import: ...@@ -24,3 +24,5 @@ import:
vcs: git vcs: git
- package: github.com/satori/go.uuid - package: github.com/satori/go.uuid
version: v1.1.0 version: v1.1.0
- package: github.com/alecthomas/gometalinter
version: v1.2.1
...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) ...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign -ftls-model=global-dynamic")
SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter paddle_parameter
......
...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) ...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
...@@ -42,5 +42,17 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -42,5 +42,17 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward)
set(EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op
mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op mean_op)
if(WITH_GPU)
nv_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
else()
cc_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
...@@ -28,14 +28,15 @@ namespace paddle { ...@@ -28,14 +28,15 @@ namespace paddle {
namespace framework { namespace framework {
static inline std::unique_ptr<OperatorBase> CreateGradOp( static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op) { const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
OpDescBind op_desc; OpDescBind op_desc;
op_desc.SetInputMap(op.Inputs()); op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs()); op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type()); op_desc.SetType(op.Type());
op_desc.SetAttrMap(op.Attrs()); op_desc.SetAttrMap(op.Attrs());
auto& info = OpInfoMap::Instance().Get(op.Type()); auto& info = OpInfoMap::Instance().Get(op.Type());
auto grad_descs = info.GradOpMaker()(op_desc); auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var);
std::vector<std::unique_ptr<OperatorBase>> grad_ops; std::vector<std::unique_ptr<OperatorBase>> grad_ops;
grad_ops.reserve(grad_descs.size()); grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(), std::transform(grad_descs.begin(), grad_descs.end(),
...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() { ...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() {
// See Backward.h for details // See Backward.h for details
static std::unique_ptr<OperatorBase> BackwardRecursive( static std::unique_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) { std::unordered_set<std::string>& no_grad_names,
std::unordered_map<std::string, std::string>* grad_to_var,
size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate, // If all input gradients of forwarding operator do not need to calculate,
// just return an NOP. Not return null ptr because NOP does not take // just return an NOP. Not return null ptr because NOP does not take
// too much time for calculation, but it is useful for simplifying logic. // too much time for calculation, but it is useful for simplifying logic.
...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) { ++it, ++local_op_id) {
auto& fwd = *it; auto& fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
ForEachVarName(bwd->Outputs(), ForEachVarName(bwd->Outputs(),
[&dup_output_ops, local_op_id](const std::string& out) { [&dup_output_ops, local_op_id](const std::string& out) {
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
...@@ -172,30 +175,14 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -172,30 +175,14 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
std::to_string(i)); std::to_string(i));
net->ops_[op_offset]->Rename(name, dup_outputs.back()); net->ops_[op_offset]->Rename(name, dup_outputs.back());
} }
// collect all the offset to append `add` op for each alias // collect all the offset for each alias,
// // insert a sum operator to add all aliases to output
// one variable is shared between multiple operators.
// insert add operator one by one, then add it to output
for (size_t output_idx = 0; output_idx < dup_outputs.size() - 1;
++output_idx) {
auto insert_add_x = dup_outputs[output_idx];
auto insert_add_y = dup_outputs[output_idx + 1];
auto insert_add_out = name + "@SHARED@" + std::to_string(output_idx);
// first add op inserted
if (output_idx == dup_outputs.size() - 2) {
insert_add_out = name;
}
if (output_idx != 0) {
insert_add_y = name + "@SHARED@" + std::to_string(output_idx - 1);
}
insert_position.push_back( insert_position.push_back(
{dup_op.back(), {dup_op.back(), OpRegistry::CreateOp("sum", {{"X", dup_outputs}},
OpRegistry::CreateOp("sum", {{"X", {insert_add_x, insert_add_y}}}, {{"Out", {name}}}, {})});
{{"Out", {insert_add_out}}}, {})});
}
} }
// make sure the inserted `add` ops follow the BFS order. // make sure the inserted `sum` ops follow the BFS order.
insert_position.sort( insert_position.sort(
[](const Pos& l, const Pos& r) { return l.first > r.first; }); [](const Pos& l, const Pos& r) { return l.first > r.first; });
...@@ -203,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -203,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
net->InsertOp(pos.first + 1, std::move(pos.second)); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::unique_ptr<OperatorBase> grad_op(CreateGradOp(forwardOp)); std::unique_ptr<OperatorBase> grad_op(
CreateGradOp(forwardOp, no_grad_names, grad_to_var));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
const std::string& grad_input) { const std::string& grad_input) {
...@@ -242,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -242,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
*static_cast<const OperatorBase*>(&rnnop.stepnet()); *static_cast<const OperatorBase*>(&rnnop.stepnet());
// create stepnet's gradient op // create stepnet's gradient op
rnn_grad_op->set_stepnet( rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
} }
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
...@@ -269,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward( ...@@ -269,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward(
no_grad_names.insert(name + kGradVarSuffix); no_grad_names.insert(name + kGradVarSuffix);
} }
size_t uid = 0; size_t uid = 0;
return BackwardRecursive(forwardOp, no_grad_names, uid); std::unordered_map<std::string, std::string> grad_to_var;
return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
} }
// ==================================== // // ==================================== //
...@@ -284,30 +273,61 @@ static bool AllGradInSet(const std::vector<std::string>& names, ...@@ -284,30 +273,61 @@ static bool AllGradInSet(const std::vector<std::string>& names,
return true; return true;
} }
static void CreateGradVarInBlock(
size_t grad_op_start_index,
const std::unordered_map<std::string, std::string>& param_name_map,
BlockDescBind* block_desc,
std::unordered_map<std::string, GradVarInfo>* grad_var_record) {
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
block_desc->Var(grad_var_name);
auto it = param_name_map.find(grad_var_name);
if (it == param_name_map.end()) {
return false;
}
auto param_var_name = it->second;
auto& grad_record = (*grad_var_record)[param_var_name];
grad_record.name_ = grad_var_name;
grad_record.block_idx_ = block_desc->ID();
grad_record.op_idx_ = static_cast<int>(op_index);
return false; /* not break */
});
}
}
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const std::unique_ptr<OpDescBind>& op_desc, const std::unique_ptr<OpDescBind>& op_desc,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs; std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculat. // All input gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames(); const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, no_grad_vars)) { if (AllGradInSet(inputs, *no_grad_vars)) {
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
// All output gradients of forwarding operator do not need to calculate. // All output gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& outputs = op_desc->OutputArgumentNames(); const std::vector<std::string>& outputs = op_desc->OutputArgumentNames();
if (AllGradInSet(outputs, no_grad_vars)) { if (AllGradInSet(outputs, *no_grad_vars)) {
for (const std::string& name : inputs) { for (const std::string& name : inputs) {
no_grad_vars.insert(GradVarName(name)); no_grad_vars->insert(GradVarName(name));
} }
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get()); grad_op_descs = OpInfoMap::Instance()
.Get(op_desc->Type())
.GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops; std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) { for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) { for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars.count(in_name)) { if (no_grad_vars->count(in_name)) {
std::string prefix = in_name.substr( std::string prefix = in_name.substr(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix; std::string new_name = prefix + kZeroVarSuffix;
...@@ -317,11 +337,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -317,11 +337,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
} }
} }
for (const std::string& out_name : desc->OutputArgumentNames()) {
if (no_grad_vars.count(out_name)) {
desc->Rename(out_name, kEmptyVarName);
}
}
} }
for (auto& p : pending_fill_zeros_ops) { for (auto& p : pending_fill_zeros_ops) {
...@@ -332,23 +347,25 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -332,23 +347,25 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx, ProgramDescBind& program_desc, int block_idx,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
BlockDescBind* cur_block = program_desc.Block(block_idx); BlockDescBind* cur_block = program_desc.Block(block_idx);
std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_; std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_;
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops; std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0; size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs; std::vector<std::unique_ptr<OpDescBind>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
std::vector<std::unique_ptr<OpDescBind>> op_grads = std::vector<std::unique_ptr<OpDescBind>> op_grads =
MakeOpGrad(*it, no_grad_vars); MakeOpGrad(*it, no_grad_vars, grad_to_var);
if ((*it)->Type() == "recurrent") { if ((*it)->Type() == "recurrent") {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
op_grads.size(), size_t(1), op_grads.size(), static_cast<size_t>(1),
"rnn_op's gradient process should contain only one op."); "rnn_op's gradient process should contain only one op.");
int step_block_idx = (*it)->GetBlockAttr("stop_block"); int step_block_idx = (*it)->GetBlockAttr("step_block");
auto backward_block_op_descs = auto backward_block_op_descs = MakeBlockBackward(
MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); program_desc, step_block_idx, no_grad_vars, grad_to_var);
BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block);
for (auto& ptr : backward_block_op_descs) { for (auto& ptr : backward_block_op_descs) {
backward_block->ops_.push_back(std::move(ptr)); backward_block->ops_.push_back(std::move(ptr));
...@@ -392,10 +409,12 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -392,10 +409,12 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
backward_descs.insert(backward_descs.begin() + p.first + 1, backward_descs.insert(backward_descs.begin() + p.first + 1,
std::move(p.second)); std::move(p.second));
} }
return backward_descs; return backward_descs;
} }
void AppendBackward(ProgramDescBind& program_desc, ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars) { const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names; std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1); no_grad_var_names.reserve(no_grad_vars.size() + 1);
...@@ -403,13 +422,43 @@ void AppendBackward(ProgramDescBind& program_desc, ...@@ -403,13 +422,43 @@ void AppendBackward(ProgramDescBind& program_desc,
for (auto& name : no_grad_vars) { for (auto& name : no_grad_vars) {
no_grad_var_names.insert(GradVarName(name)); no_grad_var_names.insert(GradVarName(name));
} }
const int root_block_idx = 0; const int root_block_idx = 0;
auto backward_op_descs = auto root_block = program_desc.Block(root_block_idx);
MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); auto& all_ops = root_block->ops_;
auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_;
// insert fill one op for target
std::string fill_one_op_out = GradVarName(target.Name());
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)},
{"dataType", framework::DataType::FP32}}));
all_ops.push_back(std::move(fill_one_op));
size_t forward_op_num = all_ops.size();
size_t forward_block_num = program_desc.Size();
// Insert backward operators
std::unordered_map<std::string, std::string> grad_to_var;
auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);
std::unordered_map<std::string, GradVarInfo> retv;
// Create Variable
for (auto& ptr : backward_op_descs) { for (auto& ptr : backward_op_descs) {
forw_op_descs.push_back(std::move(ptr)); all_ops.push_back(std::move(ptr));
}
root_block->Var(fill_one_op_out);
// create grad_var for all blocks in this program
CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
for (size_t block_index = forward_block_num;
block_index < program_desc.Size(); ++block_index) {
CreateGradVarInBlock(0, grad_to_var, program_desc.Block(block_index),
&retv);
} }
return retv;
} }
} // namespace framework } // namespace framework
......
...@@ -14,7 +14,10 @@ ...@@ -14,7 +14,10 @@
#pragma once #pragma once
#include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
...@@ -27,7 +30,26 @@ extern std::unique_ptr<OperatorBase> Backward( ...@@ -27,7 +30,26 @@ extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
void AppendBackward(ProgramDescBind& program_desc, struct GradVarInfo {
GradVarInfo() {}
GradVarInfo(const std::string& name, int block_idx, int op_idx)
: name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
bool operator==(const GradVarInfo& b) const {
return name_ == b.name_ && block_idx_ == b.block_idx_ &&
op_idx_ == b.op_idx_;
}
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "paddle/framework/block_desc.h" #include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/var_desc.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
...@@ -169,6 +170,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker { ...@@ -169,6 +170,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class MinusGradOpDescMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
std::vector<std::unique_ptr<OpDescBind>> retv;
auto x_g = InputGrad("X");
if (!x_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", x_g);
op_desc->SetAttr("scale", 1.0f);
retv.emplace_back(op_desc);
}
auto y_g = InputGrad("Y");
if (!y_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", y_g);
op_desc->SetAttr("scale", -1.0f);
retv.emplace_back(op_desc);
}
return retv;
}
};
class MinusOpMaker : public OpProtoAndCheckerMaker {
public:
MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "");
AddInput("Y", "");
AddOutput("Out", "");
AddComment("minus for unittest");
}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -187,6 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); ...@@ -187,6 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP); f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker);
TEST(Backward, simple_op_not_need_grad) { TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp( auto fwd = f::OpRegistry::CreateOp(
...@@ -395,12 +436,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -395,12 +436,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
2UL /* external input number */ 2UL /* external input number */
+ 1UL /* external output number*/ + 1UL /* external output number*/
+ 1UL /* number of gradient of external output*/ + 1UL /* number of gradient of external output*/
+ 2U /* internal variable number*/); + 2UL /* internal variable number*/
);
EXPECT_EQ(grad_fc.Outputs(all).size(), EXPECT_EQ(grad_fc.Outputs(all).size(),
2UL /* input number of mul*/ 2UL /* input number of mul*/
+ 2UL /* input number of rowwise_add + 2UL /* input number of rowwise_add*/
*/ + 1UL /* input number of sigmod */
+ 1UL /* input number of sigmod */); - 1UL /* out2 is not needed*/);
EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
...@@ -427,10 +469,14 @@ TEST(Backward, simple_single_op) { ...@@ -427,10 +469,14 @@ TEST(Backward, simple_single_op) {
op->SetInput("b", {"b"}); op->SetInput("b", {"b"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
AppendBackward(program, {}); auto target = f::VarDescBind("out");
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL);
f::OpDescBind *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
ASSERT_EQ(block->AllOps().size(), 2UL); f::OpDescBind *grad_op = block->AllOps()[2];
f::OpDescBind *grad_op = block->AllOps()[1];
EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->InputNames().size(), 1UL);
ASSERT_EQ(grad_op->OutputNames().size(), 2UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
...@@ -440,6 +486,13 @@ TEST(Backward, simple_single_op) { ...@@ -440,6 +486,13 @@ TEST(Backward, simple_single_op) {
std::vector<std::string>({f::GradVarName("x")})); std::vector<std::string>({f::GradVarName("x")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("b")), EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b")})); std::vector<std::string>({f::GradVarName("b")}));
EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
EXPECT_TRUE(block->HasVar(f::GradVarName("b")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x")));
} }
TEST(Backward, default_attribute) { TEST(Backward, default_attribute) {
...@@ -451,14 +504,19 @@ TEST(Backward, default_attribute) { ...@@ -451,14 +504,19 @@ TEST(Backward, default_attribute) {
op->SetInput("X", {"x"}); op->SetInput("X", {"x"});
op->SetInput("Y", {"y"}); op->SetInput("Y", {"y"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
op->CheckAttrs();
AppendBackward(program, {}); auto target = f::VarDescBind("out");
AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 2UL); ASSERT_EQ(block->AllOps().size(), 3UL);
EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
f::OpDescBind *grad_op = block->AllOps()[1]; f::OpDescBind *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2];
ASSERT_EQ(grad_op->Type(), "mul_grad"); ASSERT_EQ(grad_op->Type(), "mul_grad");
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
...@@ -486,10 +544,15 @@ TEST(Backward, simple_mult_op) { ...@@ -486,10 +544,15 @@ TEST(Backward, simple_mult_op) {
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
AppendBackward(program, {}); auto target = f::VarDescBind("out3");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 6UL); ASSERT_EQ(block->AllOps().size(), 6UL + 1);
f::OpDescBind *grad_op1 = block->AllOps()[5]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -500,7 +563,7 @@ TEST(Backward, simple_mult_op) { ...@@ -500,7 +563,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op2 = block->AllOps()[4]; f::OpDescBind *grad_op2 = block->AllOps()[5];
EXPECT_EQ(grad_op2->Type(), "mul_grad"); EXPECT_EQ(grad_op2->Type(), "mul_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -514,7 +577,7 @@ TEST(Backward, simple_mult_op) { ...@@ -514,7 +577,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *grad_op3 = block->AllOps()[3]; f::OpDescBind *grad_op3 = block->AllOps()[4];
EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -524,6 +587,23 @@ TEST(Backward, simple_mult_op) { ...@@ -524,6 +587,23 @@ TEST(Backward, simple_mult_op) {
std::vector<std::string>({f::GradVarName("out2")})); std::vector<std::string>({f::GradVarName("out2")}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")})); std::vector<std::string>({f::GradVarName("b3")}));
EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 5));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out2"),
f::GradVarInfo(f::GradVarName("out2"), 0, 4));
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
} }
TEST(Backward, intermedia_var_no_grad) { TEST(Backward, intermedia_var_no_grad) {
...@@ -554,10 +634,15 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -554,10 +634,15 @@ TEST(Backward, intermedia_var_no_grad) {
op4->SetInput("Y", {"out3"}); op4->SetInput("Y", {"out3"});
op4->SetOutput("Out", {"out4"}); op4->SetOutput("Out", {"out4"});
AppendBackward(program, {"out3"}); auto target = f::VarDescBind("out4");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"});
ASSERT_EQ(block->AllOps().size(), 6UL); ASSERT_EQ(block->AllOps().size(), 7UL);
f::OpDescBind *grad_op1 = block->AllOps()[5]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -568,7 +653,7 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -568,7 +653,7 @@ TEST(Backward, intermedia_var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op4 = block->AllOps()[4]; f::OpDescBind *grad_op4 = block->AllOps()[5];
EXPECT_EQ(grad_op4->Type(), "mul_grad"); EXPECT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -579,8 +664,17 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -579,8 +664,17 @@ TEST(Backward, intermedia_var_no_grad) {
std::vector<std::string>({f::GradVarName("out4")})); std::vector<std::string>({f::GradVarName("out4")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 5));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
} }
TEST(Backward, var_no_grad) { TEST(Backward, var_no_grad) {
...@@ -601,10 +695,15 @@ TEST(Backward, var_no_grad) { ...@@ -601,10 +695,15 @@ TEST(Backward, var_no_grad) {
op2->SetOutput("Y", {"y2"}); op2->SetOutput("Y", {"y2"});
op2->SetOutput("Z", {"z2"}); op2->SetOutput("Z", {"z2"});
AppendBackward(program, {"z1"}); auto target = f::VarDescBind("z2");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"});
ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
ASSERT_EQ(block->AllOps().size(), 5UL); f::OpDescBind *grad_op2 = block->AllOps()[3];
f::OpDescBind *grad_op2 = block->AllOps()[2];
ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -618,10 +717,9 @@ TEST(Backward, var_no_grad) { ...@@ -618,10 +717,9 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("z2")})); std::vector<std::string>({f::GradVarName("z2")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("y1")})); std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
f::OpDescBind *fill_zero_op = block->AllOps()[3]; f::OpDescBind *fill_zero_op = block->AllOps()[4];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
...@@ -629,7 +727,7 @@ TEST(Backward, var_no_grad) { ...@@ -629,7 +727,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ(fill_zero_op->Output("Y"), EXPECT_EQ(fill_zero_op->Output("Y"),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix})); std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
f::OpDescBind *grad_op1 = block->AllOps()[4]; f::OpDescBind *grad_op1 = block->AllOps()[5];
ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -645,6 +743,15 @@ TEST(Backward, var_no_grad) { ...@@ -645,6 +743,15 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("x1")})); std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
std::vector<std::string>({f::GradVarName("h1")})); std::vector<std::string>({f::GradVarName("h1")}));
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
EXPECT_TRUE(block->HasVar(f::GradVarName("y1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("h1")));
} }
TEST(Backward, shared_var) { TEST(Backward, shared_var) {
...@@ -669,10 +776,15 @@ TEST(Backward, shared_var) { ...@@ -669,10 +776,15 @@ TEST(Backward, shared_var) {
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
AppendBackward(program, {}); auto target = f::VarDescBind("out3");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 7UL); ASSERT_EQ(block->AllOps().size(), 8UL);
f::OpDescBind *grad_op3 = block->AllOps()[3]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op3 = block->AllOps()[4];
ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -683,7 +795,7 @@ TEST(Backward, shared_var) { ...@@ -683,7 +795,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")})); std::vector<std::string>({f::GradVarName("b3")}));
f::OpDescBind *grad_op4 = block->AllOps()[4]; f::OpDescBind *grad_op4 = block->AllOps()[5];
ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -697,7 +809,7 @@ TEST(Backward, shared_var) { ...@@ -697,7 +809,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *sum_op = block->AllOps()[5]; f::OpDescBind *sum_op = block->AllOps()[6];
ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->Type(), "sum");
ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->InputNames().size(), 1UL);
ASSERT_EQ(sum_op->OutputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
...@@ -707,7 +819,7 @@ TEST(Backward, shared_var) { ...@@ -707,7 +819,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(sum_op->Output("Out"), EXPECT_EQ(sum_op->Output("Out"),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
f::OpDescBind *grad_op1 = block->AllOps()[6]; f::OpDescBind *grad_op1 = block->AllOps()[7];
ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -717,4 +829,41 @@ TEST(Backward, shared_var) { ...@@ -717,4 +829,41 @@ TEST(Backward, shared_var) {
std::vector<std::string>({f::GradVarName("x1")})); std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
EXPECT_EQ(var_to_grad.size(), 5UL);
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 6));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7));
EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
}
TEST(Backward, half_backward) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
auto *op1 = block->AppendOp();
op1->SetType("minus");
op1->SetInput("X", {"a"});
op1->SetInput("Y", {"b"});
op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size());
EXPECT_EQ(var_to_grad.size(), 1UL);
EXPECT_EQ(var_to_grad.at("a"),
f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
} }
...@@ -18,19 +18,22 @@ limitations under the License. */ ...@@ -18,19 +18,22 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
VarDescBind *BlockDescBind::NewVar(const std::string &name) { VarDescBind *BlockDescBind::Var(const std::string &name) {
need_update_ = true; need_update_ = true;
auto it = vars_.find(name); auto it = vars_.find(name);
PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name); if (it != vars_.end()) {
auto var = new VarDescBind(name); return it->second.get();
}
auto *var = new VarDescBind(name);
vars_[name].reset(var); vars_[name].reset(var);
return var; return var;
} }
VarDescBind *BlockDescBind::Var(const std::string &name) const { VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
auto it = vars_.find(name); auto it = vars_.find(name);
PADDLE_ENFORCE(it != vars_.end(), if (it == vars_.end()) {
"Can not find variable %s in current block.", name); return nullptr;
}
return it->second.get(); return it->second.get();
} }
...@@ -66,7 +69,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const { ...@@ -66,7 +69,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const {
return res; return res;
} }
void BlockDescBind::Sync() { void BlockDescBind::Flush() {
if (need_update_) { if (need_update_) {
auto &op_field = *this->desc_->mutable_ops(); auto &op_field = *this->desc_->mutable_ops();
op_field.Clear(); op_field.Clear();
...@@ -91,9 +94,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const { ...@@ -91,9 +94,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const {
return prog_->Block(static_cast<size_t>(this->desc_->parent_idx())); return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { BlockDesc *BlockDescBind::Proto() {
BlockDesc *desc = block.RawPtr(); Flush();
this->attrs_[name] = desc; return desc_;
} }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -33,14 +33,6 @@ class ProgramDescBind; ...@@ -33,14 +33,6 @@ class ProgramDescBind;
class BlockDescBind { class BlockDescBind {
public: public:
friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind &program_desc, int block_idx,
std::unordered_set<std::string> &no_grad_vars);
friend void AppendBackward(
ProgramDescBind &program_desc,
const std::unordered_set<std::string> &no_grad_vars);
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {} : prog_(prog), desc_(desc), need_update_(false) {}
...@@ -48,9 +40,9 @@ class BlockDescBind { ...@@ -48,9 +40,9 @@ class BlockDescBind {
int32_t Parent() const { return desc_->parent_idx(); } int32_t Parent() const { return desc_->parent_idx(); }
VarDescBind *NewVar(const std::string &name_bytes); VarDescBind *Var(const std::string &name_bytes);
VarDescBind *Var(const std::string &name_bytes) const; VarDescBind *FindVar(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const; bool HasVar(const std::string &var_name) const;
...@@ -64,11 +56,13 @@ class BlockDescBind { ...@@ -64,11 +56,13 @@ class BlockDescBind {
std::vector<OpDescBind *> AllOps() const; std::vector<OpDescBind *> AllOps() const;
void Sync(); void Flush();
BlockDesc *RawPtr() { return desc_; } BlockDesc *Proto();
private: // FIXME(yuyang18): backward will access private data of BlockDesc.
// Mark it public temporary. We can fix it later.
public:
ProgramDescBind *prog_; // not_own ProgramDescBind *prog_; // not_own
BlockDesc *desc_; // not_own BlockDesc *desc_; // not_own
bool need_update_; bool need_update_;
......
...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kGradOpDescMaker> { struct OpInfoFiller<T, kGradOpDescMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->grad_op_maker_ = [](const OpDescBind& fwd_op) { info->grad_op_maker_ = [](
T maker(fwd_op); const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
T maker(fwd_op, no_grad_set, grad_to_var);
return maker(); return maker();
}; };
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/executor.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <set>
#include <vector>
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
namespace paddle {
namespace framework {
const std::string kFeedOpType = "feed";
const std::string kFetchOpType = "fetch";
Executor::Executor(const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0);
device_contexts_.resize(places.size());
for (size_t i = 0; i < places.size(); i++) {
if (platform::is_cpu_place(places[i])) {
device_contexts_[i] = new platform::CPUDeviceContext(
boost::get<platform::CPUPlace>(places[i]));
} else if (platform::is_gpu_place(places[i])) {
#ifdef PADDLE_WITH_CUDA
device_contexts_[i] = new platform::CUDADeviceContext(
boost::get<platform::GPUPlace>(places[i]));
#else
PADDLE_THROW(
"'GPUPlace' is not supported, Please re-compile with WITH_GPU "
"option");
#endif
}
}
}
Executor::~Executor() {
for (auto& device_context : device_contexts_) {
delete device_context;
}
}
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
// TODO(tonyyang-svail):
// - only runs on the first device (i.e. no interdevice communication)
// - will change to use multiple blocks for RNN op and Cond Op
PADDLE_ENFORCE_GT(pdesc.blocks_size(), block_id);
auto& block = pdesc.blocks(block_id);
auto& device = device_contexts_[0];
// Instantiate all the vars in the global scope
for (auto& var : block.vars()) {
scope->Var(var.name());
}
Scope& local_scope = scope->NewScope();
std::vector<bool> should_run = Prune(pdesc, block_id);
PADDLE_ENFORCE_EQ(should_run.size(), static_cast<size_t>(block.ops_size()));
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
for (auto& var : block.ops(i).outputs()) {
for (auto& argu : var.arguments()) {
if (local_scope.FindVar(argu) == nullptr) {
local_scope.Var(argu);
}
}
}
auto op = paddle::framework::OpRegistry::CreateOp(block.ops(i));
op->Run(local_scope, *device);
}
}
// TODO(tonyyang-svail):
// - Destroy local_scope
}
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
auto& block = pdesc.blocks(block_id);
auto& ops = block.ops();
bool expect_feed = true;
for (auto& op_desc : ops) {
PADDLE_ENFORCE(op_desc.type() != kFeedOpType || expect_feed,
"All FeedOps are at the beginning of the ProgramDesc");
expect_feed = (op_desc.type() == kFeedOpType);
}
bool expect_fetch = true;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
PADDLE_ENFORCE(op_desc.type() != kFetchOpType || expect_fetch,
"All FetchOps must at the end of the ProgramDesc");
expect_fetch = (op_desc.type() == kFetchOpType);
}
std::set<std::string> dependent_vars;
std::vector<bool> should_run;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
bool found_dependent_vars = false;
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
found_dependent_vars = true;
}
}
}
if (op_desc.type() == kFetchOpType || found_dependent_vars) {
// erase its output to the dependency graph
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.erase(argu);
}
}
// insert its input to the dependency graph
for (auto& var : op_desc.inputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.insert(argu);
}
}
should_run.push_back(true);
} else {
should_run.push_back(false);
}
}
// TODO(tonyyang-svail):
// - check this after integration of Init
// PADDLE_ENFORCE(dependent_vars.empty());
// since we are traversing the ProgramDesc in reverse order
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());
return should_run;
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace framework {
class Executor {
public:
explicit Executor(const std::vector<platform::Place>& places);
~Executor();
/* @Brief
* Runtime evaluation of the given ProgramDesc under certain Scope
*
* @param
* ProgramDesc
* Scope
*/
void Run(const ProgramDesc&, Scope*, int);
private:
std::vector<platform::DeviceContext*> device_contexts_;
};
/* @Brief
* Pruning the graph
*
* @param
* ProgramDesc
*
* @return
* vector<bool> Same size as ops. Indicates whether an op should be run.
*/
std::vector<bool> Prune(const ProgramDesc& pdesc, int block_id);
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/executor.h"
#include <memory>
#include <vector>
#include "gflags/gflags.h"
#include "gtest/gtest.h"
#include "paddle/framework/attribute.h"
#include "paddle/framework/backward.h"
#include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
USE_OP(elementwise_add);
USE_OP(gaussian_random);
USE_NO_KERNEL_OP(feed);
USE_NO_KERNEL_OP(fetch);
USE_OP(mul);
USE_OP(sum);
USE_OP(squared_l2_distance);
USE_OP(fill_constant);
USE_OP(mean);
USE_OP(sgd);
constexpr auto kFeedValueName = "feed_value";
constexpr auto kFetchValueName = "fetch_value";
using namespace paddle::platform;
using namespace paddle::framework;
void AddOp(const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, AttributeMap attrs,
paddle::framework::BlockDescBind* block) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
// <<<<<<< HEAD
// auto var = block->Var(v);
// var->SetType(VarDesc::LOD_TENSOR);
// var->SetDataType(paddle::framework::DataType::FP32);
// =======
if (!block->HasVar(v)) {
auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32);
}
// >>>>>>> origin/develop
}
}
// insert op
auto op = block->AppendOp();
op->SetType(type);
for (auto& kv : inputs) {
op->SetInput(kv.first, kv.second);
}
for (auto& kv : outputs) {
op->SetOutput(kv.first, kv.second);
}
op->SetAttrMap(attrs);
op->CheckAttrs();
}
// Tensors in feed value variable will only be in CPUPlace
// So we can memcpy the data from vector<T> to feed_value
template <typename T>
void SetFeedVariable(const std::vector<std::vector<T>>& inputs,
const std::vector<std::vector<int64_t>>& dims) {
Variable* g_feed_value = GetGlobalScope().FindVar(kFeedValueName);
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
size_t size = inputs.size();
feed_inputs.resize(size);
for (size_t i = 0; i < size; i++) {
T* dst = feed_inputs[i].mutable_data<T>(make_ddim(dims[i]), CPUPlace());
memcpy(dst, inputs[i].data(), inputs[i].size() * sizeof(T));
}
}
// Tensors in fetch value variable will only be in CPUPlace
// So we can memcpy the data from fetch_value to vector<T>
template <typename T>
std::vector<std::vector<T>> GetFetchVariable() {
Variable* g_fetch_value = GetGlobalScope().FindVar(kFetchValueName);
auto& fetch_outputs =
*(g_fetch_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
size_t size = fetch_outputs.size();
std::vector<std::vector<T>> result;
result.reserve(size);
for (size_t i = 0; i < size; i++) {
std::vector<T> tmp;
tmp.resize(fetch_outputs[i].numel());
memcpy(tmp.data(), fetch_outputs[i].data<T>(),
fetch_outputs[i].numel() * sizeof(T));
result.push_back(tmp);
}
return result;
}
class ExecutorTesterRandom : public ::testing::Test {
public:
virtual void SetUp() override {
int input_dim = 3, batch_size = 2, embed_dim = 5;
auto temp_init_root_block = init_pdesc_.add_blocks();
temp_init_root_block->set_idx(0);
temp_init_root_block->set_parent_idx(-1);
paddle::framework::ProgramDescBind& init_program =
paddle::framework::ProgramDescBind::Instance(&init_pdesc_);
paddle::framework::BlockDescBind* init_root_block = init_program.Block(0);
AddOp("gaussian_random", {}, {{"Out", {"w1"}}},
{{"dims", std::vector<int>{input_dim, embed_dim}}}, init_root_block);
AddOp("gaussian_random", {}, {{"Out", {"w2"}}},
{{"dims", std::vector<int>{embed_dim, input_dim}}}, init_root_block);
AddOp("fetch", {{"Input", {"w1"}}}, {{"Out", {kFetchValueName}}},
{{"col", 0}}, init_root_block);
AddOp("fetch", {{"Input", {"w2"}}}, {{"Out", {kFetchValueName}}},
{{"col", 1}}, init_root_block);
// flush
init_program.Proto();
// run block
auto temp_root_block = pdesc_.add_blocks();
temp_root_block->set_idx(0);
temp_root_block->set_parent_idx(-1);
paddle::framework::ProgramDescBind& program =
paddle::framework::ProgramDescBind::Instance(&pdesc_);
paddle::framework::BlockDescBind* root_block = program.Block(0);
// feed data
inputs_.push_back({1.0, 1.0, 1.0, 1.0, 1.0, 1.0});
dims_.push_back({batch_size, input_dim});
AddOp("feed", {{"Input", {kFeedValueName}}}, {{"Out", {"a"}}},
{{"dims", std::vector<int>{batch_size, input_dim}}, {"col", 0}},
root_block);
// forward
AddOp("mul", {{"X", {"a"}}, {"Y", {"w1"}}}, {{"Out", {"b"}}}, {},
root_block);
AddOp("mul", {{"X", {"b"}}, {"Y", {"w2"}}}, {{"Out", {"a_out"}}}, {},
root_block);
AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}},
{{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {},
root_block);
AddOp("mean", {{"X", {"l2_distance"}}}, {{"Out", {"mean_out"}}}, {},
root_block);
// backward
auto target = VarDescBind("mean_out");
AppendBackward(program, target, {});
// update
AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}},
{{"shape", std::vector<int>{1}}, {"value", float(0.001)}},
root_block);
AddOp("sgd", {{"Param", {"w1"}},
{"LearningRate", {"learning_rate"}},
{"Grad", {"w1@GRAD"}}},
{{"ParamOut", {"w1"}}}, {}, root_block);
AddOp("sgd", {{"Param", {"w2"}},
{"LearningRate", {"learning_rate"}},
{"Grad", {"w2@GRAD"}}},
{{"ParamOut", {"w2"}}}, {}, root_block);
AddOp("fetch", {{"Input", {"w1"}}}, {{"Out", {kFetchValueName}}},
{{"col", 0}}, root_block);
AddOp("fetch", {{"Input", {"w2"}}}, {{"Out", {kFetchValueName}}},
{{"col", 1}}, root_block);
AddOp("fetch", {{"Input", {"l2_distance"}}}, {{"Out", {kFetchValueName}}},
{{"col", 0}}, root_block);
// flush
program.Proto();
}
protected:
ProgramDesc init_pdesc_;
ProgramDesc pdesc_;
std::vector<std::vector<float>> inputs_;
std::vector<std::vector<int64_t>> dims_;
};
class ExecutorTesterFeedAndFetch : public ::testing::Test {
public:
virtual void SetUp() override {
auto temp_root_block = pdesc_.add_blocks();
temp_root_block->set_idx(0);
temp_root_block->set_parent_idx(-1);
// wrap to BlockDescBind
paddle::framework::ProgramDescBind& program =
paddle::framework::ProgramDescBind::Instance(&pdesc_);
paddle::framework::BlockDescBind* root_block = program.Block(0);
std::vector<int> dim{6};
AddOp("feed", {{"Input", {kFeedValueName}}}, {{"Out", {"a"}}},
{{"dims", dim}, {"col", 0}}, root_block);
AddOp("feed", {{"Input", {kFeedValueName}}}, {{"Out", {"b"}}},
{{"dims", dim}, {"col", 1}}, root_block);
AddOp("fetch", {{"Input", {"a"}}}, {{"Out", {kFetchValueName}}},
{{"col", 0}}, root_block);
AddOp("fetch", {{"Input", {"b"}}}, {{"Out", {kFetchValueName}}},
{{"col", 1}}, root_block);
// flush
program.Proto();
std::vector<float> vec1 = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
std::vector<float> vec2 = {4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
inputs_.push_back(vec1);
inputs_.push_back(vec2);
dims_.push_back({static_cast<int64_t>(vec1.size())});
dims_.push_back({static_cast<int64_t>(vec2.size())});
}
protected:
ProgramDesc pdesc_;
std::vector<std::vector<float>> inputs_;
std::vector<std::vector<int64_t>> dims_;
};
#ifndef PADDLE_WITH_CUDA
TEST_F(ExecutorTesterRandom, CPU) {
std::vector<Place> places;
CPUPlace cpu_place;
places.push_back(cpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
paddle::memory::Used(cpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
executor->Run(init_pdesc_, &GetGlobalScope(), 0);
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
}
TEST_F(ExecutorTesterFeedAndFetch, CPU) {
std::vector<Place> places;
CPUPlace cpu_place;
places.emplace_back(cpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
paddle::memory::Used(cpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
ASSERT_EQ(result.size(), inputs_.size());
for (size_t i = 0; i < result.size(); ++i) {
ASSERT_EQ(result[i].size(), inputs_[i].size());
for (size_t j = 0; j < result[i].size(); ++j) {
ASSERT_EQ(result[i][j], inputs_[i][j]);
}
}
}
}
#else
TEST_F(ExecutorTesterRandom, GPU) {
std::vector<Place> places;
GPUPlace gpu_place(0);
places.push_back(gpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
// If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
// need to be used at first.
paddle::memory::Used(CPUPlace());
paddle::memory::Used(gpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
executor->Run(init_pdesc_, &GetGlobalScope(), 0);
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
}
}
TEST_F(ExecutorTesterFeedAndFetch, GPU) {
std::vector<Place> places;
GPUPlace gpu_place(0);
places.push_back(gpu_place);
// We have a global Scope and BuddyAllocator, and we must ensure
// global BuddyAllocator is initialized before global Scope. Thus,
// global Scope will deconstruct before BuddyAllocator. Otherwise,
// "pointer being freed was not allocated" error will appear.
// If paddle is compiled with GPU, both CPU and GPU BuddyAllocator
// need to be used at first.
paddle::memory::Used(CPUPlace());
paddle::memory::Used(gpu_place);
std::unique_ptr<Executor> executor(new Executor(places));
for (int batch_id = 0; batch_id < 3; batch_id++) {
SetFeedVariable<float>(inputs_, dims_);
executor->Run(pdesc_, &GetGlobalScope(), 0);
std::vector<std::vector<float>> result = GetFetchVariable<float>();
PADDLE_ENFORCE_EQ(result.size(), inputs_.size());
for (size_t i = 0; i < result.size(); ++i) {
PADDLE_ENFORCE_EQ(result[i].size(), inputs_[i].size());
for (size_t j = 0; j < result[i].size(); ++j) {
PADDLE_ENFORCE_EQ(result[i][j], inputs_[i][j]);
}
}
}
}
DECLARE_double(fraction_of_gpu_memory_to_use);
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// Use less GPU memory for unittest.
FLAGS_fraction_of_gpu_memory_to_use = 0.25;
return RUN_ALL_TESTS();
}
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/framework/lod_tensor.h"
namespace paddle {
namespace framework {
using FeedFetchType = LoDTensor;
using FeedFetchList = std::vector<FeedFetchType>;
} // namespace framework
} // namespace paddle
...@@ -97,16 +97,26 @@ enum DataType { ...@@ -97,16 +97,26 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [ default = 0 ]; }
message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int32 lod_level = 2 [ default = 0 ];
} }
message VarDesc { message VarDesc {
enum VarType {
LOD_TENSOR = 1;
SELECTED_ROWS = 2;
}
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; required VarType type = 2;
optional bool persistable = 3 [ default = false ]; optional LoDTensorDesc lod_tensor = 3;
optional TensorDesc selected_rows = 4;
optional bool persistable = 5 [ default = false ];
} }
message BlockDesc { message BlockDesc {
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <unordered_set>
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
...@@ -21,27 +23,50 @@ namespace framework { ...@@ -21,27 +23,50 @@ namespace framework {
class GradOpDescMakerBase { class GradOpDescMakerBase {
public: public:
explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} explicit GradOpDescMakerBase(
const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var)
: fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {}
virtual ~GradOpDescMakerBase() = default; virtual ~GradOpDescMakerBase() = default;
virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0; virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0;
protected: protected:
static std::vector<std::string> ToGradNames( std::vector<std::string> InputGrad(const std::string& name,
const std::vector<std::string>& var_names) { bool drop_empty_grad = true) const {
std::vector<std::string> ret_val; std::vector<std::string> ret_val;
auto var_names = this->Input(name);
ret_val.reserve(var_names.size()); ret_val.reserve(var_names.size());
std::transform(var_names.begin(), var_names.end(), std::transform(var_names.begin(), var_names.end(),
std::back_inserter(ret_val), GradVarName); std::back_inserter(ret_val),
[this](const std::string& fwd_var_name) -> std::string {
auto g_name = GradVarName(fwd_var_name);
if (no_grad_set_.count(g_name)) {
return kEmptyVarName;
} else {
(*this->grad_to_var_)[g_name] = fwd_var_name;
return g_name;
}
});
if (!drop_empty_grad) {
return ret_val; return ret_val;
} }
std::vector<std::string> dropped_ret_val;
std::vector<std::string> InputGrad(const std::string& name) const { dropped_ret_val.reserve(ret_val.size());
return ToGradNames(fwd_op_.Input(name)); std::copy_if(ret_val.begin(), ret_val.end(),
std::back_inserter(dropped_ret_val),
[](const std::string& str) { return str != kEmptyVarName; });
return dropped_ret_val;
} }
std::vector<std::string> OutputGrad(const std::string& name) const { std::vector<std::string> OutputGrad(const std::string& name) const {
return ToGradNames(fwd_op_.Output(name)); std::vector<std::string> ret_val;
auto onames = this->Output(name);
ret_val.reserve(onames.size());
std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val),
GradVarName);
return ret_val;
} }
std::vector<std::string> InputNames() const { std::vector<std::string> InputNames() const {
...@@ -75,6 +100,8 @@ class GradOpDescMakerBase { ...@@ -75,6 +100,8 @@ class GradOpDescMakerBase {
private: private:
const OpDescBind& fwd_op_; const OpDescBind& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_;
std::unordered_map<std::string, std::string>* grad_to_var_;
}; };
class SingleGradOpDescMaker : public GradOpDescMakerBase { class SingleGradOpDescMaker : public GradOpDescMakerBase {
...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase { ...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase {
virtual std::unique_ptr<OpDescBind> Apply() const = 0; virtual std::unique_ptr<OpDescBind> Apply() const = 0;
}; };
template <bool DropEmptyIG = true>
class DefaultGradOpDescMaker : public SingleGradOpDescMaker { class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
public: public:
using SingleGradOpDescMaker::SingleGradOpDescMaker; using SingleGradOpDescMaker::SingleGradOpDescMaker;
...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
for (auto& input_param : this->InputNames()) { for (auto& input_param : this->InputNames()) {
grad->SetInput(input_param, this->Input(input_param)); grad->SetInput(input_param, this->Input(input_param));
grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); grad->SetOutput(GradVarName(input_param),
this->InputGrad(input_param, DropEmptyIG));
} }
for (auto& output_param : this->OutputNames()) { for (auto& output_param : this->OutputNames()) {
...@@ -120,5 +149,13 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -120,5 +149,13 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
} }
}; };
class EmptyGradOpMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
return {};
}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, ...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
} }
OpDesc *OpDescBind::Proto() { OpDesc *OpDescBind::Proto() {
Sync(); Flush();
return &op_desc_; return &op_desc_;
} }
...@@ -100,6 +100,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { ...@@ -100,6 +100,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) {
need_update_ = true; need_update_ = true;
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) {
BlockDesc *desc = block.Proto();
this->attrs_[name] = desc;
need_update_ = true;
}
void OpDescBind::SetAttrMap( void OpDescBind::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) { const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map; attrs_ = attr_map;
...@@ -159,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -159,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
void OpDescBind::Sync() { void OpDescBind::Flush() {
if (need_update_) { if (need_update_) {
this->op_desc_.mutable_inputs()->Clear(); this->op_desc_.mutable_inputs()->Clear();
for (auto &ipt : inputs_) { for (auto &ipt : inputs_) {
...@@ -211,6 +217,18 @@ static InferShapeFuncMap &InferShapeFuncs() { ...@@ -211,6 +217,18 @@ static InferShapeFuncMap &InferShapeFuncs() {
return *g_map; return *g_map;
} }
void OpDescBind::CheckAttrs() {
PADDLE_ENFORCE(!Type().empty(),
"CheckAttr() can not be called before type is setted.");
auto *checker = OpInfoMap::Instance().Get(Type()).Checker();
if (checker == nullptr) {
// checker is not configured. That operator could be generated by Paddle,
// not by users.
return;
}
checker->Check(attrs_);
}
void OpDescBind::InferShape(const BlockDescBind &block) const { void OpDescBind::InferShape(const BlockDescBind &block) const {
auto &funcs = InferShapeFuncs(); auto &funcs = InferShapeFuncs();
auto it = funcs.find(this->Type()); auto it = funcs.find(this->Type());
......
...@@ -89,8 +89,6 @@ class OpDescBind { ...@@ -89,8 +89,6 @@ class OpDescBind {
this->need_update_ = true; this->need_update_ = true;
} }
void Sync();
const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; } const VariableNameMap &Outputs() const { return outputs_; }
...@@ -100,8 +98,12 @@ class OpDescBind { ...@@ -100,8 +98,12 @@ class OpDescBind {
return &this->attrs_; return &this->attrs_;
} }
void CheckAttrs();
void InferShape(const BlockDescBind &block) const; void InferShape(const BlockDescBind &block) const;
void Flush();
private: private:
template <typename MapType> template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) { static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
......
...@@ -59,16 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) { ...@@ -59,16 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
op_desc.GetAttrMap()); op_desc.GetAttrMap());
} }
std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
OpDescBind* op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc->Type());
if (info.Checker() != nullptr) {
info.Checker()->Check(*op_desc->MutableAttrMap());
}
return info.grad_op_maker_(*op_desc);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -79,9 +79,6 @@ class OpRegistry { ...@@ -79,9 +79,6 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
OpDescBind* op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
}; };
...@@ -164,8 +161,9 @@ class OpKernelRegistrar : public Registrar { ...@@ -164,8 +161,9 @@ class OpKernelRegistrar : public Registrar {
grad_op_class) \ grad_op_class) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \ REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \ class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker { \ : public ::paddle::framework::DefaultGradOpDescMaker<true> { \
using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \ using ::paddle::framework::DefaultGradOpDescMaker< \
true>::DefaultGradOpDescMaker; \
\ \
protected: \ protected: \
virtual std::string GradOpType() const { return #grad_op_type; } \ virtual std::string GradOpType() const { return #grad_op_type; } \
......
...@@ -143,8 +143,8 @@ class OperatorBase { ...@@ -143,8 +143,8 @@ class OperatorBase {
// If you are writing an kernel operator, `Clone` will be defined when you // If you are writing an kernel operator, `Clone` will be defined when you
// register it. i.e. `Clone` method is not needed to define by yourself. // register it. i.e. `Clone` method is not needed to define by yourself.
#define DEFINE_OP_CLONE_METHOD(cls) \ #define DEFINE_OP_CLONE_METHOD(cls) \
std::unique_ptr<OperatorBase> Clone() const final { \ std::unique_ptr<::paddle::framework::OperatorBase> Clone() const final { \
return std::unique_ptr<OperatorBase>(new cls(*this)); \ return std::unique_ptr<::paddle::framework::OperatorBase>(new cls(*this)); \
} }
// Macro for define a default constructor for Operator. // Macro for define a default constructor for Operator.
...@@ -289,6 +289,15 @@ class ExecutionContext { ...@@ -289,6 +289,15 @@ class ExecutionContext {
return device_context_; return device_context_;
} }
#ifdef PADDLE_WITH_CUDA
const platform::CUDADeviceContext& cuda_device_context() const {
PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
auto cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext*>(&device_context_);
return *cuda_ctx;
}
#endif
private: private:
const OperatorBase& op_; const OperatorBase& op_;
const Scope& scope_; const Scope& scope_;
...@@ -394,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -394,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private: private:
DDim GetDim(const std::string& name) const override { DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.Var(name)->Shape()); return framework::make_ddim(block_.FindVar(name)->Shape());
} }
void SetDim(const std::string& name, const DDim& dim) override { void SetDim(const std::string& name, const DDim& dim) override {
block_.Var(name)->SetShape(framework::vectorize(dim)); block_.FindVar(name)->SetShape(framework::vectorize(dim));
} }
const OpDescBind& op_; const OpDescBind& op_;
......
...@@ -84,7 +84,7 @@ TEST(OperatorBase, all) { ...@@ -84,7 +84,7 @@ TEST(OperatorBase, all) {
paddle::framework::Scope scope; paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
scope.NewVar("OUT1"); scope.Var("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0); ASSERT_EQ(paddle::framework::op_run_num, 0);
op->Run(scope, device_context); op->Run(scope, device_context);
ASSERT_EQ(paddle::framework::op_run_num, 1); ASSERT_EQ(paddle::framework::op_run_num, 1);
...@@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) { ...@@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) {
paddle::platform::CPUDeviceContext cpu_device_context; paddle::platform::CPUDeviceContext cpu_device_context;
paddle::framework::Scope scope; paddle::framework::Scope scope;
scope.NewVar("x0")->GetMutable<Tensor>(); scope.Var("x0")->GetMutable<Tensor>();
scope.NewVar("x1")->GetMutable<Tensor>(); scope.Var("x1")->GetMutable<Tensor>();
scope.NewVar("x2")->GetMutable<Tensor>(); scope.Var("x2")->GetMutable<Tensor>();
scope.NewVar("k0")->GetMutable<Tensor>(); scope.Var("k0")->GetMutable<Tensor>();
scope.NewVar("y0")->GetMutable<Tensor>(); scope.Var("y0")->GetMutable<Tensor>();
scope.NewVar("y1")->GetMutable<Tensor>(); scope.Var("y1")->GetMutable<Tensor>();
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_device_context); op->Run(scope, cpu_device_context);
......
...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
ProgramDesc *ProgramDescBind::Proto() { ProgramDesc *ProgramDescBind::Proto() {
for (auto &block : blocks_) { for (auto &block : blocks_) {
block->Sync(); block->Flush();
} }
return prog_; return prog_;
} }
......
...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include <memory> // for unique_ptr
#include <mutex> // for call_once
#include "paddle/string/printf.h" #include "paddle/string/printf.h"
namespace paddle { namespace paddle {
...@@ -28,7 +31,7 @@ Scope& Scope::NewScope() const { ...@@ -28,7 +31,7 @@ Scope& Scope::NewScope() const {
return *kids_.back(); return *kids_.back();
} }
Variable* Scope::NewVar(const std::string& name) { Variable* Scope::Var(const std::string& name) {
auto iter = vars_.find(name); auto iter = vars_.find(name);
if (iter != vars_.end()) { if (iter != vars_.end()) {
return iter->second; return iter->second;
...@@ -39,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) { ...@@ -39,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) {
return v; return v;
} }
Variable* Scope::NewVar() { Variable* Scope::Var() {
return NewVar(string::Sprintf("%p.%d", this, vars_.size())); return Var(string::Sprintf("%p.%d", this, vars_.size()));
} }
Variable* Scope::FindVar(const std::string& name) const { Variable* Scope::FindVar(const std::string& name) const {
...@@ -62,5 +65,17 @@ void Scope::DropKids() { ...@@ -62,5 +65,17 @@ void Scope::DropKids() {
kids_.clear(); kids_.clear();
} }
std::once_flag feed_variable_flag;
framework::Scope& GetGlobalScope() {
static std::unique_ptr<framework::Scope> g_scope{nullptr};
std::call_once(feed_variable_flag, [&]() {
g_scope.reset(new framework::Scope());
g_scope->Var("feed_value");
g_scope->Var("fetch_value");
});
return *(g_scope.get());
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -45,10 +45,10 @@ class Scope { ...@@ -45,10 +45,10 @@ class Scope {
Scope& NewScope() const; Scope& NewScope() const;
/// Create a variable with given name if it doesn't exist. /// Create a variable with given name if it doesn't exist.
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
/// Create a variable with a scope-unique name. /// Create a variable with a scope-unique name.
Variable* NewVar(); Variable* Var();
/// Find a variable in the scope or any of its ancestors. Returns /// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find. /// nullptr if cannot find.
...@@ -73,5 +73,7 @@ class Scope { ...@@ -73,5 +73,7 @@ class Scope {
DISABLE_COPY_AND_ASSIGN(Scope); DISABLE_COPY_AND_ASSIGN(Scope);
}; };
framework::Scope& GetGlobalScope();
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) { ...@@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) {
Scope& ss1 = s.NewScope(); Scope& ss1 = s.NewScope();
Scope& ss2 = s.NewScope(); Scope& ss2 = s.NewScope();
Variable* v0 = s.NewVar("a"); Variable* v0 = s.Var("a");
Variable* v1 = ss1.NewVar("a"); Variable* v1 = ss1.Var("a");
EXPECT_NE(v0, v1); EXPECT_NE(v0, v1);
...@@ -40,7 +40,7 @@ TEST(Scope, FindVar) { ...@@ -40,7 +40,7 @@ TEST(Scope, FindVar) {
EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_EQ(nullptr, ss.FindVar("a")); EXPECT_EQ(nullptr, ss.FindVar("a"));
ss.NewVar("a"); ss.Var("a");
EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_NE(nullptr, ss.FindVar("a")); EXPECT_NE(nullptr, ss.FindVar("a"));
...@@ -49,7 +49,7 @@ TEST(Scope, FindVar) { ...@@ -49,7 +49,7 @@ TEST(Scope, FindVar) {
TEST(Scope, FindScope) { TEST(Scope, FindScope) {
Scope s; Scope s;
Scope& ss = s.NewScope(); Scope& ss = s.NewScope();
Variable* v = s.NewVar("a"); Variable* v = s.Var("a");
EXPECT_EQ(&s, s.FindScope(v)); EXPECT_EQ(&s, s.FindScope(v));
EXPECT_EQ(&s, ss.FindScope(v)); EXPECT_EQ(&s, ss.FindScope(v));
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
namespace paddle {
namespace framework {} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
namespace paddle {
namespace framework {
class SelectedRows {
public:
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() { value_.reset(new Tensor()); }
platform::Place place() const { return value_->place(); }
const Tensor& value() const { return *value_; }
Tensor* mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
const std::vector<int64_t>& rows() const { return rows_; }
void set_rows(const std::vector<int64_t>& rows) { rows_ = rows; }
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
std::vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
class SelectedRowsTester : public ::testing::Test {
public:
virtual void SetUp() override {
std::vector<int64_t> rows{0, 4, 7};
int64_t height = 10;
int64_t row_numel = 100;
selected_rows_.reset(new SelectedRows(rows, height));
Tensor* value = selected_rows_->mutable_value();
value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows.size()), row_numel}), place_);
}
protected:
platform::CPUPlace place_;
std::unique_ptr<SelectedRows> selected_rows_{nullptr};
};
TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); }
TEST_F(SelectedRowsTester, dims) {
ASSERT_EQ(selected_rows_->value().dims(), make_ddim({3, 100}));
}
TEST_F(SelectedRowsTester, complete_dims) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100}));
}
} // namespace framework
} // namespace paddle
...@@ -19,9 +19,6 @@ limitations under the License. */ ...@@ -19,9 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// TODO(longfei): Once after both CompileTimeInferShapeContext and
// RuntimeInferShapeContext get merged, we can rename InferShapeContext into
// InferShapeContext so to replace the current InferShapeContext.
class InferShapeContext { class InferShapeContext {
public: public:
virtual ~InferShapeContext() {} virtual ~InferShapeContext() {}
......
...@@ -88,25 +88,46 @@ class Tensor { ...@@ -88,25 +88,46 @@ class Tensor {
* @brief Copy the content of external tensor to a new place. * @brief Copy the content of external tensor to a new place.
* *
* @param[in] src The external tensor. * @param[in] src The external tensor.
* @param[in] ctx The device context contains place where to store. * @param[in] dst_place The dst place.
* @param[in] ctx The device context contains device resources.
* *
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
*/ */
// TODO(qijun): https://github.com/PaddlePaddle/Paddle/issues/4647
// Remove `CopyFrom` and `CopyFromVector` from Tensor interface
// and make them global functions
template <typename T> template <typename T>
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place); inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx);
// FIXME(yuyang18): CopyFrom should without template T, use the replace
// `CopyFrom` with `CopyFromTensor`
inline void CopyFromTensor(const Tensor& src,
const platform::Place& dst_place,
const platform::DeviceContext& ctx) {
// NOLINTNEXTLINES_8 cpplint.py will recognize below lines as functions.
// That is a bug of cpplint.py. Just ignore lint these lines.
if (src.type() == std::type_index(typeid(double))) {
CopyFrom<double>(src, dst_place, ctx);
} else if (src.type() == std::type_index(typeid(float))) {
CopyFrom<float>(src, dst_place, ctx);
} else if (src.type() == std::type_index(typeid(int))) {
CopyFrom<int>(src, dst_place, ctx);
}
}
/** /**
* @brief Copy the content of an external vector to a tensor. * @brief Copy the content of an external vector to a tensor.
* *
* @param[in] src The external vector. * @param[in] src The external tensor.
* @param[in] ctx The device context contains place where to store. * @param[in] ctx The device context contains device resources.
* *
* * @note CopyFromVector assumes that the tensor has been resized * * @note CopyFromVector assumes that the tensor has been resized
* before invoking. * before invoking.
*/ */
template <typename T> template <typename T>
inline void CopyFromVector(const std::vector<T>& src, inline void CopyFromVector(const std::vector<T>& src,
const platform::Place& dst_place); const platform::DeviceContext& ctx);
/** /**
* @brief Return the slice of the tensor. * @brief Return the slice of the tensor.
......
...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, ...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
const std::vector<DySeqMeta>& meta, const LoD& lod, const std::vector<DySeqMeta>& meta, const LoD& lod,
size_t level); size_t level);
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) {
// collect indice need to copy to the batch
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + batch_id;
if (id >= seq.end) break;
indice.push_back(id);
}
return indice;
}
} // namespace detail } // namespace detail
const LoDTensor& TensorArray::Read(size_t index) const { const LoDTensor& TensorArray::Read(size_t index) const {
...@@ -95,7 +106,8 @@ void TensorArray::Write(size_t index, const LoDTensor& value) { ...@@ -95,7 +106,8 @@ void TensorArray::Write(size_t index, const LoDTensor& value) {
values_[index].Resize(value.dims()); values_[index].Resize(value.dims());
values_[index].mutable_data<value_type>(platform::CPUPlace()); values_[index].mutable_data<value_type>(platform::CPUPlace());
values_[index].CopyFrom<value_type>(value, platform::CPUPlace()); values_[index].CopyFrom<value_type>(value, platform::CPUPlace(),
platform::CPUDeviceContext());
} }
void TensorArray::WriteShared(size_t index, const LoDTensor& value) { void TensorArray::WriteShared(size_t index, const LoDTensor& value) {
...@@ -112,7 +124,7 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta, ...@@ -112,7 +124,7 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta,
return detail::PackDynamicBatch(values_, meta, lod, level); return detail::PackDynamicBatch(values_, meta, lod, level);
} }
std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
bool length_desend) { bool length_desend) {
detail::DynamicBatchUnpacker unpacker(source, level, detail::DynamicBatchUnpacker unpacker(source, level,
length_desend /*descend*/); length_desend /*descend*/);
...@@ -128,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, ...@@ -128,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
Write(batch_id, unpacker.GetBatch(batch_id)); Write(batch_id, unpacker.GetBatch(batch_id));
} }
PADDLE_ENFORCE(!unpacker.meta.empty());
return unpacker.meta; return unpacker.meta;
} }
...@@ -151,7 +164,8 @@ LoDTensor TensorArray::Stack() const { ...@@ -151,7 +164,8 @@ LoDTensor TensorArray::Stack() const {
for (size_t idx = 0; idx < size(); idx++) { for (size_t idx = 0; idx < size(); idx++) {
result.Slice<value_type>(idx, idx + 1) result.Slice<value_type>(idx, idx + 1)
.CopyFrom<value_type>(Read(idx), platform::CPUPlace()); .CopyFrom<value_type>(Read(idx), platform::CPUPlace(),
platform::CPUDeviceContext());
} }
return result; return result;
} }
...@@ -182,7 +196,8 @@ void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const { ...@@ -182,7 +196,8 @@ void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const {
// copy // copy
value.Resize(value_dims); value.Resize(value_dims);
value.CopyFrom<value_type>(source.Slice<value_type>(elem, elem + 1), value.CopyFrom<value_type>(source.Slice<value_type>(elem, elem + 1),
platform::CPUPlace()); platform::CPUPlace(),
platform::CPUDeviceContext());
} }
} }
} }
...@@ -215,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -215,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
PADDLE_ENFORCE(!meta.empty(), "should build meta first"); PADDLE_ENFORCE(!meta.empty(), "should build meta first");
LoDTensor result; LoDTensor result;
// collect indice need to copy to the batch auto indice = detail::GenDyBatchIndice(meta, index);
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + index;
if (id >= seq.end) break;
indice.push_back(id);
}
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);
// copy the indice of records in LoDTensor // copy the indice of records in LoDTensor
...@@ -234,9 +243,10 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -234,9 +243,10 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
for (size_t i = 0; i < indice.size(); i++) { for (size_t i = 0; i < indice.size(); i++) {
auto index = indice[i]; auto index = indice[i];
auto target = result.Slice<value_type>(i, i + 1); auto target = result.Slice<value_type>(i, i + 1);
auto source_ = source->Slice<value_type>(index, index + 1); auto slice = source->Slice<value_type>(index, index + 1);
target.CopyFrom<value_type>(source_, platform::CPUPlace()); target.CopyFrom<value_type>(slice, platform::CPUPlace(),
platform::CPUDeviceContext());
} }
return result; return result;
...@@ -269,7 +279,8 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, ...@@ -269,7 +279,8 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
if (index >= seq_meta.end) break; if (index >= seq_meta.end) break;
auto source_ = source[batch_id].Slice<float>(seq_id, seq_id + 1); auto source_ = source[batch_id].Slice<float>(seq_id, seq_id + 1);
auto target = result.Slice<float>(index, index + 1); auto target = result.Slice<float>(index, index + 1);
target.CopyFrom<float>(source_, platform::CPUPlace()); target.CopyFrom<float>(source_, platform::CPUPlace(),
platform::CPUDeviceContext());
} }
} }
......
...@@ -34,6 +34,13 @@ struct DySeqMeta { ...@@ -34,6 +34,13 @@ struct DySeqMeta {
size_t ori_idx; size_t ori_idx;
}; };
using DySeqMetaBatch = std::vector<DySeqMeta>;
/*
* Extract the indices of instances.
*/
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);
/* /*
* TensorArray is a C-array-like array of tensors, it is meant to be used with * TensorArray is a C-array-like array of tensors, it is meant to be used with
* dynamic iteration primitives such as while_loop. It is used to segment inputs * dynamic iteration primitives such as while_loop. It is used to segment inputs
...@@ -69,7 +76,7 @@ class TensorArray { ...@@ -69,7 +76,7 @@ class TensorArray {
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and * Recover the original LoD-arranged LoDTensor with the `values`, `level` and
* `indice_map`. * `indice_map`.
*/ */
LoDTensor Pack(size_t level, const std::vector<DySeqMeta> &meta, LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
const LoD &lod) const; const LoD &lod) const;
/* /*
...@@ -77,8 +84,7 @@ class TensorArray { ...@@ -77,8 +84,7 @@ class TensorArray {
* `values`, if set `desend`, will sort by length in descending order else in * `values`, if set `desend`, will sort by length in descending order else in
* ascending order. * ascending order.
*/ */
std::vector<DySeqMeta> Unpack(const LoDTensor &source, int level, DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
bool length_desend);
/* /*
* Pack the values into a tensor with rank one higher than each tensor in * Pack the values into a tensor with rank one higher than each tensor in
...@@ -87,12 +93,12 @@ class TensorArray { ...@@ -87,12 +93,12 @@ class TensorArray {
LoDTensor Stack() const; LoDTensor Stack() const;
/* /*
* Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors. * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors.
*/ */
void Unstack(const LoDTensor &source) const; void Unstack(const LoDTensor &source) const;
/* /*
* Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors, * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors,
* with memory of tensors shared. * with memory of tensors shared.
*/ */
void UnstackShared(const LoDTensor &source) const; void UnstackShared(const LoDTensor &source) const;
......
...@@ -88,7 +88,8 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { ...@@ -88,7 +88,8 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
template <typename T> template <typename T>
inline void Tensor::CopyFrom(const Tensor& src, inline void Tensor::CopyFrom(const Tensor& src,
const platform::Place& dst_place) { const platform::Place& dst_place,
const platform::DeviceContext& ctx) {
src.check_memory_size<T>(); src.check_memory_size<T>();
Resize(src.dims()); Resize(src.dims());
...@@ -106,26 +107,45 @@ inline void Tensor::CopyFrom(const Tensor& src, ...@@ -106,26 +107,45 @@ inline void Tensor::CopyFrom(const Tensor& src,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && else if (platform::is_gpu_place(src_place) &&
platform::is_cpu_place(dst_place)) { platform::is_cpu_place(dst_place)) {
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0); auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} else if (platform::is_cpu_place(src_place) && } else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr, auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
boost::get<platform::CPUPlace>(src_place), src_ptr, size, 0); auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} else if (platform::is_gpu_place(src_place) && } else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr, auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0); auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
memory::Copy(
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} }
PADDLE_ENFORCE(cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in Tensor CopyFrom");
#endif #endif
} }
template <typename T> template <typename T>
inline void Tensor::CopyFromVector(const std::vector<T>& src, inline void Tensor::CopyFromVector(const std::vector<T>& src,
const platform::Place& dst_place) { const platform::DeviceContext& ctx) {
auto dst_place = ctx.GetPlace();
auto src_ptr = static_cast<const void*>(src.data()); auto src_ptr = static_cast<const void*>(src.data());
platform::CPUPlace src_place; platform::CPUPlace src_place;
auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place)); auto dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
...@@ -137,12 +157,11 @@ inline void Tensor::CopyFromVector(const std::vector<T>& src, ...@@ -137,12 +157,11 @@ inline void Tensor::CopyFromVector(const std::vector<T>& src,
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(dst_place)) { else if (platform::is_gpu_place(dst_place)) {
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place, memory::Copy(
src_ptr, size, 0); boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place, src_ptr,
size,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
} }
PADDLE_ENFORCE(cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in Tensor CopyFromVector");
#endif #endif
} }
......
...@@ -194,6 +194,7 @@ TEST(Tensor, CopyFrom) { ...@@ -194,6 +194,7 @@ TEST(Tensor, CopyFrom) {
{ {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
CPUDeviceContext cpu_ctx((CPUPlace()));
int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace()); int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());
...@@ -201,7 +202,7 @@ TEST(Tensor, CopyFrom) { ...@@ -201,7 +202,7 @@ TEST(Tensor, CopyFrom) {
memcpy(src_ptr, arr, 9 * sizeof(int)); memcpy(src_ptr, arr, 9 * sizeof(int));
auto cpu_place = new paddle::platform::CPUPlace(); auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom<int>(src_tensor, *cpu_place); dst_tensor.CopyFrom<int>(src_tensor, *cpu_place, cpu_ctx);
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr); ASSERT_NE(src_ptr, dst_ptr);
...@@ -210,7 +211,7 @@ TEST(Tensor, CopyFrom) { ...@@ -210,7 +211,7 @@ TEST(Tensor, CopyFrom) {
} }
Tensor slice_tensor = src_tensor.Slice<int>(1, 2); Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
dst_tensor.CopyFrom<int>(slice_tensor, *cpu_place); dst_tensor.CopyFrom<int>(slice_tensor, *cpu_place, cpu_ctx);
const int* slice_ptr = slice_tensor.data<int>(); const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr); ASSERT_NE(dst_ptr, slice_ptr);
...@@ -231,13 +232,15 @@ TEST(Tensor, CopyFrom) { ...@@ -231,13 +232,15 @@ TEST(Tensor, CopyFrom) {
// CPU Tensor to GPU Tensor // CPU Tensor to GPU Tensor
auto gpu_place = new paddle::platform::GPUPlace(0); auto gpu_place = new paddle::platform::GPUPlace(0);
gpu_tensor.CopyFrom<int>(src_tensor, *gpu_place); CUDADeviceContext gpu_ctx(*gpu_place);
gpu_tensor.CopyFrom<int>(src_tensor, *gpu_place, gpu_ctx);
// GPU Tensor to CPU Tensor // GPU Tensor to CPU Tensor
auto cpu_place = new paddle::platform::CPUPlace(); auto cpu_place = new paddle::platform::CPUPlace();
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place); dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place, gpu_ctx);
// Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait();
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr); ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) { for (size_t i = 0; i < 9; ++i) {
...@@ -247,12 +250,13 @@ TEST(Tensor, CopyFrom) { ...@@ -247,12 +250,13 @@ TEST(Tensor, CopyFrom) {
Tensor slice_tensor = src_tensor.Slice<int>(1, 2); Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
// CPU Slice Tensor to GPU Tensor // CPU Slice Tensor to GPU Tensor
gpu_tensor.CopyFrom<int>(slice_tensor, *gpu_place); gpu_tensor.CopyFrom<int>(slice_tensor, *gpu_place, gpu_ctx);
// GPU Tensor to CPU Tensor // GPU Tensor to CPU Tensor
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place); dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place, gpu_ctx);
// Compare Slice Tensors // Sync before Compare Slice Tensors
gpu_ctx.Wait();
const int* slice_ptr = slice_tensor.data<int>(); const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr); ASSERT_NE(dst_ptr, slice_ptr);
...@@ -273,7 +277,8 @@ TEST(Tensor, CopyFromVector) { ...@@ -273,7 +277,8 @@ TEST(Tensor, CopyFromVector) {
// Copy to CPU Tensor // Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3})); cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace(); auto cpu_place = new paddle::platform::CPUPlace();
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place); CPUDeviceContext cpu_ctx(*cpu_place);
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
// Compare Tensors // Compare Tensors
const int* cpu_ptr = cpu_tensor.data<int>(); const int* cpu_ptr = cpu_tensor.data<int>();
...@@ -285,7 +290,7 @@ TEST(Tensor, CopyFromVector) { ...@@ -285,7 +290,7 @@ TEST(Tensor, CopyFromVector) {
src_vec.erase(src_vec.begin(), src_vec.begin() + 5); src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2})); cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place); cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
cpu_ptr = cpu_tensor.data<int>(); cpu_ptr = cpu_tensor.data<int>();
src_ptr = src_vec.data(); src_ptr = src_vec.data();
ASSERT_NE(src_ptr, cpu_ptr); ASSERT_NE(src_ptr, cpu_ptr);
...@@ -306,16 +311,19 @@ TEST(Tensor, CopyFromVector) { ...@@ -306,16 +311,19 @@ TEST(Tensor, CopyFromVector) {
// Copy to CPU Tensor // Copy to CPU Tensor
cpu_tensor.Resize(make_ddim({3, 3})); cpu_tensor.Resize(make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace(); auto cpu_place = new paddle::platform::CPUPlace();
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place); CPUDeviceContext cpu_ctx(*cpu_place);
cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
// Copy to GPUTensor // Copy to GPUTensor
gpu_tensor.Resize(make_ddim({3, 3})); gpu_tensor.Resize(make_ddim({3, 3}));
auto gpu_place = new paddle::platform::GPUPlace(); auto gpu_place = new paddle::platform::GPUPlace();
gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place); CUDADeviceContext gpu_ctx(*gpu_place);
gpu_tensor.CopyFromVector<int>(src_vec, gpu_ctx);
// Copy from GPU to CPU tensor for comparison // Copy from GPU to CPU tensor for comparison
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place); dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place, gpu_ctx);
// Compare Tensors // Sync before Compare Tensors
gpu_ctx.Wait();
const int* src_ptr = src_vec.data(); const int* src_ptr = src_vec.data();
const int* cpu_ptr = cpu_tensor.data<int>(); const int* cpu_ptr = cpu_tensor.data<int>();
const int* dst_ptr = dst_tensor.data<int>(); const int* dst_ptr = dst_tensor.data<int>();
...@@ -329,11 +337,13 @@ TEST(Tensor, CopyFromVector) { ...@@ -329,11 +337,13 @@ TEST(Tensor, CopyFromVector) {
src_vec.erase(src_vec.begin(), src_vec.begin() + 5); src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
cpu_tensor.Resize(make_ddim({2, 2})); cpu_tensor.Resize(make_ddim({2, 2}));
cpu_tensor.CopyFromVector<int>(src_vec, *cpu_place); cpu_tensor.CopyFromVector<int>(src_vec, cpu_ctx);
gpu_tensor.Resize(make_ddim({2, 2})); gpu_tensor.Resize(make_ddim({2, 2}));
gpu_tensor.CopyFromVector<int>(src_vec, *gpu_place); gpu_tensor.CopyFromVector<int>(src_vec, gpu_ctx);
dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place); dst_tensor.CopyFrom<int>(gpu_tensor, *cpu_place, gpu_ctx);
// Sync before Compare Tensors
gpu_ctx.Wait();
src_ptr = src_vec.data(); src_ptr = src_vec.data();
cpu_ptr = cpu_tensor.data<int>(); cpu_ptr = cpu_tensor.data<int>();
dst_ptr = dst_tensor.data<int>(); dst_ptr = dst_tensor.data<int>();
......
...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*( ...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/, const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>; const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
using GradOpMakerFN = using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDescBind>>(
std::function<std::vector<std::unique_ptr<OpDescBind>>(const OpDescBind&)>; const OpDescBind&, const std::unordered_set<std::string>& /*no_grad_set*/,
std::unordered_map<std::string, std::string>* /*grad_to_var*/)>;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -13,24 +13,58 @@ See the License for the specific language governing permissions and ...@@ -13,24 +13,58 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/var_desc.h" #include "paddle/framework/var_desc.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void VarDescBind::SetShape(const std::vector<int64_t> &dims) { void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
} }
void VarDescBind::SetDataType(DataType data_type) { void VarDescBind::SetDataType(DataType data_type) {
desc_.mutable_lod_tensor()->set_data_type(data_type); mutable_tensor_desc()->set_data_type(data_type);
} }
std::vector<int64_t> VarDescBind::Shape() const { std::vector<int64_t> VarDescBind::Shape() const {
return RepeatedToVector(desc_.lod_tensor().dims()); return RepeatedToVector(tensor_desc().dims());
} }
DataType VarDescBind::GetDataType() const { DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); }
return desc_.lod_tensor().data_type();
void VarDescBind::SetLoDLevel(int32_t lod_level) {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
desc_.mutable_lod_tensor()->set_lod_level(lod_level);
}
int32_t VarDescBind::GetLodLevel() const {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
return desc_.lod_tensor().lod_level();
}
const TensorDesc &VarDescBind::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
}
TensorDesc *VarDescBind::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
} }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -34,6 +34,7 @@ inline std::vector<T> RepeatedToVector( ...@@ -34,6 +34,7 @@ inline std::vector<T> RepeatedToVector(
template <typename T, typename RepeatedField> template <typename T, typename RepeatedField>
inline void VectorToRepeated(const std::vector<T> &vec, inline void VectorToRepeated(const std::vector<T> &vec,
RepeatedField *repeated_field) { RepeatedField *repeated_field) {
repeated_field->Clear();
repeated_field->Reserve(vec.size()); repeated_field->Reserve(vec.size());
for (const auto &elem : vec) { for (const auto &elem : vec) {
*repeated_field->Add() = elem; *repeated_field->Add() = elem;
...@@ -44,6 +45,7 @@ inline void VectorToRepeated(const std::vector<T> &vec, ...@@ -44,6 +45,7 @@ inline void VectorToRepeated(const std::vector<T> &vec,
template <typename RepeatedField> template <typename RepeatedField>
inline void VectorToRepeated(const std::vector<bool> &vec, inline void VectorToRepeated(const std::vector<bool> &vec,
RepeatedField *repeated_field) { RepeatedField *repeated_field) {
repeated_field->Clear();
repeated_field->Reserve(vec.size()); repeated_field->Reserve(vec.size());
for (auto elem : vec) { for (auto elem : vec) {
*repeated_field->Add() = elem; *repeated_field->Add() = elem;
...@@ -52,7 +54,10 @@ inline void VectorToRepeated(const std::vector<bool> &vec, ...@@ -52,7 +54,10 @@ inline void VectorToRepeated(const std::vector<bool> &vec,
class VarDescBind { class VarDescBind {
public: public:
explicit VarDescBind(const std::string &name) { desc_.set_name(name); } explicit VarDescBind(const std::string &name) {
desc_.set_name(name);
desc_.set_type(VarDesc::LOD_TENSOR);
}
VarDesc *Proto() { return &desc_; } VarDesc *Proto() { return &desc_; }
...@@ -66,7 +71,18 @@ class VarDescBind { ...@@ -66,7 +71,18 @@ class VarDescBind {
DataType GetDataType() const; DataType GetDataType() const;
void SetLoDLevel(int32_t lod_level);
int32_t GetLodLevel() const;
VarDesc::VarType GetType() const { return desc_.type(); }
void SetType(VarDesc::VarType type) { desc_.set_type(type); }
private: private:
const TensorDesc &tensor_desc() const;
TensorDesc *mutable_tensor_desc();
VarDesc desc_; VarDesc desc_;
}; };
} // namespace framework } // namespace framework
......
...@@ -462,8 +462,8 @@ void LambdaCost::calcGrad(const real* outputScore, ...@@ -462,8 +462,8 @@ void LambdaCost::calcGrad(const real* outputScore,
real score_j = score[index_j]; real score_j = score[index_j];
real dcgDif = 0; real dcgDif = 0;
if (j < sortSize) { if (j < sortSize) {
dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) / dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) *
(std::log(i + 2) - std::log(j + 2)); (1 / std::log(i + 2) - 1 / std::log(j + 2));
} else { } else {
dcgDif = dcgDif =
(std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2); (std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2);
......
...@@ -86,6 +86,7 @@ protected: ...@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case. /// Also used in 'use_mkldnn' case.
std::vector<Argument> outputOtherDevice_; std::vector<Argument> outputOtherDevice_;
/// If there are several outputs, map them by each name. /// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std::map<std::string, Argument*> outputMap_; std::map<std::string, Argument*> outputMap_;
/// Used to merge grad on different devices. /// Used to merge grad on different devices.
MatrixPtr tmpGrad_; MatrixPtr tmpGrad_;
...@@ -325,6 +326,11 @@ public: ...@@ -325,6 +326,11 @@ public:
outputMap_[name] = output; outputMap_[name] = output;
} }
/**
* Get the output map size, if layer has multi-output.
*/
size_t getOutputMapSize() { return outputMap_.size(); }
/** /**
* Get the output based on layer's name. * Get the output based on layer's name.
*/ */
......
...@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline( ...@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtInVal_) { if (cvtInVal_) {
pipeline.push_back(*cvtInVal_); pipeline.push_back(*cvtInVal_);
} }
...@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( ...@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void MKLDNNConvLayer::resetInValue( void MKLDNNConvLayer::resetInValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) { std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
// create buffer and reorder if input value do not match // create buffer and reorder if input value do not match
...@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( ...@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be empty";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNConvLayer::resetBwdWgtPD( void MKLDNNConvLayer::resetBwdWgtPD(
...@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline( ...@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) { if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_); pipeline.push_back(*cvtOutGrad_);
} }
...@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline( ...@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void MKLDNNConvLayer::resetOutGrad( void MKLDNNConvLayer::resetOutGrad(
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) { std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
const MatrixPtr& outMat = output_.grad;
out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc());
CHECK(outVal_ != nullptr &&
out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad and value should be equal";
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_ = nullptr; cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr; cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) { CHECK(outVal_ != nullptr &&
outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc())
<< "primitive desc of out grad and value should be equal";
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
outMat->setData(cpuOut->getData());
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_); CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { // create reorder if primitive desc does not match
out = cpuOutGrad_; if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
} else { out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_); CHECK(cvtOutGrad_);
} else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_;
} }
} }
} }
...@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad( ...@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void MKLDNNConvLayer::resetInGrad( void MKLDNNConvLayer::resetInGrad(
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD, std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in) { MKLDNNMatrixPtr& in) {
in = nullptr;
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (dataPD == nullptr) { if (dataPD == nullptr) {
return; return;
} }
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done if (inputIsOnlyMKLDNN()) {
in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc());
dataPD->diff_src_primitive_desc());
CHECK(nullptr != inVal_ && CHECK(nullptr != inVal_ &&
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
<< "primitive desc of input grad and value should be equal"; << "primitive desc of input grad and value should be equal";
} else {
// create reorder if has output grad does not match
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (!inputIsOnlyMKLDNN()) {
const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuInVal_); CHECK(cpuInVal_);
cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc());
if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { in = cpuInGrad_;
const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); // create reorder if PrimitiveDesc does not match
in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) {
in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE),
dataPD->diff_src_primitive_desc());
cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_);
CHECK(cvtInGrad_); CHECK(cvtInGrad_);
} else {
in = cpuInGrad_;
} }
} }
} }
......
...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, ...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert // fc cpu output value do not need create convert, just share data
// just share point
getOutput(CPU_DEVICE).value->setData(out->getData()); getOutput(CPU_DEVICE).value->setData(out->getData());
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd, void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
...@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline( ...@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (bias) { if (bias) {
fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out));
} else { } else {
...@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, ...@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
} }
void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
// TODO(TJ): merge outgrad
int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
output_.grad->setData(getOutput(device).grad->getData());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK(outVal_); CHECK(outVal_);
out = if (outputIsOnlyMKLDNN()) {
MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
output_.grad->setData(cpuOut->getData());
out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc());
}
} }
void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
...@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, ...@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr; in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inputLayers_[0]->getOutput().grad == nullptr) {
if (inGrad == nullptr) {
return; return;
} }
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK(inVal_); CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
} }
void MKLDNNFcLayer::resetBwdWgtPD( void MKLDNNFcLayer::resetBwdWgtPD(
...@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline( ...@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
CHECK(inVal_); CHECK(inVal_);
if (bias) { if (bias) {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias));
......
...@@ -65,6 +65,17 @@ protected: ...@@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr biasVal_; MKLDNNMatrixPtr biasVal_;
MKLDNNMatrixPtr biasGrad_; MKLDNNMatrixPtr biasGrad_;
// merge grad primitive
std::shared_ptr<mkldnn::primitive> mergeGrad_;
std::vector<mkldnn::primitive> pipelineMergeGrad_;
// tmp input argument to save input grad, only used to merge grad
Argument tmpInArg_;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr tmpOutGrad_;
std::shared_ptr<mkldnn::primitive> tmpCvt_;
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config), : Layer(config),
...@@ -99,6 +110,7 @@ public: ...@@ -99,6 +110,7 @@ public:
if (!Layer::init(layerMap, parameterMap)) { if (!Layer::init(layerMap, parameterMap)) {
return false; return false;
} }
setOutputMap();
checkCPUOutputsNumber(); checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream()); stream_.reset(new MKLDNNStream());
...@@ -118,12 +130,9 @@ public: ...@@ -118,12 +130,9 @@ public:
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
// reset when input total sizes changed, not only the batchsize // reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt; inputElemenCnt_ = elemenCnt;
pipelineFwd_.clear();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
if (outVal_) {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
}
convertWeightsFromPaddle(); convertWeightsFromPaddle();
needResetBwd_ = true; needResetBwd_ = true;
} }
...@@ -144,9 +153,18 @@ public: ...@@ -144,9 +153,18 @@ public:
void backward(const UpdateCallback& callback) override { void backward(const UpdateCallback& callback) override {
if (needResetBwd_) { if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear();
pipelineMergeGrad_.clear();
mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
needResetBwd_ = false; needResetBwd_ = false;
} }
// merge grad must before backward activation
if (mergeGrad_) {
REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str());
stream_->submit(pipelineMergeGrad_);
}
{ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation(); backwardActivation();
...@@ -247,6 +265,76 @@ protected: ...@@ -247,6 +265,76 @@ protected:
} }
} }
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual void resetOutGrad(MKLDNNMatrixPtr& out,
mkldnn::memory::primitive_desc pd) {
CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
mergeGrad_ = nullptr;
pipelineMergeGrad_.clear();
out = MKLDNNMatrix::create(output_.grad, pd);
if (outputMap_.size() <= 1) {
return;
}
std::vector<double> scales(outputMap_.size(), 1.0);
std::vector<mkldnn::memory::primitive_desc> srcPDs;
std::vector<mkldnn::primitive::at> srcs;
for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) {
MKLDNNMatrixPtr src =
std::dynamic_pointer_cast<MKLDNNMatrix>(it->second->grad);
VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first;
CHECK(src) << "should be MKLDNNMatrix";
auto srcDims = src->getDims();
auto dstDims = out->getDims();
CHECK_EQ(srcDims.size(), dstDims.size());
for (size_t i = 0; i < srcDims.size(); ++i) {
CHECK_EQ(srcDims[i], dstDims[i]);
}
srcPDs.push_back(src->getPrimitiveDesc());
srcs.push_back(*src);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for (size_t i = 1; i < srcPDs.size(); ++i) {
CHECK(srcPDs[0] == srcPDs[i]);
}
tmpOutGrad_ = nullptr;
tmpCvt_ = nullptr;
if (out->getPrimitiveDesc() != srcPDs[0]) {
tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]);
tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out);
CHECK(tmpCvt_);
pipelineMergeGrad_.push_back(*tmpCvt_);
} else {
tmpOutGrad_ = out;
}
auto sumPD = mkldnn::sum::primitive_desc(
tmpOutGrad_->getMemoryDesc(), scales, srcPDs);
mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_));
pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc pd) {
LayerPtr& input = inputLayers_[0];
const MatrixPtr& grad =
input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad;
in = MKLDNNMatrix::create(grad, pd);
Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in);
}
/** /**
* print info about sizes * print info about sizes
*/ */
...@@ -334,6 +422,16 @@ private: ...@@ -334,6 +422,16 @@ private:
} }
} }
/**
* Set output map of prev layers.
*/
void setOutputMap() {
outputMap_.clear();
for (size_t i = 0; i < inputLayers_.size(); ++i) {
inputLayers_[i]->setOutput(getName(), &tmpInArg_);
}
}
/** /**
* Check the cpu device number of outputOtherDevice_. * Check the cpu device number of outputOtherDevice_.
* should have only one at most. * should have only one at most.
......
...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { ...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be emptry";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
} }
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
...@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline( ...@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std::shared_ptr<pool_fwd::primitive_desc>& pd, std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
fwd_ = workspace_ fwd_ = workspace_
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_)) ? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out)); : std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
...@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, ...@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad(in); resetInGrad(in);
} }
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
CHECK(outVal_) << "Should have output value";
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_ = nullptr; cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr; cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) { CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
cpuOutGrad_ = MKLDNNMatrix::create( cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry"; CHECK(cvtOutGrad_) << "should not be emptry";
} else { } else {
...@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { ...@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) { void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr; in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inputLayers_[0]->getOutput().grad == nullptr) {
if (inGrad == nullptr) {
return; return;
} }
CHECK(inVal_); CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
} }
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
...@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline( ...@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std::shared_ptr<pool_bwd::primitive_desc>& pd, std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) { if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_); pipeline.push_back(*cvtOutGrad_);
} }
......
...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { ...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() { void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward"; VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, double delta =
refLayer_->getOutputValue()); compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
......
...@@ -84,8 +84,9 @@ function(op_library TARGET) ...@@ -84,8 +84,9 @@ function(op_library TARGET)
endif() endif()
# pybind USE_NO_KERNEL_OP # pybind USE_NO_KERNEL_OP
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
string(REPLACE "_op" "" TARGET "${TARGET}") string(REPLACE "_op" "" TARGET "${TARGET}")
if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
...@@ -112,7 +113,9 @@ set(DEPS_OPS ...@@ -112,7 +113,9 @@ set(DEPS_OPS
cond_op cond_op
cross_entropy_op cross_entropy_op
softmax_with_cross_entropy_op softmax_with_cross_entropy_op
sum_op) sum_op
pool_op
pool_with_index_op)
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
...@@ -121,6 +124,8 @@ op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) ...@@ -121,6 +124,8 @@ op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(sum_op DEPS net_op) op_library(sum_op DEPS net_op)
op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
...@@ -133,3 +138,4 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor) ...@@ -133,3 +138,4 @@ cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc DEPS dynamic_recurrent_op recurrent_op tensor_array)
...@@ -49,6 +49,18 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,6 +49,18 @@ class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
class LogSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
LogSigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of LogSigmoid operator");
AddOutput("Y", "Output of LogSigmoid operator");
AddComment(
"Logsigmoid activation operator, logsigmoid = log (1 / (1 + exp(-x)))");
}
};
class ExpOpMaker : public framework::OpProtoAndCheckerMaker { class ExpOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
ExpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) ExpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
...@@ -85,6 +97,23 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -85,6 +97,23 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftShrinkOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softshrink operator");
AddOutput("Y", "Output of Softshrink operator");
AddComment(
"Softshrink activation operator, "
"softshrink = x - lambda, if x > lambda;"
" x + lambda, if x < lambda; 0 otherwise");
AddAttr<AttrType>("lambda", "non-negative offset")
.SetDefault(static_cast<AttrType>(0.5f));
}
};
class TanhOpMaker : public framework::OpProtoAndCheckerMaker { class TanhOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) TanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
...@@ -108,6 +137,24 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -108,6 +137,24 @@ class TanhShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardShrinkOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardShrink operator");
AddOutput("Y", "Output of HardShrink operator");
AddComment(
"HardShrink activation operator, "
"hard_shrink(x) = x if x > lambda"
"hard_shrink(x) = x if x < -lambda"
"hard_shrink(x) = 0 otherwise");
AddAttr<AttrType>("threshold", "The value of threshold for HardShrink")
.SetDefault(static_cast<AttrType>(0.5));
}
};
class SqrtOpMaker : public framework::OpProtoAndCheckerMaker { class SqrtOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) SqrtOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
...@@ -159,6 +206,17 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -159,6 +206,17 @@ class SquareOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
class SoftplusOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftplusOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of Softplus operator");
AddOutput("Y", "Output of Softplus operator");
AddComment("Softplus activation operator, softplus(x) = log(1 + exp(x))");
}
};
class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker { class SoftsignOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
SoftsignOpMaker(framework::OpProto *proto, SoftsignOpMaker(framework::OpProto *proto,
...@@ -263,6 +321,55 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -263,6 +321,55 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ThresholdedReluOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of ThresholdedRelu operator");
AddOutput("Y", "Output of ThresholdedRelu operator");
AddComment(
"ThresholdedRelu activation operator, "
"thresholded_relu = x for x > threshold, "
"thresholded_relu = 0 otherwise.");
AddAttr<AttrType>("threshold", "The threshold location of activation")
.SetDefault(static_cast<AttrType>(1.0));
}
};
template <typename AttrType>
class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardSigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardSigmoid operator");
AddOutput("Y", "Output of HardSigmoid operator");
AddComment(R"DOC(
Hard Sigmoid activation operator.
Segment-wise linear approximation of sigmoid[1].
This is much faster than sigmoid.
hard_sigmoid = max(0, min(1, slope * x + shift))
The slope should be positive. The offset can be either positive or negative.
The default slope and shift are set from [1].
It is recommended to use the defaults for this activation.
References:
[1] Noisy Activation Functions
(https://arxiv.org/abs/1603.00391)
)DOC");
AddAttr<AttrType>("slope", "Slope for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.2));
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5));
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -271,6 +378,9 @@ namespace ops = paddle::operators; ...@@ -271,6 +378,9 @@ namespace ops = paddle::operators;
REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad, REGISTER_OP(sigmoid, ops::ActivationOp, ops::SigmoidOpMaker, sigmoid_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(logsigmoid, ops::ActivationOp, ops::LogSigmoidOpMaker,
logsigmoid_grad, ops::ActivationOpGrad);
REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad, REGISTER_OP(exp, ops::ActivationOp, ops::ExpOpMaker, exp_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
...@@ -283,6 +393,9 @@ REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad, ...@@ -283,6 +393,9 @@ REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad,
REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker,
tanh_shrink_grad, ops::ActivationOpGrad); tanh_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker<float>,
softshrink_grad, ops::ActivationOpGrad);
REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
...@@ -298,6 +411,9 @@ REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad, ...@@ -298,6 +411,9 @@ REGISTER_OP(log, ops::ActivationOp, ops::LogOpMaker, log_grad,
REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad, REGISTER_OP(square, ops::ActivationOp, ops::SquareOpMaker, square_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad,
ops::ActivationOpGrad);
REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
...@@ -322,6 +438,16 @@ REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad, ...@@ -322,6 +438,16 @@ REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker<float>, pow_grad,
REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad, REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker<float>, stanh_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker<float>,
hard_shrink_grad, ops::ActivationOpGrad);
REGISTER_OP(thresholded_relu, ops::ActivationOp,
ops::ThresholdedReluOpMaker<float>, thresholded_relu_grad,
ops::ActivationOpGrad);
REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker<float>,
hard_sigmoid_grad, ops::ActivationOpGrad);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, \ act_type, \
......
...@@ -95,6 +95,41 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> { ...@@ -95,6 +95,41 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
// Originally: logsigmoid(x) = -log (1 + exp(-x))
// For numerical stability, we can use the log-sum-exp trick:
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
// We can rewrite the above equation as:
// y = -log( exp(0) + exp(-x)) [since exp(0) = 1]
// = -log( exp(max(-x, 0) - max(-x, 0)) + exp(-x + max(-x, 0) - max(-x, 0)))
// = -log( exp(max(-x, 0)) * exp(-max(-x, 0)) - exp(max(-x, 0)) * exp(-x -
// max(-x, 0)))
// = -log( exp(max(-x, 0)) * (exp(-max(-x, 0)) + exp(-x - max(-x, 0))))
// = -log( exp(max(-x, 0)) - log(exp(-max(-x, 0)) + exp(-x - max(-x, 0)))
//
// Hence, logsigmoid(x) = - (max(-x, 0) + log(exp(-max(-x, 0))
// + exp(-x - max(-x, 0))))
template <typename T>
struct LogSigmoidFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp = (-x).cwiseMax(static_cast<T>(0)); // temp = max(-x, 0)
y.device(d) = -temp - (((-temp).exp() + (-x - temp).exp()).log());
}
};
// Originally: f' = exp(-x) / (1 + exp(-x))
// For numerical stability: f' = exp(-x - max(-x, 0)) / (exp(-max(-x, 0)) +
// exp(-x - max(-x, 0)))
template <typename T>
struct LogSigmoidGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
auto temp = (-x).cwiseMax(static_cast<T>(0)); // temp = max(-x, 0)
dx.device(d) =
dy * ((-x - temp).exp() / ((-temp).exp() + (-x - temp).exp()));
}
};
// exp(x) = e^x // exp(x) = e^x
template <typename T> template <typename T>
struct ExpFunctor : public BaseActivationFunctor<T> { struct ExpFunctor : public BaseActivationFunctor<T> {
...@@ -164,6 +199,70 @@ struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> { ...@@ -164,6 +199,70 @@ struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template <typename T>
struct HardShrinkFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp1 = (x < (threshold * -1)).template cast<T>().eval();
auto temp2 = (x > threshold).template cast<T>().eval();
y.device(d) = x * (temp1 + temp2);
}
};
template <typename T>
struct HardShrinkGradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
auto temp1 = (x < (threshold * -1)).template cast<T>().eval();
auto temp2 = (x > threshold).template cast<T>().eval();
dx.device(d) = dy * (temp1 + temp2).template cast<T>();
}
};
// softshrink(x) = x - lambda, if x > lambda; x + lambda, if x < lambda; 0
// otherwise
template <typename T>
struct SoftShrinkFunctor : public BaseActivationFunctor<T> {
float lambda;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"lambda", &lambda}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp1 = (x > lambda).template cast<T>().eval();
auto temp2 = (x < -lambda).template cast<T>().eval();
y.device(d) = temp1 * (x - lambda) + temp2 * (x + lambda);
}
};
template <typename T>
struct SoftShrinkGradFunctor : public BaseActivationFunctor<T> {
float lambda;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"lambda", &lambda}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
auto temp1 = (x > lambda).template cast<T>().eval();
auto temp2 = (x < -lambda).template cast<T>().eval();
dx.device(d) = dy * (temp1 + temp2).template cast<T>();
}
};
// sqrt(x) = x^(1/2) // sqrt(x) = x^(1/2)
template <typename T> template <typename T>
struct SqrtFunctor : public BaseActivationFunctor<T> { struct SqrtFunctor : public BaseActivationFunctor<T> {
...@@ -285,8 +384,6 @@ template <typename T> ...@@ -285,8 +384,6 @@ template <typename T>
struct Relu6Functor : public BaseActivationFunctor<T> { struct Relu6Functor : public BaseActivationFunctor<T> {
float threshold; float threshold;
// NOTE: Explicit hides the `BaseActivationFunctor<T>::GetAttrs`
// not polymorphism for speed.
typename BaseActivationFunctor<T>::AttrPair GetAttrs() { typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}}; return {{"threshold", &threshold}};
} }
...@@ -310,6 +407,33 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> { ...@@ -310,6 +407,33 @@ struct Relu6GradFunctor : public BaseActivationFunctor<T> {
} }
}; };
// softplus(x) = log(1 + exp(x))
// When x is a very large positive number, exp(x) may explode to inf,
// Using trick below for numerical stability
// https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/
// Then: softplus(x) = max(x, 0) + log(exp(-max(x, 0)) + exp(x - max(x, 0)))
template <typename T>
struct SoftplusFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
y.device(d) = temp + (((-temp).exp() + (x - temp).exp()).log());
}
};
// d(softplus(x))/dx = exp(x) / (1 + exp(x))
// For numerical stability:
// d(softplus(x))/dx = exp(x - max(x, 0)) / (exp(-max(x, 0)) +
// exp(x - max(x, 0)))
template <typename T>
struct SoftplusGradFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) {
auto temp = x.cwiseMax(static_cast<T>(0)); // temp = max(x, 0)
dx.device(d) = dy * ((x - temp).exp() / ((-temp).exp() + (x - temp).exp()));
}
};
// softsign(x) = x / (1 + |x|) // softsign(x) = x / (1 + |x|)
template <typename T> template <typename T>
struct SoftsignFunctor : public BaseActivationFunctor<T> { struct SoftsignFunctor : public BaseActivationFunctor<T> {
...@@ -466,14 +590,74 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -466,14 +590,74 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct ThresholdedReluFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
y.device(d) = (x > static_cast<T>(threshold)).template cast<T>() * x;
}
};
template <typename T>
struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
float threshold;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"threshold", &threshold}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) = dy * (x > static_cast<T>(threshold)).template cast<T>();
}
};
template <typename T>
struct HardSigmoidFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp = x * static_cast<T>(slope) + static_cast<T>(offset);
y.device(d) = temp.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(1));
}
};
template <typename T>
struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) =
dy *
((y > static_cast<T>(0)) * (y < static_cast<T>(1))).template cast<T>() *
static_cast<T>(slope);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ #define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(exp, ExpFunctor, ExpGradFunctor); \ __macro(exp, ExpFunctor, ExpGradFunctor); \
__macro(relu, ReluFunctor, ReluGradFunctor); \ __macro(relu, ReluFunctor, ReluGradFunctor); \
__macro(tanh, TanhFunctor, TanhGradFunctor); \ __macro(tanh, TanhFunctor, TanhGradFunctor); \
__macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \
__macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \
__macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
...@@ -483,8 +667,12 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> { ...@@ -483,8 +667,12 @@ struct STanhGradFunctor : public BaseActivationFunctor<T> {
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \ __macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \ __macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \ __macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELUFunctor, ELUGradFunctor) __macro(elu, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \
__macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \
__macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adam_op.h"
namespace paddle {
namespace operators {
class AdamOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment1"),
"Input(Moment1) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment2"),
"Input(Moment2) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
"Input(Beta1Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"),
"Input(Beta2Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"),
"Output(Moment1Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"),
"Output(Moment2Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"),
"Output(Beta1PowOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta2PowOut"),
"Output(Beta2PowOut) of AdamOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension");
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"),
"Param and Moment input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"),
"Param and InfNorm input of AdamOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims);
ctx->SetOutputDim("Moment2Out", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims);
}
};
class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment1", "(Tensor) Input first moment");
AddInput("Moment2", "(Tensor) Input second moment");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("Moment1Out", "(Tensor) Output first moment");
AddOutput("Moment2Out", "(Tensor) Output second moment");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"first moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the "
"second moment estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0e-8f);
AddComment(R"DOC(
Adam Updates Operator.
This implements the Adam optimizer from Section 2 of the Adam
paper[1]. Adam is a first-order gradient-based optimization
method based on adaptive estimates of lower-order moments.
Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
learning_rate_t = learning_rate_t *
sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out)
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon)
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker);
REGISTER_OP_CPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adam_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdamOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment1_out_tensor = ctx.Output<framework::Tensor>("Moment1Out");
auto moment2_out_tensor = ctx.Output<framework::Tensor>("Moment2Out");
auto beta1_pow_out_tensor = ctx.Output<framework::Tensor>("Beta1PowOut");
auto beta2_pow_out_tensor = ctx.Output<framework::Tensor>("Beta2PowOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment1_out_tensor->mutable_data<T>(ctx.GetPlace());
moment2_out_tensor->mutable_data<T>(ctx.GetPlace());
beta1_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
beta2_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
float beta1 = ctx.Attr<float>("beta1");
float beta2 = ctx.Attr<float>("beta2");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment1 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment1"));
auto moment2 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment2"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto beta1_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta1Pow"));
auto beta2_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta2Pow"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment1_out = framework::EigenVector<T>::Flatten(*moment1_out_tensor);
auto moment2_out = framework::EigenVector<T>::Flatten(*moment2_out_tensor);
auto beta1_pow_out =
framework::EigenVector<T>::Flatten(*beta1_pow_out_tensor);
auto beta2_pow_out =
framework::EigenVector<T>::Flatten(*beta2_pow_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment1_out.device(place) = beta1 * moment1 + (1 - beta1) * grad;
moment2_out.device(place) = beta2 * moment2 + (1 - beta2) * grad.square();
beta1_pow_out.device(place) = beta1_pow * beta1;
beta2_pow_out.device(place) = beta2_pow * beta2;
// All of these are tensors of 1 element
auto lr_t = lr * (1 - beta2_pow_out).sqrt() / (1 - beta1_pow_out);
// Eigen does not support automatic broadcast
// Get dimensions of moment vector to broadcast lr_t
Eigen::DSizes<int, 1> m_dsize(moment1_out_tensor->numel());
param_out.device(place) =
param -
lr_t.broadcast(m_dsize) *
(moment1_out / (moment2_out.sqrt() + epsilon));
}
};
} // namespace operators
} // namespace paddle
...@@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet( ...@@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet(
for (int i = 0; i < BRANCH_NUM; ++i) { for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) { for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) { for (auto& var_name : output.second) {
sub_scopes[i]->NewVar(var_name); sub_scopes[i]->Var(var_name);
} }
} }
} }
......
...@@ -12,22 +12,12 @@ ...@@ -12,22 +12,12 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/gemm_conv2d_op.h" #include "paddle/operators/conv2d_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
int outputSize(int input_size, int filter_size, int padding, int stride) { void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size;
}
class Conv2DOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"), PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(Input) of Conv2DOp should not be null."); "Input(Input) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), PADDLE_ENFORCE(ctx->HasInput("Filter"),
...@@ -53,25 +43,22 @@ class Conv2DOp : public framework::OperatorWithKernel { ...@@ -53,25 +43,22 @@ class Conv2DOp : public framework::OperatorWithKernel {
"The number of output channels should be divided by groups."); "The number of output channels should be divided by groups.");
auto output_height = auto output_height =
outputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
auto output_width = auto output_width =
outputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
ctx->SetOutputDim( ctx->SetOutputDim("Output",
"Output", {in_dims[0], filter_dims[0], output_height, output_width}); {in_dims[0], filter_dims[0], output_height, output_width});
} }
};
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
public: framework::OpAttrChecker* op_checker)
Conv2DOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"Input", "Input",
"The input tensor of convolution operator. " "The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."); "number of channels, H and W is the height and width of image.");
AddInput( AddInput("Filter",
"Filter",
"The filter tensor of convolution operator." "The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of " "The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, " "output image channels, C is the number of input image channels, "
...@@ -98,15 +85,9 @@ The convolution operation calculates the output based on the input, filter ...@@ -98,15 +85,9 @@ The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
)DOC"); )DOC");
} }
};
class Conv2DOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected: void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
void InferShape(framework::InferShapeContext* ctx) const override {
auto in_dims = ctx->GetInputDim("Input"); auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
if (ctx->HasOutput(framework::GradVarName("Input"))) { if (ctx->HasOutput(framework::GradVarName("Input"))) {
...@@ -115,8 +96,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel { ...@@ -115,8 +96,7 @@ class Conv2DOpGrad : public framework::OperatorWithKernel {
if (ctx->HasOutput(framework::GradVarName("Filter"))) { if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
} }
} }
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/gemm_conv2d_op.h" #include "paddle/operators/conv2d_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
......
...@@ -24,6 +24,38 @@ namespace operators { ...@@ -24,6 +24,38 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline int OutputSize(int input_size, int filter_size, int padding,
int stride) {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size;
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv2DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class Conv2DOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Conv2DOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DKernel : public framework::OpKernel<T> { class GemmConv2DKernel : public framework::OpKernel<T> {
public: public:
...@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> { ...@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
framework::DDim output_matrix_shape = {output_channels, framework::DDim output_matrix_shape = {output_channels,
output_height * output_width}; output_height * output_width};
// convolution operator: im2col + gemm // convolution operator: im2col + gemm
int in_step = input_channels / groups; int in_step = input_channels / groups;
int out_step = output_channels / groups; int out_step = output_channels / groups;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
namespace paddle {
namespace operators {
class CudnnConvOpMaker : public Conv2DOpMaker {
public:
CudnnConvOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: Conv2DOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault(std::vector<int>{1, 1});
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::Conv2DOpGrad);
REGISTER_OP_CPU_KERNEL(
conv_cudnn, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv_cudnn_grad,
ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
using DataLayout = platform::DataLayout;
using CUDADeviceContext = platform::CUDADeviceContext;
static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = 1024 * 1024 * 1024;
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std::vector<int> Dims2Vector(const framework::DDim& dims) {
std::vector<int> ret;
for (int i = 0; i < dims.size(); i++) {
ret.push_back(dims[i]);
}
return ret;
}
template <typename T>
class CudnnConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedFilterDescriptor filter_desc;
ScopedConvolutionDescriptor conv_desc;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t cudnn_input_desc =
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()), groups);
cudnnTensorDescriptor_t cudnn_output_desc =
output_desc.descriptor<T>(layout, Dims2Vector(output->dims()), groups);
cudnnFilterDescriptor_t cudnn_filter_desc =
filter_desc.descriptor<T>(layout, Dims2Vector(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
int input_channels = input->dims()[1];
int input_height = input->dims()[2];
int input_width = input->dims()[3];
int output_channels = output->dims()[1];
int output_height = output->dims()[2];
int output_width = output->dims()[3];
int group_offset_in = input_channels / groups * input_height * input_width;
int group_offset_out =
output_channels / groups * output_height * output_width;
int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn conv workspace ---------------------
void* cudnn_workspace = nullptr;
size_t workspace_size_in_bytes; // final workspace to allocate.
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
// ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t algo;
auto handle = ctx.cuda_device_context().cudnn_handle();
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &algo));
// get workspace size able to allocate
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, algo, &workspace_size_in_bytes));
// Allocate on GPU memory
platform::GPUPlace gpu = boost::get<platform::GPUPlace>(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv forward ---------------------
T alpha = 1.0f, beta = 0.0f;
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
cudnn_filter_desc, filter_data + i * group_offset_filter,
cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes,
&beta, cudnn_output_desc, output_data + i * group_offset_out));
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
}
};
template <typename T>
class CudnnConvGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
auto input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
auto filter_grad = ctx.Output<Tensor>(framework::GradVarName("Filter"));
const T* input_data = input->data<T>();
const T* output_grad_data = output_grad->data<T>();
const T* filter_data = filter->data<T>();
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_grad_desc;
ScopedTensorDescriptor input_grad_desc;
ScopedFilterDescriptor filter_desc;
ScopedFilterDescriptor filter_grad_desc;
ScopedConvolutionDescriptor conv_desc;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t cudnn_input_desc =
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()), groups);
cudnnTensorDescriptor_t cudnn_output_grad_desc =
output_grad_desc.descriptor<T>(layout, Dims2Vector(output_grad->dims()),
groups);
cudnnFilterDescriptor_t cudnn_filter_desc =
filter_desc.descriptor<T>(layout, Dims2Vector(filter->dims()), groups);
cudnnTensorDescriptor_t cudnn_input_grad_desc = nullptr;
cudnnFilterDescriptor_t cudnn_filter_grad_desc = nullptr;
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
int input_channels = input->dims()[1];
int input_height = input->dims()[2];
int input_width = input->dims()[3];
int output_grad_channels = filter->dims()[0];
int output_grad_height = output_grad->dims()[2];
int output_grad_width = output_grad->dims()[3];
int group_offset_in = input_channels / groups * input_height * input_width;
int group_offset_out =
output_grad_channels / groups * output_grad_height * output_grad_width;
int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn backward algorithm ---------------------
cudnnConvolutionBwdDataAlgo_t data_algo;
cudnnConvolutionBwdFilterAlgo_t filter_algo;
size_t workspace_size_in_bytes = 0, tmp_size = 0;
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
auto handle = ctx.cuda_device_context().cudnn_handle();
if (input_grad) {
cudnn_input_grad_desc = input_grad_desc.descriptor<T>(
layout, Dims2Vector(input_grad->dims()), groups);
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
handle, cudnn_filter_desc,
// dyDesc: Handle to the previously initialized input differential
// tensor descriptor.
cudnn_output_grad_desc, cudnn_conv_desc,
// dxDesc: Handle to the previously initialized output tensor
// descriptor.
cudnn_input_grad_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &data_algo));
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
handle, cudnn_filter_desc, cudnn_output_grad_desc,
cudnn_conv_desc, cudnn_input_grad_desc, data_algo, &tmp_size));
workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size);
}
if (filter_grad) {
cudnn_filter_grad_desc = filter_grad_desc.descriptor<T>(
layout, Dims2Vector(filter_grad->dims()), groups);
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
cudnn_filter_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &filter_algo));
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
cudnn_filter_desc, filter_algo, &tmp_size));
workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size);
}
// ------------------- cudnn conv workspace ---------------------
// Already on GPU
void* cudnn_workspace = nullptr;
platform::GPUPlace gpu = boost::get<platform::GPUPlace>(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv backward data ---------------------
// FIXME(typhoonzero): template type T may not be the same as cudnn call.
T alpha = 1.0f, beta = 0.0f;
if (input_grad) {
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad);
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
t.constant(static_cast<T>(0));
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
handle, &alpha, cudnn_filter_desc,
filter_data + i * group_offset_filter, cudnn_output_grad_desc,
output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo,
cudnn_workspace, workspace_size_in_bytes, &beta,
cudnn_input_grad_desc, input_grad_data + i * group_offset_in));
}
}
// ------------------- cudnn conv backward filter ---------------------
if (filter_grad) {
T* filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*filter_grad);
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
t.constant(static_cast<T>(0));
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
cudnn_output_grad_desc, output_grad_data + i * group_offset_out,
cudnn_conv_desc, filter_algo, cudnn_workspace,
workspace_size_in_bytes, &beta, cudnn_filter_grad_desc,
filter_grad_data + i * group_offset_filter));
}
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>);
REGISTER_OP_GPU_KERNEL(conv_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/decayed_adagrad_op.h"
namespace paddle {
namespace operators {
class DecayedAdagradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("LearningRate"),
"Input(LearningRate) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of DecayedAdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"LearningRate should have one element");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of DecayedAdagradOp should have "
"the same dimension.");
PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of DecayedAdagradOp should have "
"the same dimension.");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
}
};
class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public:
DecayedAdagradOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("Moment", "(Tensor) Second moment");
AddInput("LearningRate", "(Tensor) Learning rate");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output second moment");
AddAttr<float>("decay",
"(float, default 0.95) "
"Discounting factor for coming gradient")
.SetDefault(0.95);
AddAttr<float>("epsilon",
"(float, default 1.0e-6) "
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Decayed Adagrad
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(decayed_adagrad, ops::DecayedAdagradOp,
ops::DecayedAdagradOpMaker);
REGISTER_OP_CPU_KERNEL(
decayed_adagrad,
ops::DecayedAdagradOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/decayed_adagrad_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
decayed_adagrad,
ops::DecayedAdagradOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class DecayedAdagradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
float decay = ctx.Attr<float>("decay");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment_out.device(place) = decay * moment + (1 - decay) * grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve .
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using framework::Scope;
using framework::TensorArray;
using framework::LoDTensor;
using framework::Variable;
using framework::DySeqMetaBatch;
namespace detail {
inline void CreateVariables(Scope& scope,
const std::vector<std::string>& var_names) {
for (const auto& name : var_names) {
scope.Var(name);
}
}
/*
* The inputs with sequence should be reordered when they are split, so the
* boot_states should be reordered in the same order.
*
* NOTE This may require that the `pre_state` of the first time step should just
* copy the `boot_state` rather than reference it, for that the content should
* be reordered, but the RNN op should not change the `boot_state` as an input
* variable's content.
*/
template <typename T>
inline void ReorderBootState(const DySeqMetaBatch& metas,
const LoDTensor& boot_state, LoDTensor* tensor,
const platform::Place& dst_place) {
for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) {
auto slice = tensor->Slice<T>(seq_id, seq_id + 1);
auto boot_slice =
boot_state.Slice<T>(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1);
// TODO(superjom) pass in device context as an argument
slice.template CopyFrom<T>(boot_slice, dst_place,
platform::CPUDeviceContext());
}
}
} // namespace detail
class DynamicRecurrentOpProtoAndCheckerMaker
: public framework::OpProtoAndCheckerMaker {
public:
DynamicRecurrentOpProtoAndCheckerMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
const auto& name = DynamicRecurrentOp::kArgName;
// inputs and outputs stored in proto
AddInput(name.inlinks,
"the inputs that need to be segmented for each step.")
.AsDuplicable();
AddInput(name.boot_memories, "variables to initialize memories.")
.AsDuplicable();
AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
.AsDuplicable();
AddOutput(name.step_scopes, "step scopes");
// Attributes stored in AttributeMap
AddAttr<std::vector<std::string>>(name.pre_memories,
"names of pre-memories");
AddAttr<std::vector<std::string>>(name.memories, "names of memories");
AddComment("This is a RNN operator for varience-length sequences.");
}
};
void DynamicRecurrentOp::Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const {
cache_.Init(kArgName, *this, scope, &arg_);
SplitInputs();
CreateScopes();
WriteStepInputs();
InitStates();
WriteStepOutputs();
// call stepnet in all the time steps
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& step_scope = cache_.GetScope(step);
stepnet_->Run(step_scope, dev_ctx);
}
ConcatOutputs();
}
void DynamicRecurrentOp::SplitInputs() const {
// TODO(superjom) make level a config
// TODO(superjom) check all the inputs has the same LoD
int level = 0;
for (const auto& item : cache_.inlinks) {
const auto& var = item.second;
const auto& tensor = var->Get<LoDTensor>();
TensorArray& ta = step_inputs_[item.first];
dy_seq_metas_[item.first] =
ta.Unpack(tensor, level, true /*length_descend*/);
if (cache_.num_steps) {
PADDLE_ENFORCE_EQ(ta.size(), cache_.num_steps,
"inputs should have the same steps");
} else {
cache_.num_steps = ta.size();
}
}
}
void DynamicRecurrentOp::WriteStepInputs() const {
for (const auto& item : cache_.inlinks) {
auto ta_it = step_inputs_.find(item.first);
PADDLE_ENFORCE(ta_it != step_inputs_.end(),
"step_inputs_ not compatible with memory set");
TensorArray& ta = ta_it->second;
for (size_t step = 0; step < ta.size(); step++) {
auto tensor = ta.Read(step);
auto& step_scope = cache_.GetScope(step);
Variable* var = step_scope.FindVar(item.first);
if (var == nullptr) {
var = step_scope.Var(item.first);
}
var->GetMutable<LoDTensor>()->ShareDataWith<value_type>(tensor);
}
}
}
void DynamicRecurrentOp::WriteStepOutputs() const {
// initialize step outputs
for (const auto& item : cache_.outlinks) {
step_outputs_.emplace(item.first, TensorArray());
}
PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL);
}
void DynamicRecurrentOp::CreateScopes() const {
PADDLE_ENFORCE_GT(cache_.num_steps, 0);
// resize scopes
size_t num_scopes_need_create = cache_.num_steps - cache_.scopes->size();
for (size_t i = 0; i < num_scopes_need_create; i++) {
cache_.scopes->emplace_back(&cache_.scope->NewScope());
}
// init temporary inputs
PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first");
std::vector<std::string> memories;
std::vector<std::string> pre_memories;
std::vector<std::string> stepnet_outputs;
std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(memories),
[](const rnn::MemoryAttr& m) { return m.var; });
std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(pre_memories),
[](const rnn::MemoryAttr& m) { return m.pre_var; });
for (const auto& item : stepnet_->Outputs()) {
for (const auto& var : item.second) {
stepnet_outputs.push_back(var);
}
}
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step);
detail::CreateVariables(scope, arg_.inlinks);
detail::CreateVariables(scope, arg_.outlinks);
detail::CreateVariables(scope, memories);
detail::CreateVariables(scope, pre_memories);
detail::CreateVariables(scope, stepnet_outputs);
}
}
void DynamicRecurrentOp::ConcatOutputs() const {
// TODO(superjom) transform this to a config
int level = 0;
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step);
for (auto& item : step_outputs_) {
auto* var = scope.FindVar(item.first);
PADDLE_ENFORCE_NOT_NULL(var);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->mutable_data<value_type>(platform::CPUPlace());
item.second.WriteShared(step, *tensor);
}
}
// the inlinks' lods should be the same, so randomly get one lod.
const auto& some_lod =
cache_.scope->FindVar(arg_.inlinks.front())->Get<LoDTensor>().lod();
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
for (auto& item : step_outputs_) {
auto tensor = item.second.Pack(level, some_meta, some_lod);
auto* output = cache_.outlinks[item.first]->GetMutable<LoDTensor>();
const_cast<LoDTensor*>(output)->ShareDataWith<value_type>(tensor);
}
}
void DynamicRecurrentOp::InitStates() const {
for (size_t step = 0; step < cache_.num_steps; step++) {
for (const auto& memory : arg_.memories) {
CreateState(memory, step);
LinkState(memory, step);
}
}
}
void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state = *cache_.GetTensor(scope, memory.var);
auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var);
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
auto dims = boot_state.dims();
dims[0] = num_instances;
state.Resize(dims);
state.mutable_data<value_type>(platform::CPUPlace());
states_[memory.var].WriteShared(step, state);
}
void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state_pre = *cache_.GetTensor(scope, memory.pre_var);
// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
LoDTensor* pre_state{nullptr};
if (step == 0) {
pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var);
pre_state->mutable_data<float>(platform::CPUPlace());
// allocate memory
state_pre.Resize(pre_state->dims());
state_pre.mutable_data<value_type>(platform::CPUPlace());
detail::ReorderBootState<value_type>(some_meta, *pre_state, &state_pre,
pre_state->place());
} else {
pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var);
}
// shink and share from previous state
auto shrinked_pre_state = pre_state->Slice<value_type>(0, num_instances);
state_pre.ShareDataWith<value_type>(shrinked_pre_state);
}
void DynamicRecurrentOp::ArgCache::Init(
const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op,
const paddle::framework::Scope& scope, rnn::Argument* arg) {
this->scope = &scope;
InitArgument(name, op, arg);
CacheScopes(scope, *arg);
CacheInlinks(scope, arg->inlinks);
CacheOutlinks(scope, arg->outlinks);
}
void DynamicRecurrentOp::ArgCache::InitArgument(const rnn::ArgumentName& name,
const OperatorBase& op,
rnn::Argument* arg) {
rnn::InitArgument(name, arg, op, false /*is_grad*/);
}
void DynamicRecurrentOp::ArgCache::CacheScopes(const Scope& scope,
const rnn::Argument& arg) {
auto scopes_var = scope.FindVar(arg.step_scopes);
PADDLE_ENFORCE(scopes_var != nullptr,
"the step_scopes output argument [%s] should be created first "
"by framework.",
arg.step_scopes);
this->scopes = scopes_var->GetMutable<std::vector<Scope*>>();
}
void DynamicRecurrentOp::ArgCache::CacheInlinks(
const Scope& scope, const std::vector<std::string>& names) {
for (auto name : names) {
auto* var = GetVariable(scope, name);
inlinks[name] = var;
}
}
void DynamicRecurrentOp::ArgCache::CacheOutlinks(
const Scope& scope, const std::vector<std::string>& names) {
for (auto name : names) {
auto* var = GetVariable(scope, name);
outlinks[name] = var;
}
}
Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope,
const std::string& name) {
auto* var = scope.FindVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "variable [%s] not exist in scope", name);
return var;
}
LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor(
const framework::Scope& scope, const std::string& name) {
auto* var = GetVariable(scope, name);
return var->GetMutable<LoDTensor>();
}
const rnn::ArgumentName DynamicRecurrentOp::kArgName{
"step_net", "step_scopes", "inlinks", "outlinks",
"memories", "pre_memories", "boot_memories"};
void DynamicRecurrentGradientOp::Run(
const Scope& scope, const platform::DeviceContext& dev_ctx) const {}
} // namespace operators
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(
dynamic_recurrent, paddle::operators::DynamicRecurrentOp,
paddle::operators::DynamicRecurrentOpProtoAndCheckerMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_TESTING
#include "gtest/gtest.h"
#endif
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/tensor_array.h"
#include "paddle/framework/variable.h"
#include "paddle/operators/rnn/recurrent_op_utils.h"
namespace paddle {
namespace operators {
class DynamicRecurrentOp : public framework::OperatorBase {
public:
static const rnn::ArgumentName kArgName;
using value_type = float;
DynamicRecurrentOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
DynamicRecurrentOp(const DynamicRecurrentOp& o)
: framework::OperatorBase(
static_cast<const framework::OperatorBase&>(o)) {
// TODO(yuyang18): Implement copy ctor well.
PADDLE_THROW("Not implemented");
}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override;
/*
* Split the inputs(LoDTensors) to segments for each time step.
*/
void SplitInputs() const;
/*
* Create step-scopes to store temporary outputs in each time steps.
*/
void CreateScopes() const;
/*
* Link TensorArray steps to the corresponding variables located in
* step-scopes.
*/
void WriteStepInputs() const;
/*
* Write output of each step to the corresponding TensorArray.
*/
void WriteStepOutputs() const;
/*
* Initialize the states, each state will have a corresponding pre-state,
* which share the memory with the state in the previous time state. The
* pre-state in the first time step will be initialized with an zero tensor or
* a tensor in parent scope if is provided.
*/
void InitStates() const;
/*
* Create state variables for each time step.
*/
void CreateState(const rnn::MemoryAttr& memory, size_t step) const;
/*
* Link pre-state variable in current scope to the state variable in the
* previous time step (scope).
*/
void LinkState(const rnn::MemoryAttr& memory, size_t step) const;
/*
* Concatenate outputs in each time step and generate a LoDTensor.
*/
void ConcatOutputs() const;
/*
* set a stepnet that is created according to a RecurrentOp's stepnet.
*/
void SetStepNet(std::unique_ptr<OperatorBase> net) {
PADDLE_ENFORCE_NOT_NULL(net);
stepnet_ = std::move(net);
}
const OperatorBase& GetStepNet() const { return *stepnet_; }
const framework::TensorArray& state(const std::string& name) const {
return states_[name];
}
const framework::TensorArray& step_input(const std::string& name) const {
return step_inputs_[name];
}
const framework::TensorArray& step_output(const std::string& name) const {
return step_outputs_[name];
}
protected:
struct ArgCache {
framework::Scope const* scope;
std::vector<framework::Scope*>* scopes;
std::map<std::string, framework::Variable*> inlinks;
std::map<std::string, framework::Variable*> outlinks;
size_t num_steps{0};
void Init(const rnn::ArgumentName& name, const OperatorBase& op,
const framework::Scope& scope, rnn::Argument* arg);
framework::Scope& GetScope(size_t index) {
PADDLE_ENFORCE_LT(index, num_steps);
return *scopes->at(index);
}
framework::LoDTensor* GetTensor(const framework::Scope& scope,
const std::string& name);
private:
void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op,
rnn::Argument* arg);
void CacheScopes(const framework::Scope& scope, const rnn::Argument& arg);
void CacheInlinks(const framework::Scope& scope,
const std::vector<std::string>& names);
void CacheOutlinks(const framework::Scope& scope,
const std::vector<std::string>& names);
framework::Variable* GetVariable(const framework::Scope& scope,
const std::string& name);
};
private:
std::unique_ptr<OperatorBase> stepnet_;
mutable std::map<std::string, framework::TensorArray> states_;
mutable std::map<std::string, framework::TensorArray> step_inputs_;
mutable std::map<std::string, framework::TensorArray> step_outputs_;
mutable std::map<std::string, std::vector<framework::DySeqMeta>>
dy_seq_metas_;
mutable rnn::Argument arg_;
mutable ArgCache cache_;
#ifdef PADDLE_WITH_TESTING
friend class DynamicRecurrentOpTestHelper;
FRIEND_TEST(DynamicRecurrentOpTestHelper, SplitInputs);
FRIEND_TEST(DynamicRecurrentOpTestHelper, CreateCache);
FRIEND_TEST(DynamicRecurrentOpTestHelper, CreateScopes);
FRIEND_TEST(DynamicRecurrentOpTestHelper, WriteStepInputs);
FRIEND_TEST(DynamicRecurrentOpTestHelper, WriteStepOutputs);
FRIEND_TEST(DynamicRecurrentOpTestHelper, InitStates);
FRIEND_TEST(DynamicRecurrentOpTestHelper, ConcatOutputs);
#endif
};
class DynamicRecurrentGradientOp : public framework::OperatorBase {
public:
DynamicRecurrentGradientOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override;
};
} // namespace operators
} // namespace paddle
#include "paddle/operators/dynamic_recurrent_op.h"
#include <gtest/gtest.h>
#include "paddle/framework/ddim.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/net_op.h"
namespace paddle {
namespace operators {
using framework::Scope;
using framework::TensorArray;
using framework::LoDTensor;
using framework::Variable;
class TestOp : public framework::OperatorBase {
public:
using framework::OperatorBase::OperatorBase;
DEFINE_OP_CLONE_METHOD(TestOp);
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const override {}
};
void OpDescNewVar(const std::string& param_name,
std::initializer_list<const char*> arguments,
paddle::framework::OpDesc::Var* var) {
var->set_parameter(param_name);
for (auto& arg_name : arguments) {
var->add_arguments(arg_name);
}
}
// create a LoD tensor in scope with specific dims
LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
const platform::Place& place) {
auto* var = scope.Var(name);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->Resize(dims);
tensor->mutable_data<float>(place);
return tensor;
}
class DynamicRecurrentOpTestHelper : public ::testing::Test {
protected:
const rnn::ArgumentName argname = DynamicRecurrentOp::kArgName;
virtual void SetUp() override {
CreateGlobalVariables();
auto op_desc = CreateOpDesc();
op = paddle::framework::OpRegistry::CreateOp(op_desc);
dop = dynamic_cast<DynamicRecurrentOp*>(op.get());
InitCacheManually();
InitStepNet();
}
framework::OpDesc CreateOpDesc() {
// create op
paddle::framework::OpDesc op_desc;
op_desc.set_type("dynamic_recurrent");
OpDescNewVar(argname.inlinks, {"in0"}, op_desc.add_inputs());
OpDescNewVar(argname.boot_memories, {"boot_mem"}, op_desc.add_inputs());
OpDescNewVar(argname.step_scopes, {"step_scopes"}, op_desc.add_outputs());
OpDescNewVar(argname.outlinks, {"out0"}, op_desc.add_outputs());
// set pre-memories
auto pre_memories = op_desc.mutable_attrs()->Add();
pre_memories->set_name(argname.pre_memories);
pre_memories->set_type(paddle::framework::AttrType::STRINGS);
auto pre_memories_item = pre_memories->add_strings();
*pre_memories_item = "mem@pre";
// set memories
auto memories = op_desc.mutable_attrs()->Add();
memories->set_name(argname.memories);
memories->set_type(paddle::framework::AttrType::STRINGS);
auto memories_item = memories->add_strings();
*memories_item = "mem";
return op_desc;
}
void CreateGlobalVariables() {
platform::CPUPlace place;
scope.Var("step_scopes");
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
// 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively.
framework::LoD in0_lod(1);
for (int x : std::vector<int>{0, 4, 7, 9, 10}) {
in0_lod[0].push_back(x);
}
in0->set_lod(in0_lod);
in0->Resize(framework::make_ddim({10, 8}));
// set the content, each sentence content is seqid.batchid
// the seqid starts from 0
int start = 0;
for (size_t seqid = 0; seqid < in0_lod.size() - 1; seqid++) {
for (size_t batchid = 0;
batchid < in0_lod[0][seqid + 1] - in0_lod[0][seqid]; batchid++) {
float v = seqid + batchid * 0.1;
for (size_t dim = 0; dim < 8; dim++) {
in0->data<float>()[start * 8 + dim] = v;
}
start++;
}
}
}
void InitCacheManually() {
dop->cache_.Init(DynamicRecurrentOp::kArgName, *dop, scope, &dop->arg_);
}
void InitStepNet() {
std::unique_ptr<framework::OperatorBase> stepnet{new NetOp};
dynamic_cast<NetOp*>(stepnet.get())
->AppendOp(std::unique_ptr<TestOp>(new TestOp(
"test", {{"inlinks", {"in0"}}, {"boot_memories", {"boot_mem"}}},
{{"outlinks", {"out0"}}, {"step_scopes", {"step_scopes"}}}, {})));
dop->SetStepNet(std::move(stepnet));
}
protected:
DynamicRecurrentOp* dop;
std::unique_ptr<framework::OperatorBase> op;
paddle::platform::CPUDeviceContext device_context;
paddle::framework::Scope scope;
};
TEST_F(DynamicRecurrentOpTestHelper, CreateCache) {
const rnn::Argument& arg = dop->arg_;
ASSERT_EQ(arg.inlinks.size(), 1UL);
ASSERT_EQ(arg.outlinks.size(), 1UL);
}
TEST_F(DynamicRecurrentOpTestHelper, SplitInputs) {
dop->SplitInputs();
auto& in0_ta = dop->step_inputs_["in0"];
ASSERT_EQ(in0_ta.size(), 4UL);
const auto& batch0 = in0_ta.Read(0);
const auto& batch1 = in0_ta.Read(1);
const auto& batch2 = in0_ta.Read(2);
const auto& batch3 = in0_ta.Read(3);
EXPECT_EQ(batch0.dims()[0], 4);
EXPECT_EQ(batch1.dims()[0], 3);
EXPECT_EQ(batch2.dims()[0], 2);
EXPECT_EQ(batch3.dims()[0], 1);
}
TEST_F(DynamicRecurrentOpTestHelper, CreateScopes) {
dop->SplitInputs();
dop->CreateScopes();
ASSERT_EQ(dop->cache_.num_steps, 4UL);
ASSERT_EQ(dop->cache_.scopes->size(), 4UL);
}
TEST_F(DynamicRecurrentOpTestHelper, WriteStepInputs) {
dop->SplitInputs();
dop->CreateScopes();
dop->WriteStepInputs();
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
auto& scope = dop->cache_.GetScope(step);
for (auto name : std::vector<std::string>({"in0"})) {
ASSERT_TRUE(scope.FindVar(name) != nullptr);
}
}
}
TEST_F(DynamicRecurrentOpTestHelper, WriteStepOutputs) {
dop->SplitInputs();
dop->CreateScopes();
dop->WriteStepInputs();
dop->WriteStepOutputs();
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
auto& scope = dop->cache_.GetScope(step);
for (auto name : std::vector<std::string>({"out0"})) {
ASSERT_TRUE(scope.FindVar(name));
}
}
}
TEST_F(DynamicRecurrentOpTestHelper, ConcatOutputs) {
// Let's leave this test to python unittest.
}
TEST_F(DynamicRecurrentOpTestHelper, InitStates) {
dop->SplitInputs();
dop->CreateScopes();
dop->WriteStepInputs();
dop->WriteStepOutputs();
dop->InitStates();
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
auto& scope = dop->cache_.GetScope(step);
auto state = scope.FindVar("mem");
ASSERT_TRUE(state != nullptr);
auto* pre_state = scope.FindVar("mem@pre");
ASSERT_TRUE(pre_state != nullptr);
auto* boot_state = scope.FindVar("boot_mem");
ASSERT_TRUE(boot_state != nullptr);
if (step == 0) {
// check pre_state is a reference of boot_state
ASSERT_EQ(boot_state->Get<LoDTensor>().data<float>(),
pre_state->Get<LoDTensor>().data<float>());
}
}
}
} // operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
class FeedOp : public framework::OperatorBase {
public:
FeedOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto feed_var_name = Input("Input");
auto *feed_var = scope.FindVar(feed_var_name);
PADDLE_ENFORCE(feed_var != nullptr,
"Cannot find feed_var in scope, feed_var_name is %s",
feed_var_name);
auto out_name = this->Output("Out");
auto *out_var = scope.FindVar(out_name);
PADDLE_ENFORCE(out_var != nullptr,
"Cannot find out_var in scope, out_var_name is %s",
out_name);
auto col = Attr<int>("col");
auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col));
auto *out_item = out_var->GetMutable<framework::FeedFetchType>();
out_item->CopyFromTensor(feed_item, dev_ctx.GetPlace(), dev_ctx);
out_item->set_lod(feed_item.lod());
}
};
} // namespace operators
} // namespace paddle
// We do not need to register OpInfoMaker,
// since feed operator will not be used by end users directly
REGISTER_OPERATOR(feed, paddle::operators::FeedOp,
paddle::framework::EmptyGradOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
class FetchOp : public framework::OperatorBase {
public:
FetchOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto fetch_var_name = Input("Input");
auto *fetch_var = scope.FindVar(fetch_var_name);
PADDLE_ENFORCE(fetch_var != nullptr,
"Cannot find fetch variable in scope, fetch_var_name is %s",
fetch_var_name);
auto out_name = this->Output("Out");
auto *out_var = scope.FindVar(out_name);
PADDLE_ENFORCE(out_var != nullptr,
"Cannot find out_var in scope, out_var_name is %s",
out_name);
auto col = static_cast<size_t>(Attr<int>("col"));
auto *fetch_list = out_var->GetMutable<framework::FeedFetchList>();
auto &src_item = fetch_var->Get<framework::FeedFetchType>();
if (col >= fetch_list->size()) {
fetch_list->resize(col + 1);
}
auto &dst_item = fetch_list->at(col);
// FIXME(yuyang18): Should we assume the fetch operator always generate
// CPU outputs?
dst_item.CopyFromTensor(src_item, platform::CPUPlace(), dev_ctx);
}
};
} // namespace operators
} // namespace paddle
// We do not need to register OpInfoMaker,
// since fetch operator will not be used by end users directly
REGISTER_OPERATOR(fetch, paddle::operators::FetchOp,
paddle::framework::EmptyGradOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gru_unit_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class GRUUnitOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitOp should not be null.", "HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("Gate"),
"Output(%s) of GRUUnitOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"),
"Output(%s) of GRUUnitOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of GRUUnitOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("Gate", {batch_size, frame_size * 3});
ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size});
ctx->SetOutputDim("Hidden", {batch_size, frame_size});
}
};
class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GRUUnitOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"input.");
AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step.");
AddInput("Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]");
AddInput("Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate.");
AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate")
.AsIntermediate();
AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"reseted hidden state of previous time step.")
.AsIntermediate();
AddOutput("Hidden",
"(Tensor) The GRU hidden state of the current time step "
"with shape [batch_size, frame_size].");
AddAttr<int>("activation",
"(enum int, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault(tanh)
.InEnum({identity, sigmoid, tanh, relu});
AddAttr<int>("gate_activation",
"(enum int, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
\f]
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC");
}
};
class GRUUnitGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("Gate"),
"Input(%s) of GRUUnitGradOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Gate");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
// int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
auto hidden_prev_grad_name = framework::GradVarName("HiddenPrev");
if (ctx->HasOutput(hidden_prev_grad_name))
ctx->SetOutputDim(hidden_prev_grad_name, hidden_prev_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad,
ops::GRUUnitGradOp);
REGISTER_OP_CPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gru_unit_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 };
template <typename Place, typename T>
class GRUUnitKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
void ActCompute(const int act_type, const Device& d, X x, Y y) const {
if (act_type == identity)
y.device(d) = x;
else if (act_type == sigmoid)
SigmoidFunctor<T>()(d, x, y);
else if (act_type == tanh)
TanhFunctor<T>()(d, x, y);
else if (act_type == relu)
ReluFunctor<T>()(d, x, y);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* bias = context.Input<Tensor>("Bias");
auto* gate = context.Output<Tensor>("Gate");
gate->mutable_data<T>(context.GetPlace());
auto* reset_hidden_prev = context.Output<Tensor>("ResetHiddenPrev");
reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<Tensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
auto x = EigenMatrix<T>::From(*input);
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto r_h_p = EigenMatrix<T>::From(*reset_hidden_prev);
auto h = EigenMatrix<T>::From(*hidden);
auto place = context.GetEigenDevice<Place>();
// calculate unactivated gate outputs
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = x +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
} else {
g.device(place) = x;
}
const T* hidden_prev_data = hidden_prev->data<T>();
const T* weight_data = weight->data<T>();
T* gate_data = gate->data<T>();
T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
2 * frame_size, frame_size, 1, hidden_prev_data,
frame_size, weight_data, frame_size * 2, 1, gate_data,
frame_size * 3);
// calculate activited gate
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(u_offsets, extents), g.slice(u_offsets, extents));
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(r_offsets, extents), g.slice(r_offsets, extents));
auto r = g.slice(r_offsets, extents); // reset gate
r_h_p.device(place) = r * h_p; // reset previous hidden state
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
frame_size, frame_size, 1, reset_hidden_prev_data,
frame_size, weight_data + frame_size * frame_size * 2,
frame_size, 1, gate_data + frame_size * 2,
frame_size * 3);
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
ActCompute(context.Attr<int>("activation"), place,
g.slice(c_offsets, extents), g.slice(c_offsets, extents));
auto c = g.slice(c_offsets, extents); // output candidate
// calculate final output
h.device(place) = u * (h_p - c) + c;
}
};
template <typename Place, typename T>
class GRUUnitGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
void ActGradCompute(const int act_type, const Device& d, X x, Y y, DX dx,
DY dy) const {
// x is dummy and won't be used even in Relu(use y instead)
if (act_type == identity)
dx.device(d) = dy;
else if (act_type == sigmoid)
SigmoidGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == tanh)
TanhGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == relu)
ReluGradFunctor<T>()(d, x, y, dy, dx);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* gate = context.Input<Tensor>("Gate");
auto* reset_hidden_prev = context.Input<Tensor>("ResetHiddenPrev");
auto* hidden_grad = context.Input<Tensor>(framework::GradVarName("Hidden"));
auto* input_grad = context.Output<Tensor>(framework::GradVarName("Input"));
auto* hidden_prev_grad =
context.Output<Tensor>(framework::GradVarName("HiddenPrev"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
input_grad->mutable_data<T>(context.GetPlace());
hidden_prev_grad->mutable_data<T>(context.GetPlace());
weight_grad->mutable_data<T>(context.GetPlace());
Tensor gate_grad;
gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
Tensor reset_hidden_prev_grad;
reset_hidden_prev_grad.mutable_data<T>(reset_hidden_prev->dims(),
context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
const T* hidden_prev_data = hidden_prev->data<T>();
T* hidden_prev_grad_data = hidden_prev_grad->data<T>();
const T* weight_data = weight->data<T>();
T* weight_grad_data = weight_grad->data<T>();
T* gate_grad_data = gate_grad.data<T>();
const T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data<T>();
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto d_h = EigenMatrix<T>::From(*hidden_grad);
auto d_x = EigenMatrix<T>::From(*input_grad);
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
auto d_g = EigenMatrix<T>::From(gate_grad);
auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad);
auto place = context.GetEigenDevice<Place>();
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
auto r = g.slice(r_offsets, extents); // reset gate
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
auto c = g.slice(c_offsets, extents); // output candidate
// backward for unactivated update gate
ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
d_g.slice(u_offsets, extents), d_h * (h_p - c));
// backward for unactivated output candidate
ActGradCompute(context.Attr<int>("activation"), place, c, c,
d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u));
// backward for reset_hidden_prev
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size, 1,
gate_grad_data + frame_size * 2, frame_size * 3,
weight_data + frame_size * frame_size * 2, frame_size,
0, reset_hidden_prev_grad_data, frame_size);
// backward for state_weight
math::gemm<Place, T>(
context.device_context(), true, false, frame_size, frame_size,
batch_size, 1, reset_hidden_prev_data, frame_size,
gate_grad_data + frame_size * 2, frame_size * 3, 0,
weight_grad_data + frame_size * frame_size * 2, frame_size);
// backward for unactivated reset gate
ActGradCompute(context.Attr<int>("gate_activation"), place, r, r,
d_g.slice(r_offsets, extents), d_r_h_p * h_p);
// backward for update_gate_weight and reset_gate_weight
math::gemm<Place, T>(context.device_context(), true, false, frame_size,
frame_size * 2, batch_size, 1, hidden_prev_data,
frame_size, gate_grad_data, frame_size * 3, 0,
weight_grad_data, frame_size * 2);
// backward for hidden_prev
d_h_p.device(place) = d_r_h_p * r + d_h * u;
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size * 2, 1, gate_grad_data,
frame_size * 3, weight_data, frame_size * 2, 1,
hidden_prev_grad_data, frame_size);
// backward for input
d_x.device(place) = d_g;
// backward for bias
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/margin_rank_loss_op.h"
namespace paddle {
namespace operators {
class MarginRankLossOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
// input check
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("X1"), "Input(X1) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("X2"), "Input(X2) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) shouldn't be null.");
auto label_dims = ctx->GetInputDim("Label");
auto x1_dims = ctx->GetInputDim("X1");
auto x2_dims = ctx->GetInputDim("X2");
PADDLE_ENFORCE(
(label_dims == x1_dims) && (x1_dims == x2_dims) &&
(label_dims.size() == 2) && (label_dims[1] == 1),
"All inputs must be 2-D tensor with shape [batch_size x 1].");
ctx->SetOutputDim("Activated", label_dims);
ctx->SetOutputDim("Out", label_dims);
}
};
template <typename T>
class MarginRankLossOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MarginRankLossOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X1",
"(2-D tensor with shape [batch_size x 1]) The score for "
"one item X1 to be ranked, from pairwise ranking model.");
AddInput("X2",
"(2-D tensor with shape [batch_size x 1]) The score for "
"another item X2 to be ranked, from pairwise ranking model.");
AddInput("Label",
"(2-D tensor with shape [batch_size x 1]) "
"The label indicating X1 ranked higher than X2 or not, "
"can only be +1 or -1.");
AddAttr<T>("margin", "(scalar, default 0) Margin for MarginRankLossOp.")
.SetDefault(static_cast<T>(0));
AddOutput("Activated",
"(2-D tensor with shape [batch_size x 1]) Intermediate tensor "
"to indicate whether each element of Output(Out) is activated.")
.AsIntermediate();
AddOutput("Out",
"(2-D tensor with shape [batch_size x 1]) "
"The output loss of MarginRankLoss operator.");
AddComment(R"DOC(
MarginRankLoss operator measures the loss given a pair of training sample
{`X1`, `X2`} and the `Label` with attribute `margin`, where `Label = +1`
indicating X1 is ranked higher than `X2`, otherwise `Label = -1`. The loss
turns out
loss(X1, X2, Label) = max(0, -Label * (X1 - X2) + margin).
The attribute `margin` involved here helps make the predictions more robust.
Denote the item ranked higher as the positive sample, otherwise the negative
sample. If the score of the two samples satisfies
positive sample - negative sample < margin,
the pair of samples will contribute to the final loss, which will backpropogate
and train the ranking model to enlarge the difference of the two score.
For batch input with size `batch_size`, `X1`, `X2` and `Label`
all have the same shape [batch_size x 1].
)DOC");
}
};
class MarginRankLossGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Label"), "Input(Label) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("X1"), "Input(X1) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("X2"), "Input(X2) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input(Out@GRAD) shouldn't be null.");
PADDLE_ENFORCE(ctx->HasInput("Activated"),
"Intermediate(Activated) shouldn't be null.");
auto dims = ctx->GetInputDim("Label");
ctx->SetOutputDim(framework::GradVarName("X1"), dims);
ctx->SetOutputDim(framework::GradVarName("X2"), dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(margin_rank_loss, ops::MarginRankLossOp,
ops::MarginRankLossOpMaker<float>, margin_rank_loss_grad,
ops::MarginRankLossGradOp);
REGISTER_OP_CPU_KERNEL(
margin_rank_loss,
ops::MarginRankLossKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
margin_rank_loss_grad,
ops::MarginRankLossGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/margin_rank_loss_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
margin_rank_loss,
ops::MarginRankLossKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
margin_rank_loss_grad,
ops::MarginRankLossGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename T>
struct ReLU {
HOSTDEVICE T operator()(const T& val) const {
return val > 0 ? val : static_cast<T>(0);
}
};
template <typename T>
struct Heaviside {
HOSTDEVICE T operator()(const T& val) const {
return static_cast<T>(val > 0 ? 1 : 0);
}
};
template <typename Place, typename T>
class MarginRankLossKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* out_t = ctx.Output<framework::Tensor>("Out");
auto* act_t = ctx.Output<framework::Tensor>("Activated");
auto* label_t = ctx.Input<framework::Tensor>("Label");
auto* x1_t = ctx.Input<framework::Tensor>("X1");
auto* x2_t = ctx.Input<framework::Tensor>("X2");
out_t->mutable_data<T>(ctx.GetPlace());
act_t->mutable_data<T>(ctx.GetPlace());
auto margin = static_cast<T>(ctx.Attr<T>("margin"));
auto out = framework::EigenVector<T>::Flatten(*out_t);
auto act = framework::EigenVector<T>::Flatten(*act_t);
auto label = framework::EigenVector<T>::Flatten(*label_t);
auto x1 = framework::EigenVector<T>::Flatten(*x1_t);
auto x2 = framework::EigenVector<T>::Flatten(*x2_t);
auto& dev = ctx.GetEigenDevice<Place>();
out.device(dev) = (-label * (x1 - x2) + margin).unaryExpr(ReLU<T>());
act.device(dev) = out.unaryExpr(Heaviside<T>());
}
};
template <typename Place, typename T>
class MarginRankLossGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto* d_x1_t =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X1"));
auto* d_x2_t =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X2"));
auto* act_t = ctx.Input<framework::Tensor>("Activated");
auto* d_out_t = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto* label_t = ctx.Input<framework::Tensor>("Label");
auto d_out = framework::EigenVector<T>::Flatten(*d_out_t);
auto act = framework::EigenVector<T>::Flatten(*act_t);
auto label = framework::EigenVector<T>::Flatten(*label_t);
auto& dev = ctx.GetEigenDevice<Place>();
// compute d_x1
if (d_x1_t) {
d_x1_t->mutable_data<T>(ctx.GetPlace());
auto d_x1 = framework::EigenVector<T>::Flatten(*d_x1_t);
d_x1.device(dev) = -d_out * act * label;
}
// compute d_x2
if (d_x2_t) {
d_x2_t->mutable_data<T>(ctx.GetPlace());
auto d_x2 = framework::EigenVector<T>::Flatten(*d_x2_t);
d_x2.device(dev) = d_out * act * label;
}
}
};
} // namespace operators
} // namespace paddle
if(WITH_GPU) if(WITH_GPU)
nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu pooling.cc pooling.cu DEPS cblas device_context operator) nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context operator)
nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator) nv_library(softmax SRCS softmax.cc softmax.cu DEPS operator)
nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator) nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS operator)
nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context)
nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context)
else() else()
cc_library(math_function SRCS math_function.cc im2col.cc pooling.cc DEPS cblas device_context operator) cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context operator)
cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
cc_library(softmax SRCS softmax.cc DEPS operator) cc_library(softmax SRCS softmax.cc DEPS operator)
cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator) cc_library(cross_entropy SRCS cross_entropy.cc DEPS operator)
cc_library(pooling SRCS pooling.cc DEPS device_context)
cc_library(vol2col SRCS vol2col.cc DEPS device_context)
endif() endif()
cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor) cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor)
cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col tensor)
...@@ -49,10 +49,22 @@ void testIm2col() { ...@@ -49,10 +49,22 @@ void testIm2col() {
memcpy(input_ptr, arr, 6 * sizeof(float)); memcpy(input_ptr, arr, 6 * sizeof(float));
auto* place = new Place(); auto* place = new Place();
paddle::platform::DeviceContext* context;
if (paddle::platform::is_cpu_place(*place)) {
context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else {
#ifdef PADDLE_WITH_CUDA
context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else
PADDLE_THROW("no GPU support");
#endif // PADDLE_ONLY_CPU
}
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
input.CopyFrom<float>(input_tmp, *place); input.CopyFrom<float>(input_tmp, *place, *context);
} }
output_cfo.mutable_data<float>( output_cfo.mutable_data<float>(
{1, filter_size, filter_size, output_height, output_width}, *place); {1, filter_size, filter_size, output_height, output_width}, *place);
...@@ -66,18 +78,6 @@ void testIm2col() { ...@@ -66,18 +78,6 @@ void testIm2col() {
paddle::operators::math::ColFormat::kOCF, Place, float> paddle::operators::math::ColFormat::kOCF, Place, float>
im2col_ocf; im2col_ocf;
paddle::platform::DeviceContext* context;
if (paddle::platform::is_cpu_place(*place)) {
context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else {
#ifdef PADDLE_WITH_CUDA
context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else
PADDLE_THROW("no GPU support");
#endif // PADDLE_ONLY_CPU
}
im2col(*context, input, output_cfo, stride, stride, padding, padding); im2col(*context, input, output_cfo, stride, stride, padding, padding);
im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding); im2col_ocf(*context, input, output_ocf, stride, stride, padding, padding);
...@@ -85,7 +85,8 @@ void testIm2col() { ...@@ -85,7 +85,8 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output_cfo.data<float>(); out_cfo_ptr = output_cfo.data<float>();
} else { } else {
output_tmp.CopyFrom<float>(output_cfo, paddle::platform::CPUPlace()); output_tmp.CopyFrom<float>(output_cfo, paddle::platform::CPUPlace(),
*context);
out_cfo_ptr = output_tmp.data<float>(); out_cfo_ptr = output_tmp.data<float>();
} }
EXPECT_EQ(out_cfo_ptr[0], 0); EXPECT_EQ(out_cfo_ptr[0], 0);
...@@ -101,7 +102,8 @@ void testIm2col() { ...@@ -101,7 +102,8 @@ void testIm2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_ocf_ptr = output_ocf.data<float>(); out_ocf_ptr = output_ocf.data<float>();
} else { } else {
output_tmp.CopyFrom<float>(output_ocf, paddle::platform::CPUPlace()); output_tmp.CopyFrom<float>(output_ocf, paddle::platform::CPUPlace(),
*context);
out_ocf_ptr = output_tmp.data<float>(); out_ocf_ptr = output_tmp.data<float>();
} }
EXPECT_EQ(out_ocf_ptr[0], 0); EXPECT_EQ(out_ocf_ptr[0], 0);
......
...@@ -17,17 +17,18 @@ TEST(math_function, notrans_mul_trans) { ...@@ -17,17 +17,18 @@ TEST(math_function, notrans_mul_trans) {
auto* gpu_place = new paddle::platform::GPUPlace(0); auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place); input1_gpu.CopyFrom<float>(input1, *gpu_place, context);
input2_gpu.CopyFrom<float>(input1, *gpu_place); input2_gpu.CopyFrom<float>(input1, *gpu_place, context);
out_gpu.mutable_data<float>({2, 2}, *gpu_place); out_gpu.mutable_data<float>({2, 2}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::GPUPlace, float>( paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0);
out.CopyFrom<float>(out_gpu, *cpu_place); out.CopyFrom<float>(out_gpu, *cpu_place, context);
float* out_ptr = out.data<float>(); float* out_ptr = out.data<float>();
context.Wait();
EXPECT_EQ(out_ptr[0], 5); EXPECT_EQ(out_ptr[0], 5);
EXPECT_EQ(out_ptr[1], 14); EXPECT_EQ(out_ptr[1], 14);
EXPECT_EQ(out_ptr[2], 14); EXPECT_EQ(out_ptr[2], 14);
...@@ -50,17 +51,18 @@ TEST(math_function, trans_mul_notrans) { ...@@ -50,17 +51,18 @@ TEST(math_function, trans_mul_notrans) {
auto* gpu_place = new paddle::platform::GPUPlace(0); auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place); input1_gpu.CopyFrom<float>(input1, *gpu_place, context);
input2_gpu.CopyFrom<float>(input1, *gpu_place); input2_gpu.CopyFrom<float>(input1, *gpu_place, context);
out_gpu.mutable_data<float>({3, 3}, *gpu_place); out_gpu.mutable_data<float>({3, 3}, *gpu_place);
paddle::operators::math::matmul<paddle::platform::GPUPlace, float>( paddle::operators::math::matmul<paddle::platform::GPUPlace, float>(
context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0);
out.CopyFrom<float>(out_gpu, *cpu_place); out.CopyFrom<float>(out_gpu, *cpu_place, context);
float* out_ptr = out.data<float>(); float* out_ptr = out.data<float>();
context.Wait();
EXPECT_EQ(out_ptr[0], 9); EXPECT_EQ(out_ptr[0], 9);
EXPECT_EQ(out_ptr[1], 12); EXPECT_EQ(out_ptr[1], 12);
EXPECT_EQ(out_ptr[2], 15); EXPECT_EQ(out_ptr[2], 15);
...@@ -98,9 +100,9 @@ TEST(math_function, gemm_notrans_cublas) { ...@@ -98,9 +100,9 @@ TEST(math_function, gemm_notrans_cublas) {
auto* gpu_place = new paddle::platform::GPUPlace(0); auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place); input1_gpu.CopyFrom<float>(input1, *gpu_place, context);
input2_gpu.CopyFrom<float>(input2, *gpu_place); input2_gpu.CopyFrom<float>(input2, *gpu_place, context);
input3_gpu.CopyFrom<float>(input3, *gpu_place); input3_gpu.CopyFrom<float>(input3, *gpu_place, context);
float* a = input1_gpu.data<float>(); float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>(); float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place); float* c = input3_gpu.mutable_data<float>(*gpu_place);
...@@ -108,7 +110,7 @@ TEST(math_function, gemm_notrans_cublas) { ...@@ -108,7 +110,7 @@ TEST(math_function, gemm_notrans_cublas) {
paddle::operators::math::gemm<paddle::platform::GPUPlace, float>( paddle::operators::math::gemm<paddle::platform::GPUPlace, float>(
context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4);
input3.CopyFrom<float>(input3_gpu, *cpu_place); input3.CopyFrom<float>(input3_gpu, *cpu_place, context);
// numpy code: // numpy code:
// a = np.arange(6).reshape(2, 3) // a = np.arange(6).reshape(2, 3)
...@@ -116,6 +118,7 @@ TEST(math_function, gemm_notrans_cublas) { ...@@ -116,6 +118,7 @@ TEST(math_function, gemm_notrans_cublas) {
// c = np.arange(8).reshape(2, 4)[:, 1:] // c = np.arange(8).reshape(2, 4)[:, 1:]
// out = np.arange(8).reshape(2, 4) // out = np.arange(8).reshape(2, 4)
// out[:, 1:] = np.dot(a, b) + c // out[:, 1:] = np.dot(a, b) + c
context.Wait();
EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24); EXPECT_EQ(input3_ptr[1], 24);
EXPECT_EQ(input3_ptr[2], 28); EXPECT_EQ(input3_ptr[2], 28);
...@@ -152,9 +155,9 @@ TEST(math_function, gemm_trans_cublas) { ...@@ -152,9 +155,9 @@ TEST(math_function, gemm_trans_cublas) {
auto* gpu_place = new paddle::platform::GPUPlace(0); auto* gpu_place = new paddle::platform::GPUPlace(0);
paddle::platform::CUDADeviceContext context(*gpu_place); paddle::platform::CUDADeviceContext context(*gpu_place);
input1_gpu.CopyFrom<float>(input1, *gpu_place); input1_gpu.CopyFrom<float>(input1, *gpu_place, context);
input2_gpu.CopyFrom<float>(input2, *gpu_place); input2_gpu.CopyFrom<float>(input2, *gpu_place, context);
input3_gpu.CopyFrom<float>(input3, *gpu_place); input3_gpu.CopyFrom<float>(input3, *gpu_place, context);
float* a = input1_gpu.data<float>(); float* a = input1_gpu.data<float>();
float* b = input2_gpu.data<float>(); float* b = input2_gpu.data<float>();
float* c = input3_gpu.mutable_data<float>(*gpu_place); float* c = input3_gpu.mutable_data<float>(*gpu_place);
...@@ -162,7 +165,8 @@ TEST(math_function, gemm_trans_cublas) { ...@@ -162,7 +165,8 @@ TEST(math_function, gemm_trans_cublas) {
paddle::operators::math::gemm<paddle::platform::GPUPlace, float>( paddle::operators::math::gemm<paddle::platform::GPUPlace, float>(
context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4);
input3.CopyFrom<float>(input3_gpu, *cpu_place); input3.CopyFrom<float>(input3_gpu, *cpu_place, context);
context.Wait();
EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24); EXPECT_EQ(input3_ptr[1], 24);
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
namespace paddle {
namespace operators {
namespace math {
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Vol2ColFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& vol, framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const {
PADDLE_ENFORCE(vol.dims().size() == 4);
PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
const T* vol_data = vol.data<T>();
T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int c_in = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * stride_depth - padding_depth + d_offset;
for (int h = 0; h < output_height; ++h) {
int h_pad = h * stride_height - padding_height + h_offset;
for (int w = 0; w < output_width; ++w) {
int w_pad = w * stride_width - padding_width + w_offset;
int col_idx =
((c * output_depth + d) * output_height + h) * output_width + w;
if (h_pad < 0 || h_pad >= input_height || w_pad < 0 ||
w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) {
col_data[col_idx] = static_cast<T>(0);
} else {
int vol_idx =
((c_in * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
col_data[col_idx] = vol_data[vol_idx];
}
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Col2VolFunctor<platform::CPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& vol, const framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const {
PADDLE_ENFORCE(vol.dims().size() == 4);
PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int channels_col =
input_channels * filter_depth * filter_height * filter_width;
T* vol_data = vol.data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
int d_offset = (c / filter_width / filter_height) % filter_depth;
int cIm = c / filter_width / filter_height / filter_depth;
for (int d = 0; d < output_depth; ++d) {
int d_pad = d * stride_depth - padding_depth + d_offset;
for (int h = 0; h < output_height; ++h) {
int h_pad = h * stride_height - padding_height + h_offset;
for (int w = 0; w < output_width; ++w) {
int w_pad = w * stride_width - padding_width + w_offset;
if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 &&
w_pad < input_width && d_pad >= 0 && d_pad < input_depth) {
int vol_idx =
((cIm * input_depth + d_pad) * input_height + h_pad) *
input_width +
w_pad;
int col_idx =
((c * output_depth + d) * output_height + h) * output_width +
w;
vol_data[vol_idx] += col_data[col_idx];
}
}
}
}
}
}
};
template class Vol2ColFunctor<platform::CPUPlace, float>;
template class Vol2ColFunctor<platform::CPUPlace, double>;
template class Col2VolFunctor<platform::CPUPlace, float>;
template class Col2VolFunctor<platform::CPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
#include "paddle/platform/cuda_helper.h"
namespace paddle {
namespace operators {
namespace math {
template <class T>
__global__ void vol2col(int num_kernels, const T* data_vol, int depth,
int height, int width, int filter_depth,
int filter_height, int filter_width, int stride_depth,
int stride_height, int stride_width, int padding_depth,
int padding_height, int padding_width, int output_detph,
int output_height, int output_width, T* data_col) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
index += blockDim.x * gridDim.x) {
int w_out = index % output_width;
int h_out = (index / output_width) % output_height;
int d_out = (index / output_width / output_height) % output_detph;
int channel_in = index / output_width / output_height / output_detph;
int channel_out = channel_in * filter_depth * filter_height * filter_width;
int w_in = w_out * stride_width - padding_width;
int h_in = h_out * stride_height - padding_height;
int d_in = d_out * stride_depth - padding_depth;
data_col += ((channel_out * output_detph + d_out) * output_height + h_out) *
output_width +
w_out;
data_vol += ((channel_in * depth + d_in) * height + h_in) * width + w_in;
for (int k = 0; k < filter_depth; ++k) {
for (int i = 0; i < filter_height; ++i) {
for (int j = 0; j < filter_width; ++j) {
int d = d_in + k;
int h = h_in + i;
int w = w_in + j;
*data_col = (d >= 0 && d < depth && h >= 0 && h < height && w >= 0 &&
w < width)
? data_vol[(k * height + i) * width + j]
: 0;
data_col += output_detph * output_height * output_width;
}
}
}
}
}
/*
* im = [input_channels,intpu_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Vol2ColFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& vol, framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const {
PADDLE_ENFORCE(vol.dims().size() == 4);
PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int num_outputs =
input_channels * output_depth * output_height * output_width;
const int threads = 1024;
const int blocks = (num_outputs + 1024 - 1) / 1024;
vol2col<T><<<blocks, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
num_outputs, vol.data<T>(), input_depth, input_height, input_width,
filter_depth, filter_height, filter_width, stride_depth, stride_height,
stride_width, padding_depth, padding_height, padding_width,
output_depth, output_height, output_width, col.data<T>());
}
};
template <class T>
__global__ void col2vol(int num_kernels, const T* data_col, int depth,
int height, int width, int filter_depth,
int filter_height, int filter_width, int stride_depth,
int stride_height, int stride_width, int padding_depth,
int padding_height, int padding_width, int output_detph,
int output_height, int output_width, T* data_vol) {
for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < num_kernels;
index += blockDim.x * gridDim.x) {
T src_val = 0;
int w = index % width + padding_width;
int h = (index / width) % height + padding_height;
int d = (index / width / height) % depth + padding_depth;
int c = index / width / height / depth;
// compute the start and end of the output
int w_col_start =
(w < filter_width) ? 0 : (w - filter_width) / stride_width + 1;
int w_col_end = min(w / stride_width + 1, output_width);
int h_col_start =
(h < filter_height) ? 0 : (h - filter_height) / stride_height + 1;
int h_col_end = min(h / stride_height + 1, output_height);
int d_col_start =
(d < filter_depth) ? 0 : (d - filter_depth) / stride_depth + 1;
int d_col_end = min(d / stride_depth + 1, output_detph);
int offset = (c * filter_depth * filter_height * filter_width +
d * filter_width * filter_height + h * filter_width + w) *
output_detph * output_height * output_width;
int coeff_d_col =
(1 - stride_depth * filter_width * filter_height * output_detph) *
output_height * output_width;
int coeff_h_col =
(1 - stride_height * filter_width * output_detph * output_height) *
output_width;
int coeff_w_col =
(1 - stride_width * output_detph * output_height * output_width);
for (int d_col = d_col_start; d_col < d_col_end; ++d_col) {
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
src_val += data_col[offset + d_col * coeff_d_col +
h_col * coeff_h_col + w_col * coeff_w_col];
}
}
}
data_vol[index] = src_val;
}
}
/*
* im = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template <class T>
class Col2VolFunctor<platform::GPUPlace, T> {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& vol, const framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const {
PADDLE_ENFORCE(vol.dims().size() == 4);
PADDLE_ENFORCE(col.dims().size() == 7);
int input_channels = vol.dims()[0];
int input_depth = vol.dims()[1];
int input_height = vol.dims()[2];
int input_width = vol.dims()[3];
int filter_depth = col.dims()[1];
int filter_height = col.dims()[2];
int filter_width = col.dims()[3];
int output_depth = col.dims()[4];
int output_height = col.dims()[5];
int output_width = col.dims()[6];
int num_kernels = input_channels * input_depth * input_height * input_width;
const int threads = 1024;
const int blocks = (num_kernels + 1024 - 1) / 1024;
col2vol<T><<<blocks, threads, 0,
reinterpret_cast<const platform::CUDADeviceContext&>(context)
.stream()>>>(
num_kernels, col.data<T>(), input_depth, input_height, input_width,
filter_depth, filter_height, filter_width, stride_depth, stride_height,
stride_width, padding_depth, padding_height, padding_width,
output_depth, output_height, output_width, vol.data<T>());
}
};
template class Vol2ColFunctor<platform::GPUPlace, float>;
template class Vol2ColFunctor<platform::GPUPlace, double>;
template class Col2VolFunctor<platform::GPUPlace, float>;
template class Col2VolFunctor<platform::GPUPlace, double>;
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace paddle {
namespace operators {
namespace math {
/*
* \brief Converts the feature data of four dimensions(CDHW) into a colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_depth * filter_height * filter_width, and the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is equal to
* colShape.inputChannels.
*/
template <typename Place, typename T>
class Vol2ColFunctor {
public:
void operator()(const platform::DeviceContext& context,
const framework::Tensor& vol, framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const;
};
template <typename Place, typename T>
class Col2VolFunctor {
public:
void operator()(const platform::DeviceContext& context,
framework::Tensor& vol, const framework::Tensor& col,
int stride_depth, int stride_height, int stride_width,
int padding_depth, int padding_height,
int padding_width) const;
};
} // namespace math
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
#include <gtest/gtest.h>
#include <iostream>
template <typename Place>
void testVol2col() {
paddle::framework::Tensor input;
paddle::framework::Tensor input_tmp;
paddle::framework::Tensor output;
paddle::framework::Tensor output_tmp;
auto* place = new Place();
paddle::platform::DeviceContext* context;
if (paddle::platform::is_cpu_place(*place)) {
context =
new paddle::platform::CPUDeviceContext(paddle::platform::CPUPlace());
} else {
#ifdef PADDLE_WITH_CUDA
context =
new paddle::platform::CUDADeviceContext(paddle::platform::GPUPlace());
#else
PADDLE_THROW("no GPU support");
#endif // PADDLE_WITH_CUDA
}
/**
* input = [[0, 1, 2,
* 3, 4, 5]
* [6, 7, 8,
* 9, 10, 11]]
*
* output = [0, 1
* 1, 2
* 3, 4
* 4, 5
* 6, 7
* 7, 8
* 9, 10
* 10, 11]
*
* col2vol = [[0, 2, 2,
* 3, 8, 5]
* [6, 14, 8,
* 9, 20, 11]]
*
*/
int input_depth = 2;
int input_height = 2;
int input_width = 3;
int filter_size = 2;
int stride = 1;
int padding = 0;
int output_depth = (input_depth - filter_size + 2 * padding) / stride + 1;
int output_height = (input_height - filter_size + 2 * padding) / stride + 1;
int output_width = (input_width - filter_size + 2 * padding) / stride + 1;
// Vol2Col test
float* input_ptr =
input_tmp.mutable_data<float>({1, input_depth, input_height, input_width},
paddle::platform::CPUPlace());
float arr[12] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
memcpy(input_ptr, arr, 12 * sizeof(float));
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
input.CopyFrom<float>(input_tmp, *place, *context);
}
output.mutable_data<float>({1, filter_size, filter_size, filter_size,
output_depth, output_height, output_width},
*place);
paddle::operators::math::Vol2ColFunctor<Place, float> vol2col;
vol2col(*context, input, output, stride, stride, stride, padding, padding,
padding);
float vol_2_col[] = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11};
float* out_cfo_ptr;
if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output.data<float>();
} else {
output_tmp.CopyFrom<float>(output, paddle::platform::CPUPlace(), *context);
out_cfo_ptr = output_tmp.data<float>();
}
for (int i = 0; i < 16; ++i) {
EXPECT_EQ(out_cfo_ptr[i], vol_2_col[i]);
}
// Col2Vol test
float col_2_vol[] = {0, 2, 2, 3, 8, 5, 6, 14, 8, 9, 20, 11};
memset(input_ptr, 0, 12 * sizeof(float));
if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp;
} else {
input.CopyFrom<float>(input_tmp, *place, *context);
}
paddle::operators::math::Col2VolFunctor<Place, float> col2vol;
col2vol(*context, input, output, stride, stride, stride, padding, padding,
padding);
float* in_ptr;
if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>();
} else {
input_tmp.CopyFrom<float>(input, paddle::platform::CPUPlace(), *context);
in_ptr = input_tmp.data<float>();
}
for (int i = 0; i < 12; ++i) {
EXPECT_EQ(in_ptr[i], col_2_vol[i]);
}
}
TEST(math, vol2col) {
testVol2col<paddle::platform::CPUPlace>();
#ifdef PADDLE_WITH_CUDA
testVol2col<paddle::platform::GPUPlace>();
#endif // PADDLE_WITH_CUDA
}
...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel { ...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker,
ops::MultiplexGradOp); paddle::framework::DefaultGradOpDescMaker<false>);
REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>); multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
...@@ -33,7 +33,8 @@ class MultiplexGPUKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,8 @@ class MultiplexGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
index_t_cpu.CopyFrom<int32_t>(*ids, platform::CPUPlace()); index_t_cpu.CopyFrom<int32_t>(*ids, platform::CPUPlace(),
ctx.device_context());
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>( auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
ctx.device_context()) ctx.device_context())
...@@ -70,7 +71,8 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> { ...@@ -70,7 +71,8 @@ class MultiplexGradGPUKernel : public framework::OpKernel<T> {
auto cols = ins[0]->numel() / rows; auto cols = ins[0]->numel() / rows;
// copy index to cpu // copy index to cpu
Tensor index_t_cpu; Tensor index_t_cpu;
index_t_cpu.CopyFrom<int32_t>(*ids, platform::CPUPlace()); index_t_cpu.CopyFrom<int32_t>(*ids, platform::CPUPlace(),
ctx.device_context());
auto* index = index_t_cpu.data<int32_t>(); auto* index = index_t_cpu.data<int32_t>();
auto stream = reinterpret_cast<const platform::CUDADeviceContext&>( auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
......
...@@ -22,14 +22,8 @@ int OutputSizePool(int input_size, int filter_size, int padding, int stride) { ...@@ -22,14 +22,8 @@ int OutputSizePool(int input_size, int filter_size, int padding, int stride) {
return output_size; return output_size;
} }
class PoolOp : public framework::OperatorWithKernel { void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
public: PADDLE_ENFORCE(ctx->HasInput("X"), "X(Input) of Pooling should not be null.");
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Out(Output) of Pooling should not be null."); "Out(Output) of Pooling should not be null.");
...@@ -40,10 +34,8 @@ class PoolOp : public framework::OperatorWithKernel { ...@@ -40,10 +34,8 @@ class PoolOp : public framework::OperatorWithKernel {
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
PADDLE_ENFORCE(pooling_type == "max" || pooling_type == "avg",
"pooling_type should be 'max' or 'avg'");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
"Pooling intput should be 4-D or 5-D"); "Pooling intput should be 4-D or 5-D tensor.");
if (ctx->Attrs().Get<bool>("globalPooling")) { if (ctx->Attrs().Get<bool>("globalPooling")) {
ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2); ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
...@@ -52,13 +44,11 @@ class PoolOp : public framework::OperatorWithKernel { ...@@ -52,13 +44,11 @@ class PoolOp : public framework::OperatorWithKernel {
} }
PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U,
"Input size and Pooling size should be consistent."); "Input size and pooling size should be consistent.");
PADDLE_ENFORCE(ksize.size() == 2 || ksize.size() == 3,
"Pooling size should be 2 elements. or 3 elements.");
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
"strides size and pooling size should be the same."); "Strides size and pooling size should be the same.");
PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(),
"paddings size and pooling size should be the same."); "Paddings size and pooling size should be the same.");
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]}); std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
...@@ -66,45 +56,41 @@ class PoolOp : public framework::OperatorWithKernel { ...@@ -66,45 +56,41 @@ class PoolOp : public framework::OperatorWithKernel {
OutputSizePool(in_x_dims[i + 2], ksize[i], paddings[i], strides[i])); OutputSizePool(in_x_dims[i + 2], ksize[i], paddings[i], strides[i]));
} }
ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
} }
};
class PoolOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected: void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"),
"X(Input) of Pooling should not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input@Grad of Pooling should not be null."); "Input(X@GRAD) should not be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
};
class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { Pool2dOpMaker::Pool2dOpMaker(framework::OpProto *proto,
public: framework::OpAttrChecker *op_checker)
Pool2dOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of feature."); "number of channels, H and W is the height and width of feature.");
AddOutput("Out", AddOutput("Out",
"The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW."
"Where N is batch size, C is "
"the number of channels, H and W is the height and "
"width of feature.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"PoolingType of pooling operator." "PoolingType of pooling operator."
"Str constant equal to 'max' or 'avg'.") "Str constant equal to 'max' or 'avg'.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"ksize", "ksize",
"Pooling size(depth, height, width) of pooling operator." "The pooling window size(height, width) of pooling operator."
"If globalPooling = true, ksize is ignored and need not be " "If globalPooling = true, ksize is ignored and need not be "
"specified."); // TODO(Add checker) "specified."); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"Whether to use the globalPooling." "Whether to use the globalPooling."
...@@ -113,43 +99,64 @@ class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,43 +99,64 @@ class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
"If globalPooling = true, ksize is ignored and need not be specified.") "If globalPooling = true, ksize is ignored and need not be specified.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"Strides(height, width) of pooling operator." "The strides(height, width) of pooling window."
"Default {1,1}") "Default {1,1}.")
.SetDefault({1, 1}); // TODO(Add checker) .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>("paddings", AddAttr<std::vector<int>>("paddings",
"Paddings(height, width) of pooling operator." "The zero padding(height, width) size on both sides"
"Default {0,0}.") "Default {0,0}.")
.SetDefault({0, 0}); // TODO(Add checker) .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
The pooling2d operation calculates the output based on The pooling2d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCHW format. Where N is batch size, C is the
number of channels, H and W is the height and width of feature.
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
X shape: (N, C, H_in, W_in)
Output:
Out shape: (N, C, H_out, W_out)
Mask shape: (N, C, H_out, W_out)
where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
)DOC"); )DOC");
} }
};
class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { Pool3dOpMaker::Pool3dOpMaker(framework::OpProto *proto,
public: framework::OpAttrChecker *op_checker)
Pool3dOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput(
"The input tensor of pooling operator. " "X",
"(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCDHW. Where N is batch size, C is " "The format of input tensor is NCDHW. Where N is batch size, C is "
"the " "the number of channels, D, H and W is the depth, height and width of "
"number of channels, D, H and W is the depth, height and width of "
"feature."); "feature.");
AddOutput("Out", AddOutput("Out",
"The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCDHW."); "The format of output tensor is also NCDHW."
"Where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and "
"width of feature.");
AddAttr<std::string>("poolingType", AddAttr<std::string>("poolingType",
"PoolingType of pooling operator." "PoolingType of pooling operator."
"str constant equal to 'max' or 'avg'.") "Str constant equal to 'max' or 'avg'.")
.InEnum({"max", "avg"}); .InEnum({"max", "avg"});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"ksize", "ksize",
"Pooling size(depth, height, width) of pooling operator." "The pooling window size(depth, height, width) of pooling operator."
"If globalPooling = true, ksize is ignored and need not be " "If globalPooling = true, ksize is ignored and need not be "
"specified."); // TODO(Add checker) "specified."); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<bool>( AddAttr<bool>(
"globalPooling", "globalPooling",
"Whether to use the globalPooling." "Whether to use the globalPooling."
...@@ -157,22 +164,39 @@ class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -157,22 +164,39 @@ class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker {
"Default false." "Default false."
"If globalPooling = true, ksize is ignored and need not be specified.") "If globalPooling = true, ksize is ignored and need not be specified.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>("strides",
"strides",
"Strides(depth, height, width) of pooling operator." "Strides(depth, height, width) of pooling operator."
"Default {1,1,1}.") "Default {1,1,1}.")
.SetDefault({1, 1, 1}); // TODO(Add checker) .SetDefault({1, 1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"Paddings(depth, height, width) of pooling operator." "Paddings(depth, height, width) of pooling operator."
"Default {0,0,0}.") "Default {0,0,0}.")
.SetDefault({0, 0, 0}); // TODO(Add checker) .SetDefault({0, 0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.)
AddComment(R"DOC( AddComment(R"DOC(
The pooling3d operation calculates the output based on The pooling3d operation calculates the output based on
the input, poolingType and ksize, strides, paddings parameters. the input, poolingType and ksize, strides, paddings parameters.
Input(X) and output(Out) are in NCDHW format. Where N is batch
size, C is the number of channels, D, H and W is the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively.
The input(X) size and output(Out) size may be different.
Example:
Input:
X shape: (N, C, D_in, H_in, W_in)
Output:
Out shape: (N, C, D_out, H_out, W_out)
Mask shape: (N, C, D_out, H_out, W_out)
where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1;
)DOC"); )DOC");
} }
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -24,6 +24,34 @@ namespace operators { ...@@ -24,6 +24,34 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
class PoolOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class PoolOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Pool2dOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class Pool3dOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Pool3dOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
template <typename Place, typename T> template <typename Place, typename T>
class PoolKernel : public framework::OpKernel<T> { class PoolKernel : public framework::OpKernel<T> {
public: public:
......
...@@ -43,7 +43,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -43,7 +43,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
"Pooling intput should be 4-D or 5-D"); "Pooling intput should be 4-D or 5-D tensor.");
if (ctx->Attrs().Get<bool>("globalPooling")) { if (ctx->Attrs().Get<bool>("globalPooling")) {
ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2); ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
...@@ -52,7 +52,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -52,7 +52,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
} }
PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U, PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U,
"Intput size and pooling size should be consistent."); "Input size and pooling size should be consistent.");
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
"Strides size and pooling size should be the same."); "Strides size and pooling size should be the same.");
PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(), PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(),
...@@ -74,6 +74,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { ...@@ -74,6 +74,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Mask"), "Input(Mask) must not be null.");
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Input(X@GRAD) should not be null."); "Input(X@GRAD) should not be null.");
...@@ -88,17 +89,17 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -88,17 +89,17 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the " "The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image."); "number of channels, H and W is the height and width of image.");
AddOutput("Out", AddOutput("Out",
"The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW."
"Where N is batch size, C is " "Where N is batch size, C is "
"the number of channels, H and W is the height and " "the number of channels, H and W is the height and "
"width of image."); "width of image.");
AddOutput("Mask", AddOutput("Mask",
"The Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator."
"The format of output tensor is also NCHW." "The format of output tensor is also NCHW."
"Where N is batch size, C is the number of channels, H and W " "Where N is batch size, C is the number of channels, H and W "
"is the height and width of image." "is the height and width of image."
...@@ -106,7 +107,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -106,7 +107,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"ksize", "ksize",
"The pooling size(height, width) of pooling operator." "The pooling window size(height, width) of pooling operator."
"If globalPooling = true, ksize is ignored and need not be " "If globalPooling = true, ksize is ignored and need not be "
"specified."); // TODO(Chengduo): Add checker. (Currently, "specified."); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
...@@ -118,12 +119,13 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -118,12 +119,13 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
"If globalPooling = true, ksize is ignored and need not be specified.") "If globalPooling = true, ksize is ignored and need not be specified.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"Strides(height, width) of pooling operator." "The strides(height, width) of pooling window."
"Default {1,1}.") "Default {1,1}.")
.SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({1, 1}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
AddAttr<std::vector<int>>("paddings", AddAttr<std::vector<int>>(
"Paddings(height, width) of pooling operator." "paddings",
"The zero padding(height, width) size on both sides"
"Default {0,0}.") "Default {0,0}.")
.SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
...@@ -135,6 +137,17 @@ output(Out, Mask) are in NCHW format. Where N is batch size, C is the ...@@ -135,6 +137,17 @@ output(Out, Mask) are in NCHW format. Where N is batch size, C is the
number of channels, H and W is the height and width of feature. number of channels, H and W is the height and width of feature.
Parameters(ksize, strides, paddings) are two elements. Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively. These two elements represent height and width, respectively.
The input(X) size and output(Out, Mask) size may be different.
Example:
Input:
X shape: (N, C, H_in, W_in)
Output:
Out shape: (N, C, H_out, W_out)
Mask shape: (N, C, H_out, W_out)
where
H_out = (H_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
W_out = (W_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
)DOC"); )DOC");
} }
}; };
...@@ -146,18 +159,18 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -146,18 +159,18 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput( AddInput(
"X", "X",
"The input tensor of pooling operator. " "(Tensor) The input tensor of pooling operator. "
"The format of input tensor is NCDHW. Where N is batch size, C is " "The format of input tensor is NCDHW. Where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and width of " "the number of channels, D, H and W is the depth, height and width of "
"image."); "image.");
AddOutput("Out", AddOutput("Out",
"The output tensor of pooling operator." "(Tensor) The output tensor of pooling operator."
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW."
"Where N is batch size, C is " "Where N is batch size, C is "
"the number of channels, D, H and W is the depth, height and " "the number of channels, D, H and W is the depth, height and "
"width of image."); "width of image.");
AddOutput("Mask", AddOutput("Mask",
"The Mask tensor of pooling operator." "(Tensor) The Mask tensor of pooling operator."
"The format of output tensor is also NCDHW." "The format of output tensor is also NCDHW."
"Where N is batch size, C is the number of channels, D, H and W " "Where N is batch size, C is the number of channels, D, H and W "
"is the depth, height and width of image." "is the depth, height and width of image."
...@@ -165,7 +178,7 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -165,7 +178,7 @@ class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"ksize", "ksize",
"The pooling size(depth, height, width) of pooling operator." "The pooling window size(depth, height, width) of pooling operator."
"If globalPooling = true, ksize is ignored and need not be " "If globalPooling = true, ksize is ignored and need not be "
"specified."); // TODO(Chengduo): Add checker. (Currently, "specified."); // TODO(Chengduo): Add checker. (Currently,
// TypedAttrChecker don't support vector type.) // TypedAttrChecker don't support vector type.)
...@@ -196,6 +209,18 @@ Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch ...@@ -196,6 +209,18 @@ Input(X) and output(Out, Mask) are in NCDHW format. Where N is batch
size, C is the number of channels, D, H and W is the depth, height and size, C is the number of channels, D, H and W is the depth, height and
width of feature. Parameters(ksize, strides, paddings) are three elements. width of feature. Parameters(ksize, strides, paddings) are three elements.
These three elements represent depth, height and width, respectively. These three elements represent depth, height and width, respectively.
The input(X) size and output(Out, Mask) size may be different.
Example:
Input:
X shape: (N, C, D_in, H_in, W_in)
Output:
Out shape: (N, C, D_out, H_out, W_out)
Mask shape: (N, C, D_out, H_out, W_out)
where
D_out = (D_in - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
H_out = (H_in - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
W_out = (W_in - ksize[2] + 2 * paddings[2]) / strides[2] + 1;
)DOC"); )DOC");
} }
}; };
......
...@@ -46,7 +46,7 @@ void RecurrentAlgorithm::Run(const Scope& scope, ...@@ -46,7 +46,7 @@ void RecurrentAlgorithm::Run(const Scope& scope,
} }
(*stepnet_)->Run(*step_scopes[step_id], dev_ctx); (*stepnet_)->Run(*step_scopes[step_id], dev_ctx);
} }
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len); rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len, dev_ctx);
} }
void RecurrentAlgorithm::CreateScopes(const Scope& scope, void RecurrentAlgorithm::CreateScopes(const Scope& scope,
...@@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, ...@@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
// the weight are located in parent scope // the weight are located in parent scope
for (auto& var_name : input.second) { for (auto& var_name : input.second) {
if (!step_scope.FindVar(var_name)) { if (!step_scope.FindVar(var_name)) {
step_scope.NewVar(var_name)->GetMutable<LoDTensor>(); step_scope.Var(var_name)->GetMutable<LoDTensor>();
} }
} }
} }
// create stepnet's outputs // create stepnet's outputs
for (const auto& output : (*stepnet_)->Outputs()) { for (const auto& output : (*stepnet_)->Outputs()) {
for (auto& var_name : output.second) { for (auto& var_name : output.second) {
step_scope.NewVar(var_name); step_scope.Var(var_name);
} }
} }
step_scopes->emplace_back(&step_scope); step_scopes->emplace_back(&step_scope);
...@@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, ...@@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
for (auto& attr : arg_->memories) { for (auto& attr : arg_->memories) {
auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<LoDTensor>(); auto* pre_mem = step_scope->Var(attr.pre_var)->GetMutable<LoDTensor>();
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists", attr.var, "memory [%s]'s boot variable [%s] not exists", attr.var,
attr.boot_var); attr.boot_var);
...@@ -151,12 +151,12 @@ void RecurrentGradientAlgorithm::Run( ...@@ -151,12 +151,12 @@ void RecurrentGradientAlgorithm::Run(
auto& step_scopes = GetStepScopes(scope); auto& step_scopes = GetStepScopes(scope);
rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len);
for (int step_id = seq_len - 1; step_id >= 0; --step_id) { for (int step_id = seq_len - 1; step_id >= 0; --step_id) {
if (step_id != seq_len - 1) { if (static_cast<size_t>(step_id) != seq_len - 1) {
rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1);
} }
(*stepnet_)->Run(*step_scopes[step_id], dev_ctx); (*stepnet_)->Run(*step_scopes[step_id], dev_ctx);
} }
rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len); rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len, dev_ctx);
LinkBootMemoryGradients(step_scopes[0]); LinkBootMemoryGradients(step_scopes[0]);
} }
...@@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( ...@@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
"memory variable [%s] does not exists", attr.var); "memory variable [%s] does not exists", attr.var);
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"boot variable [%s] does not exists", attr.boot_var); "boot variable [%s] does not exists", attr.boot_var);
auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable<LoDTensor>(); auto* mem_grad = step_scope->Var(attr.var)->GetMutable<LoDTensor>();
auto* boot_mem_grad = auto* boot_mem_grad =
step_scope->NewVar(attr.boot_var)->GetMutable<LoDTensor>(); step_scope->Var(attr.boot_var)->GetMutable<LoDTensor>();
boot_mem_grad->Resize(mem_grad->dims()); boot_mem_grad->Resize(mem_grad->dims());
boot_mem_grad->ShareDataWith<float>(*mem_grad); boot_mem_grad->ShareDataWith<float>(*mem_grad);
} }
......
...@@ -33,7 +33,7 @@ class ReshapeKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,7 @@ class ReshapeKernel : public framework::OpKernel<T> {
std::transform(shape.begin(), shape.end(), shape_int64.begin(), std::transform(shape.begin(), shape.end(), shape_int64.begin(),
[](int a) { return static_cast<int64_t>(a); }); [](int a) { return static_cast<int64_t>(a); });
auto out_dims = framework::make_ddim(shape_int64); auto out_dims = framework::make_ddim(shape_int64);
out->CopyFrom<T>(*in, ctx.GetPlace()); out->CopyFrom<T>(*in, ctx.GetPlace(), ctx.device_context());
out->Resize(out_dims); out->Resize(out_dims);
} }
}; };
...@@ -47,7 +47,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> { ...@@ -47,7 +47,7 @@ class ReshapeGradKernel : public framework::OpKernel<T> {
d_x->mutable_data<T>(ctx.GetPlace()); d_x->mutable_data<T>(ctx.GetPlace());
auto in_dims = d_x->dims(); auto in_dims = d_x->dims();
d_x->CopyFrom<T>(*d_out, ctx.GetPlace()); d_x->CopyFrom<T>(*d_out, ctx.GetPlace(), ctx.device_context());
d_x->Resize(in_dims); d_x->Resize(in_dims);
} }
}; };
......
...@@ -40,7 +40,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -40,7 +40,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
f::DDim step_dims = slice_ddim(dims, 1, dims.size()); f::DDim step_dims = slice_ddim(dims, 1, dims.size());
for (size_t j = 0; j < seq_len; j++) { for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input = Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i])->GetMutable<Tensor>(); step_scopes[j]->Var(inlinks[i])->GetMutable<Tensor>();
// The input of operators of each step is Tensor here. // The input of operators of each step is Tensor here.
// Maybe need to modify Slice function. // Maybe need to modify Slice function.
*step_input = input->Slice<float>(j, j + 1); *step_input = input->Slice<float>(j, j + 1);
...@@ -51,7 +51,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -51,7 +51,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
void ConcatOutputs(const std::vector<Scope*>& step_scopes, void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<std::string>& outlinks, const std::vector<std::string>& outlinks,
const size_t seq_len) { const size_t seq_len, const platform::DeviceContext& ctx) {
for (size_t i = 0; i < outlinks.size(); i++) { for (size_t i = 0; i < outlinks.size(); i++) {
auto* output_var = step_scopes[0]->parent().FindVar(outlinks[i]); auto* output_var = step_scopes[0]->parent().FindVar(outlinks[i]);
PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.",
...@@ -72,7 +72,7 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes, ...@@ -72,7 +72,7 @@ void ConcatOutputs(const std::vector<Scope*>& step_scopes,
// TODO(luotao02) data type and platform::DeviceContext() should set // TODO(luotao02) data type and platform::DeviceContext() should set
// correctly // correctly
(output->Slice<float>(j, j + 1)) (output->Slice<float>(j, j + 1))
.CopyFrom<float>(*step_output, platform::CPUPlace()); .CopyFrom<float>(*step_output, platform::CPUPlace(), ctx);
} }
} }
} }
......
...@@ -71,7 +71,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -71,7 +71,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
*/ */
void ConcatOutputs(const std::vector<Scope*>& step_scopes, void ConcatOutputs(const std::vector<Scope*>& step_scopes,
const std::vector<std::string>& outlinks, const std::vector<std::string>& outlinks,
const size_t seq_len); const size_t seq_len, const platform::DeviceContext& ctx);
void LinkMemories(const std::vector<Scope*>& step_scopes, void LinkMemories(const std::vector<Scope*>& step_scopes,
const std::vector<MemoryAttr>& memories, const size_t step_id, const std::vector<MemoryAttr>& memories, const size_t step_id,
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sequence_concat_op.h"
namespace paddle {
namespace operators {
class SequenceConcatOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInputs("X"),
"Inputs(X) of SequenceConcatOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SequenceConcatOp should not be null.");
const size_t level = static_cast<size_t>(ctx->Attrs().Get<int>("level"));
const size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
PADDLE_ENFORCE(level == 0UL || level == 1UL,
"The sequence_concat operator only accepts sequence "
"or a nested sequence as its input.");
auto ins_dims = ctx->GetInputsDim("X");
framework::DDim out_dims = ins_dims[0];
const size_t n = ins_dims.size();
for (size_t i = 1; i < n; ++i) {
out_dims[axis] += ins_dims[i][axis];
}
ctx->SetOutputDim("Out", out_dims);
}
};
class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SequenceConcatOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(A vector of LoDTensor), the input is a vector of LoDTensor, "
"each of which is a variable-length sequence or nested sequence.")
.AsDuplicable();
AddOutput("Out",
"(A LoDTensor), the variable-length output of "
"sequence_concat Op.");
AddAttr<int>("axis",
"(int, default 0)"
"The axis which the inputs will be joined with. "
"If axis is 0, the inputs will be joined with LoD index.")
.SetDefault(0);
AddAttr<int>("level",
"(int, default 0)"
"The level at which the inputs will be joined. "
"If the level is 0, the inputs will be joined at the nested "
"sequence level. "
"If the level is 1, the inputs will be joined at the "
"sequence level. "
"The level should be less than the level number of inputs.")
.SetDefault(0);
AddComment(R"DOC(
The sequence_concat operator concatenates multiple LoDTensors.
It only supports sequence (LoD Tensor with level number is 1)
or a nested sequence (LoD tensor with level number is 2) as its input.
- Case1:
If the axis is other than 0(here, axis is 1 and level is 1),
each input should have the same LoD information and the LoD
information of the output keeps the same as the input.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,2,4}, {0,1,2,3,4}}; Dims(x1) = (4,4,4)
LoD(Out) = {{0,2,4}, {0,1,2,3,4}}; Dims(Out) = (4,7,4)
- Case2:
If the axis is 0(here, leve is 0), the inputs are concatenated along
time steps, the LoD information of the output need to re-compute.
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,2,3,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,1,2,3,4,5,6,7,9}}; Dims(Out) = (9,3,4)
- Case3:
If the axis is 0(here, level is 1).
LoD(x0) = {{0,2,4}, {0,1,2,3,4}}; Dims(x0) = (4,3,4)
LoD(x1) = {{0,3,5}, {0,1,3,4,5}}; Dims(x1) = (5,3,4)
LoD(Out) = {{0,5,9}, {0,2,5,7,9}}; Dims(Out) = (9,3,4)
NOTE: The levels of all the inputs should be the same.
)DOC");
}
};
class SequenceConcatGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"The gradient of Out should not be null.");
PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")),
"The gradient of X should not be null.");
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker,
sequence_concat_grad, ops::SequenceConcatGradOp);
REGISTER_OP_CPU_KERNEL(
sequence_concat,
ops::SequenceConcatOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
sequence_concat_grad,
ops::SequenceConcatGradOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/sequence_concat_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
sequence_concat,
ops::SequenceConcatOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sequence_concat_grad,
ops::SequenceConcatGradOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/op_registry.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename T>
LoD concatLoD(const std::vector<const T*> ins, const size_t axis,
const size_t level) {
auto out_lod = ins[0]->lod();
const size_t n = ins.size();
if (axis == 0UL) {
for (size_t i = 1; i < n; ++i) {
for (size_t j = 0; j < ins[i]->lod()[0].size(); ++j) {
out_lod[0][j] += ins[i]->lod()[0][j];
}
if (ins[0]->NumLevels() == 2) {
for (size_t j = 1; j < ins[i]->lod()[1].size(); ++j) {
if (level == 0UL) {
out_lod[1].push_back(out_lod[1].back() + ins[i]->lod()[1][j] -
ins[i]->lod()[1][j - 1]);
} else if (level == 1UL) {
out_lod[1][j] += ins[1]->lod()[1][j];
}
}
}
}
}
return out_lod;
}
template <typename Place, typename T>
class SequenceConcatOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<LoDTensor>("X");
auto* out = ctx.Output<LoDTensor>("Out");
const size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
const size_t level = static_cast<size_t>(ctx.Attr<int>("level"));
const size_t n = ins.size();
for (size_t i = 1; i < n; ++i) {
PADDLE_ENFORCE_EQ(ins[0]->NumLevels(), ins[i]->NumLevels(),
"The levels of all the input LoDTensors "
"should be the same.");
PADDLE_ENFORCE_EQ(ins[0]->dims().size(), ins[i]->dims().size(),
"The dimension size of all the input LoDTensors "
"should be the same.");
const size_t dims_size = ins[i]->dims().size();
for (size_t j = 0; j < dims_size; ++j) {
if (j == axis) continue;
PADDLE_ENFORCE_EQ(ins[0]->dims()[j], ins[i]->dims()[j],
"Except for the dimension of the specified "
"axis along which all the inputs are concatenated, "
"dimensions of all the other axises of the input "
"LoDTensors should be the same.");
}
}
PADDLE_ENFORCE_GT(ins[0]->NumLevels(), level,
"The levels of all the input LoDTensors "
"should be greater than the specify level");
out->mutable_data<T>(ctx.GetPlace());
auto out_lod = concatLoD<LoDTensor>(ins, axis, level);
out->set_lod(out_lod);
auto out_lod_level = out_lod[level];
for (size_t i = 0; i < out_lod_level.size() - 1; ++i) {
Tensor out_t = out->Slice<T>(static_cast<int>(out_lod_level[i]),
static_cast<int>(out_lod_level[i + 1]));
auto out_stride = framework::stride(out_t.dims());
size_t offset = 0;
for (size_t j = 0; j < n; ++j) {
auto in_lod_level = ins[j]->lod()[level];
auto in_stride = framework::stride(ins[j]->dims());
Tensor in_t = ins[j]->Slice<T>(static_cast<int>(in_lod_level[i]),
static_cast<int>(in_lod_level[i + 1]));
size_t axis_dim = in_t.dims()[axis];
StridedMemcpy<T>(ctx.device_context(), in_t.data<T>(), in_stride,
in_t.dims(), out_stride, out_t.data<T>() + offset);
offset += axis_dim * in_stride[axis];
}
}
}
};
template <typename Place, typename T>
class SequenceConcatGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto ins = ctx.MultiInput<framework::LoDTensor>("X");
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto x_grads =
ctx.MultiOutput<framework::LoDTensor>(framework::GradVarName("X"));
size_t axis = static_cast<size_t>(ctx.Attr<int>("axis"));
size_t level = static_cast<size_t>(ctx.Attr<int>("level"));
const size_t n = x_grads.size();
// Set Grad(X) LoD as X
for (size_t i = 0; i < n; i++) {
x_grads[i]->set_lod(ins[i]->lod());
x_grads[i]->mutable_data<T>(ctx.GetPlace());
}
auto out_lod = concatLoD<LoDTensor>(ins, axis, level);
auto out_lod_level = out_lod[level];
for (size_t i = 0; i < out_lod_level.size() - 1; ++i) {
Tensor out_grad_t =
out_grad->Slice<T>(static_cast<int>(out_lod_level[i]),
static_cast<int>(out_lod_level[i + 1]));
auto out_grad_stride = framework::stride(out_grad_t.dims());
size_t offset = 0;
for (size_t j = 0; j < n; ++j) {
auto x_grad_lod_level = x_grads[j]->lod()[level];
auto x_grad_stride = framework::stride(x_grads[j]->dims());
Tensor x_grad_t =
x_grads[j]->Slice<T>(static_cast<int>(x_grad_lod_level[i]),
static_cast<int>(x_grad_lod_level[i + 1]));
size_t axis_dim = x_grad_t.dims()[axis];
StridedMemcpy<T>(ctx.device_context(), out_grad_t.data<T>() + offset,
out_grad_stride, out_grad_t.dims(), x_grad_stride,
x_grad_t.data<T>());
offset += axis_dim * out_grad_stride[axis];
}
}
}
};
} // namespace operators
} // namespace paddle
...@@ -36,11 +36,10 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,11 +36,10 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
SequencePoolOpMaker(framework::OpProto* proto, SequencePoolOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", AddInput("X", "(LoDTensor), the variable-length input of SequencePoolOp");
"A float LoDTensor, the variable-length input of SequencePoolOp"); AddOutput("Out",
AddOutput( "(Tensor), output of SequencePoolOp, which does not contain LoD "
"Out", "infomation.");
"A float LoDTensor, the variable-length output of SequencePoolOp.");
AddAttr<int>( AddAttr<int>(
"strategy", "strategy",
"(int, default AVERAGE) the pooling strategy of SequencePoolOp.") "(int, default AVERAGE) the pooling strategy of SequencePoolOp.")
...@@ -49,13 +48,13 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -49,13 +48,13 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
SequencePoolOp pools features of all time-steps of each instance. SequencePoolOp pools features of all time-steps of each instance.
For a mini-batch of 3 variable lengths sentences, containing 2, 3, and 2 time-steps: For a mini-batch of 3 variable-length sentences, containing 2, 3, and 2 time-steps:
Assume X is a [7,M,N] float LoDTensor, and X->lod()[0] = [0, 2, 5, 7]. Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
Besides, for the sake of simplicity, we assume M=1 and N=1, Besides, for the sake of simplicity, we assume M=1 and N=1,
and the value of X = [[1, 3], [2, 4, 6], [5, 1]]. and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
Thus, Out is a [3,1,1] float LoDTensor, but Out->lod() is nullptr. Thus, Out is a [3,1,1] Tensor without LoD infomation.
And for different strategy, the value of Out is as follows: And for different strategy, the value of Out is as follows:
- AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/eigen.h" #include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -77,6 +78,16 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -77,6 +78,16 @@ class SequencePoolKernel : public framework::OpKernel<T> {
case SUM: case SUM:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})); out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}}));
break; break;
case SQRT:
out_e.device(place) = in_e.sum(Eigen::array<int, 1>({{0}})) /
std::sqrt(static_cast<T>(h));
break;
case LAST:
out_e.device(place) = in_e.chip(h - 1, 0);
break;
case FIRST:
out_e.device(place) = in_e.chip(0, 0);
break;
default: default:
PADDLE_THROW("unsupported pooling strategy"); PADDLE_THROW("unsupported pooling strategy");
} }
...@@ -98,6 +109,10 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -98,6 +109,10 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
int64_t w = in->numel() / dims[0]; int64_t w = in->numel() / dims[0];
in_g->mutable_data<T>(context.GetPlace()); in_g->mutable_data<T>(context.GetPlace());
if (strategy == LAST || strategy == FIRST) {
// set X@Grad be zero at first when strategy is LAST/FIRST
math::SetConstant<Place, T>(context.device_context(), in_g, 0);
}
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
auto in_g_t = in_g->Slice<T>(static_cast<int>(lod[i]), auto in_g_t = in_g->Slice<T>(static_cast<int>(lod[i]),
...@@ -115,6 +130,16 @@ class SequencePoolGradKernel : public framework::OpKernel<T> { ...@@ -115,6 +130,16 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
case SUM: case SUM:
in_g_e.device(place) = (out_g_e).broadcast(bcast); in_g_e.device(place) = (out_g_e).broadcast(bcast);
break; break;
case SQRT:
in_g_e.device(place) =
(out_g_e / std::sqrt(static_cast<T>(h))).broadcast(bcast);
break;
case LAST:
in_g_e.chip(h - 1, 0).device(place) = out_g_e;
break;
case FIRST:
in_g_e.chip(0, 0).device(place) = out_g_e;
break;
default: default:
PADDLE_THROW("unsupported pooling strategy"); PADDLE_THROW("unsupported pooling strategy");
} }
......
...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel {
auto in_dim = x_dims[0]; auto in_dim = x_dims[0];
for (size_t i = 1; i < N; i++) { for (size_t i = 1; i < N; i++) {
auto dim = x_dims[i]; auto dim = x_dims[i];
PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape");
} }
ctx->SetOutputDim("Out", in_dim); ctx->SetOutputDim("Out", in_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
......
...@@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { ...@@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"), ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"),
"uniform_random's min must less then max"); "uniform_random's min must less then max");
auto dims = Attr<std::vector<int>>("dims"); auto& dims = ctx->Attrs().Get<std::vector<int>>("dims");
std::vector<int64_t> temp; std::vector<int64_t> temp;
temp.reserve(dims.size()); temp.reserve(dims.size());
for (auto dim : dims) { for (auto dim : dims) {
......
...@@ -71,23 +71,32 @@ class ScopedTensorDescriptor { ...@@ -71,23 +71,32 @@ class ScopedTensorDescriptor {
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type, const cudnnDataType_t type,
const std::vector<int>& dims) { const std::vector<int>& dims,
// the format is not used now, but it maybe useful feature const int groups = 1) {
// the format is not used now, will add later
std::vector<int> strides(dims.size()); std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1; strides[dims.size() - 1] = 1;
for (int i = dims.size() - 2; i >= 0; i--) { for (int i = dims.size() - 2; i >= 0; i--) {
strides[i] = dims[i + 1] * strides[i + 1]; strides[i] = dims[i + 1] * strides[i + 1];
} }
// Update tensor descriptor dims setting if groups > 1
// FIXME(typhoonzero): Assume using NCHW order
std::vector<int> dims_with_group(dims.begin(), dims.end()); // copy
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_, type, dims.size(), dims.data(), strides.data())); desc_, type, dims_with_group.size(), dims_with_group.data(),
strides.data()));
return desc_; return desc_;
} }
template <typename T> template <typename T>
inline cudnnTensorDescriptor_t descriptor(const DataLayout& order, inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& dims) { const std::vector<int>& dims,
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, const int groups = 1) {
dims); return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims,
groups);
} }
private: private:
...@@ -106,18 +115,29 @@ class ScopedFilterDescriptor { ...@@ -106,18 +115,29 @@ class ScopedFilterDescriptor {
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type, const cudnnDataType_t type,
const std::vector<int>& kernel) { const std::vector<int>& kernel,
// filter layout: output input spatial_dim_y spatial_dim_x const int groups = 1) {
// filter layout: MCHW, where M is the number of
// output image channels, C is the number of input image channels,
// H and W is height and width of filter.
std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
if (groups > 1) {
// M /= groups
kernel_with_group[0] /= groups;
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
desc_, type, format, kernel.size(), kernel.data())); desc_, type, format, kernel_with_group.size(),
kernel_with_group.data()));
return desc_; return desc_;
} }
template <typename T> template <typename T>
inline cudnnFilterDescriptor_t descriptor(const DataLayout& order, inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& kernel) { const std::vector<int>& kernel,
const int groups = 1) {
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
kernel); kernel, groups);
} }
private: private:
......
...@@ -43,6 +43,8 @@ int GetCurrentDeviceId() { ...@@ -43,6 +43,8 @@ int GetCurrentDeviceId() {
} }
void SetDeviceId(int id) { void SetDeviceId(int id) {
// TODO(qijun): find a better way to cache the cuda device count
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
PADDLE_ENFORCE(cudaSetDevice(id), PADDLE_ENFORCE(cudaSetDevice(id),
"cudaSetDevice failed in paddle::platform::SetDeviceId"); "cudaSetDevice failed in paddle::platform::SetDeviceId");
} }
......
...@@ -49,6 +49,11 @@ DEFINE_int32(sock_recv_buf_size, ...@@ -49,6 +49,11 @@ DEFINE_int32(sock_recv_buf_size,
1024 * 1024 * 40, 1024 * 1024 * 40,
"restrict sock recv buff size"); "restrict sock recv buff size");
/// reasonable sock_listen_queue_size can control maximum pending connections.
DEFINE_int32(sock_listen_queue_size,
1024,
"listen queue size when pserver listen a TCP port");
namespace paddle { namespace paddle {
/** /**
...@@ -129,7 +134,7 @@ SocketServer::SocketServer(const std::string &addr, int port, int rdmaCpu) ...@@ -129,7 +134,7 @@ SocketServer::SocketServer(const std::string &addr, int port, int rdmaCpu)
if (rdmaCpu == -1) { if (rdmaCpu == -1) {
tcpRdma_ = F_TCP; tcpRdma_ = F_TCP;
socket_ = 0; socket_ = 0;
maxPendingConnections_ = 100; maxPendingConnections_ = FLAGS_sock_listen_queue_size;
} else { } else {
tcpRdma_ = F_RDMA; tcpRdma_ = F_RDMA;
rdmaCpu_ = rdmaCpu; rdmaCpu_ = rdmaCpu;
......
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array DEPS pybind python backward proto_desc tensor_array paddle_memory executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
endif(WITH_PYTHON) endif(WITH_PYTHON)
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/pybind/protobuf.h" #include "paddle/pybind/protobuf.h"
#include <deque> #include <deque>
#include <iostream> #include <iostream>
#include "paddle/framework/backward.h"
#include "paddle/framework/block_desc.h" #include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
...@@ -116,8 +117,36 @@ void BindProgramDesc(py::module &m) { ...@@ -116,8 +117,36 @@ void BindProgramDesc(py::module &m) {
py::return_value_policy::reference) py::return_value_policy::reference)
.def("append_block", &ProgramDescBind::AppendBlock, .def("append_block", &ProgramDescBind::AppendBlock,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("append_backward",
[](ProgramDescBind &program_desc, const VarDescBind &target,
const std::unordered_set<std::string> &no_grad_vars) {
ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
})
.def("block", &ProgramDescBind::Block, py::return_value_policy::reference) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size); .def("num_blocks", &ProgramDescBind::Size)
.def("serialize_to_string",
[](ProgramDescBind &program_desc) -> py::bytes {
const ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"ProgramDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize ProgramDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindBlockDesc(py::module &m) { void BindBlockDesc(py::module &m) {
...@@ -128,22 +157,32 @@ void BindBlockDesc(py::module &m) { ...@@ -128,22 +157,32 @@ void BindBlockDesc(py::module &m) {
py::return_value_policy::reference) py::return_value_policy::reference)
.def("prepend_op", &BlockDescBind::PrependOp, .def("prepend_op", &BlockDescBind::PrependOp,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("new_var", .def("var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.NewVar(name); return self.Var(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("var", .def("find_var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.Var(name); return self.FindVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_vars", &BlockDescBind::AllVars, .def("all_vars", &BlockDescBind::AllVars,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_ops", &BlockDescBind::AllOps, .def("all_ops", &BlockDescBind::AllOps,
py::return_value_policy::reference); py::return_value_policy::reference)
.def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes {
const BlockDesc *desc = block_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"BlockDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize BlockDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindVarDsec(py::module &m) { void BindVarDsec(py::module &m) {
...@@ -156,7 +195,8 @@ void BindVarDsec(py::module &m) { ...@@ -156,7 +195,8 @@ void BindVarDsec(py::module &m) {
.value("FP32", DataType::FP32) .value("FP32", DataType::FP32)
.value("FP64", DataType::FP64); .value("FP64", DataType::FP64);
py::class_<VarDescBind>(m, "VarDesc", "") py::class_<VarDescBind> var_desc(m, "VarDesc", "");
var_desc
.def("name", .def("name",
[](const VarDescBind &self) { [](const VarDescBind &self) {
py::bytes name = self.Name(); py::bytes name = self.Name();
...@@ -166,7 +206,25 @@ void BindVarDsec(py::module &m) { ...@@ -166,7 +206,25 @@ void BindVarDsec(py::module &m) {
.def("set_shape", &VarDescBind::SetShape) .def("set_shape", &VarDescBind::SetShape)
.def("set_data_type", &VarDescBind::SetDataType) .def("set_data_type", &VarDescBind::SetDataType)
.def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference)
.def("data_type", &VarDescBind::GetDataType); .def("data_type", &VarDescBind::GetDataType)
.def("lod_level", &VarDescBind::GetLodLevel)
.def("set_lod_level", &VarDescBind::SetLoDLevel)
.def("type", &VarDescBind::GetType)
.def("set_type", &VarDescBind::SetType)
.def("serialize_to_string", [](VarDescBind &var_desc) -> py::bytes {
const VarDesc *desc = var_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"VarDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize VarDesc Error. This could be a bug of Paddle.");
return res;
});
py::enum_<VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", VarDesc::LOD_TENSOR)
.value("SELECTED_ROWS", VarDesc::SELECTED_ROWS);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
...@@ -196,8 +254,19 @@ void BindOpDesc(py::module &m) { ...@@ -196,8 +254,19 @@ void BindOpDesc(py::module &m) {
.def("set_attr", &OpDescBind::SetAttr) .def("set_attr", &OpDescBind::SetAttr)
.def("attr", &OpDescBind::GetAttr) .def("attr", &OpDescBind::GetAttr)
.def("set_block_attr", &OpDescBind::SetBlockAttr) .def("set_block_attr", &OpDescBind::SetBlockAttr)
.def("get_block_attr", &OpDescBind::GetBlockAttr) .def("block_attr", &OpDescBind::GetBlockAttr)
.def("infer_shape", &OpDescBind::InferShape); .def("check_attrs", &OpDescBind::CheckAttrs)
.def("infer_shape", &OpDescBind::InferShape)
.def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes {
const OpDesc *desc = op_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"OpDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize OpDesc Error. This could be a bug of Paddle.");
return res;
});
} }
} // namespace pybind } // namespace pybind
......
...@@ -15,9 +15,11 @@ limitations under the License. */ ...@@ -15,9 +15,11 @@ limitations under the License. */
#include "paddle/pybind/protobuf.h" #include "paddle/pybind/protobuf.h"
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor_array.h" #include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h" #include "paddle/operators/cond_op.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
...@@ -163,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -163,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference); py::return_value_policy::reference);
py::class_<Scope>(m, "Scope", "") py::class_<Scope>(m, "Scope", "")
.def("new_var", .def("var",
[](Scope &self, const std::string &name) -> Variable * { [](Scope &self, const std::string &name) -> Variable * {
return self.NewVar(name); return self.Var(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
...@@ -341,6 +343,33 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -341,6 +343,33 @@ All parameter, weight, gradient are variables in Paddle.
self.set_stepnet(net.Clone()); self.set_stepnet(net.Clone());
}); });
py::class_<operators::DynamicRecurrentOp, OperatorBase>(m,
"DynamicRecurrentOp")
.def_static("create",
[](py::bytes protobin) -> operators::DynamicRecurrentOp * {
OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto rnn_op = OpRegistry::CreateOp(desc);
return static_cast<operators::DynamicRecurrentOp *>(
rnn_op.release());
})
.def("set_stepnet",
[](operators::DynamicRecurrentOp &self, const operators::NetOp &net)
-> void { self.SetStepNet(net.Clone()); })
.def("get_state",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.state(name); })
.def("get_step_input",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_input(name); })
.def("get_step_output",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_output(name); });
// cond_op // cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp") py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create", .def_static("create",
...@@ -363,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -363,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle.
self.set_falsenet(net.Clone()); self.set_falsenet(net.Clone());
}); });
py::class_<framework::Executor>(m, "Executor")
.def(py::init<std::vector<platform::Place> &>())
.def("run",
[](Executor &self, const ProgramDesc &program_desc, int block_id) {
framework::Scope &global_scope = GetGlobalScope();
self.Run(program_desc, &global_scope, block_id);
});
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
m.def("is_compile_gpu", IsCompileGPU); m.def("is_compile_gpu", IsCompileGPU);
......
...@@ -57,7 +57,18 @@ struct CastToPyBufferImpl<true, I, ARGS...> { ...@@ -57,7 +57,18 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
} }
framework::Tensor dst_tensor; framework::Tensor dst_tensor;
if (paddle::platform::is_gpu_place(tensor.place())) { if (paddle::platform::is_gpu_place(tensor.place())) {
dst_tensor.CopyFrom<CUR_TYPE>(tensor, platform::CPUPlace()); #ifdef PADDLE_WITH_CUDA
auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
tensor.dims(), platform::CPUPlace()));
// TODO(qijun): Here we use default CUDA stream to set GPU Tensor to
// a Python numpy array. It's better to manage CDUA stream unifiedly.
paddle::platform::GpuMemcpySync(dst_ptr, src_ptr,
sizeof(CUR_TYPE) * tensor.numel(),
cudaMemcpyDeviceToHost);
#else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif
} else if (paddle::platform::is_cpu_place(tensor.place())) { } else if (paddle::platform::is_cpu_place(tensor.place())) {
dst_tensor = tensor; dst_tensor = tensor;
} }
...@@ -120,6 +131,8 @@ void PyCUDATensorSetFromArray( ...@@ -120,6 +131,8 @@ void PyCUDATensorSetFromArray(
self.Resize(framework::make_ddim(dims)); self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(place); auto *dst = self.mutable_data<T>(place);
// TODO(qijun): Here we use default CUDA stream to set a Python numpy
// array to a GPU Tensor. It's better to manage CDUA stream unifiedly.
paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice); cudaMemcpyHostToDevice);
} }
......
...@@ -11,7 +11,13 @@ set -e ...@@ -11,7 +11,13 @@ set -e
# install glide # install glide
curl https://glide.sh/get | bash curl https://glide.sh/get | bash
eval "$(GIMME_GO_VERSION=1.8.3 gimme)" eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
go get -u github.com/alecthomas/gometalinter
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
go get github.com/alecthomas/gometalinter
gometalinter --install gometalinter --install
cd $TRAVIS_BUILD_DIR cd $TRAVIS_BUILD_DIR
...@@ -19,10 +25,7 @@ export PATH=/usr/bin:$PATH ...@@ -19,10 +25,7 @@ export PATH=/usr/bin:$PATH
pre-commit install pre-commit install
clang-format --version clang-format --version
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
if ! pre-commit run -a ; then if ! pre-commit run -a ; then
git diff git diff
......
...@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets ...@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets
################ test_CompareMKLDNNandCPU ###################### ################ test_CompareMKLDNNandCPU ######################
if(WITH_MKLDNN) if(WITH_MKLDNN)
add_unittest_without_exec(test_CompareMKLDNNandCPU macro(gen_command VAR_NAME CONFIG_FILE)
test_CompareTwoNets.cpp) set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/"
add_test(NAME test_CompareMKLDNNandCPU "${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False"
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ "--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True"
${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU "--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False"
--config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True "WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/")
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False endmacro()
--use_gpu=False add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp)
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) gen_command(compare_simple_net "sample_trainer_config_simple_net.conf")
gen_command(compare_branch_net "sample_trainer_config_branch_net.conf")
add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net})
add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net})
endif() endif()
############### test_CompareTwoOpts ################### ############### test_CompareTwoOpts ###################
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 256,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
tmp = img_conv_layer(input=data,
num_channels=1,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
a1 = img_conv_layer(input=tmp,
filter_size=1,
num_filters=32,
padding=0,
shared_biases=True,
act=ReluActivation())
a2 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
tmp = concat_layer(input=[a1, a2])
tmp = img_pool_layer(input=tmp,
num_channels=64,
pool_size=3,
stride=2,
padding=1,
pool_type=AvgPooling())
b1 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=64,
padding=1,
shared_biases=True,
act=ReluActivation())
b1 = img_pool_layer(input=b1,
pool_size=3,
stride=1,
padding=1,
pool_type=MaxPooling())
b2 = img_conv_layer(input=tmp,
filter_size=5,
num_filters=64,
padding=2,
shared_biases=True,
act=ReluActivation())
b2 = img_pool_layer(input=b2,
pool_size=5,
stride=1,
padding=2,
pool_type=MaxPooling())
tmp = addto_layer(input=[b1, b2],
act=ReluActivation(),
bias_attr=False)
tmp = img_pool_layer(input=tmp,
pool_size=3,
stride=2,
padding=1,
pool_type=MaxPooling())
tmp = fc_layer(input=tmp, size=64,
bias_attr=False,
act=TanhActivation())
output = fc_layer(input=tmp, size=10,
bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=10)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
file(GLOB proto_filenames . *.proto) if (MOBILE_INFERENCE)
file(GLOB proto_filenames . ModelConfig.proto ParameterConfig.proto
TrainerConfig.proto DataConfig.proto)
else()
file(GLOB proto_filenames . *.proto)
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
proto_library(paddle_proto SRCS ${proto_filenames}) proto_library(paddle_proto SRCS ${proto_filenames})
......
...@@ -559,6 +559,9 @@ class IdentityOffsetProjection(Projection): ...@@ -559,6 +559,9 @@ class IdentityOffsetProjection(Projection):
**xargs) **xargs)
self.proj_conf.offset = offset self.proj_conf.offset = offset
def calc_output_size(self, input_layer_config):
return 0 # depends on the outside MixedLayer
def calc_parameter_size(self, input_size, output_size): def calc_parameter_size(self, input_size, output_size):
return 0 return 0
......
...@@ -318,7 +318,7 @@ class LayerOutput(object): ...@@ -318,7 +318,7 @@ class LayerOutput(object):
:param activation: Layer Activation. :param activation: Layer Activation.
:type activation: BaseActivation. :type activation: BaseActivation.
:param parents: Layer's parents. :param parents: Layer's parents.
:type parents: list|tuple|collections.Sequence :type parents: list | tuple | collections.Sequence
""" """
def __init__(self, def __init__(self,
...@@ -435,7 +435,7 @@ def full_matrix_projection(input, size=0, param_attr=None): ...@@ -435,7 +435,7 @@ def full_matrix_projection(input, size=0, param_attr=None):
size=100, size=100,
param_attr=ParamAttr(name='_proj')) param_attr=ParamAttr(name='_proj'))
:param input: input layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param size: The parameter size. Means the width of parameter. :param size: The parameter size. Means the width of parameter.
:type size: int :type size: int
...@@ -471,7 +471,7 @@ def trans_full_matrix_projection(input, size=0, param_attr=None): ...@@ -471,7 +471,7 @@ def trans_full_matrix_projection(input, size=0, param_attr=None):
initial_mean=0.0, initial_mean=0.0,
initial_std=0.01)) initial_std=0.01))
:param input: input layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param size: The parameter size. Means the width of parameter. :param size: The parameter size. Means the width of parameter.
:type size: int :type size: int
...@@ -516,7 +516,7 @@ def table_projection(input, size=0, param_attr=None): ...@@ -516,7 +516,7 @@ def table_projection(input, size=0, param_attr=None):
param_attr=ParamAttr(name='_proj')) param_attr=ParamAttr(name='_proj'))
:param input: Input layer, which must contains id fields. :param input: The input of this layer, which must contains id fields.
:type input: LayerOutput :type input: LayerOutput
:param size: The parameter size. Means the width of parameter. :param size: The parameter size. Means the width of parameter.
:type size: int :type size: int
...@@ -561,7 +561,7 @@ def identity_projection(input, offset=None, size=None): ...@@ -561,7 +561,7 @@ def identity_projection(input, offset=None, size=None):
Note that both of two projections should not have any parameter. Note that both of two projections should not have any parameter.
:param input: Input Layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param offset: Offset, None if use default. :param offset: Offset, None if use default.
:type offset: int :type offset: int
...@@ -596,7 +596,7 @@ def slice_projection(input, slices): ...@@ -596,7 +596,7 @@ def slice_projection(input, slices):
Note that slice_projection should not have any parameter. Note that slice_projection should not have any parameter.
:param input: Input Layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param slices: An array of slice parameters. :param slices: An array of slice parameters.
Each slice contains the start and end offsets based Each slice contains the start and end offsets based
...@@ -634,7 +634,7 @@ def scaling_projection(input, param_attr=None): ...@@ -634,7 +634,7 @@ def scaling_projection(input, param_attr=None):
proj = scaling_projection(input=layer) proj = scaling_projection(input=layer)
:param input: Input Layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param param_attr: Parameter config, None if use default. :param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -663,7 +663,7 @@ def dotmul_projection(input, param_attr=None): ...@@ -663,7 +663,7 @@ def dotmul_projection(input, param_attr=None):
proj = dotmul_projection(input=layer) proj = dotmul_projection(input=layer)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param param_attr: Parameter config, None if use default. :param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -734,7 +734,7 @@ def context_projection(input, ...@@ -734,7 +734,7 @@ def context_projection(input,
after context projection and not set padding_attr, sequence will after context projection and not set padding_attr, sequence will
be [ 0AB ABC BCD CDE DEF EFG FG0 ]. be [ 0AB ABC BCD CDE DEF EFG FG0 ].
:param input: Input Sequence. :param input: The input of this layer, which should be a sequence.
:type input: LayerOutput :type input: LayerOutput
:param context_len: context length. :param context_len: context length.
:type context_len: int :type context_len: int
...@@ -744,7 +744,7 @@ def context_projection(input, ...@@ -744,7 +744,7 @@ def context_projection(input,
:param padding_attr: Padding Parameter Attribute. If false, it means padding :param padding_attr: Padding Parameter Attribute. If false, it means padding
always be zero. Otherwise Padding is learnable, and always be zero. Otherwise Padding is learnable, and
parameter attribute is set by this parameter. parameter attribute is set by this parameter.
:type padding_attr: bool|ParameterAttribute :type padding_attr: bool | ParameterAttribute
:return: Projection :return: Projection
:rtype: Projection :rtype: Projection
""" """
...@@ -782,13 +782,13 @@ class MixedLayerType(LayerOutput): ...@@ -782,13 +782,13 @@ class MixedLayerType(LayerOutput):
:type name: basestring :type name: basestring
:param size: layer size. :param size: layer size.
:type size: int :type size: int
:param act: activation type. :param act: Activation type.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute or None :type layer_attr: ExtraLayerAttribute or None
""" """
...@@ -880,15 +880,15 @@ def mixed_layer(size=0, ...@@ -880,15 +880,15 @@ def mixed_layer(size=0,
:type name: basestring :type name: basestring
:param size: layer size. :param size: layer size.
:type size: int :type size: int
:param input: inputs layer. It is an optional parameter. If set, :param input: The input of this layer. It is an optional parameter. If set,
then this function will just return layer's name. then this function will just return layer's name.
:param act: Activation Type. :param act: Activation Type. LinearActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: The extra layer config. Default is None. :param layer_attr: The extra layer config. Default is None.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: MixedLayerType object can add inputs or layer name. :return: MixedLayerType object can add inputs or layer name.
...@@ -929,9 +929,9 @@ def data_layer(name, size, depth=None, height=None, width=None, ...@@ -929,9 +929,9 @@ def data_layer(name, size, depth=None, height=None, width=None,
:param size: Size of this data layer. :param size: Size of this data layer.
:type size: int :type size: int
:param height: Height of this data layer, used for image :param height: Height of this data layer, used for image
:type height: int|None :type height: int | None
:param width: Width of this data layer, used for image :param width: Width of this data layer, used for image
:type width: int|None :type width: int | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
...@@ -966,15 +966,15 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): ...@@ -966,15 +966,15 @@ def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer for this embedding. NOTE: must be Index Data. :param input: The input of this layer, which must be Index Data.
:type input: LayerOutput :type input: LayerOutput
:param size: The embedding dimension. :param size: The embedding dimension.
:type size: int :type size: int
:param param_attr: The embedding parameter attribute. See ParameterAttribute :param param_attr: The embedding parameter attribute. See ParameterAttribute
for details. for details.
:type param_attr: ParameterAttribute|None :type param_attr: ParameterAttribute | None
:param layer_attr: Extra layer Config. Default is None. :param layer_attr: Extra layer Config. Default is None.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1021,11 +1021,11 @@ def fc_layer(input, ...@@ -1021,11 +1021,11 @@ def fc_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. Could be a list/tuple of input layer. :param input: The input of this layer.
:type input: LayerOutput|list|tuple :type input: LayerOutput | list | tuple
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
:param act: Activation Type. Default is tanh. :param act: Activation Type. TanhActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -1033,9 +1033,9 @@ def fc_layer(input, ...@@ -1033,9 +1033,9 @@ def fc_layer(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1072,8 +1072,8 @@ def printer_layer(input, format=None, name=None): ...@@ -1072,8 +1072,8 @@ def printer_layer(input, format=None, name=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. Could be a list/tuple of input layer. :param input: The input of this layer.
:type input: LayerOutput|list|tuple :type input: LayerOutput | list | tuple
:return: LayerOutput :return: LayerOutput
""" """
if isinstance(input, LayerOutput): if isinstance(input, LayerOutput):
...@@ -1110,7 +1110,7 @@ def priorbox_layer(input, ...@@ -1110,7 +1110,7 @@ def priorbox_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param image: The network input image. :param image: The network input image.
:type image: LayerOutput :type image: LayerOutput
...@@ -1306,7 +1306,7 @@ def cross_channel_norm_layer(input, name=None, param_attr=None): ...@@ -1306,7 +1306,7 @@ def cross_channel_norm_layer(input, name=None, param_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -1371,20 +1371,20 @@ def pooling_layer(input, ...@@ -1371,20 +1371,20 @@ def pooling_layer(input,
:type agg_level: AggregateLevel :type agg_level: AggregateLevel
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pooling_type: Type of pooling, MaxPooling(default), AvgPooling, :param pooling_type: Type of pooling, MaxPooling(default), AvgPooling,
SumPooling, SquareRootNPooling. SumPooling, SquareRootNPooling.
:type pooling_type: BasePoolingType|None :type pooling_type: BasePoolingType | None
:param stride: The step size between successive pooling regions. :param stride: The step size between successive pooling regions.
:type stride: Int :type stride: Int
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: The Extra Attributes for layer, such as dropout. :param layer_attr: The Extra Attributes for layer, such as dropout.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1469,11 +1469,11 @@ def lstmemory(input, ...@@ -1469,11 +1469,11 @@ def lstmemory(input,
:type name: basestring :type name: basestring
:param size: DEPRECATED. size of the lstm cell :param size: DEPRECATED. size of the lstm cell
:type size: int :type size: int
:param input: input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param reverse: is sequence process reversed or not. :param reverse: is sequence process reversed or not.
:type reverse: bool :type reverse: bool
:param act: activation type, TanhActivation by default. :math:`h_t` :param act: Activation type. TanhActivation is the default. :math:`h_t`
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default. :param gate_act: gate activation type, SigmoidActivation by default.
:type gate_act: BaseActivation :type gate_act: BaseActivation
...@@ -1483,11 +1483,11 @@ def lstmemory(input, ...@@ -1483,11 +1483,11 @@ def lstmemory(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: Parameter Attribute. :param param_attr: Parameter Attribute.
:type param_attr: ParameterAttribute|None|False :type param_attr: ParameterAttribute | None | False
:param layer_attr: Extra Layer attribute :param layer_attr: Extra Layer attribute
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1591,14 +1591,14 @@ def grumemory(input, ...@@ -1591,14 +1591,14 @@ def grumemory(input,
gru = grumemory(input) gru = grumemory(input)
:param name: The gru layer name. :param name: The gru layer name.
:type name: None|basestring :type name: None | basestring
:param input: input layer. :param input: The input of this layer.
:type input: LayerOutput. :type input: LayerOutput.
:param size: DEPRECATED. size of the gru cell :param size: DEPRECATED. size of the gru cell
:type size: int :type size: int
:param reverse: Whether sequence process is reversed or not. :param reverse: Whether sequence process is reversed or not.
:type reverse: bool :type reverse: bool
:param act: activation type, TanhActivation by default. This activation :param act: Activation type, TanhActivation is the default. This activation
affects the :math:`{\\tilde{h_t}}`. affects the :math:`{\\tilde{h_t}}`.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activation type, SigmoidActivation by default. :param gate_act: gate activation type, SigmoidActivation by default.
...@@ -1609,11 +1609,11 @@ def grumemory(input, ...@@ -1609,11 +1609,11 @@ def grumemory(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: Parameter Attribute. :param param_attr: Parameter Attribute.
:type param_attr: ParameterAttribute|None|False :type param_attr: ParameterAttribute | None | False
:param layer_attr: Extra Layer attribute :param layer_attr: Extra Layer attribute
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1670,7 +1670,7 @@ def last_seq(input, ...@@ -1670,7 +1670,7 @@ def last_seq(input,
:param agg_level: Aggregated level :param agg_level: Aggregated level
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param stride: The step size between successive pooling regions. :param stride: The step size between successive pooling regions.
:type stride: Int :type stride: Int
...@@ -1726,7 +1726,7 @@ def first_seq(input, ...@@ -1726,7 +1726,7 @@ def first_seq(input,
:param agg_level: aggregation level :param agg_level: aggregation level
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param stride: The step size between successive pooling regions. :param stride: The step size between successive pooling regions.
:type stride: Int :type stride: Int
...@@ -1799,7 +1799,7 @@ def expand_layer(input, ...@@ -1799,7 +1799,7 @@ def expand_layer(input,
expand_as=layer2, expand_as=layer2,
expand_level=ExpandLevel.FROM_NO_SEQUENCE) expand_level=ExpandLevel.FROM_NO_SEQUENCE)
:param input: Input layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param expand_as: Expand as this layer's sequence info. :param expand_as: Expand as this layer's sequence info.
:type expand_as: LayerOutput :type expand_as: LayerOutput
...@@ -1809,7 +1809,7 @@ def expand_layer(input, ...@@ -1809,7 +1809,7 @@ def expand_layer(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param expand_level: whether input layer is timestep(default) or sequence. :param expand_level: whether input layer is timestep(default) or sequence.
:type expand_level: ExpandLevel :type expand_level: ExpandLevel
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
...@@ -1858,7 +1858,7 @@ def repeat_layer(input, ...@@ -1858,7 +1858,7 @@ def repeat_layer(input,
expand = repeat_layer(input=layer, num_repeats=4) expand = repeat_layer(input=layer, num_repeats=4)
:param input: Input layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_repeats: Repeat the input so many times :param num_repeats: Repeat the input so many times
:type num_repeats: int :type num_repeats: int
...@@ -1869,7 +1869,7 @@ def repeat_layer(input, ...@@ -1869,7 +1869,7 @@ def repeat_layer(input,
False for treating input as column vector and repeating False for treating input as column vector and repeating
in the row direction. in the row direction.
:type as_row_vector: bool :type as_row_vector: bool
:param act: Activation type. :param act: Activation type. IdentityActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
...@@ -1917,13 +1917,13 @@ def seq_reshape_layer(input, ...@@ -1917,13 +1917,13 @@ def seq_reshape_layer(input,
reshape = seq_reshape_layer(input=layer, reshape_size=4) reshape = seq_reshape_layer(input=layer, reshape_size=4)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param reshape_size: the size of reshaped sequence. :param reshape_size: the size of reshaped sequence.
:type reshape_size: int :type reshape_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param act: Activation type. :param act: Activation type. IdentityActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -1931,7 +1931,7 @@ def seq_reshape_layer(input, ...@@ -1931,7 +1931,7 @@ def seq_reshape_layer(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -1970,8 +1970,8 @@ def interpolation_layer(input, weight, name=None, layer_attr=None): ...@@ -1970,8 +1970,8 @@ def interpolation_layer(input, weight, name=None, layer_attr=None):
interpolation = interpolation_layer(input=[layer1, layer2], weight=layer3) interpolation = interpolation_layer(input=[layer1, layer2], weight=layer3)
:param input: Input layer. :param input: The input of this layer.
:type input: list|tuple :type input: list | tuple
:param weight: Weight layer. :param weight: Weight layer.
:type weight: LayerOutput :type weight: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
...@@ -2023,11 +2023,11 @@ def bilinear_interp_layer(input, ...@@ -2023,11 +2023,11 @@ def bilinear_interp_layer(input,
:param input: A input layer. :param input: A input layer.
:type input: LayerOutput. :type input: LayerOutput.
:param out_size_x: bilinear interpolation output width. :param out_size_x: bilinear interpolation output width.
:type out_size_x: int|None :type out_size_x: int | None
:param out_size_y: bilinear interpolation output height. :param out_size_y: bilinear interpolation output height.
:type out_size_y: int|None :type out_size_y: int | None
:param name: The layer's name, which cna not be specified. :param name: The layer's name, which cna not be specified.
:type name: None|basestring :type name: None | basestring
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -2075,7 +2075,7 @@ def power_layer(input, weight, name=None, layer_attr=None): ...@@ -2075,7 +2075,7 @@ def power_layer(input, weight, name=None, layer_attr=None):
power = power_layer(input=layer1, weight=layer2) power = power_layer(input=layer1, weight=layer2)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param weight: Weight layer. :param weight: Weight layer.
:type weight: LayerOutput :type weight: LayerOutput
...@@ -2119,7 +2119,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None): ...@@ -2119,7 +2119,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
scale = scaling_layer(input=layer1, weight=layer2) scale = scaling_layer(input=layer1, weight=layer2)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param weight: Weight layer. :param weight: Weight layer.
:type weight: LayerOutput :type weight: LayerOutput
...@@ -2159,7 +2159,7 @@ def trans_layer(input, name=None, layer_attr=None): ...@@ -2159,7 +2159,7 @@ def trans_layer(input, name=None, layer_attr=None):
trans = trans_layer(input=layer) trans = trans_layer(input=layer)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -2197,7 +2197,7 @@ def rotate_layer(input, height, width, name=None, layer_attr=None): ...@@ -2197,7 +2197,7 @@ def rotate_layer(input, height, width, name=None, layer_attr=None):
height=100, height=100,
width=100) width=100)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param height: The height of the sample matrix :param height: The height of the sample matrix
:type height: int :type height: int
...@@ -2306,22 +2306,21 @@ def hsigmoid(input, ...@@ -2306,22 +2306,21 @@ def hsigmoid(input,
cost = hsigmoid(input=[layer1, layer2], cost = hsigmoid(input=[layer1, layer2],
label=data_layer) label=data_layer)
:param input: Input layers. It could be a LayerOutput or list/tuple of :param input: The input of this layer.
LayerOutput. :type input: LayerOutput | list | tuple
:type input: LayerOutput|list|tuple
:param label: Label layer. :param label: Label layer.
:type label: LayerOutput :type label: LayerOutput
:param num_classes: number of classes. :param num_classes: number of classes.
:type num_classes: int|None :type num_classes: int | None
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: Parameter Attribute. None means default parameter. :param param_attr: Parameter Attribute. None means default parameter.
:type param_attr: ParameterAttribute|None :type param_attr: ParameterAttribute | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -2429,40 +2428,40 @@ def img_conv_layer(input, ...@@ -2429,40 +2428,40 @@ def img_conv_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Layer Input. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. Or input a tuple for :param filter_size: The x dimension of a filter kernel. Or input a tuple for
two image dimension. two image dimension.
:type filter_size: int|tuple|list :type filter_size: int | tuple | list
:param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
currently supports rectangular filters, the filter's currently supports rectangular filters, the filter's
shape will be (filter_size, filter_size_y). shape will be (filter_size, filter_size_y).
:type filter_size_y: int|None :type filter_size_y: int | None
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. Default is tanh :param act: Activation type. ReluActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: Group size of filters.
:type groups: int :type groups: int
:param stride: The x dimension of the stride. Or input a tuple for two image :param stride: The x dimension of the stride. Or input a tuple for two image
dimension. dimension.
:type stride: int|tuple|list :type stride: int | tuple | list
:param stride_y: The y dimension of the stride. :param stride_y: The y dimension of the stride.
:type stride_y: int :type stride_y: int
:param padding: The x dimension of the padding. Or input a tuple for two :param padding: The x dimension of the padding. Or input a tuple for two
image dimension image dimension
:type padding: int|tuple|list :type padding: int | tuple | list
:param padding_y: The y dimension of the padding. :param padding_y: The y dimension of the padding.
:type padding_y: int :type padding_y: int
:param dilation: The x dimension of the dilation. Or input a tuple for two :param dilation: The x dimension of the dilation. Or input a tuple for two
image dimension image dimension
:type dilation: int|tuple|list :type dilation: int | tuple | list
:param dilation_y: The y dimension of the dilation. :param dilation_y: The y dimension of the dilation.
:type dilation_y: int :type dilation_y: int
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: number of input channels. If None will be set :param num_channels: number of input channels. If None will be set
automatically from previous output. automatically from previous output.
:type num_channels: int :type num_channels: int
...@@ -2616,15 +2615,15 @@ def img_pool_layer(input, ...@@ -2616,15 +2615,15 @@ def img_pool_layer(input,
:param padding: pooling padding width. :param padding: pooling padding width.
:type padding: int :type padding: int
:param padding_y: pooling padding height. It's equal to padding by default. :param padding_y: pooling padding height. It's equal to padding by default.
:type padding_y: int|None :type padding_y: int | None
:param name: name of pooling layer :param name: name of pooling layer
:type name: basestring. :type name: basestring.
:param input: layer's input :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pool_size: pooling window width :param pool_size: pooling window width
:type pool_size: int :type pool_size: int
:param pool_size_y: pooling window height. It's eaqual to pool_size by default. :param pool_size_y: pooling window height. It's eaqual to pool_size by default.
:type pool_size_y: int|None :type pool_size_y: int | None
:param num_channels: number of input channel. :param num_channels: number of input channel.
:type num_channels: int :type num_channels: int
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is :param pool_type: pooling type. MaxPooling or AvgPooling. Default is
...@@ -2633,7 +2632,7 @@ def img_pool_layer(input, ...@@ -2633,7 +2632,7 @@ def img_pool_layer(input,
:param stride: stride width of pooling. :param stride: stride width of pooling.
:type stride: int :type stride: int
:param stride_y: stride height of pooling. It is equal to stride by default. :param stride_y: stride height of pooling. It is equal to stride by default.
:type stride_y: int|None :type stride_y: int | None
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use ceil mode to calculate output height and with.
...@@ -2743,20 +2742,20 @@ def img_pool3d_layer(input, ...@@ -2743,20 +2742,20 @@ def img_pool3d_layer(input,
pool_type=MaxPooling()) pool_type=MaxPooling())
:param padding: pooling padding width. :param padding: pooling padding width.
:type padding: int|tuple|list :type padding: int | tuple | list
:param name: name of pooling layer :param name: name of pooling layer
:type name: basestring. :type name: basestring.
:param input: layer's input :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pool_size: pooling window width :param pool_size: pooling window width
:type pool_size: int|tuple|list :type pool_size: int | tuple | list
:param num_channels: number of input channel. :param num_channels: number of input channel.
:type num_channels: int :type num_channels: int
:param pool_type: pooling type. MaxPooling or AvgPooling. Default is :param pool_type: pooling type. MaxPooling or AvgPooling. Default is
MaxPooling. MaxPooling.
:type pool_type: BasePoolingType :type pool_type: BasePoolingType
:param stride: stride width of pooling. :param stride: stride width of pooling.
:type stride: int|tuple|list :type stride: int | tuple | list
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param ceil_mode: Wether to use ceil mode to calculate output height and with. :param ceil_mode: Wether to use ceil mode to calculate output height and with.
...@@ -2855,7 +2854,7 @@ def spp_layer(input, ...@@ -2855,7 +2854,7 @@ def spp_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: layer's input. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: number of input channel. :param num_channels: number of input channel.
:type num_channels: int :type num_channels: int
...@@ -2948,8 +2947,8 @@ def img_cmrnorm_layer(input, ...@@ -2948,8 +2947,8 @@ def img_cmrnorm_layer(input,
norm = img_cmrnorm_layer(input=net, size=5) norm = img_cmrnorm_layer(input=net, size=5)
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param input: layer's input. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param size: Normalize in number of :math:`size` feature maps. :param size: Normalize in number of :math:`size` feature maps.
:type size: int :type size: int
...@@ -3024,7 +3023,7 @@ def batch_norm_layer(input, ...@@ -3024,7 +3023,7 @@ def batch_norm_layer(input,
batch_norm for CPU. Otherwise, select batch norm batch_norm for CPU. Otherwise, select batch norm
type based on the specified type. If you use cudnn_batch_norm, type based on the specified type. If you use cudnn_batch_norm,
we suggested you use latest version, such as v5.1. we suggested you use latest version, such as v5.1.
:type batch_norm_type: None|string, None or "batch_norm" or "cudnn_batch_norm" :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm"
:param act: Activation Type. Better be relu. Because batch :param act: Activation Type. Better be relu. Because batch
normalization will normalize input near zero. normalization will normalize input near zero.
:type act: BaseActivation :type act: BaseActivation
...@@ -3034,7 +3033,7 @@ def batch_norm_layer(input, ...@@ -3034,7 +3033,7 @@ def batch_norm_layer(input,
:type num_channels: int :type num_channels: int
:param bias_attr: :math:`\\beta`, better be zero when initialize. So the :param bias_attr: :math:`\\beta`, better be zero when initialize. So the
initial_std=0, initial_mean=1 is best practice. initial_std=0, initial_mean=1 is best practice.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: :math:`\\gamma`, better be one when initialize. So the :param param_attr: :math:`\\gamma`, better be one when initialize. So the
initial_std=0, initial_mean=1 is best practice. initial_std=0, initial_mean=1 is best practice.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -3046,7 +3045,7 @@ def batch_norm_layer(input, ...@@ -3046,7 +3045,7 @@ def batch_norm_layer(input,
testing. If False, it will use the mean testing. If False, it will use the mean
and variance of current batch of test data for and variance of current batch of test data for
testing. testing.
:type use_global_stats: bool|None. :type use_global_stats: bool | None.
:param moving_average_fraction: Factor used in the moving average :param moving_average_fraction: Factor used in the moving average
computation, referred to as facotr, computation, referred to as facotr,
:math:`runningMean = newMean*(1-factor) :math:`runningMean = newMean*(1-factor)
...@@ -3107,7 +3106,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None): ...@@ -3107,7 +3106,7 @@ def sum_to_one_norm_layer(input, name=None, layer_attr=None):
sum_to_one_norm = sum_to_one_norm_layer(input=layer) sum_to_one_norm = sum_to_one_norm_layer(input=layer)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -3143,7 +3142,7 @@ def row_l2_norm_layer(input, name=None, layer_attr=None): ...@@ -3143,7 +3142,7 @@ def row_l2_norm_layer(input, name=None, layer_attr=None):
row_l2_norm_layer = row_l2_norm_layer(input=layer) row_l2_norm_layer = row_l2_norm_layer(input=layer)
:param input: Input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -3201,14 +3200,14 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None): ...@@ -3201,14 +3200,14 @@ def addto_layer(input, act=None, name=None, bias_attr=None, layer_attr=None):
:type name: basestring :type name: basestring
:param input: Input layers. It could be a LayerOutput or list/tuple of :param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput. LayerOutput.
:type input: LayerOutput|list|tuple :type input: LayerOutput | list | tuple
:param act: Activation Type, default is tanh. :param act: Activation Type. LinearActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -3260,8 +3259,8 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): ...@@ -3260,8 +3259,8 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input layers or projections :param input: input layers or projections
:type input: list|tuple|collections.Sequence :type input: list | tuple | collections.Sequence
:param act: Activation type. :param act: Activation type. IdentityActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3356,7 +3355,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -3356,7 +3355,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
:type a: LayerOutput :type a: LayerOutput
:param b: input sequence layer :param b: input sequence layer
:type b: LayerOutput :type b: LayerOutput
:param act: Activation type. :param act: Activation type. IdentityActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
...@@ -3364,7 +3363,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -3364,7 +3363,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -3440,9 +3439,9 @@ def memory(name, ...@@ -3440,9 +3439,9 @@ def memory(name,
:param is_seq: DEPRECATED. is sequence for boot_layer :param is_seq: DEPRECATED. is sequence for boot_layer
:type is_seq: bool :type is_seq: bool
:param boot_layer: boot layer of memory. :param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None :type boot_layer: LayerOutput | None
:param boot_bias: boot layer's bias :param boot_bias: boot layer's bias
:type boot_bias: ParameterAttribute|None :type boot_bias: ParameterAttribute | None
:param boot_bias_active_type: boot layer's active type. :param boot_bias_active_type: boot layer's active type.
:type boot_bias_active_type: BaseActivation :type boot_bias_active_type: BaseActivation
:param boot_with_const_id: boot layer's id. :param boot_with_const_id: boot layer's id.
...@@ -3537,19 +3536,17 @@ def lstm_step_layer(input, ...@@ -3537,19 +3536,17 @@ def lstm_step_layer(input,
:type input: LayerOutput :type input: LayerOutput
:param state: State Layer. :math:`c_{t-1}` :param state: State Layer. :math:`c_{t-1}`
:type state: LayerOutput :type state: LayerOutput
:param act: Activation type. Default is tanh :param act: Activation type. TanhActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param gate_act: Gate Activation Type. Default is sigmoid, and should :param gate_act: Gate Activation Type. SigmoidActivation is the default.
be sigmoid only.
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: State Activation Type. Default is sigmoid, and should :param state_act: State Activation Type. TanhActivation is the default.
be sigmoid only.
:type state_act: BaseActivation :type state_act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: layer's extra attribute. :param layer_attr: layer's extra attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -3600,13 +3597,15 @@ def gru_step_layer(input, ...@@ -3600,13 +3597,15 @@ def gru_step_layer(input,
:param output_mem: :param output_mem:
:param size: :param size:
:param act: :param act:
:type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param gate_act: :param gate_act: Activation type of this layer's two gates. Default is Sigmoid.
:type gate_act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: the parameter_attribute for transforming the output_mem :param param_attr: the parameter_attribute for transforming the output_mem
from previous step. from previous step.
:param layer_attr: :param layer_attr:
...@@ -3662,12 +3661,14 @@ def gru_step_naive_layer(input, ...@@ -3662,12 +3661,14 @@ def gru_step_naive_layer(input,
:param size: :param size:
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:param act: :param act:
:param gate_act: :type act: BaseActivation
:param gate_act: Activation type of this layer's two gates. Default is Sigmoid.
:type gate_act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: :param param_attr:
:param layer_attr: :param layer_attr:
:return: :return:
...@@ -3678,6 +3679,12 @@ def gru_step_naive_layer(input, ...@@ -3678,6 +3679,12 @@ def gru_step_naive_layer(input,
if size is None: if size is None:
size = input.size / 3 size = input.size / 3
if bias_attr and bias_attr.attr.get("parameter_name", None) is not None:
raise ValueError("You should not specify the field `name` in bias_attr."
" Otherwise, the three biases, which correponding to "
" the two gates and the mixed layer for computing Wx+b"
", will share the same parameter matrix unexpectedly.")
def __gate__(gate_name, offset): def __gate__(gate_name, offset):
with mixed_layer( with mixed_layer(
name=name + "_" + gate_name, name=name + "_" + gate_name,
...@@ -3786,15 +3793,15 @@ def recurrent_layer(input, ...@@ -3786,15 +3793,15 @@ def recurrent_layer(input,
out_{i} = act(in_{i} + out_{i+1} * W) \\ \\ \\text{for} \\ start <= i < end out_{i} = act(in_{i} + out_{i+1} * W) \\ \\ \\text{for} \\ start <= i < end
:param input: Input Layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param act: activation. :param act: Activation type. TanhActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param param_attr: parameter attribute. :param param_attr: parameter attribute.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
...@@ -3901,7 +3908,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3901,7 +3908,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
StaticInput will be imported to each time step, and doesn't change StaticInput will be imported to each time step, and doesn't change
through time. It's a mechanism to access layer outside step function. through time. It's a mechanism to access layer outside step function.
:type input: LayerOutput|StaticInput|SubsequenceInput|list|tuple :type input: LayerOutput | StaticInput | SubsequenceInput | list | tuple
:param reverse: If reverse is set true, the recurrent unit will process the :param reverse: If reverse is set true, the recurrent unit will process the
input sequence in a reverse order. input sequence in a reverse order.
...@@ -3916,7 +3923,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): ...@@ -3916,7 +3923,7 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None):
of words in each sentence) with all layer group's outputs. of words in each sentence) with all layer group's outputs.
targetInlink should be one of the layer group's input. targetInlink should be one of the layer group's input.
:type targetInlink: LayerOutput|SubsequenceInput :type targetInlink: LayerOutput | SubsequenceInput
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -4034,7 +4041,7 @@ def maxid_layer(input, name=None, layer_attr=None): ...@@ -4034,7 +4041,7 @@ def maxid_layer(input, name=None, layer_attr=None):
maxid = maxid_layer(input=layer) maxid = maxid_layer(input=layer)
:param input: Input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -4112,7 +4119,7 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -4112,7 +4119,7 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Input layer name. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param eos_id: end id of sequence :param eos_id: end id of sequence
:type eos_id: int :type eos_id: int
...@@ -4504,7 +4511,7 @@ def conv_projection(input, ...@@ -4504,7 +4511,7 @@ def conv_projection(input,
num_filters=64, num_filters=64,
num_channels=64) num_channels=64)
:param input: input layer :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. :param filter_size: The x dimension of a filter kernel.
:type filter_size: int :type filter_size: int
...@@ -4529,7 +4536,7 @@ def conv_projection(input, ...@@ -4529,7 +4536,7 @@ def conv_projection(input,
:param param_attr: Convolution param attribute. None means default attribute :param param_attr: Convolution param attribute. None means default attribute
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param trans: whether it is convTrans or conv :param trans: whether it is convTrans or conv
:type trans: boolean :type trans: bool
:return: A DotMulProjection Object. :return: A DotMulProjection Object.
:rtype: DotMulProjection :rtype: DotMulProjection
""" """
...@@ -4637,14 +4644,14 @@ def pad_layer(input, ...@@ -4637,14 +4644,14 @@ def pad_layer(input,
pad_h=[0,0], pad_h=[0,0],
pad_w=[2,2]) pad_w=[2,2])
:param input: layer's input. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param pad_c: padding size in channel dimension. :param pad_c: padding size in channel dimension.
:type pad_c: list|None :type pad_c: list | None
:param pad_h: padding size in height dimension. :param pad_h: padding size in height dimension.
:type pad_h: list|None :type pad_h: list | None
:param pad_w: padding size in width dimension. :param pad_w: padding size in width dimension.
:type pad_w: list|None :type pad_w: list | None
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
...@@ -4779,7 +4786,7 @@ def tensor_layer(a, ...@@ -4779,7 +4786,7 @@ def tensor_layer(a,
:type b: LayerOutput :type b: LayerOutput
:param size: the layer dimension. :param size: the layer dimension.
:type size: int. :type size: int.
:param act: Activation Type. Default is tanh. :param act: Activation type. LinearActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param param_attr: The Parameter Attribute.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -4787,9 +4794,9 @@ def tensor_layer(a, ...@@ -4787,9 +4794,9 @@ def tensor_layer(a,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4836,15 +4843,15 @@ def selective_fc_layer(input, ...@@ -4836,15 +4843,15 @@ def selective_fc_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput|list|tuple :type input: LayerOutput | list | tuple
:param select: The select layer. The output of select layer should be a :param select: The select layer. The output of select layer should be a
sparse binary matrix, and treat as the mask of selective fc. sparse binary matrix, and treat as the mask of selective fc.
If is None, acts exactly like fc_layer. If is None, acts exactly like fc_layer.
:type select: LayerOutput :type select: LayerOutput
:param size: The layer dimension. :param size: The layer dimension.
:type size: int :type size: int
:param act: Activation Type. Default is tanh. :param act: Activation type. TanhActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. :param param_attr: The Parameter Attribute.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -4852,9 +4859,9 @@ def selective_fc_layer(input, ...@@ -4852,9 +4859,9 @@ def selective_fc_layer(input,
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4906,12 +4913,12 @@ def sampling_id_layer(input, name=None, layer_attr=None): ...@@ -4906,12 +4913,12 @@ def sampling_id_layer(input, name=None, layer_attr=None):
samping_id = sampling_id_layer(input=input) samping_id = sampling_id_layer(input=input)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -4944,7 +4951,7 @@ def slope_intercept_layer(input, ...@@ -4944,7 +4951,7 @@ def slope_intercept_layer(input,
scale = slope_intercept_layer(input=input, slope=-1.0, intercept=1.0) scale = slope_intercept_layer(input=input, slope=-1.0, intercept=1.0)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -4953,7 +4960,7 @@ def slope_intercept_layer(input, ...@@ -4953,7 +4960,7 @@ def slope_intercept_layer(input,
:param intercept: the offset. :param intercept: the offset.
:type intercept: float. :type intercept: float.
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5013,7 +5020,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None): ...@@ -5013,7 +5020,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None, layer_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5077,10 +5084,10 @@ def block_expand_layer(input, ...@@ -5077,10 +5084,10 @@ def block_expand_layer(input,
block_x=1, block_x=1,
block_x=3) block_x=3)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. :param num_channels: The channel number of input layer.
:type num_channels: int|None :type num_channels: int | None
:param block_x: The width of sub block. :param block_x: The width of sub block.
:type block_x: int :type block_x: int
:param block_y: The width of sub block. :param block_y: The width of sub block.
...@@ -5094,9 +5101,9 @@ def block_expand_layer(input, ...@@ -5094,9 +5101,9 @@ def block_expand_layer(input,
:param padding_y: The padding size in vertical direction. :param padding_y: The padding size in vertical direction.
:type padding_y: int :type padding_y: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5155,15 +5162,15 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -5155,15 +5162,15 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
num_channels=128, num_channels=128,
groups=4) groups=4)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param num_channels: The channel number of input layer. If None will be set :param num_channels: The channel number of input layer. If None will be set
automatically from previous output. automatically from previous output.
:type num_channels: int|None :type num_channels: int | None
:param groups: The group number of input layer. :param groups: The group number of input layer.
:type groups: int :type groups: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -5220,18 +5227,18 @@ def ctc_layer(input, ...@@ -5220,18 +5227,18 @@ def ctc_layer(input,
size=9055, size=9055,
norm_by_times=True) norm_by_times=True)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The data layer of label with variable length.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: category numbers + 1.
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring|None :type name: basestring | None
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to normalization by times. False by default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5297,20 +5304,20 @@ def warp_ctc_layer(input, ...@@ -5297,20 +5304,20 @@ def warp_ctc_layer(input,
blank=1000, blank=1000,
norm_by_times=False) norm_by_times=False)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param label: The data layer of label with variable length. :param label: The data layer of label with variable length.
:type label: LayerOutput :type label: LayerOutput
:param size: category numbers + 1. :param size: category numbers + 1.
:type size: int :type size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring|None :type name: basestring | None
:param blank: the 'blank' label used in ctc :param blank: the 'blank' label used in ctc
:type blank: int :type blank: int
:param norm_by_times: Whether to normalization by times. False by default. :param norm_by_times: Whether to normalization by times. False by default.
:type norm_by_times: bool :type norm_by_times: bool
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5368,11 +5375,11 @@ def crf_layer(input, ...@@ -5368,11 +5375,11 @@ def crf_layer(input,
:param param_attr: Parameter attribute. None means default attribute :param param_attr: Parameter attribute. None means default attribute
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5438,9 +5445,9 @@ def crf_decoding_layer(input, ...@@ -5438,9 +5445,9 @@ def crf_decoding_layer(input,
:param param_attr: Parameter attribute. None means default attribute :param param_attr: Parameter attribute. None means default attribute
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -5499,14 +5506,14 @@ def nce_layer(input, ...@@ -5499,14 +5506,14 @@ def nce_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput. :param input: The input layers. It could be a LayerOutput of list/tuple of LayerOutput.
:type input: LayerOutput|list|tuple|collections.Sequence :type input: LayerOutput | list | tuple | collections.Sequence
:param label: label layer :param label: label layer
:type label: LayerOutput :type label: LayerOutput
:param weight: weight layer, can be None(default) :param weight: weight layer, can be None(default)
:type weight: LayerOutput :type weight: LayerOutput
:param num_classes: number of classes. :param num_classes: number of classes.
:type num_classes: int :type num_classes: int
:param act: Activation, default is Sigmoid. :param act: Activation type. SigmoidActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute|list. :param param_attr: The Parameter Attribute|list.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
...@@ -5515,12 +5522,12 @@ def nce_layer(input, ...@@ -5515,12 +5522,12 @@ def nce_layer(input,
:param neg_distribution: The distribution for generating the random negative labels. :param neg_distribution: The distribution for generating the random negative labels.
A uniform distribution will be used if not provided. A uniform distribution will be used if not provided.
If not None, its length must be equal to num_classes. If not None, its length must be equal to num_classes.
:type neg_distribution: list|tuple|collections.Sequence|None :type neg_distribution: list | tuple | collections.Sequence | None
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: layer name. :return: layer name.
...@@ -5636,7 +5643,7 @@ def rank_cost(left, ...@@ -5636,7 +5643,7 @@ def rank_cost(left,
It is an optional argument. It is an optional argument.
:type weight: LayerOutput :type weight: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
...@@ -5701,7 +5708,7 @@ def lambda_cost(input, ...@@ -5701,7 +5708,7 @@ def lambda_cost(input,
entire list of get gradient. entire list of get gradient.
:type max_sort_size: int :type max_sort_size: int
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -5745,7 +5752,7 @@ def cross_entropy(input, ...@@ -5745,7 +5752,7 @@ def cross_entropy(input,
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param coeff: The cost is multiplied with coeff. :param coeff: The cost is multiplied with coeff.
The coefficient affects the gradient in the backward. The coefficient affects the gradient in the backward.
:type coeff: float. :type coeff: float.
...@@ -5793,7 +5800,7 @@ def cross_entropy_with_selfnorm(input, ...@@ -5793,7 +5800,7 @@ def cross_entropy_with_selfnorm(input,
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float. :type coeff: float.
:param softmax_selfnorm_alpha: The scale factor affects the cost. :param softmax_selfnorm_alpha: The scale factor affects the cost.
...@@ -5830,10 +5837,10 @@ def sum_cost(input, name=None, layer_attr=None): ...@@ -5830,10 +5837,10 @@ def sum_cost(input, name=None, layer_attr=None):
cost = sum_cost(input=input_layer) cost = sum_cost(input=input_layer)
:param input: The first input layer. :param input: The input of this layer.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:return: LayerOutput object. :return: LayerOutput object.
...@@ -5878,7 +5885,7 @@ def huber_regression_cost(input, ...@@ -5878,7 +5885,7 @@ def huber_regression_cost(input,
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param delta: The difference between the observed and predicted values. :param delta: The difference between the observed and predicted values.
:type delta: float. :type delta: float.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
...@@ -5928,7 +5935,7 @@ def huber_classification_cost(input, ...@@ -5928,7 +5935,7 @@ def huber_classification_cost(input,
:param label: The input label. :param label: The input label.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring. :type name: None | basestring.
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float. :type coeff: float.
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
...@@ -5971,7 +5978,7 @@ def multi_binary_label_cross_entropy(input, ...@@ -5971,7 +5978,7 @@ def multi_binary_label_cross_entropy(input,
:param label: The input label. :param label: The input label.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
...@@ -6139,7 +6146,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None): ...@@ -6139,7 +6146,7 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
:param label: The input label. :param label: The input label.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: None|basestring :type name: None | basestring
:param coeff: The coefficient affects the gradient in the backward. :param coeff: The coefficient affects the gradient in the backward.
:type coeff: float :type coeff: float
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
...@@ -6226,7 +6233,7 @@ def dropout_layer(input, dropout_rate, name=None): ...@@ -6226,7 +6233,7 @@ def dropout_layer(input, dropout_rate, name=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param dropout_rate: The probability of dropout. :param dropout_rate: The probability of dropout.
:type dropout_rate: float :type dropout_rate: float
...@@ -6285,18 +6292,18 @@ def row_conv_layer(input, ...@@ -6285,18 +6292,18 @@ def row_conv_layer(input,
row_conv = row_conv_layer(input=input_layer, context_len=3) row_conv = row_conv_layer(input=input_layer, context_len=3)
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param context_len: The context length equals the lookahead step number :param context_len: The context length equals the lookahead step number
plus one. plus one.
:type context_len: int :type context_len: int
:param act: Activation Type. Default is linear activation. :param act: Activation Type. LinearActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param param_attr: The Parameter Attribute. If None, the parameter will be :param param_attr: The Parameter Attribute. If None, the parameter will be
initialized smartly. It's better to set it by yourself. initialized smartly. It's better to set it by yourself.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param layer_attr: Extra Layer config. :param layer_attr: Extra Layer config.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -6342,7 +6349,7 @@ def prelu_layer(input, ...@@ -6342,7 +6349,7 @@ def prelu_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param partial_sum: this parameter makes a group of inputs share a same weight. :param partial_sum: this parameter makes a group of inputs share a same weight.
...@@ -6352,9 +6359,9 @@ def prelu_layer(input, ...@@ -6352,9 +6359,9 @@ def prelu_layer(input,
:type partial_sum: int :type partial_sum: int
:param param_attr: The parameter attribute. See ParameterAttribute for details. :param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute|None :type param_attr: ParameterAttribute | None
:param layer_attr: Extra layer configurations. Default is None. :param layer_attr: Extra layer configurations. Default is None.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -6407,37 +6414,37 @@ def gated_unit_layer(input, ...@@ -6407,37 +6414,37 @@ def gated_unit_layer(input,
.. code-block:: python .. code-block:: python
gated_unit = gated_unit_layer(size=128, input=input_layer)) gated_unit = gated_unit_layer(size=128, input=input_layer))
:param input: input for this layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param size: output size of the gated unit. :param size: output size of the gated unit.
:type size: int :type size: int
:param act: activation type of the projected input. :param act: Activation type of the projected input. LinearActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param gate_attr: Attributes to tune the gate output, for example, error :param gate_attr: Attributes to tune the gate output, for example, error
clipping threshold, dropout and so on. See ExtraLayerAttribute for clipping threshold, dropout and so on. See ExtraLayerAttribute for
more details. more details.
:type gate_attr: ExtraLayerAttribute|None :type gate_attr: ExtraLayerAttribute | None
:param gate_param_attr: Attributes to tune the learnable projected matrix :param gate_param_attr: Attributes to tune the learnable projected matrix
parameter of the gate. parameter of the gate.
:type gate_param_attr: ParameterAttribute|None :type gate_param_attr: ParameterAttribute | None
:param gate_bias_attr: Attributes to tune the learnable bias of the gate. :param gate_bias_attr: Attributes to tune the learnable bias of the gate.
:type gate_bias_attr: ParameterAttribute|None :type gate_bias_attr: ParameterAttribute | None
:param inproj_attr: Attributes to the tune the projected input, for :param inproj_attr: Attributes to the tune the projected input, for
example, error clipping threshold, dropout and so on. See example, error clipping threshold, dropout and so on. See
ExtraLayerAttribute for more details. ExtraLayerAttribute for more details.
:type inproj_attr: ExtraLayerAttribute|None :type inproj_attr: ExtraLayerAttribute | None
:param inproj_param_attr: Attributes to tune the learnable parameter of :param inproj_param_attr: Attributes to tune the learnable parameter of
the projection of input. the projection of input.
:type inproj_param_attr: ParameterAttribute|None :type inproj_param_attr: ParameterAttribute | None
:param inproj_bias_attr: Attributes to tune the learnable bias of :param inproj_bias_attr: Attributes to tune the learnable bias of
projection of the input. projection of the input.
:type inproj_bias_attr: ParameterAttribute|None :type inproj_bias_attr: ParameterAttribute | None
:param layer_attr: Attributes to tune the final output of the gated unit, :param layer_attr: Attributes to tune the final output of the gated unit,
for example, error clipping threshold, dropout and so on. See for example, error clipping threshold, dropout and so on. See
ExtraLayerAttribute for more details. ExtraLayerAttribute for more details.
:type layer_attr: ExtraLayerAttribute|None :type layer_attr: ExtraLayerAttribute | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -6487,7 +6494,7 @@ def switch_order_layer(input, ...@@ -6487,7 +6494,7 @@ def switch_order_layer(input,
switch = switch_order(input=layer, name='switch', reshape_axis=reshape_axis) switch = switch_order(input=layer, name='switch', reshape_axis=reshape_axis)
reshape = {'height':[ 0, 1, 2], 'width':[3]} reshape = {'height':[ 0, 1, 2], 'width':[3]}
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -6521,7 +6528,7 @@ def switch_order_layer(input, ...@@ -6521,7 +6528,7 @@ def switch_order_layer(input,
@layer_support() @layer_support()
def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
""" """
The crop layer crops images by offset and shape. User can set crop shape by This layer crops images by offset and shape. User can set crop shape by
args 'shape' explicitly or by reference input layer. args 'shape' explicitly or by reference input layer.
The example usage is: The example usage is:
...@@ -6529,10 +6536,10 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): ...@@ -6529,10 +6536,10 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
.. code-block:: python .. code-block:: python
crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3])
:param input: The input layer.If two inputs were setted, :param input: The input of this layer. If two inputs are given, the second input
the second input will be regarded as reference input will be regarded as reference input.
:type input: LayerOutput or Sequence :type input: LayerOutput | Sequence
:param offset: The crop offset :param offset: The crop offset.
:type offset: Sequence :type offset: Sequence
:param axis: start axis to be cropped. To image input layer: :param axis: start axis to be cropped. To image input layer:
- 0: batch size - 0: batch size
...@@ -6581,12 +6588,12 @@ def sub_nested_seq_layer(input, selected_indices, name=None): ...@@ -6581,12 +6588,12 @@ def sub_nested_seq_layer(input, selected_indices, name=None):
.. code-block:: python .. code-block:: python
sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids)
:param input: A nested sequence. :param input: The input of this layer. It is a nested sequence.
:type input: LayerOutput :type input: LayerOutput
:param selected_indices: a set of sequence indices in the nested sequence. :param selected_indices: A set of sequence indices in the nested sequence.
:type input: LayerOutput :type input: LayerOutput
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
...@@ -6628,7 +6635,7 @@ def clip_layer(input, min, max, name=None): ...@@ -6628,7 +6635,7 @@ def clip_layer(input, min, max, name=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput. :type input: LayerOutput.
:param min: The lower threshold for clipping. :param min: The lower threshold for clipping.
:type min: double :type min: double
...@@ -6673,12 +6680,12 @@ def seq_slice_layer(input, starts, ends, name=None): ...@@ -6673,12 +6680,12 @@ def seq_slice_layer(input, starts, ends, name=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: input for this layer, it should be a sequence. :param input: The input of this layer, which should be a sequence.
:type input: LayerOutput :type input: LayerOutput
:param starts: start indices to slice the input sequence. :param starts: start indices to slice the input sequence.
:type starts: LayerOutput|None :type starts: LayerOutput | None
:param ends: end indices to slice the input sequence. :param ends: end indices to slice the input sequence.
:type ends: LayerOutput|None :type ends: LayerOutput | None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
...@@ -6727,9 +6734,9 @@ def kmax_seq_score_layer(input, name=None, beam_size=1): ...@@ -6727,9 +6734,9 @@ def kmax_seq_score_layer(input, name=None, beam_size=1):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. It stores scores over a sequence or a nested :param input: The input of this layer. It stores scores over a sequence or a nested
sequence and its size must be 1. sequence and its size must be 1.
:type input: LayerOutput. :type input: LayerOutput
:param beam_size: sequence indices with top beam_size scores are returned. :param beam_size: sequence indices with top beam_size scores are returned.
:type beam_size: double :type beam_size: double
:return: LayerOutput object. :return: LayerOutput object.
...@@ -6785,24 +6792,24 @@ def img_conv3d_layer(input, ...@@ -6785,24 +6792,24 @@ def img_conv3d_layer(input,
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: Layer Input. :param input: The input of this layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: The x dimension of a filter kernel. Or input a list. :param filter_size: The x dimension of a filter kernel. Or input a list.
:type filter_size: int|tuple|list :type filter_size: int | tuple | list
:param num_filters: Each filter group's number of filter :param num_filters: Each filter group's number of filter
:param act: Activation type. Default is tanh :param act: Activation type. ReluActivation is the default.
:type act: BaseActivation :type act: BaseActivation
:param groups: Group size of filters. :param groups: Group size of filters.
:type groups: int :type groups: int
:param stride: The x dimension of the stride. Or input a tuple for two image :param stride: The x dimension of the stride. Or input a tuple for two image
dimension. dimension.
:type stride: int|tuple|list :type stride: int | tuple | list
:param padding: The x dimension of the padding. Or input a tuple for two :param padding: The x dimension of the padding. Or input a tuple for two
image dimension image dimension
:type padding: int|tuple|list :type padding: int | tuple | list
:param bias_attr: Convolution bias attribute. None means default bias. :param bias_attr: Convolution bias attribute. None means default bias.
False means no bias. False means no bias.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:param num_channels: number of input channels. If None will be set :param num_channels: number of input channels. If None will be set
automatically from previous output. automatically from previous output.
:type num_channels: int :type num_channels: int
...@@ -6916,15 +6923,15 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): ...@@ -6916,15 +6923,15 @@ def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param input: The input layer. :param input: The input of this layer.
:type input: LayerOutput. :type input: LayerOutput
:param param_attr: The parameter attribute of scaling. :param param_attr: The parameter attribute of scaling.
:type param_attr: ParameterAttribute :type param_attr: ParameterAttribute
:param bias_attr: The Bias Attribute. If the parameter is set to :param bias_attr: The Bias Attribute. If the parameter is set to
False or something not type of ParameterAttribute, False or something not type of ParameterAttribute,
no bias is defined. If the parameter is set to no bias is defined. If the parameter is set to
True, the bias is initialized to zero. True, the bias is initialized to zero.
:type bias_attr: ParameterAttribute|None|Bool|Any :type bias_attr: ParameterAttribute | None | bool | Any
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
...@@ -6944,11 +6951,11 @@ def resize_layer(input, size, name=None): ...@@ -6944,11 +6951,11 @@ def resize_layer(input, size, name=None):
into the output matrix with a shape of [Height x Width / size, size], into the output matrix with a shape of [Height x Width / size, size],
where size is the parameter of this layer indicating the output dimension. where size is the parameter of this layer indicating the output dimension.
:param input: The input to this layer. :param input: The input of this layer.
:type input: LayerOutput. :type input: LayerOutput.
:param name: The name of this layer. It is optional. :param name: The name of this layer. It is optional.
:type name: basestring :type name: basestring
:param size: The resized output dimesion of this layer. :param size: The resized output dimension of this layer.
:type size: int :type size: int
:return: A LayerOutput object. :return: A LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
......
...@@ -5,7 +5,7 @@ Default scope function. ...@@ -5,7 +5,7 @@ Default scope function.
thread-local stack of Scope. Top of that stack is current scope, the bottom thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent. of that stack is all scopes' parent.
Invoking `new_var/find_var` can `new/find` variable in current scope. Invoking `var/find_var` can `new/find` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope. scope.
...@@ -19,7 +19,7 @@ import threading ...@@ -19,7 +19,7 @@ import threading
__tl_scope__ = threading.local() __tl_scope__ = threading.local()
__all__ = [ __all__ = [
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'new_var', 'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var',
'find_var', 'scoped_function' 'find_var', 'scoped_function'
] ]
...@@ -54,11 +54,11 @@ def leave_local_scope(): ...@@ -54,11 +54,11 @@ def leave_local_scope():
get_cur_scope().drop_kids() get_cur_scope().drop_kids()
def new_var(name): def var(name):
""" """
create variable in current scope. create variable in current scope.
""" """
return get_cur_scope().new_var(name) return get_cur_scope().var(name)
def find_var(name): def find_var(name):
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
import collections
import numpy as np
import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator']
class Variable(object):
def __init__(self,
block,
type=core.VarDesc.VarType.LOD_TENSOR,
name=None,
shape=None,
dtype=None,
lod_level=None,
**kwargs):
self.block = block
if name is None:
name = Variable._unique_var_name_()
is_new_var = False
self.desc = self.block.desc.find_var(name)
if self.desc is None:
self.desc = self.block.desc.var(name)
is_new_var = True
if is_new_var:
self.desc.set_type(type)
elif self.desc.type() != type:
raise ValueError("Variable {0} has been created before. The "
"previous type is {1}; the new type is {2}. They"
" are not matched".format(self.name,
self.desc.type(), type))
if shape is not None:
if is_new_var:
self.desc.set_shape(shape)
else:
old_shape = self.shape
shape = tuple(shape)
if shape != old_shape:
raise ValueError(
"Variable {0} has been created before. the previous "
"shape is {1}; the new shape is {2}. They are not "
"matched.".format(self.name, old_shape, shape))
if dtype is not None:
if not isinstance(dtype, core.DataType):
dtype = Variable._convert_np_dtype_to_dtype_(dtype)
if is_new_var:
self.desc.set_data_type(dtype)
else:
old_dtype = self.data_type()
if dtype != old_shape:
raise ValueError("Variable {0} has been created before. "
"The previous data type is {1}; the new "
"data type is {2}. They are not "
"matched.".format(self.name, old_dtype,
dtype))
if lod_level is not None:
if is_new_var:
self.desc.set_lod_level(lod_level)
else:
if lod_level != self.lod_level:
raise ValueError("Variable {0} has been created before. "
"The previous lod_level is {1}; the new "
"lod_level is {2}. They are not "
"matched".format(self.name, self.lod_level,
lod_level))
self.block.vars[name] = self
self.op = None
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.VarDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property
def name(self):
return self.desc.name()
@property
def shape(self):
# convert to tuple, make it as same as numpy API.
return tuple(self.desc.shape())
@property
def data_type(self):
return self.desc.data_type()
@property
def lod_level(self):
return self.desc.lod_level()
@staticmethod
def _unique_var_name_():
uid = core.unique_integer() # unique during whole process.
return "_generated_var_%d" % uid
@staticmethod
def _convert_np_dtype_to_dtype_(np_dtype):
dtype = np.dtype(np_dtype)
if dtype == np.float32:
return core.DataType.FP32
elif dtype == np.float64:
return core.DataType.FP64
elif dtype == np.float16:
return core.DataType.FP16
elif dtype == np.int32:
return core.DataType.INT32
elif dtype == np.int16:
return core.DataType.INT16
elif dtype == np.int64:
return core.DataType.INT64
elif dtype == np.bool:
return core.DataType.BOOL
else:
raise ValueError("Not supported numpy dtype " + str(dtype))
def get_all_op_protos():
"""
Get all registered op proto from PaddlePaddle C++ end.
:return: A list of registered OpProto.
"""
protostrs = core.get_all_op_protos()
ret_values = []
for pbstr in protostrs:
op_proto = framework_pb2.OpProto.FromString(str(pbstr))
ret_values.append(op_proto)
return ret_values
class OpProtoHolder(object):
@classmethod
def instance(cls):
if not hasattr(cls, '_instance'):
cls._instance = cls()
return cls._instance
def __init__(self):
assert not hasattr(
self.__class__,
'_instance'), 'Please use `instance()` to get OpProtoHolder opject!'
op_protos = get_all_op_protos()
self.op_proto_map = {}
for proto in op_protos:
self.op_proto_map[proto.type] = proto
def get_op_proto(self, type):
assert type in self.op_proto_map, "Operator \"%s\" has not been registered." % type
return self.op_proto_map[type]
class Operator(object):
def __init__(self,
block,
desc,
type=None,
inputs=None,
outputs=None,
attrs=None):
self.block = block
self.desc = desc
if len(self.desc.type()) != 0:
return
if type is None:
raise ValueError(
"`type` to initilized an Operator can not be None.")
self.desc.set_type(type)
proto = OpProtoHolder.instance().get_op_proto(type)
if inputs is not None:
given = set()
need = set()
for n in inputs:
given.add(n)
for m in proto.inputs:
need.add(m.name)
if not given == need:
raise ValueError(
"Incorrect setting for input(s) of operator \"%s\". Need: [%s] Given: [%s]"
% (type, ", ".join(str(e) for e in need), ", ".join(
str(e) for e in given)))
for in_proto in proto.inputs:
in_argus = inputs[in_proto.name]
if not isinstance(in_argus, list):
in_argus = [in_argus]
if not in_proto.duplicable and len(in_argus) > 1:
raise ValueError(
"Input %s expects only one input, but %d are given." %
(in_proto.name, len(in_argus)))
in_argu_names = []
for argu in in_argus:
in_argu_names.append(argu.name)
self.desc.set_input(in_proto.name, in_argu_names)
if outputs is not None:
given = set()
need = set()
for n in outputs:
given.add(n)
for m in proto.outputs:
need.add(m.name)
if not given == need:
raise ValueError(
"Incorrect setting for output(s) of operator \"%s\". Need: [%s] Given: [%s]"
% (type, ", ".join(str(e) for e in need), ", ".join(
str(e) for e in given)))
for out_proto in proto.outputs:
out_argus = outputs[out_proto.name]
if not isinstance(out_argus, list):
out_argus = [out_argus]
if not out_proto.duplicable and len(out_argus) > 1:
raise ValueError(
"Output %s expects only one output, but %d are given." %
(out_proto.name, len(out_argus)))
out_argu_names = []
for argu in out_argus:
out_argu_names.append(argu.name)
argu.op = self
self.desc.set_output(out_proto.name, out_argu_names)
if attrs is not None:
for attr in proto.attrs:
attr_name = attr.name
if not attr_name in attrs:
continue
if not isinstance(attrs[attr_name], Block):
self.desc.set_attr(attr_name, attrs[attr_name])
else:
self.desc.set_block_attr(attr_name, attrs[attr_name].desc)
self.desc.check_attrs()
self.desc.infer_shape(self.block.desc)
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.OpDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property
def type(self):
return self.desc.type()
def input(self, name):
return self.desc.input(name)
@property
def input_names(self):
return self.desc.input_names()
def output(self, name):
return self.desc.output(name)
@property
def output_names(self):
return self.desc.output_names()
def has_attr(self, name):
return self.desc.has_attr(name)
def attr_type(self, name):
return self.desc.attr_type(name)
@property
def attr_names(self):
return self.desc.attr_names()
def attr(self, name):
return self.desc.attr(name)
def block_attr(self, name):
return self.desc.block_attr(name)
class Block(object):
def __init__(self, program, idx):
self.desc = program.desc.block(idx)
self.vars = dict() # var_name --> var
self.ops = collections.deque() # operator list
self.program = program
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.BlockDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property
def parent_idx(self):
return self.desc.parent
@property
def idx(self):
return self.desc.id
def create_var(self, *args, **kwargs):
return Variable(self, *args, **kwargs)
def has_var(self, name):
return name in self.vars
def create_parameter(self, *args, **kwargs):
global_block = self.program.global_block()
return Parameter(global_block, *args, **kwargs)
def append_op(self, *args, **kwargs):
op_desc = self.desc.append_op()
op = Operator(self, op_desc, *args, **kwargs)
self.ops.append(op)
return op
def prepend_op(self, *args, **kwargs):
op_desc = self.desc.prepend_op()
op = Operator(self, op_desc, *args, **kwargs)
self.ops.appendleft(op)
return op
def sync_with_cpp(self):
# sync variables from cpp
for var in self.desc.all_vars():
if not self.has_var(var.name()):
self.create_var(name=var.name(), desc=var, type=var.type())
# sync operators from cpp
ops_in_cpp = self.desc.all_ops()
first_op_in_python = self.ops[0].desc
last_op_in_python = self.ops[len(self.ops) - 1].desc
start_index = None
end_index = None
for index in range(len(ops_in_cpp)):
if first_op_in_python == ops_in_cpp[index]:
start_index = index
if last_op_in_python == ops_in_cpp[index]:
end_index = index
assert start_index is not None
assert end_index is not None
assert start_index <= end_index
# sync ops append to the head of cpp_ops
for index in range((start_index - 1 - 1), -1, -1):
op_desc = ops_in_cpp[index]
op = Operator(self, op_desc)
self.ops.appendleft(op)
# sync ops append to the end of cpp_ops
for index in range((end_index + 1), len(ops_in_cpp)):
op_desc = ops_in_cpp[index]
op = Operator(self, op_desc)
self.ops.append(op)
assert len(self.ops) == len(ops_in_cpp)
for index in range(len(self.ops)):
assert self.ops[index].desc == ops_in_cpp[index]
class Program(object):
@classmethod
def instance(cls):
# From https://stackoverflow.com/questions/8212053
# Making Program as a Singleton class.
if not hasattr(cls, '_instance'):
cls._instance = cls()
return cls._instance
def __init__(self):
assert not hasattr(self.__class__,
'_instance'), 'Do not call constructor directly!'
self.desc = core.ProgramDesc.instance()
self.blocks = [Block(self, 0)]
self.current_block_idx = 0
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.ProgramDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
def global_block(self):
return self.blocks[0]
def current_block(self):
return self.blocks[self.current_block_idx]
def append_backward(self, target, no_grad_set):
assert isinstance(target, Variable)
param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set)
self.sync_with_cpp()
return param_to_grad_info
def create_block(self):
new_block_idx = len(self.blocks)
self.desc.append_block(self.current_block().desc)
self.current_block_idx = new_block_idx
self.blocks.append(Block(self, self.current_block_idx))
return self.current_block()
def rollback(self):
self.current_block_idx = self.current_block().parent_idx
def sync_with_cpp(self):
for block_idx in range(len(self.blocks), self.desc.num_blocks()):
self.blocks.append(Block(self, block_idx))
for block in self.blocks:
block.sync_with_cpp()
class Parameter(Variable):
def __init__(self, block, shape, dtype, **kwargs):
if shape is None or dtype is None:
raise ValueError("Parameter must set shape and dtype")
if len(shape) == 0:
raise ValueError("Parameter shape cannot be empty")
for each in shape:
if each < 0:
raise ValueError("Parameter shape should not be related with "
"batch-size")
Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs)
self.trainable = kwargs.get('trainable', True)
self.init_attr = kwargs.get('initialize_attr', {
'type': 'uniform_random',
'min': -1.0,
'max': 1.0
})
self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0})
self._append_initialize_ops_()
def _append_initialize_ops_(self):
attr = copy.deepcopy(self.init_attr)
op_type = attr.pop('type', None)
block = self.block
assert isinstance(block, Block)
shape = self.shape
attr['dims'] = shape
attr['data_type'] = int(self.data_type)
op = block.prepend_op(
type=op_type, inputs=None, outputs={'Out': [self]}, attrs=attr)
self.op = op
# program is a global instance.
g_program = Program.instance()
import paddle.v2.framework.core as core
import collections
__all__ = ['Block', 'Variable', 'Program', 'Operator']
class Variable(object):
def __init__(self, block, name=None, shape=None, dtype=None,
lod_level=None):
self.block = block
if name is None:
name = Variable._unique_var_name_()
self.proto = self.block.proto.new_var(name)
if shape is not None:
self.proto.set_shape(shape)
if dtype is not None:
# TODO(yuyang18): Convert dtype from numpy.dtype
self.proto.set_data_type(dtype)
if lod_level is not None:
# TODO(yuyang18): set_lod_level is not defined.
self.proto.set_lod_level(lod_level)
self.block.vars[name] = self
self.op = None
# TODO(yuyang18): Get methods
@staticmethod
def _unique_var_name_():
uid = core.unique_integer() # unique during whole process.
return "_generated_var_%d" % uid
class Operator(object):
def __init__(self,
block,
proto,
type=None,
inputs=None,
outputs=None,
attrs=None):
self.block = block
self.proto = proto
if type is not None:
# TODO.
pass
if inputs is not None:
# TODO
pass
if outputs is not None:
# TODO
pass
if attrs is not None:
# TODO
pass
# TODO: Getters
class Block(object):
def __init__(self, program, idx):
self.proto = program.proto.block(idx)
self.vars = dict() # var_name --> var
self.ops = collections.deque() # operator list
self.program = program
@property
def parent_idx(self):
return self.proto.parent
@property
def idx(self):
return self.proto.id
def create_var(self, *args, **kwargs):
return Variable(self, *args, **kwargs)
def append_op(self, *args, **kwargs):
op_proto = self.proto.append_op()
op = Operator(self, op_proto, *args, **kwargs)
self.ops.append(op)
return op
def prepend_op(self, *args, **kwargs):
op_proto = self.proto.prepend_op()
op = Operator(self, op_proto, *args, **kwargs)
self.ops.appendleft(op)
return op
class Program(object):
@classmethod
def instance(cls):
# From https://stackoverflow.com/questions/8212053
# Making Program as a Singleton class.
if not hasattr(cls, '_instance'):
cls._instance = cls()
return cls._instance
def __init__(self):
assert not hasattr(self.__class__,
'_instance'), 'Do not call constructor directly!'
self.proto = core.ProgramDesc.instance()
self.blocks = [Block(self, 0)]
self.current_block_idx = 0
def global_block(self):
return self.blocks[0]
def current_block(self):
return self.blocks[self.current_block_idx]
def create_block(self):
new_block_idx = len(self.blocks)
self.proto.append_block(self.current_block().proto)
self.current_block_idx = new_block_idx
self.blocks.append(Block(self, self.current_block_idx))
return self.current_block()
def rollback(self):
self.current_block_idx = self.current_block().parent_idx
# program is a global instance.
g_program = Program.instance()
...@@ -219,6 +219,27 @@ class __RecurrentOp__(object): ...@@ -219,6 +219,27 @@ class __RecurrentOp__(object):
return core.RecurrentOp.create(proto.SerializeToString()) return core.RecurrentOp.create(proto.SerializeToString())
class __DynamicRecurrentOp__(object):
__proto__ = None
type = "dynamic_recurrent"
def __init__(self):
# cache recurrent_op's proto
if self.__proto__ is None:
for op_proto in get_all_op_protos():
if op_proto.type == self.type:
self.__proto__ = op_proto
def __call__(self, *args, **kwargs):
if self.type not in args and "type" not in kwargs:
kwargs["type"] = self.type
# create proto
create_method = OpDescCreationMethod(self.__proto__)
proto = create_method(*args, **kwargs)
# create rnnop
return core.DynamicRecurrentOp.create(proto.SerializeToString())
class __CondOp__(object): class __CondOp__(object):
__proto__ = None __proto__ = None
type = "cond" type = "cond"
...@@ -242,4 +263,5 @@ class __CondOp__(object): ...@@ -242,4 +263,5 @@ class __CondOp__(object):
Operator = OperatorFactory() # The default global factory Operator = OperatorFactory() # The default global factory
RecurrentOp = __RecurrentOp__() RecurrentOp = __RecurrentOp__()
DynamicRecurrentOp = __DynamicRecurrentOp__()
CondOp = __CondOp__() CondOp = __CondOp__()
...@@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs): ...@@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs = dict() kwargs = dict()
def __create_var__(name, var_name): def __create_var__(name, var_name):
scope.new_var(var_name) scope.var(var_name)
kwargs[name].append(var_name) kwargs[name].append(var_name)
for in_name, in_dup in Operator.get_op_inputs(op_type): for in_name, in_dup in Operator.get_op_inputs(op_type):
...@@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place): ...@@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place):
def set_output_grad(scope, op, outputs, place): def set_output_grad(scope, op, outputs, place):
def __set_tensor__(name): def __set_tensor__(name):
out_tensor = scope.find_var(name).get_tensor() out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() grad_tensor = scope.var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype() out_dtype = out_tensor.dtype()
if out_dtype == core.DataType.FP64: if out_dtype == core.DataType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64) data = np.ones(out_tensor.shape(), dtype=np.float64)
...@@ -169,10 +169,10 @@ def get_numeric_gradient(scope, ...@@ -169,10 +169,10 @@ def get_numeric_gradient(scope,
def get_backward_op(scope, op, no_grad_set): def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set) backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars(): for input in backward_op.input_vars():
var = scope.new_var(input) var = scope.var(input)
var.get_tensor() var.get_tensor()
for output in backward_op.output_vars(): for output in backward_op.output_vars():
var = scope.new_var(output) var = scope.var(output)
var.get_tensor() var.get_tensor()
return backward_op return backward_op
......
...@@ -33,6 +33,21 @@ class TestSigmoid(OpTest): ...@@ -33,6 +33,21 @@ class TestSigmoid(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.008) self.check_grad(['X'], 'Y', max_relative_error=0.008)
class TestLogSigmoid(OpTest):
def setUp(self):
self.op_type = "logsigmoid"
self.inputs = {
'X': np.random.uniform(-1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.log(1 / (1 + np.exp(-self.inputs['X'])))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.008)
class TestTanh(OpTest): class TestTanh(OpTest):
def setUp(self): def setUp(self):
self.op_type = "tanh" self.op_type = "tanh"
...@@ -63,6 +78,46 @@ class TestTanhShrink(OpTest): ...@@ -63,6 +78,46 @@ class TestTanhShrink(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.008) self.check_grad(['X'], 'Y', max_relative_error=0.008)
class TestHardShrink(OpTest):
def setUp(self):
self.op_type = "hard_shrink"
x = np.random.uniform(-1, 1, [4, 4]).astype("float32")
threshold = 0.5
self.inputs = {'X': x}
self.attrs = {'lambda': threshold}
t = np.copy(x)
t[(t >= -threshold) & (t <= threshold)] = 0
self.outputs = {'Y': t}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.005)
class TestSoftShrink(OpTest):
def setUp(self):
self.op_type = "softshrink"
lambda_val = 0.1
self.attrs = {'lambda': lambda_val}
self.inputs = {
'X': np.random.uniform(0.25, 10, [4, 4]).astype("float32")
}
y = np.copy(self.inputs['X'])
y = (y < -lambda_val) * (y + lambda_val) + (y > lambda_val) * (
y - lambda_val)
self.outputs = {'Y': y}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSqrt(OpTest): class TestSqrt(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sqrt" self.op_type = "sqrt"
...@@ -276,6 +331,21 @@ class TestSTanh(OpTest): ...@@ -276,6 +331,21 @@ class TestSTanh(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.007) self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSoftplus(OpTest):
def setUp(self):
self.op_type = "softplus"
self.inputs = {
'X': np.random.uniform(-1, 1, [11, 17]).astype("float32")
}
self.outputs = {'Y': np.log(1 + np.exp(self.inputs['X']))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestSoftsign(OpTest): class TestSoftsign(OpTest):
def setUp(self): def setUp(self):
self.op_type = "softsign" self.op_type = "softsign"
...@@ -293,5 +363,54 @@ class TestSoftsign(OpTest): ...@@ -293,5 +363,54 @@ class TestSoftsign(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=0.007) self.check_grad(['X'], 'Y', max_relative_error=0.007)
class TestThresholdedRelu(OpTest):
def setUp(self):
self.op_type = "thresholded_relu"
threshold = 0.25
self.relative_error = 0.005
X = np.random.uniform(-1, 1, [11, 17]).astype("float32")
# Same reason as TestAbs
X[np.abs(X - threshold) < self.relative_error] = threshold + 0.2
self.inputs = {'X': X}
self.attrs = {'threshold': threshold}
self.outputs = {'Y': (X > threshold) * X}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=self.relative_error)
class TestHardSigmoid(OpTest):
def setUp(self):
self.op_type = "hard_sigmoid"
self.relative_error = 0.002
X = np.random.uniform(-5, 5, [2, 2]).astype("float32")
slope = 0.2
offset = 0.5
lower_threshold = -offset / slope
upper_threshold = (1 - offset) / slope
self.inputs = {'X': X}
# Same reason as TestAbs
X[np.abs(X - lower_threshold) < self.relative_error] = \
lower_threshold + 0.2
X[np.abs(X - upper_threshold) < self.relative_error] = \
upper_threshold - 0.2
temp = X * slope + offset
self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.002)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestAdamOp1(OpTest):
def setUp(self):
'''Test Adam Op with supplied attributes
'''
self.op_type = "adam"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.004
beta1 = 0.78
beta2 = 0.836
epsilon = 1e-4
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, self.attrs)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
def test_check_output(self):
self.check_output()
class TestAdamOp2(OpTest):
def setUp(self):
'''Test Adam Op with supplied attributes
'''
self.op_type = "adam"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.001
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
attributes = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, attributes)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
def test_check_output(self):
self.check_output()
class TestAdamOpMultipleSteps(OpTest):
def setUp(self):
'''Test Adam Operator with supplied attributes
'''
self.op_type = "adam"
self.num_steps = 10
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.001
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
def test_check_output(self):
for _ in range(self.num_steps):
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, self.attrs)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
# Verify output for this step
self.check_output()
# Output of this step becomes input for next step
self.inputs['Param'] = param_out
self.inputs['Moment1'] = moment1_out
self.inputs['Moment2'] = moment2_out
self.inputs['Beta1Pow'] = beta1_pow_out
self.inputs['Beta2Pow'] = beta2_pow_out
# Randomize gradient for next step
self.inputs['Grad'] = np.random.uniform(
-1, 1, (102, 105)).astype("float32")
def adam_step(inputs, attributes):
'''
Simulate one step of the adam optimizer
:param inputs: dict of inputs
:param attributes: dict of attributes
:return tuple: tuple of output param, moment1, moment2,
beta1 power accumulator and beta2 power accumulator
'''
param = inputs['Param']
grad = inputs['Grad']
moment1 = inputs['Moment1']
moment2 = inputs['Moment2']
lr = inputs['LearningRate']
beta1_pow = inputs['Beta1Pow']
beta2_pow = inputs['Beta2Pow']
beta1 = attributes['beta1']
beta2 = attributes['beta2']
epsilon = attributes['epsilon']
moment1_out = beta1 * moment1 + (1 - beta1) * grad
moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad)
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
lr_t = lr * np.sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out)
param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon))
return param_out, moment1_out, moment2_out, beta1_pow_out, beta2_pow_out
if __name__ == "__main__":
unittest.main()
...@@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase): ...@@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase):
def create_tensor(scope, name, shape, np_data): def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor() tensor = scope.var(name).get_tensor()
tensor.set_dims(shape) tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace()) tensor.set(np_data, core.CPUPlace())
return tensor return tensor
...@@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase): ...@@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase):
create_tensor(self.scope, "X", [10, 1], x_np_data) create_tensor(self.scope, "X", [10, 1], x_np_data)
cond_np_data = self.py_cond.cond.astype("int32") cond_np_data = self.py_cond.cond.astype("int32")
create_tensor(self.scope, "cond", [10, 1], cond_np_data) create_tensor(self.scope, "cond", [10, 1], cond_np_data)
self.scope.new_var("SubScopes") self.scope.var("SubScopes")
self.scope.new_var("IndexTensors") self.scope.var("IndexTensors")
self.scope.new_var("Out") self.scope.var("Out")
def create_cond_op(self): def create_cond_op(self):
self.condop = CondOp( self.condop = CondOp(
......
...@@ -3,70 +3,56 @@ import numpy as np ...@@ -3,70 +3,56 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def conv2d_forward_naive(input, filter, group, conv_param):
in_n, in_c, in_h, in_w = input.shape
out_c, f_c, f_h, f_w = filter.shape
assert f_c * group == in_c
assert np.mod(out_c, group) == 0
sub_out_c = out_c / group
stride, pad = conv_param['stride'], conv_param['pad']
out_h = 1 + (in_h + 2 * pad[0] - f_h) / stride[0]
out_w = 1 + (in_w + 2 * pad[1] - f_w) / stride[1]
out = np.zeros((in_n, out_c, out_h, out_w))
input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], )),
mode='constant',
constant_values=0)
for i in range(out_h):
for j in range(out_w):
for g in range(group):
input_pad_masked = \
input_pad[:, g * f_c:(g + 1) * f_c,
i * stride[0]:i * stride[0] + f_h,
j * stride[1]:j * stride[1] + f_w]
f_sub = filter[g * sub_out_c:(g + 1) * sub_out_c, :, :, :]
for k in range(sub_out_c):
out[:, g * sub_out_c + k, i, j] = \
np.sum(input_pad_masked * f_sub[k, :, :, :],
axis=(1, 2, 3))
return out
class TestConv2dOp(OpTest): class TestConv2dOp(OpTest):
def setUp(self): def setUp(self):
self.init_groups() self.init_op_type()
self.op_type = "conv2d" self.init_group()
batch_size = 2 self.init_test_case()
input_channels = 3
input_height = 5 conv2d_param = {'stride': self.stride, 'pad': self.pad}
input_width = 5 input = np.random.random(self.input_size).astype("float32")
output_channels = 6 filter = np.random.random(self.filter_size).astype("float32")
filter_height = 3 output = conv2d_forward_naive(input, filter, self.groups, conv2d_param)
filter_width = 3
stride = 1
padding = 0
output_height = (input_height - filter_height + 2 * padding
) / stride + 1
output_width = (input_width - filter_width + 2 * padding) / stride + 1
input = np.random.random((batch_size, input_channels, input_height,
input_width)).astype("float32")
filter = np.random.random(
(output_channels, input_channels / self.groups, filter_height,
filter_width)).astype("float32")
output = np.ndarray(
(batch_size, output_channels, output_height, output_width))
self.inputs = {'Input': input, 'Filter': filter} self.inputs = {'Input': input, 'Filter': filter}
self.attrs = { self.attrs = {
'strides': [1, 1], 'strides': self.stride,
'paddings': [0, 0], 'paddings': self.pad,
'groups': self.groups 'groups': self.groups,
'dilations': self.dilations
} }
output_group_channels = output_channels / self.groups
input_group_channels = input_channels / self.groups
for batchid in xrange(batch_size):
for group in xrange(self.groups):
for outchannelid in range(group * output_group_channels,
(group + 1) * output_group_channels):
for rowid in xrange(output_height):
for colid in xrange(output_width):
start_h = (rowid * stride) - padding
start_w = (colid * stride) - padding
output_value = 0.0
for inchannelid in range(
group * input_group_channels,
(group + 1) * input_group_channels):
for frowid in xrange(filter_height):
for fcolid in xrange(filter_width):
input_value = 0.0
inrowid = start_h + frowid
incolid = start_w + fcolid
if ((inrowid >= 0 and
inrowid < input_height) and
(incolid >= 0 and
incolid < input_width)):
input_value = input[batchid][
inchannelid][inrowid][incolid]
filter_value = filter[outchannelid][
inchannelid % input_group_channels][
frowid][fcolid]
output_value += input_value * filter_value
output[batchid][outchannelid][rowid][
colid] = output_value
self.outputs = {'Output': output} self.outputs = {'Output': output}
def test_check_output(self): def test_check_output(self):
...@@ -90,14 +76,47 @@ class TestConv2dOp(OpTest): ...@@ -90,14 +76,47 @@ class TestConv2dOp(OpTest):
max_relative_error=0.05, max_relative_error=0.05,
no_grad_set=set(['Input'])) no_grad_set=set(['Input']))
def init_groups(self): def init_test_case(self):
# self.groups = 1
# self.op_type = "conv2d"
self.pad = [0, 0]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
assert np.mod(self.input_size[1], self.groups) == 0
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 3, 3]
def init_group(self):
self.groups = 1 self.groups = 1
def init_op_type(self):
self.op_type = "conv2d"
class TestWithGroup(TestConv2dOp): class TestWithGroup(TestConv2dOp):
def init_groups(self): def init_group(self):
self.groups = 3 self.groups = 3
def init_op_type(self):
self.op_type = "conv2d"
class TestCudnn(TestConv2dOp):
def init_group(self):
self.groups = 1
def init_op_type(self):
self.op_type = "conv_cudnn"
class TestCudnnWithGroup(TestConv2dOp):
def init_group(self):
self.groups = 3
def init_op_type(self):
self.op_type = "conv_cudnn"
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestDecayedAdagradOp1(OpTest):
''' Test DecayedAdagrad operator with explicit attributes
'''
def setUp(self):
self.op_type = "decayed_adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
decay = 0.80
epsilon = 1e-8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'decay': decay, 'epsilon': epsilon}
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
class TestDecayedAdagradOp2(OpTest):
''' Test DecayedAdagrad operator with default attributes
'''
def setUp(self):
self.op_type = "decayed_adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
decay = 0.95
epsilon = 1e-6
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'decay': decay, 'epsilon': epsilon}
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
...@@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): ...@@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
self.assertIsNone(find_var("test")) self.assertIsNone(find_var("test"))
def test_create_var_get_var(self): def test_create_var_get_var(self):
var_a = new_var("var_a") var_a = var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
self.assertIsNotNone(get_cur_scope().find_var('var_a')) self.assertIsNotNone(get_cur_scope().find_var('var_a'))
enter_local_scope() enter_local_scope()
...@@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): ...@@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
def test_var_get_int(self): def test_var_get_int(self):
def __new_scope__(): def __new_scope__():
i = new_var("var_i") i = var("var_i")
self.assertFalse(i.is_int()) self.assertFalse(i.is_int())
i.set_int(10) i.set_int(10)
self.assertTrue(i.is_int()) self.assertTrue(i.is_int())
......
import logging
import paddle.v2.framework.core as core
import unittest
from paddle.v2.framework.op import Operator, DynamicRecurrentOp
import numpy as np
def create_tensor(scope, name, shape, np_data):
tensor = scope.var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
class DynamicRecurrentOpTest(unittest.TestCase):
'''
Test RNNOp
equation:
h_t = \sigma (W x_t + U h_{t-1})
weights:
- W
- U
vars:
- x
memories:
- h
outputs:
- h
'''
# for siplicity, just one level LoD
lod_py = [[0, 4, 7, 9, 10]]
input_dim = 30
num_sents = len(lod_py[0]) - 1
weight_dim = 15
def forward(self):
self.scope = core.Scope()
self.create_global_variables()
self.create_rnn_op()
self.create_step_net()
ctx = core.DeviceContext.create(core.CPUPlace())
self.rnnop.run(self.scope, ctx)
state = self.rnnop.get_state("h@mem")
print 'state size: ', state.size()
step_inputs = self.rnnop.get_step_input("x")
print "x size ", step_inputs.size()
for i in range(step_inputs.size()):
print "x %d" % i, np.array(step_inputs.read(i).get_dims())
step_outputs = self.rnnop.get_step_output('h@mem')
print 'step_outputs.size ', step_outputs.size()
output = self.scope.find_var("h@mem").get_tensor()
print 'output', np.array(output).shape
def create_global_variables(self):
x = np.random.normal(size=(self.lod_py[0][-1],
self.input_dim)).astype("float32")
W = np.random.normal(size=(self.input_dim,
self.input_dim)).astype("float32")
U = np.random.normal(size=(self.input_dim,
self.input_dim)).astype("float32")
h_boot = np.random.normal(size=(self.num_sents,
self.input_dim)).astype("float32")
# create inlink
x_tensor = create_tensor(self.scope, "x",
[self.num_sents, self.input_dim], x)
x_tensor.set_lod(self.lod_py)
create_tensor(self.scope, "W", [self.input_dim, self.input_dim], W)
create_tensor(self.scope, "U", [self.input_dim, self.input_dim], U)
create_tensor(self.scope, "h_boot", [self.num_sents, self.input_dim],
h_boot)
self.scope.var("step_scopes")
self.scope.var("h@mem")
def create_rnn_op(self):
# create RNNOp
self.rnnop = DynamicRecurrentOp(
# inputs
inlinks=["x"],
boot_memories=["h_boot"],
step_net="stepnet",
# outputs
outlinks=["h@mem"],
step_scopes="step_scopes",
# attributes
pre_memories=["h@pre"],
memories=["h@mem"])
def create_step_net(self):
stepnet = core.Net.create()
x_fc_op = Operator("mul", X="x", Y="W", Out="Wx")
h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh")
sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum")
sig_op = Operator("sigmoid", X="sum", Y="h@mem")
for op in [x_fc_op, h_fc_op, sum_op, sig_op]:
stepnet.append_op(op)
stepnet.complete_add_op(True)
self.rnnop.set_stepnet(stepnet)
def test_forward(self):
print 'test recurrent op forward'
pd_output = self.forward()
print 'pd_output', pd_output
if __name__ == '__main__':
unittest.main()
...@@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase): ...@@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase):
def gaussian_random_test(self, place): def gaussian_random_test(self, place):
scope = core.Scope() scope = core.Scope()
scope.new_var('Out').get_tensor() scope.var('Out').get_tensor()
op = Operator( op = Operator(
"gaussian_random", "gaussian_random",
......
import math
import unittest
import numpy as np
from op_test import OpTest
class GRUActivationType(OpTest):
identity = 0
sigmoid = 1
tanh = 2
relu = 3
def identity(x):
return x
def sigmoid(x):
return 1. / (1. + np.exp(-x))
def tanh(x):
return 2. * sigmoid(2. * x) - 1.
def relu(x):
return np.maximum(x, 0)
class TestGRUUnitOp(OpTest):
batch_size = 3
frame_size = 5
activate = {
GRUActivationType.identity: identity,
GRUActivationType.sigmoid: sigmoid,
GRUActivationType.tanh: tanh,
GRUActivationType.relu: relu,
}
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
self.op_type = 'gru_unit'
self.inputs = {
'Input': np.random.uniform(
-0.1, 0.1, (batch_size, frame_size * 3)).astype('float32'),
'HiddenPrev': np.random.uniform(
-0.1, 0.1, (batch_size, frame_size)).astype('float32'),
'Weight': np.random.uniform(
-1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size),
(frame_size, frame_size * 3)).astype('float32'),
}
self.attrs = {
'activation': GRUActivationType.tanh,
'gate_activation': GRUActivationType.sigmoid
}
def set_outputs(self):
# GRU calculations
batch_size = self.batch_size
frame_size = self.frame_size
x = self.inputs['Input']
h_p = self.inputs['HiddenPrev']
w = self.inputs['Weight']
b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros(
(1, frame_size * 3))
g = x + np.tile(b, (batch_size, 1))
w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape(
(frame_size, frame_size * 2))
u_r = self.activate[self.attrs['gate_activation']](np.dot(
h_p, w_u_r) + g[:, :frame_size * 2])
u = u_r[:, :frame_size]
r = u_r[:, frame_size:frame_size * 2]
r_h_p = r * h_p
w_c = w.flatten()[frame_size * frame_size * 2:].reshape(
(frame_size, frame_size))
c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
g[:, frame_size * 2:])
g = np.hstack((u_r, c))
h = u * h_p + (1 - u) * c
self.outputs = {'Gate': g, 'ResetHiddenPrev': r_h_p, 'Hidden': h}
def setUp(self):
self.set_inputs()
self.set_outputs()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight'], ['Hidden'],
max_relative_error=0.007)
class TestGRUUnitOpWithBias(TestGRUUnitOp):
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
super(TestGRUUnitOpWithBias, self).set_inputs()
self.inputs['Bias'] = np.random.uniform(
-0.1, 0.1, (1, frame_size * 3)).astype('float32')
self.attrs = {
'activation': GRUActivationType.identity,
'gate_activation': GRUActivationType.sigmoid
}
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'],
max_relative_error=0.007)
if __name__ == '__main__':
unittest.main()
...@@ -13,12 +13,15 @@ class TestInferShape(unittest.TestCase): ...@@ -13,12 +13,15 @@ class TestInferShape(unittest.TestCase):
shape = [10, 20] shape = [10, 20]
# prepare input/output # prepare input/output
x1 = block.new_var("x1") x1 = block.var("x1")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(shape) x1.set_shape(shape)
x2 = block.new_var("x2") x2 = block.var("x2")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(shape) x2.set_shape(shape)
out = block.new_var("out") out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator # prepare the operator
sum_op_desc = block.append_op() sum_op_desc = block.append_op()
...@@ -39,12 +42,15 @@ class TestInferShape(unittest.TestCase): ...@@ -39,12 +42,15 @@ class TestInferShape(unittest.TestCase):
y_shape = [20, 30] y_shape = [20, 30]
# prepare input/output # prepare input/output
x1 = block.new_var("x") x1 = block.var("x")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(x_shape) x1.set_shape(x_shape)
x2 = block.new_var("y") x2 = block.var("y")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(y_shape) x2.set_shape(y_shape)
out = block.new_var("out") out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator # prepare the operator
mul_op_desc = block.append_op() mul_op_desc = block.append_op()
......
import unittest
import numpy as np
from op_test import OpTest
class TestMarginRankLossOp(OpTest):
def setUp(self):
self.op_type = "margin_rank_loss"
batch_size = 5
margin = 0.5
# labels_{i} = {-1, 1}
label = 2 * np.random.randint(
0, 2, size=(batch_size, 1)).astype("float32") - 1
x1 = np.random.random((batch_size, 1)).astype("float32")
x2 = np.random.random((batch_size, 1)).astype("float32")
# loss = max(0, -label * (x1 - x2) + margin)
loss = -label * (x1 - x2) + margin
loss = np.where(loss > 0, loss, 0)
act = np.where(loss > 0, 1., 0.)
self.attrs = {'margin': margin}
self.inputs = {'Label': label, 'X1': x1, 'X2': x2}
self.outputs = {'Activated': act, 'Out': loss}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X1", "X2"], "Out")
def test_check_grad_ignore_x1(self):
self.check_grad(["X2"], "Out", no_grad_set=set('X1'))
def test_check_grad_ignore_x2(self):
self.check_grad(["X1"], "Out", no_grad_set=set('X2'))
if __name__ == '__main__':
unittest.main()
...@@ -31,7 +31,7 @@ uniq_id = atomic_id().next ...@@ -31,7 +31,7 @@ uniq_id = atomic_id().next
def data_layer(name, dims): def data_layer(name, dims):
var = scope.new_var(name) var = scope.var(name)
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims(dims) # 1 is batch size holder. tensor.set_dims(dims) # 1 is batch size holder.
return name return name
...@@ -67,7 +67,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.005): ...@@ -67,7 +67,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.005):
# should use operator and add these to the init_network # should use operator and add these to the init_network
def init_param(net, param_name, dims): def init_param(net, param_name, dims):
scope.new_var(param_name) scope.var(param_name)
op = Operator( op = Operator(
"uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10)
op.infer_shape(scope) op.infer_shape(scope)
...@@ -104,7 +104,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): ...@@ -104,7 +104,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01)
pre_activation = name + ".mul.out" pre_activation = name + ".mul.out"
scope.new_var(pre_activation) scope.var(pre_activation)
mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation)
net.append_op(mul_op) net.append_op(mul_op)
...@@ -115,7 +115,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): ...@@ -115,7 +115,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
sgd_optimizer( sgd_optimizer(
net=optimize_net, param_name=bias_name, learning_rate=0.001) net=optimize_net, param_name=bias_name, learning_rate=0.001)
bias_out = name + ".rowwise_add.out" bias_out = name + ".rowwise_add.out"
scope.new_var(bias_out) scope.var(bias_out)
rowwise_append_op = Operator( rowwise_append_op = Operator(
"rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out)
net.append_op(rowwise_append_op) net.append_op(rowwise_append_op)
...@@ -123,7 +123,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): ...@@ -123,7 +123,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None):
activation_op = Operator(act, X=pre_activation, Y=name) activation_op = Operator(act, X=pre_activation, Y=name)
net.append_op(activation_op) net.append_op(activation_op)
scope.new_var(name) scope.var(name)
net.infer_shape(scope) net.infer_shape(scope)
return name return name
...@@ -133,7 +133,7 @@ def cross_entropy_layer(net, input, label): ...@@ -133,7 +133,7 @@ def cross_entropy_layer(net, input, label):
cross_entropy_op = Operator( cross_entropy_op = Operator(
"cross_entropy", X=input, Label=label, Y=cost_name) "cross_entropy", X=input, Label=label, Y=cost_name)
net.append_op(cross_entropy_op) net.append_op(cross_entropy_op)
scope.new_var(cost_name) scope.var(cost_name)
net.infer_shape(scope) net.infer_shape(scope)
return cost_name return cost_name
...@@ -141,10 +141,10 @@ def cross_entropy_layer(net, input, label): ...@@ -141,10 +141,10 @@ def cross_entropy_layer(net, input, label):
def create_backward_net(forward_net): def create_backward_net(forward_net):
net = core.Operator.backward(forward_net, set()) net = core.Operator.backward(forward_net, set())
for input in net.inputs()["all"]: for input in net.inputs()["all"]:
var = scope.new_var(input) var = scope.var(input)
var.get_tensor() var.get_tensor()
for output in net.outputs()["all"]: for output in net.outputs()["all"]:
var = scope.new_var(output) var = scope.var(output)
var.get_tensor() var.get_tensor()
return net return net
......
import unittest
from paddle.v2.framework.framework import Variable, g_program
import paddle.v2.framework.core as core
class TestOperator(unittest.TestCase):
def test_error_type(self):
block = g_program.create_block()
try:
block.append_op()
self.assertFail()
except ValueError as v_err:
self.assertEqual(
v_err.message,
"`type` to initilized an Operator can not be None.")
try:
block.append_op(type="no_such_op")
self.assertFail()
except AssertionError as a_err:
self.assertEqual(a_err.message,
"Operator \"no_such_op\" has not been registered.")
def test_op_desc_creation(self):
block = g_program.current_block()
mul_x = block.create_var(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mul_op = block.append_op(
type="mul",
inputs={"X": [mul_x],
"Y": mul_y},
outputs={"Out": [mul_out]},
attrs={"x_num_col_dims": 1})
self.assertNotEqual(str(mul_op), "")
self.assertEqual(mul_op.type, "mul")
self.assertEqual(mul_op.input_names, ["X", "Y"])
self.assertEqual(mul_op.input("X"), ["mul.x"])
self.assertEqual(mul_op.input("Y"), ["mul.y"])
self.assertEqual(mul_op.output_names, ["Out"])
self.assertEqual(mul_op.output("Out"), ["mul.out"])
self.assertEqual(
set(mul_op.attr_names), set(["x_num_col_dims", "y_num_col_dims"]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
self.assertEqual(mul_op.attr("x_num_col_dims"), 1)
self.assertEqual(mul_op.has_attr("y_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("y_num_col_dims"), core.AttrType.INT)
self.assertEqual(mul_op.attr("y_num_col_dims"), 1)
self.assertEqual(mul_out.op, mul_op)
def test_mult_input(self):
block = g_program.current_block()
sum_x1 = block.create_var(
dtype="int", shape=[3, 4], lod_level=0, name="sum.x1")
sum_x2 = block.create_var(
dtype="int", shape=[3, 4], lod_level=0, name="sum.x2")
sum_x3 = block.create_var(
dtype="int", shape=[3, 4], lod_level=0, name="sum.x3")
sum_out = block.create_var(
dtype="int", shape=[3, 4], lod_level=0, name="sum.out")
sum_op = block.append_op(
type="sum",
inputs={"X": [sum_x1, sum_x2, sum_x3]},
outputs={"Out": sum_out})
self.assertEqual(sum_op.type, "sum")
self.assertEqual(sum_op.input_names, ["X"])
self.assertEqual(sum_op.input("X"), ["sum.x1", "sum.x2", "sum.x3"])
self.assertEqual(sum_op.output_names, ["Out"])
self.assertEqual(sum_op.output("Out"), ["sum.out"])
self.assertEqual(sum_out.op, sum_op)
if __name__ == '__main__':
unittest.main()
import unittest
from paddle.v2.framework.framework import g_program
import paddle.v2.framework.core as core
class TestParameter(unittest.TestCase):
def test_param(self):
b = g_program.create_block()
param = b.create_parameter(
name='fc.w',
shape=[784, 100],
dtype='float32',
initialize_attr={
'type': 'uniform_random',
'seed': 13,
'min': -5.0,
'max': 5.0
})
self.assertIsNotNone(param)
self.assertEqual('fc.w', param.name)
self.assertEqual((784, 100), param.shape)
self.assertEqual(core.DataType.FP32, param.data_type)
self.assertEqual(0, param.block.idx)
if __name__ == '__main__':
unittest.main()
import unittest import unittest
from paddle.v2.framework.graph import g_program
import paddle.v2.framework.core as core
from paddle.v2.framework.framework import Program
from paddle.v2.framework.framework import g_program
class TestProgram(unittest.TestCase): class TestProgram(unittest.TestCase):
...@@ -31,6 +34,62 @@ class TestProgram(unittest.TestCase): ...@@ -31,6 +34,62 @@ class TestProgram(unittest.TestCase):
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
def test_desc_append_backward(self):
prog = core.ProgramDesc.__create_program_desc__()
self.assertIsNotNone(prog)
block = prog.block(0)
self.assertIsNotNone(block)
mul_op_desc = block.append_op()
mul_op_desc.set_type("mul")
mul_op_desc.set_input("X", ["x1"])
mul_op_desc.set_input("Y", ["y1"])
mul_op_desc.set_output("Out", ["out1"])
sum_op_desc = block.append_op()
sum_op_desc.set_type("elementwise_add")
sum_op_desc.set_input("X", ["out1"])
sum_op_desc.set_input("Y", ["b1"])
sum_op_desc.set_output("Out", ["out2"])
target = block.var("out2")
expect_ops = [
"mul", "elementwise_add", "fill_constant", "elementwise_add_grad",
"mul_grad"
]
def grad_name(name):
return name + "@GRAD"
actual_ops = []
param_to_grad = prog.append_backward(target, set())
for var_name in ("x1", "y1", "out1", "b1"):
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0)
for op in block.all_ops():
actual_ops.append(op.type())
self.assertEqual(actual_ops, expect_ops)
def test_append_backward(self):
prog = Program.instance()
block = prog.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mul_op = block.append_op(
type="mul",
inputs={"X": [mul_x],
"Y": mul_y},
outputs={"Out": [mul_out]},
attrs={"x_num_col_dims": 1})
param_to_grad = prog.append_backward(mul_out, set())
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -53,7 +53,13 @@ class TestOpDesc(unittest.TestCase): ...@@ -53,7 +53,13 @@ class TestOpDesc(unittest.TestCase):
self.assertEqual(8, len(op.attr_names())) self.assertEqual(8, len(op.attr_names()))
op.set_block_attr("block_attr", prog.block(0)) op.set_block_attr("block_attr", prog.block(0))
self.assertEqual(0, op.get_block_attr("block_attr")) self.assertEqual(0, op.block_attr("block_attr"))
mul_op = block.append_op()
mul_op.set_type("mul")
mul_op.check_attrs()
self.assertEqual(mul_op.attr("x_num_col_dims"), 1)
self.assertEqual(mul_op.attr("y_num_col_dims"), 1)
class TestProgramDesc(unittest.TestCase): class TestProgramDesc(unittest.TestCase):
...@@ -87,18 +93,22 @@ class TestVarDesc(unittest.TestCase): ...@@ -87,18 +93,22 @@ class TestVarDesc(unittest.TestCase):
def test_shape(self): def test_shape(self):
program_desc = core.ProgramDesc.__create_program_desc__() program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0) block = program_desc.block(0)
var = block.new_var('my_var') var = block.var('my_var')
var.set_type(core.VarDesc.VarType.SELECTED_ROWS)
src_shape = [3, 2, 10, 8] src_shape = [3, 2, 10, 8]
var.set_shape(src_shape) var.set_shape(src_shape)
res_shape = var.shape() res_shape = var.shape()
self.assertEqual(src_shape, res_shape) self.assertEqual(src_shape, res_shape)
self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type())
def test_data_type(self): def test_data_type(self):
program_desc = core.ProgramDesc.__create_program_desc__() program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0) block = program_desc.block(0)
var = block.new_var('my_var') var = block.var('my_var')
var.set_type(core.VarDesc.VarType.LOD_TENSOR)
var.set_data_type(core.DataType.INT32) var.set_data_type(core.DataType.INT32)
self.assertEqual(core.DataType.INT32, var.data_type()) self.assertEqual(core.DataType.INT32, var.data_type())
self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type())
class TestBlockDesc(unittest.TestCase): class TestBlockDesc(unittest.TestCase):
...@@ -107,12 +117,12 @@ class TestBlockDesc(unittest.TestCase): ...@@ -107,12 +117,12 @@ class TestBlockDesc(unittest.TestCase):
self.assertIsNotNone(prog) self.assertIsNotNone(prog)
block = prog.block(0) block = prog.block(0)
self.assertIsNotNone(block) self.assertIsNotNone(block)
var1 = block.new_var("var1") var1 = block.var("var1")
var2 = block.new_var("var2") var2 = block.var("var2")
var3 = block.new_var("var3") var3 = block.var("var3")
all_vars = block.all_vars() all_vars = block.all_vars()
self.assertEqual(set(all_vars), set([var1, var2, var3])) self.assertEqual(set(all_vars), set([var1, var2, var3]))
var2_re = block.var("var2") var2_re = block.find_var("var2")
self.assertEqual(var2_re, var2) self.assertEqual(var2_re, var2)
def test_add_op(self): def test_add_op(self):
......
...@@ -66,7 +66,7 @@ class PySimpleRNNTest(unittest.TestCase): ...@@ -66,7 +66,7 @@ class PySimpleRNNTest(unittest.TestCase):
def create_tensor(scope, name, shape, np_data): def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor() tensor = scope.var(name).get_tensor()
tensor.set_dims(shape) tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace()) tensor.set(np_data, core.CPUPlace())
return tensor return tensor
...@@ -125,8 +125,8 @@ class RecurrentOpTest(unittest.TestCase): ...@@ -125,8 +125,8 @@ class RecurrentOpTest(unittest.TestCase):
h_boot_np_data = self.py_rnn.h_boot h_boot_np_data = self.py_rnn.h_boot
create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim],
h_boot_np_data) h_boot_np_data)
self.scope.new_var("step_scopes") self.scope.var("step_scopes")
self.scope.new_var("h@mem") self.scope.var("h@mem")
def create_rnn_op(self): def create_rnn_op(self):
# create RNNOp # create RNNOp
......
...@@ -18,7 +18,7 @@ class TestScope(unittest.TestCase): ...@@ -18,7 +18,7 @@ class TestScope(unittest.TestCase):
def test_create_var_get_var(self): def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var_a = scope.new_var("var_a") var_a = scope.var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
self.assertIsNotNone(scope.find_var('var_a')) self.assertIsNotNone(scope.find_var('var_a'))
scope2 = scope.new_scope() scope2 = scope.new_scope()
...@@ -27,7 +27,7 @@ class TestScope(unittest.TestCase): ...@@ -27,7 +27,7 @@ class TestScope(unittest.TestCase):
def test_var_get_int(self): def test_var_get_int(self):
paddle_c = paddle.v2.framework.core paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope() scope = paddle_c.Scope()
var = scope.new_var("test_int") var = scope.var("test_int")
var.set_int(10) var.set_int(10)
self.assertTrue(var.is_int()) self.assertTrue(var.is_int())
self.assertEqual(10, var.get_int()) self.assertEqual(10, var.get_int())
......
import unittest
import numpy as np
import sys
from op_test import OpTest
class TestConcatOp(OpTest):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 6, 3)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
x1 = np.random.random((4, 8, 3)).astype('float32')
lod1 = [[0, 2, 4], [0, 1, 2, 3, 4]]
axis = 1
level = 1
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
outs = []
for i in range(4):
sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :]
sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :]
outs.append(np.concatenate((sub_x0, sub_x1), axis=axis))
self.outputs = {'Out': np.concatenate(outs, axis=0)}
def setUp(self):
self.op_type = "sequence_concat"
self.set_data()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['x0'], 'Out')
class TestConcatOpDiffLod(TestConcatOp):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 6, 3)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
x1 = np.random.random((5, 6, 3)).astype('float32')
lod1 = [[0, 3, 5], [0, 1, 2, 3, 5]]
axis = 0
level = 1
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
outs = []
for i in range(4):
sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :]
sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :]
outs.append(np.concatenate((sub_x0, sub_x1), axis=axis))
self.outputs = {'Out': np.concatenate(outs, axis=0)}
class TestConcatOpLevelZero(TestConcatOp):
def set_data(self):
# two level, batch size is 3
x0 = np.random.random((4, 3, 4)).astype('float32')
lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
x1 = np.random.random((5, 3, 4)).astype('float32')
lod1 = [[0, 3, 5], [0, 1, 3, 4, 5]]
axis = 0
level = 0
self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
self.attrs = {'axis': axis, 'level': level}
outs = []
for i in range(2):
sub_x0 = x0[lod0[level][i]:lod0[level][i + 1], :]
sub_x1 = x1[lod1[level][i]:lod1[level][i + 1], :]
outs.append(np.concatenate((sub_x0, sub_x1), axis=axis))
self.outputs = {'Out': np.concatenate(outs, axis=0)}
if __name__ == '__main__':
sys.exit(0)
unittest.main()
...@@ -82,5 +82,70 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): ...@@ -82,5 +82,70 @@ class TestSeqSumPool2D(TestSeqAvgPool2D):
out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))
class TestSeqSqrtPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
len = lod[0][i + 1] - lod[0][i]
out[i] = sub_x.sum(axis=0) / np.sqrt(len)
class TestSeqSqrtPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.SQRT}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
len = lod[0][i + 1] - lod[0][i]
out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17))
def test_check_grad(self):
self.check_grad(["X"], "Out", max_relative_error=0.06)
class TestSeqLastPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[-1, :]
class TestSeqLastPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.LAST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[-1, :], (3, 17))
class TestSeqFirstPool(TestSeqAvgPool):
def compute(self):
self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = x[lod[0][i]:lod[0][i + 1], :]
out[i] = sub_x[0, :]
class TestSeqFirstPool2D(TestSeqAvgPool2D):
def compute(self):
self.attrs = {'strategy': SeqPoolType.FIRST}
x, lod = self.inputs['X']
out = self.outputs['Out']
for i in range(4):
sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
out[i] = np.reshape(sub_x[0, :], (3, 17))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -6,7 +6,7 @@ import numpy ...@@ -6,7 +6,7 @@ import numpy
class TestTensor(unittest.TestCase): class TestTensor(unittest.TestCase):
def test_int_tensor(self): def test_int_tensor(self):
scope = core.Scope() scope = core.Scope()
var = scope.new_var("test_tensor") var = scope.var("test_tensor")
place = core.CPUPlace() place = core.CPUPlace()
tensor = var.get_tensor() tensor = var.get_tensor()
...@@ -25,7 +25,7 @@ class TestTensor(unittest.TestCase): ...@@ -25,7 +25,7 @@ class TestTensor(unittest.TestCase):
def test_float_tensor(self): def test_float_tensor(self):
scope = core.Scope() scope = core.Scope()
var = scope.new_var("test_tensor") var = scope.var("test_tensor")
place = core.CPUPlace() place = core.CPUPlace()
tensor = var.get_tensor() tensor = var.get_tensor()
...@@ -46,7 +46,7 @@ class TestTensor(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestTensor(unittest.TestCase):
def test_int_lod_tensor(self): def test_int_lod_tensor(self):
place = core.CPUPlace() place = core.CPUPlace()
scope = core.Scope() scope = core.Scope()
var_lod = scope.new_var("test_lod_tensor") var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor() lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([4, 4, 6]) lod_tensor.set_dims([4, 4, 6])
...@@ -68,7 +68,7 @@ class TestTensor(unittest.TestCase): ...@@ -68,7 +68,7 @@ class TestTensor(unittest.TestCase):
def test_float_lod_tensor(self): def test_float_lod_tensor(self):
place = core.CPUPlace() place = core.CPUPlace()
scope = core.Scope() scope = core.Scope()
var_lod = scope.new_var("test_lod_tensor") var_lod = scope.var("test_lod_tensor")
lod_tensor = var_lod.get_tensor() lod_tensor = var_lod.get_tensor()
lod_tensor.set_dims([5, 2, 3, 4]) lod_tensor.set_dims([5, 2, 3, 4])
......
...@@ -13,7 +13,7 @@ class TestTensorArray(unittest.TestCase): ...@@ -13,7 +13,7 @@ class TestTensorArray(unittest.TestCase):
# create a LoDTensor # create a LoDTensor
self.scope = core.Scope() self.scope = core.Scope()
var = self.scope.new_var("test_tensor") var = self.scope.var("test_tensor")
self.place = core.CPUPlace() self.place = core.CPUPlace()
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([self.batch_size, self.dim]) tensor.set_dims([self.batch_size, self.dim])
...@@ -51,7 +51,7 @@ class TestTensorArray(unittest.TestCase): ...@@ -51,7 +51,7 @@ class TestTensorArray(unittest.TestCase):
self.ta.unstack(self.tensor) self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim] # create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell") var = self.scope.var("hell")
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1, self.dim]) tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place) tensor.alloc_float(self.place)
...@@ -71,7 +71,7 @@ class TestTensorArray(unittest.TestCase): ...@@ -71,7 +71,7 @@ class TestTensorArray(unittest.TestCase):
self.ta.unstack(self.tensor) self.ta.unstack(self.tensor)
# create a tensor with shape of [1, self.dim] # create a tensor with shape of [1, self.dim]
var = self.scope.new_var("hell") var = self.scope.var("hell")
tensor = var.get_tensor() tensor = var.get_tensor()
tensor.set_dims([1, self.dim]) tensor.set_dims([1, self.dim])
tensor.alloc_float(self.place) tensor.alloc_float(self.place)
......
...@@ -14,7 +14,7 @@ class TestUniformRandomOp(unittest.TestCase): ...@@ -14,7 +14,7 @@ class TestUniformRandomOp(unittest.TestCase):
def uniform_random_test(self, place): def uniform_random_test(self, place):
scope = core.Scope() scope = core.Scope()
scope.new_var('X').get_tensor() scope.var('X').get_tensor()
op = Operator( op = Operator(
"uniform_random", "uniform_random",
......
import unittest
from paddle.v2.framework.framework import Variable, g_program
import paddle.v2.framework.core as core
import numpy as np
class TestVariable(unittest.TestCase):
def test_np_dtype_convert(self):
DT = core.DataType
convert = Variable._convert_np_dtype_to_dtype_
self.assertEqual(DT.FP32, convert(np.float32))
self.assertEqual(DT.FP16, convert("float16"))
self.assertEqual(DT.FP64, convert("float64"))
self.assertEqual(DT.INT32, convert("int32"))
self.assertEqual(DT.INT16, convert("int16"))
self.assertEqual(DT.INT64, convert("int64"))
self.assertEqual(DT.BOOL, convert("bool"))
self.assertRaises(ValueError, lambda: convert("int8"))
def test_var(self):
b = g_program.current_block()
w = b.create_var(
dtype="float64", shape=[784, 100], lod_level=0, name="fc.w")
self.assertNotEqual(str(w), "")
self.assertEqual(core.DataType.FP64, w.data_type)
self.assertEqual((784, 100), w.shape)
self.assertEqual("fc.w", w.name)
self.assertEqual(0, w.lod_level)
w = b.create_var(name='fc.w')
self.assertEqual(core.DataType.FP64, w.data_type)
self.assertEqual((784, 100), w.shape)
self.assertEqual("fc.w", w.name)
self.assertEqual(0, w.lod_level)
self.assertRaises(ValueError,
lambda: b.create_var(name="fc.w", shape=(24, 100)))
if __name__ == '__main__':
unittest.main()
The examples in v1_api_demo are using v1_api now, and will be upgraded into v2_api later. The examples in v1_api_demo are using v1_api currently, and will be upgraded to v2_api later.
Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future. Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册