提交 83627d3e 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into lstm

...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON) ...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON)
"different Python interpreter from compiling.") "different Python interpreter from compiling.")
endif() endif()
if(MOBILE_INFERENCE)
set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
......
...@@ -8,7 +8,7 @@ ExternalProject_Add( ...@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG "master" GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -36,6 +36,7 @@ ExternalProject_Add( ...@@ -36,6 +36,7 @@ ExternalProject_Add(
# change this back to the official Github repo once my PR is # change this back to the official Github repo once my PR is
# merged. # merged.
GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git"
GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -45,11 +46,11 @@ ExternalProject_Add( ...@@ -45,11 +46,11 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
......
...@@ -31,6 +31,7 @@ ExternalProject_Add( ...@@ -31,6 +31,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
GIT_TAG v0.3.5
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -43,12 +44,12 @@ ExternalProject_Add( ...@@ -43,12 +44,12 @@ ExternalProject_Add(
-DWITH_GFLAGS=ON -DWITH_GFLAGS=ON
-Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
......
...@@ -56,11 +56,11 @@ IF(WITH_TESTING) ...@@ -56,11 +56,11 @@ IF(WITH_TESTING)
-DBUILD_GMOCK=ON -DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON -Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON -Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
......
...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${OPTIONAL_ARGS} ${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}
......
...@@ -35,6 +35,7 @@ ExternalProject_Add( ...@@ -35,6 +35,7 @@ ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09
PREFIX ${WARPCTC_SOURCES_DIR} PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -48,9 +49,9 @@ ExternalProject_Add( ...@@ -48,9 +49,9 @@ ExternalProject_Add(
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
-DBUILD_SHARED=ON -DBUILD_SHARED=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
) )
......
...@@ -42,11 +42,11 @@ ExternalProject_Add( ...@@ -42,11 +42,11 @@ ExternalProject_Add(
-DBUILD_SHARED_LIBS=OFF -DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_MACOSX_RPATH=ON -DCMAKE_MACOSX_RPATH=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)
......
...@@ -243,7 +243,7 @@ class SymbolTable { ...@@ -243,7 +243,7 @@ class SymbolTable {
// TODO determine whether name is generated by python or C++. // TODO determine whether name is generated by python or C++.
// Currently assume that a unique name will be generated by C++ if the // Currently assume that a unique name will be generated by C++ if the
// argument name is left default. // argument name is left default.
VarDesc* NewVar(const string& name=""); VarDesc* Var(const string& name="");
// find a VarDesc by name, if recursive is true, find parent's SymbolTable // find a VarDesc by name, if recursive is true, find parent's SymbolTable
// recursively. // recursively.
......
...@@ -33,7 +33,6 @@ digraph ImageClassificationGraph { ...@@ -33,7 +33,6 @@ digraph ImageClassificationGraph {
cost -> MSE_Grad [color=red]; cost -> MSE_Grad [color=red];
d_cost -> MSE_Grad [color=red]; d_cost -> MSE_Grad [color=red];
x -> MSE_Grad [color=red];
l -> MSE_Grad [color=red]; l -> MSE_Grad [color=red];
y -> MSE_Grad -> d_y [color=red]; y -> MSE_Grad -> d_y [color=red];
......
...@@ -3,15 +3,17 @@ ...@@ -3,15 +3,17 @@
## The Problem Posed ## The Problem Posed
In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance.
However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. However, we noticed two problems with the current deisgn:
More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. 1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message.
## Current Implementation 1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation.
OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is ## The Current Implementation
The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is
```cpp ```cpp
struct OpInfo { struct OpInfo {
......
...@@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`. ...@@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`.
```cpp ```cpp
class Scope { class Scope {
public: public:
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
const Variable* FindVar(const std::string& name) const; const Variable* FindVar(const std::string& name) const;
private: private:
...@@ -98,7 +98,7 @@ class Scope { ...@@ -98,7 +98,7 @@ class Scope {
Variable* FindVar(const std::string& name) const; Variable* FindVar(const std::string& name) const;
// return if already contains same name variable. // return if already contains same name variable.
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
private: private:
std::shared_ptr<Scope> parent_; std::shared_ptr<Scope> parent_;
...@@ -107,7 +107,7 @@ class Scope { ...@@ -107,7 +107,7 @@ class Scope {
``` ```
## Only scope can create a variable ## Only scope can create a variable
To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`. To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `Var` can construct `Variable`.
## When scope destroyed, all variables inside this scope should be destroyed together ## When scope destroyed, all variables inside this scope should be destroyed together
...@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar ...@@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar
## Orthogonal interface ## Orthogonal interface
`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily. `FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `Var` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `Var`, we can implement `Var` easily.
...@@ -161,7 +161,7 @@ class TensorArray: ...@@ -161,7 +161,7 @@ class TensorArray:
@name: str @name: str
the name of the variable to output. the name of the variable to output.
''' '''
tensor = NewVar(name) tensor = Var(name)
tensor_array_stack(self.name, tensor) tensor_array_stack(self.name, tensor)
return tensor return tensor
......
...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep ...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep
## Definition of VarDesc ## Definition of VarDesc
A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`.
```proto ```proto
message VarDesc { message VarDesc {
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
required VarType type = 2;
optional LoDTensorDesc lod_desc = 3;
optional TensorDesc selected_rows_desc = 4;
optional bool persistable = 5 [ default = false ];
} }
``` ```
## Definition of LodTensorDesc ## Definition of TensorDesc
```proto ```proto
enum DataType { enum DataType {
...@@ -38,87 +45,25 @@ enum DataType { ...@@ -38,87 +45,25 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [default=0];
} }
``` ```
## Definition of Variable in Python A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedRows`, please reference [`SelectedRows`](./selected_rows.md).
In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable.
```python
image = Variable(dims=[-1, 640, 480])
# fc1 and fc2 are both Variable
fc1 = layer.fc(input=image, output_size=10)
fc2 = layer.fc(input=fc1, output_size=20)
```
### what should class `Variable` Have
1. `name`.a name of string type is used to mark the value of the Variable.
1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator.
1. `operator`. Variable should record which operator produce itself. The reaon is:
- we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable.
In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph.
```python
import VarDesc
import LoDTensorDesc
import framework
def AddInitialOperator(variable, initializer):
# add an initialize Operator to block to init this Variable
class Variable(object):
def __init__(self, name, dims, type, initializer):
self._block = get_default_block()
self._name = name
self.op = None
tensor_desc = LoDTensorDesc(data_type=type, dims=dims)
_var_desc = VarDesc(name=name, lod_tensor=tensor_desc)
self._var = framework.CreateVar(_var_desc)
self._block.add_var(self)
# add initial op according to initializer ## Definition of LodTensorDesc
if initializer is not None:
AddInitialOperator(self, initializer)
def dims(self):
return self._var.dims()
def data_type(self):
return self._var.data_type()
def to_proto(self): ```proto
pass message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int lod_level = 2;
}
``` ```
Then we can use this Variable to create a fc layer in Python. A LoDTensorDesc contains a tensor and a lod_level.
```python ## Definition of Variable in Python
import paddle as pd
def flatten_size(X, num_flatten_dims):
prod = 1 # of last num_flatten_dims
for i in xrange(num_flatten_dims):
prod = prod * X.dims[-i-1]
return prod
def layer.fc(X, output_size, num_flatten_dims):
W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size])
b = Variable(pd.random_uniform(), type=FP32, dims=[output_size])
out = Variable(type=FP32)
y = operator.fc(X, W, b, output=out) # fc will put fc op input into out
pd.InferShape(y)
return out
x = Variable(dims=[-1, 640, 480])
y = layer.fc(x, output_size=100)
z = layer.fc(y, output_size=200)
paddle.eval(targets=[z], ...) For Variable in Python, please reference [`Python API`](./python_api.md).
print(z)
```
...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) ...@@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py)
SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON)
SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign -ftls-model=global-dynamic")
SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS
paddle_parameter paddle_parameter
......
...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) ...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
...@@ -42,12 +42,17 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -42,12 +42,17 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op) cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward ${GLOB_OP_LIB}) cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward)
#if(WITH_GPU) set(EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op
# nv_test(executor_test SRCS executor_test.cc DEPS executor) mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op mean_op)
#else() if(WITH_GPU)
# cc_test(executor_test SRCS executor_test.cc DEPS executor) nv_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
#endif() else()
cc_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP})
endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
...@@ -28,14 +28,15 @@ namespace paddle { ...@@ -28,14 +28,15 @@ namespace paddle {
namespace framework { namespace framework {
static inline std::unique_ptr<OperatorBase> CreateGradOp( static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op) { const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
OpDescBind op_desc; OpDescBind op_desc;
op_desc.SetInputMap(op.Inputs()); op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs()); op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type()); op_desc.SetType(op.Type());
op_desc.SetAttrMap(op.Attrs()); op_desc.SetAttrMap(op.Attrs());
auto& info = OpInfoMap::Instance().Get(op.Type()); auto& info = OpInfoMap::Instance().Get(op.Type());
auto grad_descs = info.GradOpMaker()(op_desc); auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var);
std::vector<std::unique_ptr<OperatorBase>> grad_ops; std::vector<std::unique_ptr<OperatorBase>> grad_ops;
grad_ops.reserve(grad_descs.size()); grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(), std::transform(grad_descs.begin(), grad_descs.end(),
...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() { ...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() {
// See Backward.h for details // See Backward.h for details
static std::unique_ptr<OperatorBase> BackwardRecursive( static std::unique_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) { std::unordered_set<std::string>& no_grad_names,
std::unordered_map<std::string, std::string>* grad_to_var,
size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate, // If all input gradients of forwarding operator do not need to calculate,
// just return an NOP. Not return null ptr because NOP does not take // just return an NOP. Not return null ptr because NOP does not take
// too much time for calculation, but it is useful for simplifying logic. // too much time for calculation, but it is useful for simplifying logic.
...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) { ++it, ++local_op_id) {
auto& fwd = *it; auto& fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
ForEachVarName(bwd->Outputs(), ForEachVarName(bwd->Outputs(),
[&dup_output_ops, local_op_id](const std::string& out) { [&dup_output_ops, local_op_id](const std::string& out) {
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
...@@ -187,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -187,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
net->InsertOp(pos.first + 1, std::move(pos.second)); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::unique_ptr<OperatorBase> grad_op(CreateGradOp(forwardOp)); std::unique_ptr<OperatorBase> grad_op(
CreateGradOp(forwardOp, no_grad_names, grad_to_var));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
const std::string& grad_input) { const std::string& grad_input) {
...@@ -226,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -226,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
*static_cast<const OperatorBase*>(&rnnop.stepnet()); *static_cast<const OperatorBase*>(&rnnop.stepnet());
// create stepnet's gradient op // create stepnet's gradient op
rnn_grad_op->set_stepnet( rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
} }
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
...@@ -253,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward( ...@@ -253,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward(
no_grad_names.insert(name + kGradVarSuffix); no_grad_names.insert(name + kGradVarSuffix);
} }
size_t uid = 0; size_t uid = 0;
return BackwardRecursive(forwardOp, no_grad_names, uid); std::unordered_map<std::string, std::string> grad_to_var;
return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
} }
// ==================================== // // ==================================== //
...@@ -268,30 +273,61 @@ static bool AllGradInSet(const std::vector<std::string>& names, ...@@ -268,30 +273,61 @@ static bool AllGradInSet(const std::vector<std::string>& names,
return true; return true;
} }
static void CreateGradVarInBlock(
size_t grad_op_start_index,
const std::unordered_map<std::string, std::string>& param_name_map,
BlockDescBind* block_desc,
std::unordered_map<std::string, GradVarInfo>* grad_var_record) {
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
block_desc->Var(grad_var_name);
auto it = param_name_map.find(grad_var_name);
if (it == param_name_map.end()) {
return false;
}
auto param_var_name = it->second;
auto& grad_record = (*grad_var_record)[param_var_name];
grad_record.name_ = grad_var_name;
grad_record.block_idx_ = block_desc->ID();
grad_record.op_idx_ = static_cast<int>(op_index);
return false; /* not break */
});
}
}
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const std::unique_ptr<OpDescBind>& op_desc, const std::unique_ptr<OpDescBind>& op_desc,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs; std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculat. // All input gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames(); const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, no_grad_vars)) { if (AllGradInSet(inputs, *no_grad_vars)) {
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
// All output gradients of forwarding operator do not need to calculate. // All output gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& outputs = op_desc->OutputArgumentNames(); const std::vector<std::string>& outputs = op_desc->OutputArgumentNames();
if (AllGradInSet(outputs, no_grad_vars)) { if (AllGradInSet(outputs, *no_grad_vars)) {
for (const std::string& name : inputs) { for (const std::string& name : inputs) {
no_grad_vars.insert(GradVarName(name)); no_grad_vars->insert(GradVarName(name));
} }
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get()); grad_op_descs = OpInfoMap::Instance()
.Get(op_desc->Type())
.GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops; std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) { for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) { for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars.count(in_name)) { if (no_grad_vars->count(in_name)) {
std::string prefix = in_name.substr( std::string prefix = in_name.substr(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix; std::string new_name = prefix + kZeroVarSuffix;
...@@ -301,11 +337,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -301,11 +337,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
} }
} }
for (const std::string& out_name : desc->OutputArgumentNames()) {
if (no_grad_vars.count(out_name)) {
desc->Rename(out_name, kEmptyVarName);
}
}
} }
for (auto& p : pending_fill_zeros_ops) { for (auto& p : pending_fill_zeros_ops) {
...@@ -316,23 +347,25 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -316,23 +347,25 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx, ProgramDescBind& program_desc, int block_idx,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
BlockDescBind* cur_block = program_desc.Block(block_idx); BlockDescBind* cur_block = program_desc.Block(block_idx);
std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_; std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_;
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops; std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0; size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs; std::vector<std::unique_ptr<OpDescBind>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
std::vector<std::unique_ptr<OpDescBind>> op_grads = std::vector<std::unique_ptr<OpDescBind>> op_grads =
MakeOpGrad(*it, no_grad_vars); MakeOpGrad(*it, no_grad_vars, grad_to_var);
if ((*it)->Type() == "recurrent") { if ((*it)->Type() == "recurrent") {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
op_grads.size(), size_t(1), op_grads.size(), static_cast<size_t>(1),
"rnn_op's gradient process should contain only one op."); "rnn_op's gradient process should contain only one op.");
int step_block_idx = (*it)->GetBlockAttr("stop_block"); int step_block_idx = (*it)->GetBlockAttr("step_block");
auto backward_block_op_descs = auto backward_block_op_descs = MakeBlockBackward(
MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); program_desc, step_block_idx, no_grad_vars, grad_to_var);
BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block);
for (auto& ptr : backward_block_op_descs) { for (auto& ptr : backward_block_op_descs) {
backward_block->ops_.push_back(std::move(ptr)); backward_block->ops_.push_back(std::move(ptr));
...@@ -376,24 +409,56 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -376,24 +409,56 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
backward_descs.insert(backward_descs.begin() + p.first + 1, backward_descs.insert(backward_descs.begin() + p.first + 1,
std::move(p.second)); std::move(p.second));
} }
return backward_descs; return backward_descs;
} }
void AppendBackward(ProgramDescBind& program_desc, ParamGradInfoMap AppendBackward(
const std::unordered_set<std::string>& no_grad_vars) { ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names; std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1); no_grad_var_names.reserve(no_grad_vars.size() + 1);
no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
for (auto& name : no_grad_vars) { for (auto& name : no_grad_vars) {
no_grad_var_names.insert(GradVarName(name)); no_grad_var_names.insert(GradVarName(name));
} }
const int root_block_idx = 0; const int root_block_idx = 0;
auto backward_op_descs = auto root_block = program_desc.Block(root_block_idx);
MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); auto& all_ops = root_block->ops_;
auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_;
// insert fill one op for target
std::string fill_one_op_out = GradVarName(target.Name());
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)},
{"dataType", framework::DataType::FP32}}));
all_ops.push_back(std::move(fill_one_op));
size_t forward_op_num = all_ops.size();
size_t forward_block_num = program_desc.Size();
// Insert backward operators
std::unordered_map<std::string, std::string> grad_to_var;
auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);
std::unordered_map<std::string, GradVarInfo> retv;
// Create Variable
for (auto& ptr : backward_op_descs) { for (auto& ptr : backward_op_descs) {
forw_op_descs.push_back(std::move(ptr)); all_ops.push_back(std::move(ptr));
}
root_block->Var(fill_one_op_out);
// create grad_var for all blocks in this program
CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
for (size_t block_index = forward_block_num;
block_index < program_desc.Size(); ++block_index) {
CreateGradVarInBlock(0, grad_to_var, program_desc.Block(block_index),
&retv);
} }
return retv;
} }
} // namespace framework } // namespace framework
......
...@@ -14,7 +14,10 @@ ...@@ -14,7 +14,10 @@
#pragma once #pragma once
#include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
...@@ -27,10 +30,27 @@ extern std::unique_ptr<OperatorBase> Backward( ...@@ -27,10 +30,27 @@ extern std::unique_ptr<OperatorBase> Backward(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
// TODO(jiayi): Add target as parameter and generate backward op struct GradVarInfo {
// according to target. GradVarInfo() {}
void AppendBackward(ProgramDescBind& program_desc, GradVarInfo(const std::string& name, int block_idx, int op_idx)
const std::unordered_set<std::string>& no_grad_vars); : name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
bool operator==(const GradVarInfo& b) const {
return name_ == b.name_ && block_idx_ == b.block_idx_ &&
op_idx_ == b.op_idx_;
}
std::string name_;
int block_idx_;
int op_idx_;
};
using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target,
const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "paddle/framework/block_desc.h" #include "paddle/framework/block_desc.h"
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/var_desc.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
namespace paddle { namespace paddle {
...@@ -169,6 +170,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker { ...@@ -169,6 +170,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class MinusGradOpDescMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
std::vector<std::unique_ptr<OpDescBind>> retv;
auto x_g = InputGrad("X");
if (!x_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", x_g);
op_desc->SetAttr("scale", 1.0f);
retv.emplace_back(op_desc);
}
auto y_g = InputGrad("Y");
if (!y_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", y_g);
op_desc->SetAttr("scale", -1.0f);
retv.emplace_back(op_desc);
}
return retv;
}
};
class MinusOpMaker : public OpProtoAndCheckerMaker {
public:
MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "");
AddInput("Y", "");
AddOutput("Out", "");
AddComment("minus for unittest");
}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -187,6 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); ...@@ -187,6 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP); f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker);
TEST(Backward, simple_op_not_need_grad) { TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp( auto fwd = f::OpRegistry::CreateOp(
...@@ -395,12 +436,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -395,12 +436,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
2UL /* external input number */ 2UL /* external input number */
+ 1UL /* external output number*/ + 1UL /* external output number*/
+ 1UL /* number of gradient of external output*/ + 1UL /* number of gradient of external output*/
+ 2U /* internal variable number*/); + 2UL /* internal variable number*/
);
EXPECT_EQ(grad_fc.Outputs(all).size(), EXPECT_EQ(grad_fc.Outputs(all).size(),
2UL /* input number of mul*/ 2UL /* input number of mul*/
+ 2UL /* input number of rowwise_add + 2UL /* input number of rowwise_add*/
*/ + 1UL /* input number of sigmod */
+ 1UL /* input number of sigmod */); - 1UL /* out2 is not needed*/);
EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
...@@ -427,10 +469,14 @@ TEST(Backward, simple_single_op) { ...@@ -427,10 +469,14 @@ TEST(Backward, simple_single_op) {
op->SetInput("b", {"b"}); op->SetInput("b", {"b"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
AppendBackward(program, {}); auto target = f::VarDescBind("out");
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL);
f::OpDescBind *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
ASSERT_EQ(block->AllOps().size(), 2UL); f::OpDescBind *grad_op = block->AllOps()[2];
f::OpDescBind *grad_op = block->AllOps()[1];
EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->InputNames().size(), 1UL);
ASSERT_EQ(grad_op->OutputNames().size(), 2UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
...@@ -440,6 +486,13 @@ TEST(Backward, simple_single_op) { ...@@ -440,6 +486,13 @@ TEST(Backward, simple_single_op) {
std::vector<std::string>({f::GradVarName("x")})); std::vector<std::string>({f::GradVarName("x")}));
EXPECT_EQ(grad_op->Output(f::GradVarName("b")), EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b")})); std::vector<std::string>({f::GradVarName("b")}));
EXPECT_EQ(var_to_grad.size(), 2UL);
EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
EXPECT_TRUE(block->HasVar(f::GradVarName("b")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x")));
} }
TEST(Backward, default_attribute) { TEST(Backward, default_attribute) {
...@@ -451,14 +504,19 @@ TEST(Backward, default_attribute) { ...@@ -451,14 +504,19 @@ TEST(Backward, default_attribute) {
op->SetInput("X", {"x"}); op->SetInput("X", {"x"});
op->SetInput("Y", {"y"}); op->SetInput("Y", {"y"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
op->CheckAttrs();
AppendBackward(program, {}); auto target = f::VarDescBind("out");
AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 2UL); ASSERT_EQ(block->AllOps().size(), 3UL);
EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
f::OpDescBind *grad_op = block->AllOps()[1]; f::OpDescBind *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2];
ASSERT_EQ(grad_op->Type(), "mul_grad"); ASSERT_EQ(grad_op->Type(), "mul_grad");
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
...@@ -486,10 +544,15 @@ TEST(Backward, simple_mult_op) { ...@@ -486,10 +544,15 @@ TEST(Backward, simple_mult_op) {
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
AppendBackward(program, {}); auto target = f::VarDescBind("out3");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 6UL); ASSERT_EQ(block->AllOps().size(), 6UL + 1);
f::OpDescBind *grad_op1 = block->AllOps()[5]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -500,7 +563,7 @@ TEST(Backward, simple_mult_op) { ...@@ -500,7 +563,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op2 = block->AllOps()[4]; f::OpDescBind *grad_op2 = block->AllOps()[5];
EXPECT_EQ(grad_op2->Type(), "mul_grad"); EXPECT_EQ(grad_op2->Type(), "mul_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -514,7 +577,7 @@ TEST(Backward, simple_mult_op) { ...@@ -514,7 +577,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *grad_op3 = block->AllOps()[3]; f::OpDescBind *grad_op3 = block->AllOps()[4];
EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -524,6 +587,23 @@ TEST(Backward, simple_mult_op) { ...@@ -524,6 +587,23 @@ TEST(Backward, simple_mult_op) {
std::vector<std::string>({f::GradVarName("out2")})); std::vector<std::string>({f::GradVarName("out2")}));
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")})); std::vector<std::string>({f::GradVarName("b3")}));
EXPECT_EQ(var_to_grad.size(), 6UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 5));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out2"),
f::GradVarInfo(f::GradVarName("out2"), 0, 4));
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
} }
TEST(Backward, intermedia_var_no_grad) { TEST(Backward, intermedia_var_no_grad) {
...@@ -554,10 +634,15 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -554,10 +634,15 @@ TEST(Backward, intermedia_var_no_grad) {
op4->SetInput("Y", {"out3"}); op4->SetInput("Y", {"out3"});
op4->SetOutput("Out", {"out4"}); op4->SetOutput("Out", {"out4"});
AppendBackward(program, {"out3"}); auto target = f::VarDescBind("out4");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"});
ASSERT_EQ(block->AllOps().size(), 6UL); ASSERT_EQ(block->AllOps().size(), 7UL);
f::OpDescBind *grad_op1 = block->AllOps()[5]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -568,7 +653,7 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -568,7 +653,7 @@ TEST(Backward, intermedia_var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op4 = block->AllOps()[4]; f::OpDescBind *grad_op4 = block->AllOps()[5];
EXPECT_EQ(grad_op4->Type(), "mul_grad"); EXPECT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -579,8 +664,17 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -579,8 +664,17 @@ TEST(Backward, intermedia_var_no_grad) {
std::vector<std::string>({f::GradVarName("out4")})); std::vector<std::string>({f::GradVarName("out4")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 5));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
} }
TEST(Backward, var_no_grad) { TEST(Backward, var_no_grad) {
...@@ -601,10 +695,15 @@ TEST(Backward, var_no_grad) { ...@@ -601,10 +695,15 @@ TEST(Backward, var_no_grad) {
op2->SetOutput("Y", {"y2"}); op2->SetOutput("Y", {"y2"});
op2->SetOutput("Z", {"z2"}); op2->SetOutput("Z", {"z2"});
AppendBackward(program, {"z1"}); auto target = f::VarDescBind("z2");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"});
ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
ASSERT_EQ(block->AllOps().size(), 5UL); f::OpDescBind *grad_op2 = block->AllOps()[3];
f::OpDescBind *grad_op2 = block->AllOps()[2];
ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -618,10 +717,9 @@ TEST(Backward, var_no_grad) { ...@@ -618,10 +717,9 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("z2")})); std::vector<std::string>({f::GradVarName("z2")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("y1")})); std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
f::OpDescBind *fill_zero_op = block->AllOps()[3]; f::OpDescBind *fill_zero_op = block->AllOps()[4];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
...@@ -629,7 +727,7 @@ TEST(Backward, var_no_grad) { ...@@ -629,7 +727,7 @@ TEST(Backward, var_no_grad) {
EXPECT_EQ(fill_zero_op->Output("Y"), EXPECT_EQ(fill_zero_op->Output("Y"),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix})); std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
f::OpDescBind *grad_op1 = block->AllOps()[4]; f::OpDescBind *grad_op1 = block->AllOps()[5];
ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -645,6 +743,15 @@ TEST(Backward, var_no_grad) { ...@@ -645,6 +743,15 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("x1")})); std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
std::vector<std::string>({f::GradVarName("h1")})); std::vector<std::string>({f::GradVarName("h1")}));
EXPECT_EQ(var_to_grad.size(), 3UL);
EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
EXPECT_TRUE(block->HasVar(f::GradVarName("y1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("h1")));
} }
TEST(Backward, shared_var) { TEST(Backward, shared_var) {
...@@ -669,10 +776,15 @@ TEST(Backward, shared_var) { ...@@ -669,10 +776,15 @@ TEST(Backward, shared_var) {
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
AppendBackward(program, {}); auto target = f::VarDescBind("out3");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 7UL); ASSERT_EQ(block->AllOps().size(), 8UL);
f::OpDescBind *grad_op3 = block->AllOps()[3]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op3 = block->AllOps()[4];
ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -683,7 +795,7 @@ TEST(Backward, shared_var) { ...@@ -683,7 +795,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")})); std::vector<std::string>({f::GradVarName("b3")}));
f::OpDescBind *grad_op4 = block->AllOps()[4]; f::OpDescBind *grad_op4 = block->AllOps()[5];
ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -697,7 +809,7 @@ TEST(Backward, shared_var) { ...@@ -697,7 +809,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *sum_op = block->AllOps()[5]; f::OpDescBind *sum_op = block->AllOps()[6];
ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->Type(), "sum");
ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->InputNames().size(), 1UL);
ASSERT_EQ(sum_op->OutputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
...@@ -707,7 +819,7 @@ TEST(Backward, shared_var) { ...@@ -707,7 +819,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(sum_op->Output("Out"), EXPECT_EQ(sum_op->Output("Out"),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
f::OpDescBind *grad_op1 = block->AllOps()[6]; f::OpDescBind *grad_op1 = block->AllOps()[7];
ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -717,4 +829,41 @@ TEST(Backward, shared_var) { ...@@ -717,4 +829,41 @@ TEST(Backward, shared_var) {
std::vector<std::string>({f::GradVarName("x1")})); std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
}
\ No newline at end of file EXPECT_EQ(var_to_grad.size(), 5UL);
EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
EXPECT_EQ(var_to_grad.at("out1"),
f::GradVarInfo(f::GradVarName("out1"), 0, 6));
EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7));
EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7));
EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
}
TEST(Backward, half_backward) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
auto *op1 = block->AppendOp();
op1->SetType("minus");
op1->SetInput("X", {"a"});
op1->SetInput("Y", {"b"});
op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant");
auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size());
EXPECT_EQ(var_to_grad.size(), 1UL);
EXPECT_EQ(var_to_grad.at("a"),
f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
}
...@@ -18,19 +18,22 @@ limitations under the License. */ ...@@ -18,19 +18,22 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
VarDescBind *BlockDescBind::NewVar(const std::string &name) { VarDescBind *BlockDescBind::Var(const std::string &name) {
need_update_ = true; need_update_ = true;
auto it = vars_.find(name); auto it = vars_.find(name);
PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name); if (it != vars_.end()) {
auto var = new VarDescBind(name); return it->second.get();
}
auto *var = new VarDescBind(name);
vars_[name].reset(var); vars_[name].reset(var);
return var; return var;
} }
VarDescBind *BlockDescBind::Var(const std::string &name) const { VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
auto it = vars_.find(name); auto it = vars_.find(name);
PADDLE_ENFORCE(it != vars_.end(), if (it == vars_.end()) {
"Can not find variable %s in current block.", name); return nullptr;
}
return it->second.get(); return it->second.get();
} }
...@@ -66,7 +69,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const { ...@@ -66,7 +69,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const {
return res; return res;
} }
void BlockDescBind::Sync() { void BlockDescBind::Flush() {
if (need_update_) { if (need_update_) {
auto &op_field = *this->desc_->mutable_ops(); auto &op_field = *this->desc_->mutable_ops();
op_field.Clear(); op_field.Clear();
...@@ -91,9 +94,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const { ...@@ -91,9 +94,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const {
return prog_->Block(static_cast<size_t>(this->desc_->parent_idx())); return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { BlockDesc *BlockDescBind::Proto() {
BlockDesc *desc = block.RawPtr(); Flush();
this->attrs_[name] = desc; return desc_;
} }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -33,14 +33,6 @@ class ProgramDescBind; ...@@ -33,14 +33,6 @@ class ProgramDescBind;
class BlockDescBind { class BlockDescBind {
public: public:
friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind &program_desc, int block_idx,
std::unordered_set<std::string> &no_grad_vars);
friend void AppendBackward(
ProgramDescBind &program_desc,
const std::unordered_set<std::string> &no_grad_vars);
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {} : prog_(prog), desc_(desc), need_update_(false) {}
...@@ -48,9 +40,9 @@ class BlockDescBind { ...@@ -48,9 +40,9 @@ class BlockDescBind {
int32_t Parent() const { return desc_->parent_idx(); } int32_t Parent() const { return desc_->parent_idx(); }
VarDescBind *NewVar(const std::string &name_bytes); VarDescBind *Var(const std::string &name_bytes);
VarDescBind *Var(const std::string &name_bytes) const; VarDescBind *FindVar(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const; bool HasVar(const std::string &var_name) const;
...@@ -64,11 +56,13 @@ class BlockDescBind { ...@@ -64,11 +56,13 @@ class BlockDescBind {
std::vector<OpDescBind *> AllOps() const; std::vector<OpDescBind *> AllOps() const;
void Sync(); void Flush();
BlockDesc *RawPtr() { return desc_; } BlockDesc *Proto();
private: // FIXME(yuyang18): backward will access private data of BlockDesc.
// Mark it public temporary. We can fix it later.
public:
ProgramDescBind *prog_; // not_own ProgramDescBind *prog_; // not_own
BlockDesc *desc_; // not_own BlockDesc *desc_; // not_own
bool need_update_; bool need_update_;
......
...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kGradOpDescMaker> { struct OpInfoFiller<T, kGradOpDescMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->grad_op_maker_ = [](const OpDescBind& fwd_op) { info->grad_op_maker_ = [](
T maker(fwd_op); const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
T maker(fwd_op, no_grad_set, grad_to_var);
return maker(); return maker();
}; };
} }
......
...@@ -66,7 +66,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) { ...@@ -66,7 +66,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
// Instantiate all the vars in the global scope // Instantiate all the vars in the global scope
for (auto& var : block.vars()) { for (auto& var : block.vars()) {
scope->NewVar(var.name()); scope->Var(var.name());
} }
Scope& local_scope = scope->NewScope(); Scope& local_scope = scope->NewScope();
...@@ -78,7 +78,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) { ...@@ -78,7 +78,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
for (auto& var : block.ops(i).outputs()) { for (auto& var : block.ops(i).outputs()) {
for (auto& argu : var.arguments()) { for (auto& argu : var.arguments()) {
if (local_scope.FindVar(argu) == nullptr) { if (local_scope.FindVar(argu) == nullptr) {
local_scope.NewVar(argu); local_scope.Var(argu);
} }
} }
} }
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "gflags/gflags.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
...@@ -25,6 +26,17 @@ limitations under the License. */ ...@@ -25,6 +26,17 @@ limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
USE_OP(elementwise_add);
USE_OP(gaussian_random);
USE_OP(feed);
USE_OP(fetch);
USE_OP(mul);
USE_OP(sum);
USE_OP(squared_l2_distance);
USE_OP(fill_constant);
USE_OP(mean);
USE_OP(sgd);
using namespace paddle::platform; using namespace paddle::platform;
using namespace paddle::framework; using namespace paddle::framework;
...@@ -34,8 +46,16 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, ...@@ -34,8 +46,16 @@ void AddOp(const std::string& type, const VariableNameMap& inputs,
// insert output // insert output
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
auto var = block->NewVar(v); // <<<<<<< HEAD
var->SetDataType(paddle::framework::DataType::FP32); // auto var = block->Var(v);
// var->SetType(VarDesc::LOD_TENSOR);
// var->SetDataType(paddle::framework::DataType::FP32);
// =======
if (!block->HasVar(v)) {
auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32);
}
// >>>>>>> origin/develop
} }
} }
...@@ -49,6 +69,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, ...@@ -49,6 +69,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs,
op->SetOutput(kv.first, kv.second); op->SetOutput(kv.first, kv.second);
} }
op->SetAttrMap(attrs); op->SetAttrMap(attrs);
op->CheckAttrs();
} }
// Tensors in feed value variable will only be in CPUPlace // Tensors in feed value variable will only be in CPUPlace
...@@ -134,12 +155,12 @@ class ExecutorTesterRandom : public ::testing::Test { ...@@ -134,12 +155,12 @@ class ExecutorTesterRandom : public ::testing::Test {
AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}}, AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}},
{{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {}, {{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {},
root_block); root_block);
AddOp("mean", {{"X", {"l2_distance"}}}, {{"Out", {"mean_out"}}}, {},
root_block);
// backward // backward
AddOp("fill_constant", {}, {{"Out", {"l2_distance@GRAD"}}}, auto target = VarDescBind("mean_out");
{{"shape", std::vector<int>{batch_size, 1}}, {"value", float(1.0)}}, AppendBackward(program, target, {});
root_block);
AppendBackward(program, {});
// update // update
AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}}, AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}},
...@@ -305,4 +326,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) { ...@@ -305,4 +326,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) {
} }
} }
} }
DECLARE_double(fraction_of_gpu_memory_to_use);
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// Use less GPU memory for unittest.
FLAGS_fraction_of_gpu_memory_to_use = 0.25;
return RUN_ALL_TESTS();
}
#endif #endif
...@@ -97,16 +97,26 @@ enum DataType { ...@@ -97,16 +97,26 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [ default = 0 ]; }
message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int32 lod_level = 2 [ default = 0 ];
} }
message VarDesc { message VarDesc {
enum VarType {
LOD_TENSOR = 1;
SELECTED_ROWS = 2;
}
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; required VarType type = 2;
optional bool persistable = 3 [ default = false ]; optional LoDTensorDesc lod_tensor = 3;
optional TensorDesc selected_rows = 4;
optional bool persistable = 5 [ default = false ];
} }
message BlockDesc { message BlockDesc {
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <unordered_set>
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
...@@ -21,27 +23,50 @@ namespace framework { ...@@ -21,27 +23,50 @@ namespace framework {
class GradOpDescMakerBase { class GradOpDescMakerBase {
public: public:
explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} explicit GradOpDescMakerBase(
const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var)
: fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {}
virtual ~GradOpDescMakerBase() = default; virtual ~GradOpDescMakerBase() = default;
virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0; virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0;
protected: protected:
static std::vector<std::string> ToGradNames( std::vector<std::string> InputGrad(const std::string& name,
const std::vector<std::string>& var_names) { bool drop_empty_grad = true) const {
std::vector<std::string> ret_val; std::vector<std::string> ret_val;
auto var_names = this->Input(name);
ret_val.reserve(var_names.size()); ret_val.reserve(var_names.size());
std::transform(var_names.begin(), var_names.end(), std::transform(var_names.begin(), var_names.end(),
std::back_inserter(ret_val), GradVarName); std::back_inserter(ret_val),
return ret_val; [this](const std::string& fwd_var_name) -> std::string {
} auto g_name = GradVarName(fwd_var_name);
if (no_grad_set_.count(g_name)) {
std::vector<std::string> InputGrad(const std::string& name) const { return kEmptyVarName;
return ToGradNames(fwd_op_.Input(name)); } else {
(*this->grad_to_var_)[g_name] = fwd_var_name;
return g_name;
}
});
if (!drop_empty_grad) {
return ret_val;
}
std::vector<std::string> dropped_ret_val;
dropped_ret_val.reserve(ret_val.size());
std::copy_if(ret_val.begin(), ret_val.end(),
std::back_inserter(dropped_ret_val),
[](const std::string& str) { return str != kEmptyVarName; });
return dropped_ret_val;
} }
std::vector<std::string> OutputGrad(const std::string& name) const { std::vector<std::string> OutputGrad(const std::string& name) const {
return ToGradNames(fwd_op_.Output(name)); std::vector<std::string> ret_val;
auto onames = this->Output(name);
ret_val.reserve(onames.size());
std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val),
GradVarName);
return ret_val;
} }
std::vector<std::string> InputNames() const { std::vector<std::string> InputNames() const {
...@@ -75,6 +100,8 @@ class GradOpDescMakerBase { ...@@ -75,6 +100,8 @@ class GradOpDescMakerBase {
private: private:
const OpDescBind& fwd_op_; const OpDescBind& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_;
std::unordered_map<std::string, std::string>* grad_to_var_;
}; };
class SingleGradOpDescMaker : public GradOpDescMakerBase { class SingleGradOpDescMaker : public GradOpDescMakerBase {
...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase { ...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase {
virtual std::unique_ptr<OpDescBind> Apply() const = 0; virtual std::unique_ptr<OpDescBind> Apply() const = 0;
}; };
template <bool DropEmptyIG = true>
class DefaultGradOpDescMaker : public SingleGradOpDescMaker { class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
public: public:
using SingleGradOpDescMaker::SingleGradOpDescMaker; using SingleGradOpDescMaker::SingleGradOpDescMaker;
...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
for (auto& input_param : this->InputNames()) { for (auto& input_param : this->InputNames()) {
grad->SetInput(input_param, this->Input(input_param)); grad->SetInput(input_param, this->Input(input_param));
grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); grad->SetOutput(GradVarName(input_param),
this->InputGrad(input_param, DropEmptyIG));
} }
for (auto& output_param : this->OutputNames()) { for (auto& output_param : this->OutputNames()) {
......
...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, ...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
} }
OpDesc *OpDescBind::Proto() { OpDesc *OpDescBind::Proto() {
Sync(); Flush();
return &op_desc_; return &op_desc_;
} }
...@@ -100,6 +100,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { ...@@ -100,6 +100,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) {
need_update_ = true; need_update_ = true;
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) {
BlockDesc *desc = block.Proto();
this->attrs_[name] = desc;
need_update_ = true;
}
void OpDescBind::SetAttrMap( void OpDescBind::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) { const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map; attrs_ = attr_map;
...@@ -159,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -159,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
void OpDescBind::Sync() { void OpDescBind::Flush() {
if (need_update_) { if (need_update_) {
this->op_desc_.mutable_inputs()->Clear(); this->op_desc_.mutable_inputs()->Clear();
for (auto &ipt : inputs_) { for (auto &ipt : inputs_) {
......
...@@ -89,8 +89,6 @@ class OpDescBind { ...@@ -89,8 +89,6 @@ class OpDescBind {
this->need_update_ = true; this->need_update_ = true;
} }
void Sync();
const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; } const VariableNameMap &Outputs() const { return outputs_; }
...@@ -104,6 +102,8 @@ class OpDescBind { ...@@ -104,6 +102,8 @@ class OpDescBind {
void InferShape(const BlockDescBind &block) const; void InferShape(const BlockDescBind &block) const;
void Flush();
private: private:
template <typename MapType> template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) { static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
......
...@@ -59,16 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) { ...@@ -59,16 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
op_desc.GetAttrMap()); op_desc.GetAttrMap());
} }
std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
OpDescBind* op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc->Type());
if (info.Checker() != nullptr) {
info.Checker()->Check(*op_desc->MutableAttrMap());
}
return info.grad_op_maker_(*op_desc);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -79,9 +79,6 @@ class OpRegistry { ...@@ -79,9 +79,6 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
OpDescBind* op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
}; };
...@@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar { ...@@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar {
/** /**
* Macro to register Operator. * Macro to register Operator.
*/ */
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ #define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \ grad_op_class) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \ REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \ class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker { \ : public ::paddle::framework::DefaultGradOpDescMaker<true> { \
using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \ using ::paddle::framework::DefaultGradOpDescMaker< \
\ true>::DefaultGradOpDescMaker; \
protected: \ \
virtual std::string GradOpType() const { return #grad_op_type; } \ protected: \
}; \ virtual std::string GradOpType() const { return #grad_op_type; } \
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ }; \
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \
op_maker_class); op_maker_class);
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
......
...@@ -289,6 +289,15 @@ class ExecutionContext { ...@@ -289,6 +289,15 @@ class ExecutionContext {
return device_context_; return device_context_;
} }
#ifdef PADDLE_WITH_CUDA
const platform::CUDADeviceContext& cuda_device_context() const {
PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace()));
auto cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext*>(&device_context_);
return *cuda_ctx;
}
#endif
private: private:
const OperatorBase& op_; const OperatorBase& op_;
const Scope& scope_; const Scope& scope_;
...@@ -394,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -394,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {
private: private:
DDim GetDim(const std::string& name) const override { DDim GetDim(const std::string& name) const override {
return framework::make_ddim(block_.Var(name)->Shape()); return framework::make_ddim(block_.FindVar(name)->Shape());
} }
void SetDim(const std::string& name, const DDim& dim) override { void SetDim(const std::string& name, const DDim& dim) override {
block_.Var(name)->SetShape(framework::vectorize(dim)); block_.FindVar(name)->SetShape(framework::vectorize(dim));
} }
const OpDescBind& op_; const OpDescBind& op_;
......
...@@ -84,7 +84,7 @@ TEST(OperatorBase, all) { ...@@ -84,7 +84,7 @@ TEST(OperatorBase, all) {
paddle::framework::Scope scope; paddle::framework::Scope scope;
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
scope.NewVar("OUT1"); scope.Var("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0); ASSERT_EQ(paddle::framework::op_run_num, 0);
op->Run(scope, device_context); op->Run(scope, device_context);
ASSERT_EQ(paddle::framework::op_run_num, 1); ASSERT_EQ(paddle::framework::op_run_num, 1);
...@@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) { ...@@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) {
paddle::platform::CPUDeviceContext cpu_device_context; paddle::platform::CPUDeviceContext cpu_device_context;
paddle::framework::Scope scope; paddle::framework::Scope scope;
scope.NewVar("x0")->GetMutable<Tensor>(); scope.Var("x0")->GetMutable<Tensor>();
scope.NewVar("x1")->GetMutable<Tensor>(); scope.Var("x1")->GetMutable<Tensor>();
scope.NewVar("x2")->GetMutable<Tensor>(); scope.Var("x2")->GetMutable<Tensor>();
scope.NewVar("k0")->GetMutable<Tensor>(); scope.Var("k0")->GetMutable<Tensor>();
scope.NewVar("y0")->GetMutable<Tensor>(); scope.Var("y0")->GetMutable<Tensor>();
scope.NewVar("y1")->GetMutable<Tensor>(); scope.Var("y1")->GetMutable<Tensor>();
auto op = paddle::framework::OpRegistry::CreateOp(op_desc); auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_device_context); op->Run(scope, cpu_device_context);
......
...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
ProgramDesc *ProgramDescBind::Proto() { ProgramDesc *ProgramDescBind::Proto() {
for (auto &block : blocks_) { for (auto &block : blocks_) {
block->Sync(); block->Flush();
} }
return prog_; return prog_;
} }
......
...@@ -31,7 +31,7 @@ Scope& Scope::NewScope() const { ...@@ -31,7 +31,7 @@ Scope& Scope::NewScope() const {
return *kids_.back(); return *kids_.back();
} }
Variable* Scope::NewVar(const std::string& name) { Variable* Scope::Var(const std::string& name) {
auto iter = vars_.find(name); auto iter = vars_.find(name);
if (iter != vars_.end()) { if (iter != vars_.end()) {
return iter->second; return iter->second;
...@@ -42,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) { ...@@ -42,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) {
return v; return v;
} }
Variable* Scope::NewVar() { Variable* Scope::Var() {
return NewVar(string::Sprintf("%p.%d", this, vars_.size())); return Var(string::Sprintf("%p.%d", this, vars_.size()));
} }
Variable* Scope::FindVar(const std::string& name) const { Variable* Scope::FindVar(const std::string& name) const {
...@@ -71,8 +71,8 @@ framework::Scope& GetGlobalScope() { ...@@ -71,8 +71,8 @@ framework::Scope& GetGlobalScope() {
static std::unique_ptr<framework::Scope> g_scope{nullptr}; static std::unique_ptr<framework::Scope> g_scope{nullptr};
std::call_once(feed_variable_flag, [&]() { std::call_once(feed_variable_flag, [&]() {
g_scope.reset(new framework::Scope()); g_scope.reset(new framework::Scope());
g_scope->NewVar("feed_value"); g_scope->Var("feed_value");
g_scope->NewVar("fetch_value"); g_scope->Var("fetch_value");
}); });
return *(g_scope.get()); return *(g_scope.get());
} }
......
...@@ -45,10 +45,10 @@ class Scope { ...@@ -45,10 +45,10 @@ class Scope {
Scope& NewScope() const; Scope& NewScope() const;
/// Create a variable with given name if it doesn't exist. /// Create a variable with given name if it doesn't exist.
Variable* NewVar(const std::string& name); Variable* Var(const std::string& name);
/// Create a variable with a scope-unique name. /// Create a variable with a scope-unique name.
Variable* NewVar(); Variable* Var();
/// Find a variable in the scope or any of its ancestors. Returns /// Find a variable in the scope or any of its ancestors. Returns
/// nullptr if cannot find. /// nullptr if cannot find.
......
...@@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) { ...@@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) {
Scope& ss1 = s.NewScope(); Scope& ss1 = s.NewScope();
Scope& ss2 = s.NewScope(); Scope& ss2 = s.NewScope();
Variable* v0 = s.NewVar("a"); Variable* v0 = s.Var("a");
Variable* v1 = ss1.NewVar("a"); Variable* v1 = ss1.Var("a");
EXPECT_NE(v0, v1); EXPECT_NE(v0, v1);
...@@ -40,7 +40,7 @@ TEST(Scope, FindVar) { ...@@ -40,7 +40,7 @@ TEST(Scope, FindVar) {
EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_EQ(nullptr, ss.FindVar("a")); EXPECT_EQ(nullptr, ss.FindVar("a"));
ss.NewVar("a"); ss.Var("a");
EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_EQ(nullptr, s.FindVar("a"));
EXPECT_NE(nullptr, ss.FindVar("a")); EXPECT_NE(nullptr, ss.FindVar("a"));
...@@ -49,7 +49,7 @@ TEST(Scope, FindVar) { ...@@ -49,7 +49,7 @@ TEST(Scope, FindVar) {
TEST(Scope, FindScope) { TEST(Scope, FindScope) {
Scope s; Scope s;
Scope& ss = s.NewScope(); Scope& ss = s.NewScope();
Variable* v = s.NewVar("a"); Variable* v = s.Var("a");
EXPECT_EQ(&s, s.FindScope(v)); EXPECT_EQ(&s, s.FindScope(v));
EXPECT_EQ(&s, ss.FindScope(v)); EXPECT_EQ(&s, ss.FindScope(v));
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
namespace paddle {
namespace framework {} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
namespace paddle {
namespace framework {
class SelectedRows {
public:
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() { value_.reset(new Tensor()); }
platform::Place place() const { return value_->place(); }
const Tensor& value() const { return *value_; }
Tensor* mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
const std::vector<int64_t>& rows() const { return rows_; }
void set_rows(const std::vector<int64_t>& rows) { rows_ = rows; }
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
std::vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
class SelectedRowsTester : public ::testing::Test {
public:
virtual void SetUp() override {
std::vector<int64_t> rows{0, 4, 7};
int64_t height = 10;
int64_t row_numel = 100;
selected_rows_.reset(new SelectedRows(rows, height));
Tensor* value = selected_rows_->mutable_value();
value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows.size()), row_numel}), place_);
}
protected:
platform::CPUPlace place_;
std::unique_ptr<SelectedRows> selected_rows_{nullptr};
};
TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); }
TEST_F(SelectedRowsTester, dims) {
ASSERT_EQ(selected_rows_->value().dims(), make_ddim({3, 100}));
}
TEST_F(SelectedRowsTester, complete_dims) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100}));
}
} // namespace framework
} // namespace paddle
...@@ -19,9 +19,6 @@ limitations under the License. */ ...@@ -19,9 +19,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
// TODO(longfei): Once after both CompileTimeInferShapeContext and
// RuntimeInferShapeContext get merged, we can rename InferShapeContext into
// InferShapeContext so to replace the current InferShapeContext.
class InferShapeContext { class InferShapeContext {
public: public:
virtual ~InferShapeContext() {} virtual ~InferShapeContext() {}
......
...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, ...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
const std::vector<DySeqMeta>& meta, const LoD& lod, const std::vector<DySeqMeta>& meta, const LoD& lod,
size_t level); size_t level);
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) {
// collect indice need to copy to the batch
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + batch_id;
if (id >= seq.end) break;
indice.push_back(id);
}
return indice;
}
} // namespace detail } // namespace detail
const LoDTensor& TensorArray::Read(size_t index) const { const LoDTensor& TensorArray::Read(size_t index) const {
...@@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta, ...@@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta,
return detail::PackDynamicBatch(values_, meta, lod, level); return detail::PackDynamicBatch(values_, meta, lod, level);
} }
std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
bool length_desend) { bool length_desend) {
detail::DynamicBatchUnpacker unpacker(source, level, detail::DynamicBatchUnpacker unpacker(source, level,
length_desend /*descend*/); length_desend /*descend*/);
...@@ -129,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, ...@@ -129,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
Write(batch_id, unpacker.GetBatch(batch_id)); Write(batch_id, unpacker.GetBatch(batch_id));
} }
PADDLE_ENFORCE(!unpacker.meta.empty());
return unpacker.meta; return unpacker.meta;
} }
...@@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
PADDLE_ENFORCE(!meta.empty(), "should build meta first"); PADDLE_ENFORCE(!meta.empty(), "should build meta first");
LoDTensor result; LoDTensor result;
// collect indice need to copy to the batch auto indice = detail::GenDyBatchIndice(meta, index);
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + index;
if (id >= seq.end) break;
indice.push_back(id);
}
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);
// copy the indice of records in LoDTensor // copy the indice of records in LoDTensor
...@@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
for (size_t i = 0; i < indice.size(); i++) { for (size_t i = 0; i < indice.size(); i++) {
auto index = indice[i]; auto index = indice[i];
auto target = result.Slice<value_type>(i, i + 1); auto target = result.Slice<value_type>(i, i + 1);
auto source_ = source->Slice<value_type>(index, index + 1); auto slice = source->Slice<value_type>(index, index + 1);
target.CopyFrom<value_type>(source_, platform::CPUPlace(), target.CopyFrom<value_type>(slice, platform::CPUPlace(),
platform::CPUDeviceContext()); platform::CPUDeviceContext());
} }
......
...@@ -34,6 +34,13 @@ struct DySeqMeta { ...@@ -34,6 +34,13 @@ struct DySeqMeta {
size_t ori_idx; size_t ori_idx;
}; };
using DySeqMetaBatch = std::vector<DySeqMeta>;
/*
* Extract the indices of instances.
*/
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);
/* /*
* TensorArray is a C-array-like array of tensors, it is meant to be used with * TensorArray is a C-array-like array of tensors, it is meant to be used with
* dynamic iteration primitives such as while_loop. It is used to segment inputs * dynamic iteration primitives such as while_loop. It is used to segment inputs
...@@ -69,7 +76,7 @@ class TensorArray { ...@@ -69,7 +76,7 @@ class TensorArray {
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and * Recover the original LoD-arranged LoDTensor with the `values`, `level` and
* `indice_map`. * `indice_map`.
*/ */
LoDTensor Pack(size_t level, const std::vector<DySeqMeta> &meta, LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
const LoD &lod) const; const LoD &lod) const;
/* /*
...@@ -77,8 +84,7 @@ class TensorArray { ...@@ -77,8 +84,7 @@ class TensorArray {
* `values`, if set `desend`, will sort by length in descending order else in * `values`, if set `desend`, will sort by length in descending order else in
* ascending order. * ascending order.
*/ */
std::vector<DySeqMeta> Unpack(const LoDTensor &source, int level, DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
bool length_desend);
/* /*
* Pack the values into a tensor with rank one higher than each tensor in * Pack the values into a tensor with rank one higher than each tensor in
......
...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*( ...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/, const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>; const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
using GradOpMakerFN = using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDescBind>>(
std::function<std::vector<std::unique_ptr<OpDescBind>>(const OpDescBind&)>; const OpDescBind&, const std::unordered_set<std::string>& /*no_grad_set*/,
std::unordered_map<std::string, std::string>* /*grad_to_var*/)>;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -13,32 +13,58 @@ See the License for the specific language governing permissions and ...@@ -13,32 +13,58 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/var_desc.h" #include "paddle/framework/var_desc.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void VarDescBind::SetShape(const std::vector<int64_t> &dims) { void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
} }
void VarDescBind::SetDataType(DataType data_type) { void VarDescBind::SetDataType(DataType data_type) {
desc_.mutable_lod_tensor()->set_data_type(data_type); mutable_tensor_desc()->set_data_type(data_type);
} }
std::vector<int64_t> VarDescBind::Shape() const { std::vector<int64_t> VarDescBind::Shape() const {
return RepeatedToVector(desc_.lod_tensor().dims()); return RepeatedToVector(tensor_desc().dims());
} }
DataType VarDescBind::GetDataType() const { DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); }
return desc_.lod_tensor().data_type();
}
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDescBind::SetLoDLevel(int32_t lod_level) {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
desc_.mutable_lod_tensor()->set_lod_level(lod_level); desc_.mutable_lod_tensor()->set_lod_level(lod_level);
} }
int32_t VarDescBind::GetLodLevel() const { int32_t VarDescBind::GetLodLevel() const {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
return desc_.lod_tensor().lod_level(); return desc_.lod_tensor().lod_level();
} }
const TensorDesc &VarDescBind::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
}
TensorDesc *VarDescBind::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -34,6 +34,7 @@ inline std::vector<T> RepeatedToVector( ...@@ -34,6 +34,7 @@ inline std::vector<T> RepeatedToVector(
template <typename T, typename RepeatedField> template <typename T, typename RepeatedField>
inline void VectorToRepeated(const std::vector<T> &vec, inline void VectorToRepeated(const std::vector<T> &vec,
RepeatedField *repeated_field) { RepeatedField *repeated_field) {
repeated_field->Clear();
repeated_field->Reserve(vec.size()); repeated_field->Reserve(vec.size());
for (const auto &elem : vec) { for (const auto &elem : vec) {
*repeated_field->Add() = elem; *repeated_field->Add() = elem;
...@@ -44,6 +45,7 @@ inline void VectorToRepeated(const std::vector<T> &vec, ...@@ -44,6 +45,7 @@ inline void VectorToRepeated(const std::vector<T> &vec,
template <typename RepeatedField> template <typename RepeatedField>
inline void VectorToRepeated(const std::vector<bool> &vec, inline void VectorToRepeated(const std::vector<bool> &vec,
RepeatedField *repeated_field) { RepeatedField *repeated_field) {
repeated_field->Clear();
repeated_field->Reserve(vec.size()); repeated_field->Reserve(vec.size());
for (auto elem : vec) { for (auto elem : vec) {
*repeated_field->Add() = elem; *repeated_field->Add() = elem;
...@@ -52,7 +54,10 @@ inline void VectorToRepeated(const std::vector<bool> &vec, ...@@ -52,7 +54,10 @@ inline void VectorToRepeated(const std::vector<bool> &vec,
class VarDescBind { class VarDescBind {
public: public:
explicit VarDescBind(const std::string &name) { desc_.set_name(name); } explicit VarDescBind(const std::string &name) {
desc_.set_name(name);
desc_.set_type(VarDesc::LOD_TENSOR);
}
VarDesc *Proto() { return &desc_; } VarDesc *Proto() { return &desc_; }
...@@ -70,7 +75,14 @@ class VarDescBind { ...@@ -70,7 +75,14 @@ class VarDescBind {
int32_t GetLodLevel() const; int32_t GetLodLevel() const;
VarDesc::VarType GetType() const { return desc_.type(); }
void SetType(VarDesc::VarType type) { desc_.set_type(type); }
private: private:
const TensorDesc &tensor_desc() const;
TensorDesc *mutable_tensor_desc();
VarDesc desc_; VarDesc desc_;
}; };
} // namespace framework } // namespace framework
......
...@@ -462,8 +462,8 @@ void LambdaCost::calcGrad(const real* outputScore, ...@@ -462,8 +462,8 @@ void LambdaCost::calcGrad(const real* outputScore,
real score_j = score[index_j]; real score_j = score[index_j];
real dcgDif = 0; real dcgDif = 0;
if (j < sortSize) { if (j < sortSize) {
dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) / dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) *
(std::log(i + 2) - std::log(j + 2)); (1 / std::log(i + 2) - 1 / std::log(j + 2));
} else { } else {
dcgDif = dcgDif =
(std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2); (std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2);
......
...@@ -86,6 +86,7 @@ protected: ...@@ -86,6 +86,7 @@ protected:
/// Also used in 'use_mkldnn' case. /// Also used in 'use_mkldnn' case.
std::vector<Argument> outputOtherDevice_; std::vector<Argument> outputOtherDevice_;
/// If there are several outputs, map them by each name. /// If there are several outputs, map them by each name.
/// MKLDNNLayer use it only to merge output grad
std::map<std::string, Argument*> outputMap_; std::map<std::string, Argument*> outputMap_;
/// Used to merge grad on different devices. /// Used to merge grad on different devices.
MatrixPtr tmpGrad_; MatrixPtr tmpGrad_;
...@@ -325,6 +326,11 @@ public: ...@@ -325,6 +326,11 @@ public:
outputMap_[name] = output; outputMap_[name] = output;
} }
/**
* Get the output map size, if layer has multi-output.
*/
size_t getOutputMapSize() { return outputMap_.size(); }
/** /**
* Get the output based on layer's name. * Get the output based on layer's name.
*/ */
......
...@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline( ...@@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtInVal_) { if (cvtInVal_) {
pipeline.push_back(*cvtInVal_); pipeline.push_back(*cvtInVal_);
} }
...@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( ...@@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline(
void MKLDNNConvLayer::resetInValue( void MKLDNNConvLayer::resetInValue(
std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) { std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& in) {
const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; const MatrixPtr& inMat = inputLayers_[0]->getOutputValue();
in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc());
// create buffer and reorder if input value do not match // create buffer and reorder if input value do not match
...@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( ...@@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue(
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) {
out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be empty";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
// when output is cpu device, change the mkldnn output value and make them
// share the same data. Then if next layer use inputlayer->getOuputValue()
// to achieve the input value, it will get the right data.
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNConvLayer::resetBwdWgtPD( void MKLDNNConvLayer::resetBwdWgtPD(
...@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline( ...@@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) { if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_); pipeline.push_back(*cvtOutGrad_);
} }
...@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline( ...@@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline(
void MKLDNNConvLayer::resetOutGrad( void MKLDNNConvLayer::resetOutGrad(
std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) { std::shared_ptr<conv_bwdWgt::primitive_desc>& wgtPD, MKLDNNMatrixPtr& out) {
const MatrixPtr& outMat = output_.grad;
out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc());
CHECK(outVal_ != nullptr &&
out->getPrimitiveDesc() == outVal_->getPrimitiveDesc())
<< "primitive desc of out grad and value should be equal";
// TODO(TJ): merge outgrad
// create reorder if has output grad does not match
cpuOutGrad_ = nullptr; cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr; cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) { CHECK(outVal_ != nullptr &&
outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc())
<< "primitive desc of out grad and value should be equal";
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
outMat->setData(cpuOut->getData());
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuOutVal_); CHECK(cpuOutVal_);
cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc());
if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { // create reorder if primitive desc does not match
out = cpuOutGrad_; if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
} else { out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_); CHECK(cvtOutGrad_);
} else {
// share the same data of CPU output
output_.grad->setData(cpuOut->getData());
out = cpuOutGrad_;
} }
} }
} }
...@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad( ...@@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad(
void MKLDNNConvLayer::resetInGrad( void MKLDNNConvLayer::resetInGrad(
std::shared_ptr<conv_bwdData::primitive_desc>& dataPD, std::shared_ptr<conv_bwdData::primitive_desc>& dataPD,
MKLDNNMatrixPtr& in) { MKLDNNMatrixPtr& in) {
in = nullptr;
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (dataPD == nullptr) { if (dataPD == nullptr) {
return; return;
} }
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done if (inputIsOnlyMKLDNN()) {
in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc());
dataPD->diff_src_primitive_desc()); CHECK(nullptr != inVal_ &&
CHECK(nullptr != inVal_ && in->getPrimitiveDesc() == inVal_->getPrimitiveDesc())
in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) << "primitive desc of input grad and value should be equal";
<< "primitive desc of input grad and value should be equal"; } else {
// create reorder if has output grad does not match
cpuInGrad_ = nullptr;
cvtInGrad_ = nullptr;
if (!inputIsOnlyMKLDNN()) {
const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE);
// same PrimitiveDesc with cpuInVal_ // same PrimitiveDesc with cpuInVal_
CHECK(cpuInVal_); CHECK(cpuInVal_);
cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc());
if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { in = cpuInGrad_;
const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); // create reorder if PrimitiveDesc does not match
in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) {
in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE),
dataPD->diff_src_primitive_desc());
cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_);
CHECK(cvtInGrad_); CHECK(cvtInGrad_);
} else {
in = cpuInGrad_;
} }
} }
} }
......
...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, ...@@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
if (!outputIsOnlyMKLDNN()) { if (!outputIsOnlyMKLDNN()) {
// fc cpu output value do not need create convert // fc cpu output value do not need create convert, just share data
// just share point
getOutput(CPU_DEVICE).value->setData(out->getData()); getOutput(CPU_DEVICE).value->setData(out->getData());
} }
output_.value = std::dynamic_pointer_cast<Matrix>(out);
} }
void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd, void MKLDNNFcLayer::resetFwdPD(std::shared_ptr<fc_fwd::primitive_desc>& pd,
...@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline( ...@@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (bias) { if (bias) {
fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out));
} else { } else {
...@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, ...@@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
} }
void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
// TODO(TJ): merge outgrad
int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE;
output_.grad->setData(getOutput(device).grad->getData());
// for MKLDNN device:
// can not directly cast outputgrad to mkldnnmatrix,
// since each layer can not write the inputgrad to mkldnn inputgrad.
// So just create from matrix with outputvalue format.
// for CPU device:
// fc do not need to convert from cpu device since output is always nc format
// only need create from cpu device
CHECK(outVal_); CHECK(outVal_);
out = if (outputIsOnlyMKLDNN()) {
MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
output_.grad->setData(cpuOut->getData());
out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc());
}
} }
void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
...@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, ...@@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt,
void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr; in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inputLayers_[0]->getOutput().grad == nullptr) {
if (inGrad == nullptr) {
return; return;
} }
// TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done
CHECK(inVal_); CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
} }
void MKLDNNFcLayer::resetBwdWgtPD( void MKLDNNFcLayer::resetBwdWgtPD(
...@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline( ...@@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline(
MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& wgt,
MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& bias,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
CHECK(inVal_); CHECK(inVal_);
if (bias) { if (bias) {
bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias));
......
...@@ -65,6 +65,17 @@ protected: ...@@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr biasVal_; MKLDNNMatrixPtr biasVal_;
MKLDNNMatrixPtr biasGrad_; MKLDNNMatrixPtr biasGrad_;
// merge grad primitive
std::shared_ptr<mkldnn::primitive> mergeGrad_;
std::vector<mkldnn::primitive> pipelineMergeGrad_;
// tmp input argument to save input grad, only used to merge grad
Argument tmpInArg_;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr tmpOutGrad_;
std::shared_ptr<mkldnn::primitive> tmpCvt_;
public: public:
explicit MKLDNNLayer(const LayerConfig& config) explicit MKLDNNLayer(const LayerConfig& config)
: Layer(config), : Layer(config),
...@@ -99,6 +110,7 @@ public: ...@@ -99,6 +110,7 @@ public:
if (!Layer::init(layerMap, parameterMap)) { if (!Layer::init(layerMap, parameterMap)) {
return false; return false;
} }
setOutputMap();
checkCPUOutputsNumber(); checkCPUOutputsNumber();
stream_.reset(new MKLDNNStream()); stream_.reset(new MKLDNNStream());
...@@ -118,12 +130,9 @@ public: ...@@ -118,12 +130,9 @@ public:
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward";
// reset when input total sizes changed, not only the batchsize // reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt; inputElemenCnt_ = elemenCnt;
pipelineFwd_.clear();
reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
if (outVal_) {
// change original output value to mkldnn output value
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
}
convertWeightsFromPaddle(); convertWeightsFromPaddle();
needResetBwd_ = true; needResetBwd_ = true;
} }
...@@ -144,9 +153,18 @@ public: ...@@ -144,9 +153,18 @@ public:
void backward(const UpdateCallback& callback) override { void backward(const UpdateCallback& callback) override {
if (needResetBwd_) { if (needResetBwd_) {
VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward";
pipelineBwd_.clear();
pipelineMergeGrad_.clear();
mergeGrad_ = nullptr;
resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_);
needResetBwd_ = false; needResetBwd_ = false;
} }
// merge grad must before backward activation
if (mergeGrad_) {
REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str());
stream_->submit(pipelineMergeGrad_);
}
{ {
REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); REGISTER_TIMER_INFO("BpActTimer", getName().c_str());
backwardActivation(); backwardActivation();
...@@ -247,6 +265,76 @@ protected: ...@@ -247,6 +265,76 @@ protected:
} }
} }
/**
* reset the output grad matrix from primitive desc.
* and reset the merge grad primitive if needed.
* note: when this layer has serval outputs,
* it could not be mixed with cpu device,
* since it can not get memory desc from cpu device.
*/
virtual void resetOutGrad(MKLDNNMatrixPtr& out,
mkldnn::memory::primitive_desc pd) {
CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet";
mergeGrad_ = nullptr;
pipelineMergeGrad_.clear();
out = MKLDNNMatrix::create(output_.grad, pd);
if (outputMap_.size() <= 1) {
return;
}
std::vector<double> scales(outputMap_.size(), 1.0);
std::vector<mkldnn::memory::primitive_desc> srcPDs;
std::vector<mkldnn::primitive::at> srcs;
for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) {
MKLDNNMatrixPtr src =
std::dynamic_pointer_cast<MKLDNNMatrix>(it->second->grad);
VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first;
CHECK(src) << "should be MKLDNNMatrix";
auto srcDims = src->getDims();
auto dstDims = out->getDims();
CHECK_EQ(srcDims.size(), dstDims.size());
for (size_t i = 0; i < srcDims.size(); ++i) {
CHECK_EQ(srcDims[i], dstDims[i]);
}
srcPDs.push_back(src->getPrimitiveDesc());
srcs.push_back(*src);
}
// TODO(TJ): remove me when mkldnn sum support different formats
for (size_t i = 1; i < srcPDs.size(); ++i) {
CHECK(srcPDs[0] == srcPDs[i]);
}
tmpOutGrad_ = nullptr;
tmpCvt_ = nullptr;
if (out->getPrimitiveDesc() != srcPDs[0]) {
tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]);
tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out);
CHECK(tmpCvt_);
pipelineMergeGrad_.push_back(*tmpCvt_);
} else {
tmpOutGrad_ = out;
}
auto sumPD = mkldnn::sum::primitive_desc(
tmpOutGrad_->getMemoryDesc(), scales, srcPDs);
mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_));
pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_);
}
/**
* reset input grad from primitive desc.
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual void resetInGrad(MKLDNNMatrixPtr& in,
mkldnn::memory::primitive_desc pd) {
LayerPtr& input = inputLayers_[0];
const MatrixPtr& grad =
input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad;
in = MKLDNNMatrix::create(grad, pd);
Argument& arg = input->getOutput(this->getName());
arg.grad = std::dynamic_pointer_cast<Matrix>(in);
}
/** /**
* print info about sizes * print info about sizes
*/ */
...@@ -334,6 +422,16 @@ private: ...@@ -334,6 +422,16 @@ private:
} }
} }
/**
* Set output map of prev layers.
*/
void setOutputMap() {
outputMap_.clear();
for (size_t i = 0; i < inputLayers_.size(); ++i) {
inputLayers_[i]->setOutput(getName(), &tmpInArg_);
}
}
/** /**
* Check the cpu device number of outputOtherDevice_. * Check the cpu device number of outputOtherDevice_.
* should have only one at most. * should have only one at most.
......
...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { ...@@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_);
if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc());
cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_);
CHECK(cvtOutVal_) << "should not be emptry"; CHECK(cvtOutVal_) << "should not be emptry";
} else { } else {
// CPU output share the same data of MKLDNN output
cpuOut->setData(out->getData());
cpuOutVal_ = out; cpuOutVal_ = out;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(cpuOutVal_);
return;
} }
output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
} }
void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr<pool_fwd::primitive_desc>& pd,
...@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline( ...@@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline(
std::shared_ptr<pool_fwd::primitive_desc>& pd, std::shared_ptr<pool_fwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
fwd_ = workspace_ fwd_ = workspace_
? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_)) ? std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out, *workspace_))
: std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out)); : std::make_shared<pool_fwd>(pool_fwd(*pd, *in, *out));
...@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, ...@@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in,
resetInGrad(in); resetInGrad(in);
} }
void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
CHECK(outVal_) << "Should have output value";
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
// create reorder if output value has cpu device and pd do not match
cpuOutGrad_ = nullptr; cpuOutGrad_ = nullptr;
cvtOutGrad_ = nullptr; cvtOutGrad_ = nullptr;
if (!outputIsOnlyMKLDNN()) { CHECK(outVal_);
if (outputIsOnlyMKLDNN()) {
MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc());
} else {
const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad;
cpuOutGrad_ = MKLDNNMatrix::create( cpuOutGrad_ = MKLDNNMatrix::create(
cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_);
if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) { if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) {
out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc());
cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out);
CHECK(cvtOutGrad_) << "should not be emptry"; CHECK(cvtOutGrad_) << "should not be emptry";
} else { } else {
...@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { ...@@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) {
void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) { void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) {
in = nullptr; in = nullptr;
const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inputLayers_[0]->getOutput().grad == nullptr) {
if (inGrad == nullptr) {
return; return;
} }
CHECK(inVal_); CHECK(inVal_);
in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc());
} }
void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd, void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr<pool_bwd::primitive_desc>& pd,
...@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline( ...@@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline(
std::shared_ptr<pool_bwd::primitive_desc>& pd, std::shared_ptr<pool_bwd::primitive_desc>& pd,
MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& in,
MKLDNNMatrixPtr& out) { MKLDNNMatrixPtr& out) {
pipeline.clear();
if (cvtOutGrad_) { if (cvtOutGrad_) {
pipeline.push_back(*cvtOutGrad_); pipeline.push_back(*cvtOutGrad_);
} }
......
...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { ...@@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() {
void MKLDNNTester::checkForward() { void MKLDNNTester::checkForward() {
VLOG(MKLDNN_ALL) << "Check Forward"; VLOG(MKLDNN_ALL) << "Check Forward";
printTopDatas(); printTopDatas();
double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, double delta =
refLayer_->getOutputValue()); compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue());
EXPECT_LE(fabs(delta), eps_); EXPECT_LE(fabs(delta), eps_);
} }
......
...@@ -84,8 +84,9 @@ function(op_library TARGET) ...@@ -84,8 +84,9 @@ function(op_library TARGET)
endif() endif()
# pybind USE_NO_KERNEL_OP # pybind USE_NO_KERNEL_OP
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
string(REPLACE "_op" "" TARGET "${TARGET}") string(REPLACE "_op" "" TARGET "${TARGET}")
if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
......
...@@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardSigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardSigmoid operator");
AddOutput("Y", "Output of HardSigmoid operator");
AddComment(R"DOC(
Hard Sigmoid activation operator.
Segment-wise linear approximation of sigmoid[1].
This is much faster than sigmoid.
hard_sigmoid = max(0, min(1, slope * x + shift))
The slope should be positive. The offset can be either positive or negative.
The default slope and shift are set from [1].
It is recommended to use the defaults for this activation.
References:
[1] Noisy Activation Functions
(https://arxiv.org/abs/1603.00391)
)DOC");
AddAttr<AttrType>("slope", "Slope for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.2));
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5));
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ...@@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp,
ops::ThresholdedReluOpMaker<float>, thresholded_relu_grad, ops::ThresholdedReluOpMaker<float>, thresholded_relu_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker<float>,
hard_sigmoid_grad, ops::ActivationOpGrad);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, \ act_type, \
......
...@@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct HardSigmoidFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp = x * static_cast<T>(slope) + static_cast<T>(offset);
y.device(d) = temp.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(1));
}
};
template <typename T>
struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) =
dy *
((y > static_cast<T>(0)) * (y < static_cast<T>(1))).template cast<T>() *
static_cast<T>(slope);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ #define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(exp, ExpFunctor, ExpGradFunctor); \ __macro(exp, ExpFunctor, ExpGradFunctor); \
__macro(relu, ReluFunctor, ReluGradFunctor); \ __macro(relu, ReluFunctor, ReluGradFunctor); \
__macro(tanh, TanhFunctor, TanhGradFunctor); \ __macro(tanh, TanhFunctor, TanhGradFunctor); \
__macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \
__macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \
__macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, LogFunctor, LogGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \
__macro(square, SquareFunctor, SquareGradFunctor); \ __macro(square, SquareFunctor, SquareGradFunctor); \
__macro(brelu, BReluFunctor, BReluGradFunctor); \ __macro(brelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \ __macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \ __macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \ __macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELUFunctor, ELUGradFunctor); \ __macro(elu, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \
__macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \
__macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adam_op.h"
namespace paddle {
namespace operators {
class AdamOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment1"),
"Input(Moment1) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment2"),
"Input(Moment2) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
"Input(Beta1Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"),
"Input(Beta2Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"),
"Output(Moment1Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"),
"Output(Moment2Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"),
"Output(Beta1PowOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta2PowOut"),
"Output(Beta2PowOut) of AdamOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension");
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"),
"Param and Moment input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"),
"Param and InfNorm input of AdamOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims);
ctx->SetOutputDim("Moment2Out", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims);
}
};
class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment1", "(Tensor) Input first moment");
AddInput("Moment2", "(Tensor) Input second moment");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("Moment1Out", "(Tensor) Output first moment");
AddOutput("Moment2Out", "(Tensor) Output second moment");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"first moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the "
"second moment estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0e-8f);
AddComment(R"DOC(
Adam Updates Operator.
This implements the Adam optimizer from Section 2 of the Adam
paper[1]. Adam is a first-order gradient-based optimization
method based on adaptive estimates of lower-order moments.
Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
learning_rate_t = learning_rate_t *
sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out)
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon)
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker);
REGISTER_OP_CPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adam_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdamOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment1_out_tensor = ctx.Output<framework::Tensor>("Moment1Out");
auto moment2_out_tensor = ctx.Output<framework::Tensor>("Moment2Out");
auto beta1_pow_out_tensor = ctx.Output<framework::Tensor>("Beta1PowOut");
auto beta2_pow_out_tensor = ctx.Output<framework::Tensor>("Beta2PowOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment1_out_tensor->mutable_data<T>(ctx.GetPlace());
moment2_out_tensor->mutable_data<T>(ctx.GetPlace());
beta1_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
beta2_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
float beta1 = ctx.Attr<float>("beta1");
float beta2 = ctx.Attr<float>("beta2");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment1 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment1"));
auto moment2 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment2"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto beta1_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta1Pow"));
auto beta2_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta2Pow"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment1_out = framework::EigenVector<T>::Flatten(*moment1_out_tensor);
auto moment2_out = framework::EigenVector<T>::Flatten(*moment2_out_tensor);
auto beta1_pow_out =
framework::EigenVector<T>::Flatten(*beta1_pow_out_tensor);
auto beta2_pow_out =
framework::EigenVector<T>::Flatten(*beta2_pow_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment1_out.device(place) = beta1 * moment1 + (1 - beta1) * grad;
moment2_out.device(place) = beta2 * moment2 + (1 - beta2) * grad.square();
beta1_pow_out.device(place) = beta1_pow * beta1;
beta2_pow_out.device(place) = beta2_pow * beta2;
// All of these are tensors of 1 element
auto lr_t = lr * (1 - beta2_pow_out).sqrt() / (1 - beta1_pow_out);
// Eigen does not support automatic broadcast
// Get dimensions of moment vector to broadcast lr_t
Eigen::DSizes<int, 1> m_dsize(moment1_out_tensor->numel());
param_out.device(place) =
param -
lr_t.broadcast(m_dsize) *
(moment1_out / (moment2_out.sqrt() + epsilon));
}
};
} // namespace operators
} // namespace paddle
...@@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet( ...@@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet(
for (int i = 0; i < BRANCH_NUM; ++i) { for (int i = 0; i < BRANCH_NUM; ++i) {
for (auto& output : (*sub_net_op_[i]).Outputs()) { for (auto& output : (*sub_net_op_[i]).Outputs()) {
for (auto& var_name : output.second) { for (auto& var_name : output.second) {
sub_scopes[i]->NewVar(var_name); sub_scopes[i]->Var(var_name);
} }
} }
} }
......
...@@ -12,111 +12,91 @@ ...@@ -12,111 +12,91 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/gemm_conv2d_op.h" #include "paddle/operators/conv2d_op.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
int outputSize(int input_size, int filter_size, int padding, int stride) { void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1; PADDLE_ENFORCE(ctx->HasInput("Input"),
return output_size; "Input(Input) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Filter"),
"Input(Filter) of Conv2DOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Output"),
"Output(Output) of Conv2DOp should not be null.");
auto in_dims = ctx->GetInputDim("Input");
auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
int groups = ctx->Attrs().Get<int>("groups");
int input_channels = in_dims[1];
int output_channels = filter_dims[0];
PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D.");
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D.");
PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups,
"The number of input channels should be equal to filter "
"channels * groups.");
PADDLE_ENFORCE_EQ(
output_channels % groups, 0,
"The number of output channels should be divided by groups.");
auto output_height =
OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
auto output_width =
OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
ctx->SetOutputDim("Output",
{in_dims[0], filter_dims[0], output_height, output_width});
} }
class Conv2DOp : public framework::OperatorWithKernel { Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
public: framework::OpAttrChecker* op_checker)
using framework::OperatorWithKernel::OperatorWithKernel; : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
protected: "Input",
void InferShape(framework::InferShapeContext* ctx) const override { "The input tensor of convolution operator. "
PADDLE_ENFORCE(ctx->HasInput("Input"), "The format of input tensor is NCHW. Where N is batch size, C is the "
"Input(Input) of Conv2DOp should not be null."); "number of channels, H and W is the height and width of image.");
PADDLE_ENFORCE(ctx->HasInput("Filter"), AddInput("Filter",
"Input(Filter) of Conv2DOp should not be null."); "The filter tensor of convolution operator."
PADDLE_ENFORCE(ctx->HasOutput("Output"), "The format of the filter tensor is MCHW, where M is the number of "
"Output(Output) of Conv2DOp should not be null."); "output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
auto in_dims = ctx->GetInputDim("Input"); "If the groups attribute is greater than 1, C equal the number of "
auto filter_dims = ctx->GetInputDim("Filter"); "input image channels divided by the groups.");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); AddOutput("Output",
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); "The output tensor of convolution operator."
int groups = ctx->Attrs().Get<int>("groups"); "The format of output tensor is also NCHW.");
int input_channels = in_dims[1]; AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
int output_channels = filter_dims[0]; .SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); .SetDefault({0, 0});
PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); AddAttr<int>(
PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, "groups",
"The number of input channels should be equal to filter " "group size of convolution operator. "
"channels * groups."); "Refer to grouped convolution in Alex Krizhevsky's paper: "
PADDLE_ENFORCE_EQ( "when group=2, the first half of the filters are only connected to the "
output_channels % groups, 0, "first half of the input channels, and the second half only connected "
"The number of output channels should be divided by groups."); "to the second half.")
.SetDefault(1);
auto output_height = AddComment(R"DOC(
outputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]);
auto output_width =
outputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]);
ctx->SetOutputDim(
"Output", {in_dims[0], filter_dims[0], output_height, output_width});
}
};
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv2DOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput(
"Input",
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image.");
AddInput(
"Filter",
"The filter tensor of convolution operator."
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"input image channels divided by the groups.");
AddOutput("Output",
"The output tensor of convolution operator."
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
.SetDefault({0, 0});
AddAttr<int>(
"groups",
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half.")
.SetDefault(1);
AddComment(R"DOC(
The convolution operation calculates the output based on the input, filter The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
)DOC"); )DOC");
} }
};
class Conv2DOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected: void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const {
void InferShape(framework::InferShapeContext* ctx) const override { auto in_dims = ctx->GetInputDim("Input");
auto in_dims = ctx->GetInputDim("Input"); auto filter_dims = ctx->GetInputDim("Filter");
auto filter_dims = ctx->GetInputDim("Filter"); if (ctx->HasOutput(framework::GradVarName("Input"))) {
if (ctx->HasOutput(framework::GradVarName("Input"))) { ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
}
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
} }
}; if (ctx->HasOutput(framework::GradVarName("Filter"))) {
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
}
}
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/operators/gemm_conv2d_op.h" #include "paddle/operators/conv2d_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
......
...@@ -24,6 +24,38 @@ namespace operators { ...@@ -24,6 +24,38 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
// Base convolution operator definations for other conv
// like operators to reuse the implementation.
inline int OutputSize(int input_size, int filter_size, int padding,
int stride) {
int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
return output_size;
}
// Define Op classes in .h file so that other conv
// operator implementations can reuse the code.
class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
public:
Conv2DOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker);
};
class Conv2DOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
class Conv2DOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override;
};
template <typename Place, typename T> template <typename Place, typename T>
class GemmConv2DKernel : public framework::OpKernel<T> { class GemmConv2DKernel : public framework::OpKernel<T> {
public: public:
...@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> { ...@@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
framework::DDim output_matrix_shape = {output_channels, framework::DDim output_matrix_shape = {output_channels,
output_height * output_width}; output_height * output_width};
// convolution operator: im2col + gemm // convolution operator: im2col + gemm
int in_step = input_channels / groups; int in_step = input_channels / groups;
int out_step = output_channels / groups; int out_step = output_channels / groups;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/conv2d_op.h"
namespace paddle {
namespace operators {
class CudnnConvOpMaker : public Conv2DOpMaker {
public:
CudnnConvOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: Conv2DOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault(std::vector<int>{1, 1});
AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
.SetDefault(4096);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad,
ops::Conv2DOpGrad);
REGISTER_OP_CPU_KERNEL(
conv_cudnn, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
conv_cudnn_grad,
ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/memory/memory.h"
#include "paddle/operators/conv2d_op.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cudnn_helper.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
using DataLayout = platform::DataLayout;
using CUDADeviceContext = platform::CUDADeviceContext;
static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = 1024 * 1024 * 1024;
// NOTE: framework::vectorize converts to type int64_t
// which does not fit cudnn inputs.
std::vector<int> Dims2Vector(const framework::DDim& dims) {
std::vector<int> ret;
for (int i = 0; i < dims.size(); i++) {
ret.push_back(dims[i]);
}
return ret;
}
template <typename T>
class CudnnConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
auto* input = ctx.Input<Tensor>("Input");
auto* filter = ctx.Input<Tensor>("Filter");
auto* output = ctx.Output<Tensor>("Output");
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
ScopedFilterDescriptor filter_desc;
ScopedConvolutionDescriptor conv_desc;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t cudnn_input_desc =
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()), groups);
cudnnTensorDescriptor_t cudnn_output_desc =
output_desc.descriptor<T>(layout, Dims2Vector(output->dims()), groups);
cudnnFilterDescriptor_t cudnn_filter_desc =
filter_desc.descriptor<T>(layout, Dims2Vector(filter->dims()), groups);
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
int input_channels = input->dims()[1];
int input_height = input->dims()[2];
int input_width = input->dims()[3];
int output_channels = output->dims()[1];
int output_height = output->dims()[2];
int output_width = output->dims()[3];
int group_offset_in = input_channels / groups * input_height * input_width;
int group_offset_out =
output_channels / groups * output_height * output_width;
int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn conv workspace ---------------------
void* cudnn_workspace = nullptr;
size_t workspace_size_in_bytes; // final workspace to allocate.
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
// ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t algo;
auto handle = ctx.cuda_device_context().cudnn_handle();
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &algo));
// get workspace size able to allocate
PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, algo, &workspace_size_in_bytes));
// Allocate on GPU memory
platform::GPUPlace gpu = boost::get<platform::GPUPlace>(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv forward ---------------------
T alpha = 1.0f, beta = 0.0f;
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
cudnn_filter_desc, filter_data + i * group_offset_filter,
cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes,
&beta, cudnn_output_desc, output_data + i * group_offset_out));
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
}
};
template <typename T>
class CudnnConvGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");
auto input = ctx.Input<Tensor>("Input");
auto filter = ctx.Input<Tensor>("Filter");
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
auto input_grad = ctx.Output<Tensor>(framework::GradVarName("Input"));
auto filter_grad = ctx.Output<Tensor>(framework::GradVarName("Filter"));
const T* input_data = input->data<T>();
const T* output_grad_data = output_grad->data<T>();
const T* filter_data = filter->data<T>();
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = ctx.Attr<std::vector<int>>("dilations");
int groups = ctx.Attr<int>("groups");
int user_workspace_size = ctx.Attr<int>("workspace_size_MB");
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_grad_desc;
ScopedTensorDescriptor input_grad_desc;
ScopedFilterDescriptor filter_desc;
ScopedFilterDescriptor filter_grad_desc;
ScopedConvolutionDescriptor conv_desc;
DataLayout layout = DataLayout::kNCHW;
cudnnTensorDescriptor_t cudnn_input_desc =
input_desc.descriptor<T>(layout, Dims2Vector(input->dims()), groups);
cudnnTensorDescriptor_t cudnn_output_grad_desc =
output_grad_desc.descriptor<T>(layout, Dims2Vector(output_grad->dims()),
groups);
cudnnFilterDescriptor_t cudnn_filter_desc =
filter_desc.descriptor<T>(layout, Dims2Vector(filter->dims()), groups);
cudnnTensorDescriptor_t cudnn_input_grad_desc = nullptr;
cudnnFilterDescriptor_t cudnn_filter_grad_desc = nullptr;
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
int input_channels = input->dims()[1];
int input_height = input->dims()[2];
int input_width = input->dims()[3];
int output_grad_channels = filter->dims()[0];
int output_grad_height = output_grad->dims()[2];
int output_grad_width = output_grad->dims()[3];
int group_offset_in = input_channels / groups * input_height * input_width;
int group_offset_out =
output_grad_channels / groups * output_grad_height * output_grad_width;
int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn backward algorithm ---------------------
cudnnConvolutionBwdDataAlgo_t data_algo;
cudnnConvolutionBwdFilterAlgo_t filter_algo;
size_t workspace_size_in_bytes = 0, tmp_size = 0;
size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES;
if (user_workspace_size > 0) {
workspace_size_limit = user_workspace_size * 1024 * 1024;
}
auto handle = ctx.cuda_device_context().cudnn_handle();
if (input_grad) {
cudnn_input_grad_desc = input_grad_desc.descriptor<T>(
layout, Dims2Vector(input_grad->dims()), groups);
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm(
handle, cudnn_filter_desc,
// dyDesc: Handle to the previously initialized input differential
// tensor descriptor.
cudnn_output_grad_desc, cudnn_conv_desc,
// dxDesc: Handle to the previously initialized output tensor
// descriptor.
cudnn_input_grad_desc,
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &data_algo));
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize(
handle, cudnn_filter_desc, cudnn_output_grad_desc,
cudnn_conv_desc, cudnn_input_grad_desc, data_algo, &tmp_size));
workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size);
}
if (filter_grad) {
cudnn_filter_grad_desc = filter_grad_desc.descriptor<T>(
layout, Dims2Vector(filter_grad->dims()), groups);
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm(
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
cudnn_filter_desc,
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
workspace_size_limit, &filter_algo));
PADDLE_ENFORCE(
platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize(
handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc,
cudnn_filter_desc, filter_algo, &tmp_size));
workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size);
}
// ------------------- cudnn conv workspace ---------------------
// Already on GPU
void* cudnn_workspace = nullptr;
platform::GPUPlace gpu = boost::get<platform::GPUPlace>(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv backward data ---------------------
// FIXME(typhoonzero): template type T may not be the same as cudnn call.
T alpha = 1.0f, beta = 0.0f;
if (input_grad) {
T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*input_grad);
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
t.constant(static_cast<T>(0));
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData(
handle, &alpha, cudnn_filter_desc,
filter_data + i * group_offset_filter, cudnn_output_grad_desc,
output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo,
cudnn_workspace, workspace_size_in_bytes, &beta,
cudnn_input_grad_desc, input_grad_data + i * group_offset_in));
}
}
// ------------------- cudnn conv backward filter ---------------------
if (filter_grad) {
T* filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace());
auto t = framework::EigenVector<T>::Flatten(*filter_grad);
t.device(ctx.GetEigenDevice<platform::GPUPlace>()) =
t.constant(static_cast<T>(0));
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter(
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
cudnn_output_grad_desc, output_grad_data + i * group_offset_out,
cudnn_conv_desc, filter_algo, cudnn_workspace,
workspace_size_in_bytes, &beta, cudnn_filter_grad_desc,
filter_grad_data + i * group_offset_filter));
}
}
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel<float>);
REGISTER_OP_GPU_KERNEL(conv_cudnn_grad,
paddle::operators::CudnnConvGradOpKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/decayed_adagrad_op.h"
namespace paddle {
namespace operators {
class DecayedAdagradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(
ctx->HasInput("LearningRate"),
"Input(LearningRate) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of DecayedAdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of DecayedAdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"LearningRate should have one element");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of DecayedAdagradOp should have "
"the same dimension.");
PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of DecayedAdagradOp should have "
"the same dimension.");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
}
};
class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
public:
DecayedAdagradOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("Moment", "(Tensor) Second moment");
AddInput("LearningRate", "(Tensor) Learning rate");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output second moment");
AddAttr<float>("decay",
"(float, default 0.95) "
"Discounting factor for coming gradient")
.SetDefault(0.95);
AddAttr<float>("epsilon",
"(float, default 1.0e-6) "
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Decayed Adagrad
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(decayed_adagrad, ops::DecayedAdagradOp,
ops::DecayedAdagradOpMaker);
REGISTER_OP_CPU_KERNEL(
decayed_adagrad,
ops::DecayedAdagradOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/decayed_adagrad_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
decayed_adagrad,
ops::DecayedAdagradOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class DecayedAdagradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
float decay = ctx.Attr<float>("decay");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment_out.device(place) = decay * moment + (1 - decay) * grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
};
} // namespace operators
} // namespace paddle
...@@ -23,13 +23,37 @@ using framework::Scope; ...@@ -23,13 +23,37 @@ using framework::Scope;
using framework::TensorArray; using framework::TensorArray;
using framework::LoDTensor; using framework::LoDTensor;
using framework::Variable; using framework::Variable;
using framework::DySeqMetaBatch;
namespace detail { namespace detail {
inline void CreateVariables(Scope& scope, inline void CreateVariables(Scope& scope,
const std::vector<std::string>& var_names) { const std::vector<std::string>& var_names) {
for (const auto& name : var_names) { for (const auto& name : var_names) {
scope.NewVar(name); scope.Var(name);
}
}
/*
* The inputs with sequence should be reordered when they are split, so the
* boot_states should be reordered in the same order.
*
* NOTE This may require that the `pre_state` of the first time step should just
* copy the `boot_state` rather than reference it, for that the content should
* be reordered, but the RNN op should not change the `boot_state` as an input
* variable's content.
*/
template <typename T>
inline void ReorderBootState(const DySeqMetaBatch& metas,
const LoDTensor& boot_state, LoDTensor* tensor,
const platform::Place& dst_place) {
for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) {
auto slice = tensor->Slice<T>(seq_id, seq_id + 1);
auto boot_slice =
boot_state.Slice<T>(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1);
// TODO(superjom) pass in device context as an argument
slice.template CopyFrom<T>(boot_slice, dst_place,
platform::CPUDeviceContext());
} }
} }
...@@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope, ...@@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope,
CreateScopes(); CreateScopes();
WriteStepInputs(); WriteStepInputs();
InitStates(); InitStates();
WriteStepOutputs();
// call stepnet in all the time steps // call stepnet in all the time steps
for (size_t step = 0; step < cache_.num_steps; step++) { for (size_t step = 0; step < cache_.num_steps; step++) {
...@@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope, ...@@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope,
stepnet_->Run(step_scope, dev_ctx); stepnet_->Run(step_scope, dev_ctx);
} }
WriteStepOutputs();
ConcatOutputs(); ConcatOutputs();
} }
...@@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const { ...@@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const {
// TODO(superjom) make level a config // TODO(superjom) make level a config
// TODO(superjom) check all the inputs has the same LoD // TODO(superjom) check all the inputs has the same LoD
int level = 0; int level = 0;
const auto& inlinks = cache_.inlinks; for (const auto& item : cache_.inlinks) {
for (const auto& item : inlinks) {
const auto& var = item.second; const auto& var = item.second;
const auto& tensor = var->Get<LoDTensor>(); const auto& tensor = var->Get<LoDTensor>();
TensorArray& ta = step_inputs_[item.first]; TensorArray& ta = step_inputs_[item.first];
dy_seq_metas_[item.first] = dy_seq_metas_[item.first] =
ta.Unpack(tensor, level, true /*length_descend*/); ta.Unpack(tensor, level, true /*length_descend*/);
...@@ -112,7 +136,7 @@ void DynamicRecurrentOp::WriteStepInputs() const { ...@@ -112,7 +136,7 @@ void DynamicRecurrentOp::WriteStepInputs() const {
auto& step_scope = cache_.GetScope(step); auto& step_scope = cache_.GetScope(step);
Variable* var = step_scope.FindVar(item.first); Variable* var = step_scope.FindVar(item.first);
if (var == nullptr) { if (var == nullptr) {
var = step_scope.NewVar(item.first); var = step_scope.Var(item.first);
} }
var->GetMutable<LoDTensor>()->ShareDataWith<value_type>(tensor); var->GetMutable<LoDTensor>()->ShareDataWith<value_type>(tensor);
} }
...@@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const { ...@@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const {
} }
void DynamicRecurrentOp::WriteStepOutputs() const { void DynamicRecurrentOp::WriteStepOutputs() const {
for (size_t step = 0; step < cache_.scopes->size(); step++) { // initialize step outputs
auto& scope = cache_.GetScope(step); for (const auto& item : cache_.outlinks) {
for (auto& item : step_outputs_) { step_outputs_.emplace(item.first, TensorArray());
auto* var = scope.FindVar(item.first);
if (var == nullptr) {
var = scope.NewVar(item.first);
}
auto* tensor = var->GetMutable<LoDTensor>();
item.second.WriteShared(step, *tensor);
}
} }
PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL);
} }
void DynamicRecurrentOp::CreateScopes() const { void DynamicRecurrentOp::CreateScopes() const {
...@@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const { ...@@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const {
PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first"); PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first");
std::vector<std::string> memories; std::vector<std::string> memories;
std::vector<std::string> pre_memories; std::vector<std::string> pre_memories;
std::vector<std::string> stepnet_outputs;
std::transform(arg_.memories.begin(), arg_.memories.end(), std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(memories), std::back_inserter(memories),
[](const rnn::MemoryAttr& m) { return m.var; }); [](const rnn::MemoryAttr& m) { return m.var; });
std::transform(arg_.memories.begin(), arg_.memories.end(), std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(pre_memories), std::back_inserter(pre_memories),
[](const rnn::MemoryAttr& m) { return m.pre_var; }); [](const rnn::MemoryAttr& m) { return m.pre_var; });
for (const auto& item : stepnet_->Outputs()) {
for (const auto& var : item.second) {
stepnet_outputs.push_back(var);
}
}
for (size_t step = 0; step < cache_.num_steps; step++) { for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step); auto& scope = cache_.GetScope(step);
...@@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const { ...@@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const {
detail::CreateVariables(scope, arg_.outlinks); detail::CreateVariables(scope, arg_.outlinks);
detail::CreateVariables(scope, memories); detail::CreateVariables(scope, memories);
detail::CreateVariables(scope, pre_memories); detail::CreateVariables(scope, pre_memories);
detail::CreateVariables(scope, stepnet_outputs);
} }
} }
void DynamicRecurrentOp::ConcatOutputs() const { void DynamicRecurrentOp::ConcatOutputs() const {
// TODO(superjom) transform this to a config // TODO(superjom) transform this to a config
int level = 0; int level = 0;
// TODO(superjom) pass in some lod for (size_t step = 0; step < cache_.num_steps; step++) {
// just a placeholder auto& scope = cache_.GetScope(step);
framework::LoD lod; for (auto& item : step_outputs_) {
auto* var = scope.FindVar(item.first);
PADDLE_ENFORCE_NOT_NULL(var);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->mutable_data<value_type>(platform::CPUPlace());
item.second.WriteShared(step, *tensor);
}
}
// the inlinks' lods should be the same, so randomly get one lod.
const auto& some_lod =
cache_.scope->FindVar(arg_.inlinks.front())->Get<LoDTensor>().lod();
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
for (auto& item : step_outputs_) { for (auto& item : step_outputs_) {
auto tensor = item.second.Pack(level, dy_seq_metas_[item.first], lod); auto tensor = item.second.Pack(level, some_meta, some_lod);
auto& output = cache_.outlinks[item.first]->Get<LoDTensor>(); auto* output = cache_.outlinks[item.first]->GetMutable<LoDTensor>();
const_cast<LoDTensor*>(&output)->ShareDataWith<value_type>(tensor); const_cast<LoDTensor*>(output)->ShareDataWith<value_type>(tensor);
} }
} }
void DynamicRecurrentOp::InitStates() const { void DynamicRecurrentOp::InitStates() const {
// init the first state for (size_t step = 0; step < cache_.num_steps; step++) {
// TODO(superjom) parepare the scenerio that boot state not exists for (const auto& memory : arg_.memories) {
for (auto memory : arg_.memories) { CreateState(memory, step);
auto* boot_state_var = cache_.scope->FindVar(memory.boot_var); LinkState(memory, step);
PADDLE_ENFORCE_NOT_NULL(boot_state_var);
auto& boot_state = boot_state_var->Get<LoDTensor>();
const auto& dims = boot_state.dims();
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& cur_scope = cache_.GetScope(step);
// link pre-state to boot_state
// init state and pre-state
auto* pre_state = cur_scope.FindVar(memory.pre_var);
PADDLE_ENFORCE_NOT_NULL(pre_state);
pre_state->GetMutable<LoDTensor>();
auto* state = cur_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state);
state->GetMutable<LoDTensor>()->Resize(dims);
state->GetMutable<LoDTensor>()->mutable_data<value_type>(
platform::CPUPlace());
if (step == 0) {
auto* pre_state_tensor = pre_state->GetMutable<LoDTensor>();
pre_state_tensor->Resize(boot_state.dims());
pre_state_tensor->ShareDataWith<value_type>(boot_state);
} else {
auto& pre_scope = cache_.GetScope(step - 1);
auto* state_pre = pre_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state_pre);
pre_state->GetMutable<LoDTensor>()->ShareDataWith<value_type>(
*state_pre->GetMutable<LoDTensor>());
}
} }
} }
} }
void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state = *cache_.GetTensor(scope, memory.var);
auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var);
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
auto dims = boot_state.dims();
dims[0] = num_instances;
state.Resize(dims);
state.mutable_data<value_type>(platform::CPUPlace());
states_[memory.var].WriteShared(step, state);
}
void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state_pre = *cache_.GetTensor(scope, memory.pre_var);
// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
LoDTensor* pre_state{nullptr};
if (step == 0) {
pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var);
pre_state->mutable_data<float>(platform::CPUPlace());
// allocate memory
state_pre.Resize(pre_state->dims());
state_pre.mutable_data<value_type>(platform::CPUPlace());
detail::ReorderBootState<value_type>(some_meta, *pre_state, &state_pre,
pre_state->place());
} else {
pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var);
}
// shink and share from previous state
auto shrinked_pre_state = pre_state->Slice<value_type>(0, num_instances);
state_pre.ShareDataWith<value_type>(shrinked_pre_state);
}
void DynamicRecurrentOp::ArgCache::Init( void DynamicRecurrentOp::ArgCache::Init(
const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op, const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op,
const paddle::framework::Scope& scope, rnn::Argument* arg) { const paddle::framework::Scope& scope, rnn::Argument* arg) {
...@@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope, ...@@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope,
return var; return var;
} }
LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor(
const framework::Scope& scope, const std::string& name) {
auto* var = GetVariable(scope, name);
return var->GetMutable<LoDTensor>();
}
const rnn::ArgumentName DynamicRecurrentOp::kArgName{ const rnn::ArgumentName DynamicRecurrentOp::kArgName{
"step_net", "step_scopes", "inlinks", "outlinks", "step_net", "step_scopes", "inlinks", "outlinks",
"memories", "pre_memories", "boot_memories"}; "memories", "pre_memories", "boot_memories"};
......
...@@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase {
*/ */
void InitStates() const; void InitStates() const;
/*
* Create state variables for each time step.
*/
void CreateState(const rnn::MemoryAttr& memory, size_t step) const;
/*
* Link pre-state variable in current scope to the state variable in the
* previous time step (scope).
*/
void LinkState(const rnn::MemoryAttr& memory, size_t step) const;
/* /*
* Concatenate outputs in each time step and generate a LoDTensor. * Concatenate outputs in each time step and generate a LoDTensor.
*/ */
...@@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase {
} }
const OperatorBase& GetStepNet() const { return *stepnet_; } const OperatorBase& GetStepNet() const { return *stepnet_; }
const framework::TensorArray& state(const std::string& name) const {
return states_[name];
}
const framework::TensorArray& step_input(const std::string& name) const {
return step_inputs_[name];
}
const framework::TensorArray& step_output(const std::string& name) const {
return step_outputs_[name];
}
protected: protected:
struct ArgCache { struct ArgCache {
framework::Scope const* scope; framework::Scope const* scope;
...@@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase {
return *scopes->at(index); return *scopes->at(index);
} }
framework::LoDTensor* GetTensor(const framework::Scope& scope,
const std::string& name);
private: private:
void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op, void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op,
rnn::Argument* arg); rnn::Argument* arg);
...@@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase {
private: private:
std::unique_ptr<OperatorBase> stepnet_; std::unique_ptr<OperatorBase> stepnet_;
mutable framework::TensorArray states_; mutable std::map<std::string, framework::TensorArray> states_;
mutable std::map<std::string, framework::TensorArray> step_inputs_; mutable std::map<std::string, framework::TensorArray> step_inputs_;
mutable std::map<std::string, framework::TensorArray> step_outputs_; mutable std::map<std::string, framework::TensorArray> step_outputs_;
mutable std::map<std::string, std::vector<framework::DySeqMeta>> mutable std::map<std::string, std::vector<framework::DySeqMeta>>
......
...@@ -36,7 +36,7 @@ void OpDescNewVar(const std::string& param_name, ...@@ -36,7 +36,7 @@ void OpDescNewVar(const std::string& param_name,
// create a LoD tensor in scope with specific dims // create a LoD tensor in scope with specific dims
LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims, LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
const platform::Place& place) { const platform::Place& place) {
auto* var = scope.NewVar(name); auto* var = scope.Var(name);
auto* tensor = var->GetMutable<LoDTensor>(); auto* tensor = var->GetMutable<LoDTensor>();
tensor->Resize(dims); tensor->Resize(dims);
tensor->mutable_data<float>(place); tensor->mutable_data<float>(place);
...@@ -85,9 +85,8 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test { ...@@ -85,9 +85,8 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test {
void CreateGlobalVariables() { void CreateGlobalVariables() {
platform::CPUPlace place; platform::CPUPlace place;
scope.NewVar("step_scopes"); scope.Var("step_scopes");
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
// auto* out0 =
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
// 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively.
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gru_unit_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class GRUUnitOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitOp should not be null.", "HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("Gate"),
"Output(%s) of GRUUnitOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"),
"Output(%s) of GRUUnitOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of GRUUnitOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("Gate", {batch_size, frame_size * 3});
ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size});
ctx->SetOutputDim("Hidden", {batch_size, frame_size});
}
};
class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GRUUnitOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"input.");
AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step.");
AddInput("Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]");
AddInput("Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate.");
AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate")
.AsIntermediate();
AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"reseted hidden state of previous time step.")
.AsIntermediate();
AddOutput("Hidden",
"(Tensor) The GRU hidden state of the current time step "
"with shape [batch_size, frame_size].");
AddAttr<int>("activation",
"(enum int, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault(tanh)
.InEnum({identity, sigmoid, tanh, relu});
AddAttr<int>("gate_activation",
"(enum int, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
\f]
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC");
}
};
class GRUUnitGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("Gate"),
"Input(%s) of GRUUnitGradOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Gate");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
// int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
auto hidden_prev_grad_name = framework::GradVarName("HiddenPrev");
if (ctx->HasOutput(hidden_prev_grad_name))
ctx->SetOutputDim(hidden_prev_grad_name, hidden_prev_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad,
ops::GRUUnitGradOp);
REGISTER_OP_CPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gru_unit_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 };
template <typename Place, typename T>
class GRUUnitKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
void ActCompute(const int act_type, const Device& d, X x, Y y) const {
if (act_type == identity)
y.device(d) = x;
else if (act_type == sigmoid)
SigmoidFunctor<T>()(d, x, y);
else if (act_type == tanh)
TanhFunctor<T>()(d, x, y);
else if (act_type == relu)
ReluFunctor<T>()(d, x, y);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* bias = context.Input<Tensor>("Bias");
auto* gate = context.Output<Tensor>("Gate");
gate->mutable_data<T>(context.GetPlace());
auto* reset_hidden_prev = context.Output<Tensor>("ResetHiddenPrev");
reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<Tensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
auto x = EigenMatrix<T>::From(*input);
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto r_h_p = EigenMatrix<T>::From(*reset_hidden_prev);
auto h = EigenMatrix<T>::From(*hidden);
auto place = context.GetEigenDevice<Place>();
// calculate unactivated gate outputs
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = x +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
} else {
g.device(place) = x;
}
const T* hidden_prev_data = hidden_prev->data<T>();
const T* weight_data = weight->data<T>();
T* gate_data = gate->data<T>();
T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
2 * frame_size, frame_size, 1, hidden_prev_data,
frame_size, weight_data, frame_size * 2, 1, gate_data,
frame_size * 3);
// calculate activited gate
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(u_offsets, extents), g.slice(u_offsets, extents));
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(r_offsets, extents), g.slice(r_offsets, extents));
auto r = g.slice(r_offsets, extents); // reset gate
r_h_p.device(place) = r * h_p; // reset previous hidden state
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
frame_size, frame_size, 1, reset_hidden_prev_data,
frame_size, weight_data + frame_size * frame_size * 2,
frame_size, 1, gate_data + frame_size * 2,
frame_size * 3);
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
ActCompute(context.Attr<int>("activation"), place,
g.slice(c_offsets, extents), g.slice(c_offsets, extents));
auto c = g.slice(c_offsets, extents); // output candidate
// calculate final output
h.device(place) = u * (h_p - c) + c;
}
};
template <typename Place, typename T>
class GRUUnitGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
void ActGradCompute(const int act_type, const Device& d, X x, Y y, DX dx,
DY dy) const {
// x is dummy and won't be used even in Relu(use y instead)
if (act_type == identity)
dx.device(d) = dy;
else if (act_type == sigmoid)
SigmoidGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == tanh)
TanhGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == relu)
ReluGradFunctor<T>()(d, x, y, dy, dx);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* gate = context.Input<Tensor>("Gate");
auto* reset_hidden_prev = context.Input<Tensor>("ResetHiddenPrev");
auto* hidden_grad = context.Input<Tensor>(framework::GradVarName("Hidden"));
auto* input_grad = context.Output<Tensor>(framework::GradVarName("Input"));
auto* hidden_prev_grad =
context.Output<Tensor>(framework::GradVarName("HiddenPrev"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
input_grad->mutable_data<T>(context.GetPlace());
hidden_prev_grad->mutable_data<T>(context.GetPlace());
weight_grad->mutable_data<T>(context.GetPlace());
Tensor gate_grad;
gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
Tensor reset_hidden_prev_grad;
reset_hidden_prev_grad.mutable_data<T>(reset_hidden_prev->dims(),
context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
const T* hidden_prev_data = hidden_prev->data<T>();
T* hidden_prev_grad_data = hidden_prev_grad->data<T>();
const T* weight_data = weight->data<T>();
T* weight_grad_data = weight_grad->data<T>();
T* gate_grad_data = gate_grad.data<T>();
const T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data<T>();
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto d_h = EigenMatrix<T>::From(*hidden_grad);
auto d_x = EigenMatrix<T>::From(*input_grad);
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
auto d_g = EigenMatrix<T>::From(gate_grad);
auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad);
auto place = context.GetEigenDevice<Place>();
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
auto r = g.slice(r_offsets, extents); // reset gate
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
auto c = g.slice(c_offsets, extents); // output candidate
// backward for unactivated update gate
ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
d_g.slice(u_offsets, extents), d_h * (h_p - c));
// backward for unactivated output candidate
ActGradCompute(context.Attr<int>("activation"), place, c, c,
d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u));
// backward for reset_hidden_prev
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size, 1,
gate_grad_data + frame_size * 2, frame_size * 3,
weight_data + frame_size * frame_size * 2, frame_size,
0, reset_hidden_prev_grad_data, frame_size);
// backward for state_weight
math::gemm<Place, T>(
context.device_context(), true, false, frame_size, frame_size,
batch_size, 1, reset_hidden_prev_data, frame_size,
gate_grad_data + frame_size * 2, frame_size * 3, 0,
weight_grad_data + frame_size * frame_size * 2, frame_size);
// backward for unactivated reset gate
ActGradCompute(context.Attr<int>("gate_activation"), place, r, r,
d_g.slice(r_offsets, extents), d_r_h_p * h_p);
// backward for update_gate_weight and reset_gate_weight
math::gemm<Place, T>(context.device_context(), true, false, frame_size,
frame_size * 2, batch_size, 1, hidden_prev_data,
frame_size, gate_grad_data, frame_size * 3, 0,
weight_grad_data, frame_size * 2);
// backward for hidden_prev
d_h_p.device(place) = d_r_h_p * r + d_h * u;
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size * 2, 1, gate_grad_data,
frame_size * 3, weight_data, frame_size * 2, 1,
hidden_prev_grad_data, frame_size);
// backward for input
d_x.device(place) = d_g;
// backward for bias
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
};
} // namespace operators
} // namespace paddle
...@@ -78,7 +78,7 @@ void testVol2col() { ...@@ -78,7 +78,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
input.CopyFrom<float>(input_tmp, *place); input.CopyFrom<float>(input_tmp, *place, *context);
} }
output.mutable_data<float>({1, filter_size, filter_size, filter_size, output.mutable_data<float>({1, filter_size, filter_size, filter_size,
output_depth, output_height, output_width}, output_depth, output_height, output_width},
...@@ -93,7 +93,7 @@ void testVol2col() { ...@@ -93,7 +93,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
out_cfo_ptr = output.data<float>(); out_cfo_ptr = output.data<float>();
} else { } else {
output_tmp.CopyFrom<float>(output, paddle::platform::CPUPlace()); output_tmp.CopyFrom<float>(output, paddle::platform::CPUPlace(), *context);
out_cfo_ptr = output_tmp.data<float>(); out_cfo_ptr = output_tmp.data<float>();
} }
...@@ -107,7 +107,7 @@ void testVol2col() { ...@@ -107,7 +107,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
input = input_tmp; input = input_tmp;
} else { } else {
input.CopyFrom<float>(input_tmp, *place); input.CopyFrom<float>(input_tmp, *place, *context);
} }
paddle::operators::math::Col2VolFunctor<Place, float> col2vol; paddle::operators::math::Col2VolFunctor<Place, float> col2vol;
...@@ -118,7 +118,7 @@ void testVol2col() { ...@@ -118,7 +118,7 @@ void testVol2col() {
if (paddle::platform::is_cpu_place(*place)) { if (paddle::platform::is_cpu_place(*place)) {
in_ptr = input.data<float>(); in_ptr = input.data<float>();
} else { } else {
input_tmp.CopyFrom<float>(input, paddle::platform::CPUPlace()); input_tmp.CopyFrom<float>(input, paddle::platform::CPUPlace(), *context);
in_ptr = input_tmp.data<float>(); in_ptr = input_tmp.data<float>();
} }
......
...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel { ...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker,
ops::MultiplexGradOp); paddle::framework::DefaultGradOpDescMaker<false>);
REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>); multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
...@@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, ...@@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
// the weight are located in parent scope // the weight are located in parent scope
for (auto& var_name : input.second) { for (auto& var_name : input.second) {
if (!step_scope.FindVar(var_name)) { if (!step_scope.FindVar(var_name)) {
step_scope.NewVar(var_name)->GetMutable<LoDTensor>(); step_scope.Var(var_name)->GetMutable<LoDTensor>();
} }
} }
} }
// create stepnet's outputs // create stepnet's outputs
for (const auto& output : (*stepnet_)->Outputs()) { for (const auto& output : (*stepnet_)->Outputs()) {
for (auto& var_name : output.second) { for (auto& var_name : output.second) {
step_scope.NewVar(var_name); step_scope.Var(var_name);
} }
} }
step_scopes->emplace_back(&step_scope); step_scopes->emplace_back(&step_scope);
...@@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, ...@@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope,
void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { void RecurrentAlgorithm::InitMemories(Scope* step_scope) const {
for (auto& attr : arg_->memories) { for (auto& attr : arg_->memories) {
auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable<LoDTensor>(); auto* pre_mem = step_scope->Var(attr.pre_var)->GetMutable<LoDTensor>();
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"memory [%s]'s boot variable [%s] not exists", attr.var, "memory [%s]'s boot variable [%s] not exists", attr.var,
attr.boot_var); attr.boot_var);
...@@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( ...@@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients(
"memory variable [%s] does not exists", attr.var); "memory variable [%s] does not exists", attr.var);
PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr,
"boot variable [%s] does not exists", attr.boot_var); "boot variable [%s] does not exists", attr.boot_var);
auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable<LoDTensor>(); auto* mem_grad = step_scope->Var(attr.var)->GetMutable<LoDTensor>();
auto* boot_mem_grad = auto* boot_mem_grad =
step_scope->NewVar(attr.boot_var)->GetMutable<LoDTensor>(); step_scope->Var(attr.boot_var)->GetMutable<LoDTensor>();
boot_mem_grad->Resize(mem_grad->dims()); boot_mem_grad->Resize(mem_grad->dims());
boot_mem_grad->ShareDataWith<float>(*mem_grad); boot_mem_grad->ShareDataWith<float>(*mem_grad);
} }
......
...@@ -40,7 +40,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes, ...@@ -40,7 +40,7 @@ void SegmentInputs(const std::vector<Scope*>& step_scopes,
f::DDim step_dims = slice_ddim(dims, 1, dims.size()); f::DDim step_dims = slice_ddim(dims, 1, dims.size());
for (size_t j = 0; j < seq_len; j++) { for (size_t j = 0; j < seq_len; j++) {
Tensor* step_input = Tensor* step_input =
step_scopes[j]->NewVar(inlinks[i])->GetMutable<Tensor>(); step_scopes[j]->Var(inlinks[i])->GetMutable<Tensor>();
// The input of operators of each step is Tensor here. // The input of operators of each step is Tensor here.
// Maybe need to modify Slice function. // Maybe need to modify Slice function.
*step_input = input->Slice<float>(j, j + 1); *step_input = input->Slice<float>(j, j + 1);
......
...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel {
auto in_dim = x_dims[0]; auto in_dim = x_dims[0];
for (size_t i = 1; i < N; i++) { for (size_t i = 1; i < N; i++) {
auto dim = x_dims[i]; auto dim = x_dims[i];
PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape");
} }
ctx->SetOutputDim("Out", in_dim); ctx->SetOutputDim("Out", in_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
......
...@@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { ...@@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE( PADDLE_ENFORCE(
ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"), ctx->Attrs().Get<float>("min") < ctx->Attrs().Get<float>("max"),
"uniform_random's min must less then max"); "uniform_random's min must less then max");
auto dims = Attr<std::vector<int>>("dims"); auto& dims = ctx->Attrs().Get<std::vector<int>>("dims");
std::vector<int64_t> temp; std::vector<int64_t> temp;
temp.reserve(dims.size()); temp.reserve(dims.size());
for (auto dim : dims) { for (auto dim : dims) {
......
...@@ -71,23 +71,32 @@ class ScopedTensorDescriptor { ...@@ -71,23 +71,32 @@ class ScopedTensorDescriptor {
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type, const cudnnDataType_t type,
const std::vector<int>& dims) { const std::vector<int>& dims,
// the format is not used now, but it maybe useful feature const int groups = 1) {
// the format is not used now, will add later
std::vector<int> strides(dims.size()); std::vector<int> strides(dims.size());
strides[dims.size() - 1] = 1; strides[dims.size() - 1] = 1;
for (int i = dims.size() - 2; i >= 0; i--) { for (int i = dims.size() - 2; i >= 0; i--) {
strides[i] = dims[i + 1] * strides[i + 1]; strides[i] = dims[i + 1] * strides[i + 1];
} }
// Update tensor descriptor dims setting if groups > 1
// FIXME(typhoonzero): Assume using NCHW order
std::vector<int> dims_with_group(dims.begin(), dims.end()); // copy
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_, type, dims.size(), dims.data(), strides.data())); desc_, type, dims_with_group.size(), dims_with_group.data(),
strides.data()));
return desc_; return desc_;
} }
template <typename T> template <typename T>
inline cudnnTensorDescriptor_t descriptor(const DataLayout& order, inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& dims) { const std::vector<int>& dims,
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, const int groups = 1) {
dims); return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, dims,
groups);
} }
private: private:
...@@ -106,18 +115,29 @@ class ScopedFilterDescriptor { ...@@ -106,18 +115,29 @@ class ScopedFilterDescriptor {
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
const cudnnDataType_t type, const cudnnDataType_t type,
const std::vector<int>& kernel) { const std::vector<int>& kernel,
// filter layout: output input spatial_dim_y spatial_dim_x const int groups = 1) {
// filter layout: MCHW, where M is the number of
// output image channels, C is the number of input image channels,
// H and W is height and width of filter.
std::vector<int> kernel_with_group(kernel.begin(), kernel.end());
if (groups > 1) {
// M /= groups
kernel_with_group[0] /= groups;
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
}
PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
desc_, type, format, kernel.size(), kernel.data())); desc_, type, format, kernel_with_group.size(),
kernel_with_group.data()));
return desc_; return desc_;
} }
template <typename T> template <typename T>
inline cudnnFilterDescriptor_t descriptor(const DataLayout& order, inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
const std::vector<int>& kernel) { const std::vector<int>& kernel,
const int groups = 1) {
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type, return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
kernel); kernel, groups);
} }
private: private:
......
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array DEPS pybind python backward proto_desc tensor_array paddle_memory executor
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
endif(WITH_PYTHON) endif(WITH_PYTHON)
...@@ -118,12 +118,35 @@ void BindProgramDesc(py::module &m) { ...@@ -118,12 +118,35 @@ void BindProgramDesc(py::module &m) {
.def("append_block", &ProgramDescBind::AppendBlock, .def("append_block", &ProgramDescBind::AppendBlock,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("append_backward", .def("append_backward",
[](ProgramDescBind &program_desc, [](ProgramDescBind &program_desc, const VarDescBind &target,
const std::unordered_set<std::string> &no_grad_vars) { const std::unordered_set<std::string> &no_grad_vars) {
AppendBackward(program_desc, no_grad_vars); ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars);
std::unordered_map<
std::string, std::tuple<std::string /* grad_var_name */,
int /* block_idx */, int /* op_idx */>>
retv;
for (auto it = param_grad_map.begin(); it != param_grad_map.end();
++it) {
const auto &grad_info = it->second;
retv[it->first] = std::make_tuple(
grad_info.name_, grad_info.block_idx_, grad_info.op_idx_);
}
return retv;
}) })
.def("block", &ProgramDescBind::Block, py::return_value_policy::reference) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size); .def("num_blocks", &ProgramDescBind::Size)
.def("serialize_to_string",
[](ProgramDescBind &program_desc) -> py::bytes {
const ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"ProgramDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize ProgramDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindBlockDesc(py::module &m) { void BindBlockDesc(py::module &m) {
...@@ -134,22 +157,32 @@ void BindBlockDesc(py::module &m) { ...@@ -134,22 +157,32 @@ void BindBlockDesc(py::module &m) {
py::return_value_policy::reference) py::return_value_policy::reference)
.def("prepend_op", &BlockDescBind::PrependOp, .def("prepend_op", &BlockDescBind::PrependOp,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("new_var", .def("var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.NewVar(name); return self.Var(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("var", .def("find_var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDescBind &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.Var(name); return self.FindVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_vars", &BlockDescBind::AllVars, .def("all_vars", &BlockDescBind::AllVars,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_ops", &BlockDescBind::AllOps, .def("all_ops", &BlockDescBind::AllOps,
py::return_value_policy::reference); py::return_value_policy::reference)
.def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes {
const BlockDesc *desc = block_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"BlockDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize BlockDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindVarDsec(py::module &m) { void BindVarDsec(py::module &m) {
...@@ -162,7 +195,8 @@ void BindVarDsec(py::module &m) { ...@@ -162,7 +195,8 @@ void BindVarDsec(py::module &m) {
.value("FP32", DataType::FP32) .value("FP32", DataType::FP32)
.value("FP64", DataType::FP64); .value("FP64", DataType::FP64);
py::class_<VarDescBind>(m, "VarDesc", "") py::class_<VarDescBind> var_desc(m, "VarDesc", "");
var_desc
.def("name", .def("name",
[](const VarDescBind &self) { [](const VarDescBind &self) {
py::bytes name = self.Name(); py::bytes name = self.Name();
...@@ -174,7 +208,23 @@ void BindVarDsec(py::module &m) { ...@@ -174,7 +208,23 @@ void BindVarDsec(py::module &m) {
.def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference)
.def("data_type", &VarDescBind::GetDataType) .def("data_type", &VarDescBind::GetDataType)
.def("lod_level", &VarDescBind::GetLodLevel) .def("lod_level", &VarDescBind::GetLodLevel)
.def("set_lod_level", &VarDescBind::SetLoDLevel); .def("set_lod_level", &VarDescBind::SetLoDLevel)
.def("type", &VarDescBind::GetType)
.def("set_type", &VarDescBind::SetType)
.def("serialize_to_string", [](VarDescBind &var_desc) -> py::bytes {
const VarDesc *desc = var_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"VarDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize VarDesc Error. This could be a bug of Paddle.");
return res;
});
py::enum_<VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", VarDesc::LOD_TENSOR)
.value("SELECTED_ROWS", VarDesc::SELECTED_ROWS);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
...@@ -204,9 +254,19 @@ void BindOpDesc(py::module &m) { ...@@ -204,9 +254,19 @@ void BindOpDesc(py::module &m) {
.def("set_attr", &OpDescBind::SetAttr) .def("set_attr", &OpDescBind::SetAttr)
.def("attr", &OpDescBind::GetAttr) .def("attr", &OpDescBind::GetAttr)
.def("set_block_attr", &OpDescBind::SetBlockAttr) .def("set_block_attr", &OpDescBind::SetBlockAttr)
.def("get_block_attr", &OpDescBind::GetBlockAttr) .def("block_attr", &OpDescBind::GetBlockAttr)
.def("check_attrs", &OpDescBind::CheckAttrs) .def("check_attrs", &OpDescBind::CheckAttrs)
.def("infer_shape", &OpDescBind::InferShape); .def("infer_shape", &OpDescBind::InferShape)
.def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes {
const OpDesc *desc = op_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"OpDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize OpDesc Error. This could be a bug of Paddle.");
return res;
});
} }
} // namespace pybind } // namespace pybind
......
...@@ -15,9 +15,11 @@ limitations under the License. */ ...@@ -15,9 +15,11 @@ limitations under the License. */
#include "paddle/pybind/protobuf.h" #include "paddle/pybind/protobuf.h"
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor_array.h" #include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h" #include "paddle/operators/cond_op.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
...@@ -163,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -163,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference); py::return_value_policy::reference);
py::class_<Scope>(m, "Scope", "") py::class_<Scope>(m, "Scope", "")
.def("new_var", .def("var",
[](Scope &self, const std::string &name) -> Variable * { [](Scope &self, const std::string &name) -> Variable * {
return self.NewVar(name); return self.Var(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference)
...@@ -341,6 +343,33 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -341,6 +343,33 @@ All parameter, weight, gradient are variables in Paddle.
self.set_stepnet(net.Clone()); self.set_stepnet(net.Clone());
}); });
py::class_<operators::DynamicRecurrentOp, OperatorBase>(m,
"DynamicRecurrentOp")
.def_static("create",
[](py::bytes protobin) -> operators::DynamicRecurrentOp * {
OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto rnn_op = OpRegistry::CreateOp(desc);
return static_cast<operators::DynamicRecurrentOp *>(
rnn_op.release());
})
.def("set_stepnet",
[](operators::DynamicRecurrentOp &self, const operators::NetOp &net)
-> void { self.SetStepNet(net.Clone()); })
.def("get_state",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.state(name); })
.def("get_step_input",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_input(name); })
.def("get_step_output",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_output(name); });
// cond_op // cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp") py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create", .def_static("create",
...@@ -363,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -363,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle.
self.set_falsenet(net.Clone()); self.set_falsenet(net.Clone());
}); });
py::class_<framework::Executor>(m, "Executor")
.def(py::init<std::vector<platform::Place> &>())
.def("run",
[](Executor &self, const ProgramDesc &program_desc, int block_id) {
framework::Scope &global_scope = GetGlobalScope();
self.Run(program_desc, &global_scope, block_id);
});
m.def("unique_integer", UniqueIntegerGenerator); m.def("unique_integer", UniqueIntegerGenerator);
m.def("is_compile_gpu", IsCompileGPU); m.def("is_compile_gpu", IsCompileGPU);
......
...@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets ...@@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets
################ test_CompareMKLDNNandCPU ###################### ################ test_CompareMKLDNNandCPU ######################
if(WITH_MKLDNN) if(WITH_MKLDNN)
add_unittest_without_exec(test_CompareMKLDNNandCPU macro(gen_command VAR_NAME CONFIG_FILE)
test_CompareTwoNets.cpp) set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/"
add_test(NAME test_CompareMKLDNNandCPU "${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False"
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ "--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True"
${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU "--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False"
--config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True "WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/")
--config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False endmacro()
--use_gpu=False add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp)
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) gen_command(compare_simple_net "sample_trainer_config_simple_net.conf")
gen_command(compare_branch_net "sample_trainer_config_branch_net.conf")
add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net})
add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net})
endif() endif()
############### test_CompareTwoOpts ################### ############### test_CompareTwoOpts ###################
......
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
################################### Data Configuration ###################################
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
################################### Algorithm Configuration ###################################
settings(batch_size = 256,
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
################################### Network Configuration ###################################
data = data_layer(name ="input", size=784)
tmp = img_conv_layer(input=data,
num_channels=1,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
a1 = img_conv_layer(input=tmp,
filter_size=1,
num_filters=32,
padding=0,
shared_biases=True,
act=ReluActivation())
a2 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=32,
padding=1,
shared_biases=True,
act=ReluActivation())
tmp = concat_layer(input=[a1, a2])
tmp = img_pool_layer(input=tmp,
num_channels=64,
pool_size=3,
stride=2,
padding=1,
pool_type=AvgPooling())
b1 = img_conv_layer(input=tmp,
filter_size=3,
num_filters=64,
padding=1,
shared_biases=True,
act=ReluActivation())
b1 = img_pool_layer(input=b1,
pool_size=3,
stride=1,
padding=1,
pool_type=MaxPooling())
b2 = img_conv_layer(input=tmp,
filter_size=5,
num_filters=64,
padding=2,
shared_biases=True,
act=ReluActivation())
b2 = img_pool_layer(input=b2,
pool_size=5,
stride=1,
padding=2,
pool_type=MaxPooling())
tmp = addto_layer(input=[b1, b2],
act=ReluActivation(),
bias_attr=False)
tmp = img_pool_layer(input=tmp,
pool_size=3,
stride=2,
padding=1,
pool_type=MaxPooling())
tmp = fc_layer(input=tmp, size=64,
bias_attr=False,
act=TanhActivation())
output = fc_layer(input=tmp, size=10,
bias_attr=True,
act=SoftmaxActivation())
lbl = data_layer(name ="label", size=10)
cost = classification_cost(input=output, label=lbl)
outputs(cost)
...@@ -5,7 +5,7 @@ Default scope function. ...@@ -5,7 +5,7 @@ Default scope function.
thread-local stack of Scope. Top of that stack is current scope, the bottom thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent. of that stack is all scopes' parent.
Invoking `new_var/find_var` can `new/find` variable in current scope. Invoking `var/find_var` can `new/find` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope. scope.
...@@ -19,7 +19,7 @@ import threading ...@@ -19,7 +19,7 @@ import threading
__tl_scope__ = threading.local() __tl_scope__ = threading.local()
__all__ = [ __all__ = [
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'new_var', 'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var',
'find_var', 'scoped_function' 'find_var', 'scoped_function'
] ]
...@@ -54,11 +54,11 @@ def leave_local_scope(): ...@@ -54,11 +54,11 @@ def leave_local_scope():
get_cur_scope().drop_kids() get_cur_scope().drop_kids()
def new_var(name): def var(name):
""" """
create variable in current scope. create variable in current scope.
""" """
return get_cur_scope().new_var(name) return get_cur_scope().var(name)
def find_var(name): def find_var(name):
......
...@@ -219,6 +219,27 @@ class __RecurrentOp__(object): ...@@ -219,6 +219,27 @@ class __RecurrentOp__(object):
return core.RecurrentOp.create(proto.SerializeToString()) return core.RecurrentOp.create(proto.SerializeToString())
class __DynamicRecurrentOp__(object):
__proto__ = None
type = "dynamic_recurrent"
def __init__(self):
# cache recurrent_op's proto
if self.__proto__ is None:
for op_proto in get_all_op_protos():
if op_proto.type == self.type:
self.__proto__ = op_proto
def __call__(self, *args, **kwargs):
if self.type not in args and "type" not in kwargs:
kwargs["type"] = self.type
# create proto
create_method = OpDescCreationMethod(self.__proto__)
proto = create_method(*args, **kwargs)
# create rnnop
return core.DynamicRecurrentOp.create(proto.SerializeToString())
class __CondOp__(object): class __CondOp__(object):
__proto__ = None __proto__ = None
type = "cond" type = "cond"
...@@ -242,4 +263,5 @@ class __CondOp__(object): ...@@ -242,4 +263,5 @@ class __CondOp__(object):
Operator = OperatorFactory() # The default global factory Operator = OperatorFactory() # The default global factory
RecurrentOp = __RecurrentOp__() RecurrentOp = __RecurrentOp__()
DynamicRecurrentOp = __DynamicRecurrentOp__()
CondOp = __CondOp__() CondOp = __CondOp__()
...@@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs): ...@@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs):
kwargs = dict() kwargs = dict()
def __create_var__(name, var_name): def __create_var__(name, var_name):
scope.new_var(var_name) scope.var(var_name)
kwargs[name].append(var_name) kwargs[name].append(var_name)
for in_name, in_dup in Operator.get_op_inputs(op_type): for in_name, in_dup in Operator.get_op_inputs(op_type):
...@@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place): ...@@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place):
def set_output_grad(scope, op, outputs, place): def set_output_grad(scope, op, outputs, place):
def __set_tensor__(name): def __set_tensor__(name):
out_tensor = scope.find_var(name).get_tensor() out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() grad_tensor = scope.var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype() out_dtype = out_tensor.dtype()
if out_dtype == core.DataType.FP64: if out_dtype == core.DataType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64) data = np.ones(out_tensor.shape(), dtype=np.float64)
...@@ -169,10 +169,10 @@ def get_numeric_gradient(scope, ...@@ -169,10 +169,10 @@ def get_numeric_gradient(scope,
def get_backward_op(scope, op, no_grad_set): def get_backward_op(scope, op, no_grad_set):
backward_op = core.Operator.backward(op, no_grad_set) backward_op = core.Operator.backward(op, no_grad_set)
for input in backward_op.input_vars(): for input in backward_op.input_vars():
var = scope.new_var(input) var = scope.var(input)
var.get_tensor() var.get_tensor()
for output in backward_op.output_vars(): for output in backward_op.output_vars():
var = scope.new_var(output) var = scope.var(output)
var.get_tensor() var.get_tensor()
return backward_op return backward_op
......
...@@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest): ...@@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) self.check_grad(['X'], 'Y', max_relative_error=self.relative_error)
class TestHardSigmoid(OpTest):
def setUp(self):
self.op_type = "hard_sigmoid"
self.relative_error = 0.002
X = np.random.uniform(-5, 5, [2, 2]).astype("float32")
slope = 0.2
offset = 0.5
lower_threshold = -offset / slope
upper_threshold = (1 - offset) / slope
self.inputs = {'X': X}
# Same reason as TestAbs
X[np.abs(X - lower_threshold) < self.relative_error] = \
lower_threshold + 0.2
X[np.abs(X - upper_threshold) < self.relative_error] = \
upper_threshold - 0.2
temp = X * slope + offset
self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.002)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
此差异已折叠。
...@@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase): ...@@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase):
def create_tensor(scope, name, shape, np_data): def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor() tensor = scope.var(name).get_tensor()
tensor.set_dims(shape) tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace()) tensor.set(np_data, core.CPUPlace())
return tensor return tensor
...@@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase): ...@@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase):
create_tensor(self.scope, "X", [10, 1], x_np_data) create_tensor(self.scope, "X", [10, 1], x_np_data)
cond_np_data = self.py_cond.cond.astype("int32") cond_np_data = self.py_cond.cond.astype("int32")
create_tensor(self.scope, "cond", [10, 1], cond_np_data) create_tensor(self.scope, "cond", [10, 1], cond_np_data)
self.scope.new_var("SubScopes") self.scope.var("SubScopes")
self.scope.new_var("IndexTensors") self.scope.var("IndexTensors")
self.scope.new_var("Out") self.scope.var("Out")
def create_cond_op(self): def create_cond_op(self):
self.condop = CondOp( self.condop = CondOp(
......
import unittest
import numpy as np
from op_test import OpTest
class TestDecayedAdagradOp1(OpTest):
''' Test DecayedAdagrad operator with explicit attributes
'''
def setUp(self):
self.op_type = "decayed_adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
decay = 0.80
epsilon = 1e-8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'decay': decay, 'epsilon': epsilon}
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
class TestDecayedAdagradOp2(OpTest):
''' Test DecayedAdagrad operator with default attributes
'''
def setUp(self):
self.op_type = "decayed_adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
decay = 0.95
epsilon = 1e-6
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'decay': decay, 'epsilon': epsilon}
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
...@@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): ...@@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
self.assertIsNone(find_var("test")) self.assertIsNone(find_var("test"))
def test_create_var_get_var(self): def test_create_var_get_var(self):
var_a = new_var("var_a") var_a = var("var_a")
self.assertIsNotNone(var_a) self.assertIsNotNone(var_a)
self.assertIsNotNone(get_cur_scope().find_var('var_a')) self.assertIsNotNone(get_cur_scope().find_var('var_a'))
enter_local_scope() enter_local_scope()
...@@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): ...@@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase):
def test_var_get_int(self): def test_var_get_int(self):
def __new_scope__(): def __new_scope__():
i = new_var("var_i") i = var("var_i")
self.assertFalse(i.is_int()) self.assertFalse(i.is_int())
i.set_int(10) i.set_int(10)
self.assertTrue(i.is_int()) self.assertTrue(i.is_int())
......
...@@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase): ...@@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase):
def gaussian_random_test(self, place): def gaussian_random_test(self, place):
scope = core.Scope() scope = core.Scope()
scope.new_var('Out').get_tensor() scope.var('Out').get_tensor()
op = Operator( op = Operator(
"gaussian_random", "gaussian_random",
......
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册