提交 3f4177ea 编写于 作者: D Dong Zhihong

Merge branch 'develop' into fix/scope

无相关合并请求
...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON) ...@@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON)
"different Python interpreter from compiling.") "different Python interpreter from compiling.")
endif() endif()
if(MOBILE_INFERENCE)
set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
######################################################################################## ########################################################################################
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
......
...@@ -8,7 +8,7 @@ ExternalProject_Add( ...@@ -8,7 +8,7 @@ ExternalProject_Add(
extern_eigen3 extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG "master" GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
PREFIX ${EIGEN_SOURCE_DIR} PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -36,6 +36,7 @@ ExternalProject_Add( ...@@ -36,6 +36,7 @@ ExternalProject_Add(
# change this back to the official Github repo once my PR is # change this back to the official Github repo once my PR is
# merged. # merged.
GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git"
GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -45,11 +46,11 @@ ExternalProject_Add( ...@@ -45,11 +46,11 @@ ExternalProject_Add(
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
......
...@@ -31,6 +31,7 @@ ExternalProject_Add( ...@@ -31,6 +31,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
GIT_TAG v0.3.5
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -43,12 +44,12 @@ ExternalProject_Add( ...@@ -43,12 +44,12 @@ ExternalProject_Add(
-DWITH_GFLAGS=ON -DWITH_GFLAGS=ON
-Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
......
...@@ -56,11 +56,11 @@ IF(WITH_TESTING) ...@@ -56,11 +56,11 @@ IF(WITH_TESTING)
-DBUILD_GMOCK=ON -DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON -Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON -Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
......
...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
${OPTIONAL_ARGS} ${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_TESTS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
CMAKE_CACHE_ARGS CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
${OPTIONAL_CACHE_ARGS} ${OPTIONAL_CACHE_ARGS}
......
...@@ -35,6 +35,7 @@ ExternalProject_Add( ...@@ -35,6 +35,7 @@ ExternalProject_Add(
extern_warpctc extern_warpctc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git"
GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09
PREFIX ${WARPCTC_SOURCES_DIR} PREFIX ${WARPCTC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -48,9 +49,9 @@ ExternalProject_Add( ...@@ -48,9 +49,9 @@ ExternalProject_Add(
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
-DBUILD_SHARED=ON -DBUILD_SHARED=ON
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
) )
......
...@@ -42,11 +42,11 @@ ExternalProject_Add( ...@@ -42,11 +42,11 @@ ExternalProject_Add(
-DBUILD_SHARED_LIBS=OFF -DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_MACOSX_RPATH=ON -DCMAKE_MACOSX_RPATH=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=Release -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)
......
...@@ -3,15 +3,17 @@ ...@@ -3,15 +3,17 @@
## The Problem Posed ## The Problem Posed
In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance.
However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. However, we noticed two problems with the current deisgn:
More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. 1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message.
## Current Implementation 1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation.
OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is ## The Current Implementation
The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is
```cpp ```cpp
struct OpInfo { struct OpInfo {
......
...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep ...@@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep
## Definition of VarDesc ## Definition of VarDesc
A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`.
```proto ```proto
message VarDesc { message VarDesc {
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
required VarType type = 2;
optional LoDTensorDesc lod_desc = 3;
optional TensorDesc selected_rows_desc = 4;
optional bool persistable = 5 [ default = false ];
} }
``` ```
## Definition of LodTensorDesc ## Definition of TensorDesc
```proto ```proto
enum DataType { enum DataType {
...@@ -38,87 +45,25 @@ enum DataType { ...@@ -38,87 +45,25 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [default=0];
} }
``` ```
## Definition of Variable in Python A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedRows`, please reference [`SelectedRows`](./selected_rows.md).
In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable.
```python
image = Variable(dims=[-1, 640, 480])
# fc1 and fc2 are both Variable
fc1 = layer.fc(input=image, output_size=10)
fc2 = layer.fc(input=fc1, output_size=20)
```
### what should class `Variable` Have
1. `name`.a name of string type is used to mark the value of the Variable.
1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator.
1. `operator`. Variable should record which operator produce itself. The reaon is:
- we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable.
In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph.
```python
import VarDesc
import LoDTensorDesc
import framework
def AddInitialOperator(variable, initializer):
# add an initialize Operator to block to init this Variable
class Variable(object):
def __init__(self, name, dims, type, initializer):
self._block = get_default_block()
self._name = name
self.op = None
tensor_desc = LoDTensorDesc(data_type=type, dims=dims)
_var_desc = VarDesc(name=name, lod_tensor=tensor_desc)
self._var = framework.CreateVar(_var_desc)
self._block.add_var(self)
# add initial op according to initializer ## Definition of LodTensorDesc
if initializer is not None:
AddInitialOperator(self, initializer)
def dims(self):
return self._var.dims()
def data_type(self):
return self._var.data_type()
def to_proto(self): ```proto
pass message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int lod_level = 2;
}
``` ```
Then we can use this Variable to create a fc layer in Python. A LoDTensorDesc contains a tensor and a lod_level.
```python ## Definition of Variable in Python
import paddle as pd
def flatten_size(X, num_flatten_dims):
prod = 1 # of last num_flatten_dims
for i in xrange(num_flatten_dims):
prod = prod * X.dims[-i-1]
return prod
def layer.fc(X, output_size, num_flatten_dims):
W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size])
b = Variable(pd.random_uniform(), type=FP32, dims=[output_size])
out = Variable(type=FP32)
y = operator.fc(X, W, b, output=out) # fc will put fc op input into out
pd.InferShape(y)
return out
x = Variable(dims=[-1, 640, 480])
y = layer.fc(x, output_size=100)
z = layer.fc(y, output_size=200)
paddle.eval(targets=[z], ...) For Variable in Python, please reference [`Python API`](./python_api.md).
print(z)
```
...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) ...@@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope)
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
...@@ -53,3 +53,6 @@ endif() ...@@ -53,3 +53,6 @@ endif()
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)
cc_library(selected_rows SRCS selected_rows.cc DEPS tensor)
cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows)
...@@ -28,14 +28,15 @@ namespace paddle { ...@@ -28,14 +28,15 @@ namespace paddle {
namespace framework { namespace framework {
static inline std::unique_ptr<OperatorBase> CreateGradOp( static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op) { const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
OpDescBind op_desc; OpDescBind op_desc;
op_desc.SetInputMap(op.Inputs()); op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs()); op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type()); op_desc.SetType(op.Type());
op_desc.SetAttrMap(op.Attrs()); op_desc.SetAttrMap(op.Attrs());
auto& info = OpInfoMap::Instance().Get(op.Type()); auto& info = OpInfoMap::Instance().Get(op.Type());
auto grad_descs = info.GradOpMaker()(op_desc); auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var);
std::vector<std::unique_ptr<OperatorBase>> grad_ops; std::vector<std::unique_ptr<OperatorBase>> grad_ops;
grad_ops.reserve(grad_descs.size()); grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(), std::transform(grad_descs.begin(), grad_descs.end(),
...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() { ...@@ -98,7 +99,9 @@ static std::unique_ptr<OperatorBase> NOP() {
// See Backward.h for details // See Backward.h for details
static std::unique_ptr<OperatorBase> BackwardRecursive( static std::unique_ptr<OperatorBase> BackwardRecursive(
const OperatorBase& forwardOp, const OperatorBase& forwardOp,
std::unordered_set<std::string>& no_grad_names, size_t& uniq_id) { std::unordered_set<std::string>& no_grad_names,
std::unordered_map<std::string, std::string>* grad_to_var,
size_t& uniq_id) {
// If all input gradients of forwarding operator do not need to calculate, // If all input gradients of forwarding operator do not need to calculate,
// just return an NOP. Not return null ptr because NOP does not take // just return an NOP. Not return null ptr because NOP does not take
// too much time for calculation, but it is useful for simplifying logic. // too much time for calculation, but it is useful for simplifying logic.
...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -136,7 +139,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
++it, ++local_op_id) { ++it, ++local_op_id) {
auto& fwd = *it; auto& fwd = *it;
auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
ForEachVarName(bwd->Outputs(), ForEachVarName(bwd->Outputs(),
[&dup_output_ops, local_op_id](const std::string& out) { [&dup_output_ops, local_op_id](const std::string& out) {
dup_output_ops[out].emplace_back(local_op_id); dup_output_ops[out].emplace_back(local_op_id);
...@@ -187,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -187,7 +190,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
net->InsertOp(pos.first + 1, std::move(pos.second)); net->InsertOp(pos.first + 1, std::move(pos.second));
} }
} else { } else {
std::unique_ptr<OperatorBase> grad_op(CreateGradOp(forwardOp)); std::unique_ptr<OperatorBase> grad_op(
CreateGradOp(forwardOp, no_grad_names, grad_to_var));
ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
const std::string& grad_input) { const std::string& grad_input) {
...@@ -226,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -226,7 +230,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
*static_cast<const OperatorBase*>(&rnnop.stepnet()); *static_cast<const OperatorBase*>(&rnnop.stepnet());
// create stepnet's gradient op // create stepnet's gradient op
rnn_grad_op->set_stepnet( rnn_grad_op->set_stepnet(
BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id));
} }
if (net->ops_.empty()) { // Current no aux op is added to network if (net->ops_.empty()) { // Current no aux op is added to network
...@@ -253,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward( ...@@ -253,7 +257,8 @@ std::unique_ptr<OperatorBase> Backward(
no_grad_names.insert(name + kGradVarSuffix); no_grad_names.insert(name + kGradVarSuffix);
} }
size_t uid = 0; size_t uid = 0;
return BackwardRecursive(forwardOp, no_grad_names, uid); std::unordered_map<std::string, std::string> grad_to_var;
return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
} }
// ==================================== // // ==================================== //
...@@ -270,28 +275,31 @@ static bool AllGradInSet(const std::vector<std::string>& names, ...@@ -270,28 +275,31 @@ static bool AllGradInSet(const std::vector<std::string>& names,
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
const std::unique_ptr<OpDescBind>& op_desc, const std::unique_ptr<OpDescBind>& op_desc,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs; std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculat. // All input gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames(); const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, no_grad_vars)) { if (AllGradInSet(inputs, *no_grad_vars)) {
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
// All output gradients of forwarding operator do not need to calculate. // All output gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& outputs = op_desc->OutputArgumentNames(); const std::vector<std::string>& outputs = op_desc->OutputArgumentNames();
if (AllGradInSet(outputs, no_grad_vars)) { if (AllGradInSet(outputs, *no_grad_vars)) {
for (const std::string& name : inputs) { for (const std::string& name : inputs) {
no_grad_vars.insert(GradVarName(name)); no_grad_vars->insert(GradVarName(name));
} }
return grad_op_descs; // empty vector return grad_op_descs; // empty vector
} }
grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get()); grad_op_descs = OpInfoMap::Instance()
.Get(op_desc->Type())
.GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops; std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) { for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) { for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars.count(in_name)) { if (no_grad_vars->count(in_name)) {
std::string prefix = in_name.substr( std::string prefix = in_name.substr(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix; std::string new_name = prefix + kZeroVarSuffix;
...@@ -301,11 +309,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -301,11 +309,6 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
} }
} }
for (const std::string& out_name : desc->OutputArgumentNames()) {
if (no_grad_vars.count(out_name)) {
desc->Rename(out_name, kEmptyVarName);
}
}
} }
for (auto& p : pending_fill_zeros_ops) { for (auto& p : pending_fill_zeros_ops) {
...@@ -316,7 +319,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -316,7 +319,8 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx, ProgramDescBind& program_desc, int block_idx,
std::unordered_set<std::string>& no_grad_vars) { std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) {
BlockDescBind* cur_block = program_desc.Block(block_idx); BlockDescBind* cur_block = program_desc.Block(block_idx);
std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_; std::deque<std::unique_ptr<OpDescBind>>& op_descs = cur_block->ops_;
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops; std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
...@@ -324,15 +328,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -324,15 +328,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
std::vector<std::unique_ptr<OpDescBind>> backward_descs; std::vector<std::unique_ptr<OpDescBind>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
std::vector<std::unique_ptr<OpDescBind>> op_grads = std::vector<std::unique_ptr<OpDescBind>> op_grads =
MakeOpGrad(*it, no_grad_vars); MakeOpGrad(*it, no_grad_vars, grad_to_var);
if ((*it)->Type() == "recurrent") { if ((*it)->Type() == "recurrent") {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
op_grads.size(), size_t(1), op_grads.size(), size_t(1),
"rnn_op's gradient process should contain only one op."); "rnn_op's gradient process should contain only one op.");
int step_block_idx = (*it)->GetBlockAttr("stop_block"); int step_block_idx = (*it)->GetBlockAttr("stop_block");
auto backward_block_op_descs = auto backward_block_op_descs = MakeBlockBackward(
MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); program_desc, step_block_idx, no_grad_vars, grad_to_var);
BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block);
for (auto& ptr : backward_block_op_descs) { for (auto& ptr : backward_block_op_descs) {
backward_block->ops_.push_back(std::move(ptr)); backward_block->ops_.push_back(std::move(ptr));
...@@ -388,8 +392,9 @@ void AppendBackward(ProgramDescBind& program_desc, ...@@ -388,8 +392,9 @@ void AppendBackward(ProgramDescBind& program_desc,
no_grad_var_names.insert(GradVarName(name)); no_grad_var_names.insert(GradVarName(name));
} }
const int root_block_idx = 0; const int root_block_idx = 0;
auto backward_op_descs = std::unordered_map<std::string, std::string> grad_to_var;
MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
&no_grad_var_names, &grad_to_var);
auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_; auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_;
for (auto& ptr : backward_op_descs) { for (auto& ptr : backward_op_descs) {
forw_op_descs.push_back(std::move(ptr)); forw_op_descs.push_back(std::move(ptr));
......
...@@ -169,6 +169,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker { ...@@ -169,6 +169,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker {
} }
}; };
class MinusGradOpDescMaker : public GradOpDescMakerBase {
public:
using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override {
std::vector<std::unique_ptr<OpDescBind>> retv;
auto x_g = InputGrad("X");
if (!x_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", x_g);
op_desc->SetAttr("scale", 1.0f);
retv.emplace_back(op_desc);
}
auto y_g = InputGrad("Y");
if (!y_g.empty()) {
auto *op_desc = new OpDescBind();
op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", y_g);
op_desc->SetAttr("scale", -1.0f);
retv.emplace_back(op_desc);
}
return retv;
}
};
class MinusOpMaker : public OpProtoAndCheckerMaker {
public:
MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "");
AddInput("Y", "");
AddOutput("Out", "");
AddComment("minus for unittest");
}
};
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -187,6 +226,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); ...@@ -187,6 +226,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP); f::NOP);
REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP);
REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker);
TEST(Backward, simple_op_not_need_grad) { TEST(Backward, simple_op_not_need_grad) {
auto fwd = f::OpRegistry::CreateOp( auto fwd = f::OpRegistry::CreateOp(
...@@ -395,12 +435,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -395,12 +435,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
2UL /* external input number */ 2UL /* external input number */
+ 1UL /* external output number*/ + 1UL /* external output number*/
+ 1UL /* number of gradient of external output*/ + 1UL /* number of gradient of external output*/
+ 2U /* internal variable number*/); + 2UL /* internal variable number*/
);
EXPECT_EQ(grad_fc.Outputs(all).size(), EXPECT_EQ(grad_fc.Outputs(all).size(),
2UL /* input number of mul*/ 2UL /* input number of mul*/
+ 2UL /* input number of rowwise_add + 2UL /* input number of rowwise_add*/
*/ + 1UL /* input number of sigmod */
+ 1UL /* input number of sigmod */); - 1UL /* out2 is not needed*/);
EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
...@@ -580,8 +621,7 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -580,8 +621,7 @@ TEST(Backward, intermedia_var_no_grad) {
std::vector<std::string>({f::GradVarName("out4")})); std::vector<std::string>({f::GradVarName("out4")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
} }
TEST(Backward, var_no_grad) { TEST(Backward, var_no_grad) {
...@@ -619,8 +659,7 @@ TEST(Backward, var_no_grad) { ...@@ -619,8 +659,7 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("z2")})); std::vector<std::string>({f::GradVarName("z2")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
std::vector<std::string>({f::GradVarName("y1")})); std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>());
std::vector<std::string>({f::kEmptyVarName}));
f::OpDescBind *fill_zero_op = block->AllOps()[3]; f::OpDescBind *fill_zero_op = block->AllOps()[3];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
...@@ -718,4 +757,19 @@ TEST(Backward, shared_var) { ...@@ -718,4 +757,19 @@ TEST(Backward, shared_var) {
std::vector<std::string>({f::GradVarName("x1")})); std::vector<std::string>({f::GradVarName("x1")}));
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
}
TEST(Backward, half_backward) {
f::ProgramDesc *program_desc = GetNewProgramDesc();
f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc);
f::BlockDescBind *block = program.Block(0);
auto *op1 = block->AppendOp();
op1->SetType("minus");
op1->SetInput("X", {"a"});
op1->SetInput("Y", {"b"});
op1->SetOutput("Out", {"out"});
AppendBackward(program, {"b"});
auto ops = block->AllOps();
ASSERT_EQ(2UL, ops.size());
} }
\ No newline at end of file
...@@ -66,7 +66,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const { ...@@ -66,7 +66,7 @@ std::vector<OpDescBind *> BlockDescBind::AllOps() const {
return res; return res;
} }
void BlockDescBind::Sync() { void BlockDescBind::Flush() {
if (need_update_) { if (need_update_) {
auto &op_field = *this->desc_->mutable_ops(); auto &op_field = *this->desc_->mutable_ops();
op_field.Clear(); op_field.Clear();
...@@ -91,5 +91,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const { ...@@ -91,5 +91,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const {
return prog_->Block(static_cast<size_t>(this->desc_->parent_idx())); return prog_->Block(static_cast<size_t>(this->desc_->parent_idx()));
} }
BlockDesc *BlockDescBind::Proto() {
Flush();
return desc_;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -35,7 +35,8 @@ class BlockDescBind { ...@@ -35,7 +35,8 @@ class BlockDescBind {
public: public:
friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( friend std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
ProgramDescBind &program_desc, int block_idx, ProgramDescBind &program_desc, int block_idx,
std::unordered_set<std::string> &no_grad_vars); std::unordered_set<std::string> *no_grad_vars,
std::unordered_map<std::string, std::string> *grad_to_var);
friend void AppendBackward( friend void AppendBackward(
ProgramDescBind &program_desc, ProgramDescBind &program_desc,
...@@ -64,9 +65,9 @@ class BlockDescBind { ...@@ -64,9 +65,9 @@ class BlockDescBind {
std::vector<OpDescBind *> AllOps() const; std::vector<OpDescBind *> AllOps() const;
void Sync(); void Flush();
BlockDesc *RawPtr() { return desc_; } BlockDesc *Proto();
private: private:
ProgramDescBind *prog_; // not_own ProgramDescBind *prog_; // not_own
......
...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> { ...@@ -97,8 +97,11 @@ struct OpInfoFiller<T, kOpProtoAndCheckerMaker> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kGradOpDescMaker> { struct OpInfoFiller<T, kGradOpDescMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->grad_op_maker_ = [](const OpDescBind& fwd_op) { info->grad_op_maker_ = [](
T maker(fwd_op); const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) {
T maker(fwd_op, no_grad_set, grad_to_var);
return maker(); return maker();
}; };
} }
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "gflags/gflags.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/framework/attribute.h" #include "paddle/framework/attribute.h"
#include "paddle/framework/backward.h" #include "paddle/framework/backward.h"
...@@ -45,6 +46,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, ...@@ -45,6 +46,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs,
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
auto var = block->Var(v); auto var = block->Var(v);
var->SetType(VarDesc::LOD_TENSOR);
var->SetDataType(paddle::framework::DataType::FP32); var->SetDataType(paddle::framework::DataType::FP32);
} }
} }
...@@ -316,4 +318,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) { ...@@ -316,4 +318,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) {
} }
} }
} }
DECLARE_double(fraction_of_gpu_memory_to_use);
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
// Use less GPU memory for unittest.
FLAGS_fraction_of_gpu_memory_to_use = 0.25;
return RUN_ALL_TESTS();
}
#endif #endif
...@@ -97,16 +97,26 @@ enum DataType { ...@@ -97,16 +97,26 @@ enum DataType {
FP64 = 6; FP64 = 6;
} }
message LoDTensorDesc { message TensorDesc {
required DataType data_type = 1; required DataType data_type = 1;
repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480]
optional int32 lod_level = 3 [ default = 0 ]; }
message LoDTensorDesc {
required TensorDesc tensor = 1;
optional int32 lod_level = 2 [ default = 0 ];
} }
message VarDesc { message VarDesc {
enum VarType {
LOD_TENSOR = 1;
SELECTED_ROWS = 2;
}
required string name = 1; required string name = 1;
optional LoDTensorDesc lod_tensor = 2; required VarType type = 2;
optional bool persistable = 3 [ default = false ]; optional LoDTensorDesc lod_tensor = 3;
optional TensorDesc selected_rows = 4;
optional bool persistable = 5 [ default = false ];
} }
message BlockDesc { message BlockDesc {
......
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <unordered_set>
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
...@@ -21,27 +23,50 @@ namespace framework { ...@@ -21,27 +23,50 @@ namespace framework {
class GradOpDescMakerBase { class GradOpDescMakerBase {
public: public:
explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} explicit GradOpDescMakerBase(
const OpDescBind& fwd_op,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var)
: fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {}
virtual ~GradOpDescMakerBase() = default; virtual ~GradOpDescMakerBase() = default;
virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0; virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0;
protected: protected:
static std::vector<std::string> ToGradNames( std::vector<std::string> InputGrad(const std::string& name,
const std::vector<std::string>& var_names) { bool drop_empty_grad = true) const {
std::vector<std::string> ret_val; std::vector<std::string> ret_val;
auto var_names = this->Input(name);
ret_val.reserve(var_names.size()); ret_val.reserve(var_names.size());
std::transform(var_names.begin(), var_names.end(), std::transform(var_names.begin(), var_names.end(),
std::back_inserter(ret_val), GradVarName); std::back_inserter(ret_val),
return ret_val; [this](const std::string& fwd_var_name) -> std::string {
} auto g_name = GradVarName(fwd_var_name);
if (no_grad_set_.count(g_name)) {
std::vector<std::string> InputGrad(const std::string& name) const { return kEmptyVarName;
return ToGradNames(fwd_op_.Input(name)); } else {
(*this->grad_to_var_)[g_name] = fwd_var_name;
return g_name;
}
});
if (!drop_empty_grad) {
return ret_val;
}
std::vector<std::string> dropped_ret_val;
dropped_ret_val.reserve(ret_val.size());
std::copy_if(ret_val.begin(), ret_val.end(),
std::back_inserter(dropped_ret_val),
[](const std::string& str) { return str != kEmptyVarName; });
return dropped_ret_val;
} }
std::vector<std::string> OutputGrad(const std::string& name) const { std::vector<std::string> OutputGrad(const std::string& name) const {
return ToGradNames(fwd_op_.Output(name)); std::vector<std::string> ret_val;
auto onames = this->Output(name);
ret_val.reserve(onames.size());
std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val),
GradVarName);
return ret_val;
} }
std::vector<std::string> InputNames() const { std::vector<std::string> InputNames() const {
...@@ -75,6 +100,8 @@ class GradOpDescMakerBase { ...@@ -75,6 +100,8 @@ class GradOpDescMakerBase {
private: private:
const OpDescBind& fwd_op_; const OpDescBind& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_;
std::unordered_map<std::string, std::string>* grad_to_var_;
}; };
class SingleGradOpDescMaker : public GradOpDescMakerBase { class SingleGradOpDescMaker : public GradOpDescMakerBase {
...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase { ...@@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase {
virtual std::unique_ptr<OpDescBind> Apply() const = 0; virtual std::unique_ptr<OpDescBind> Apply() const = 0;
}; };
template <bool DropEmptyIG = true>
class DefaultGradOpDescMaker : public SingleGradOpDescMaker { class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
public: public:
using SingleGradOpDescMaker::SingleGradOpDescMaker; using SingleGradOpDescMaker::SingleGradOpDescMaker;
...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
for (auto& input_param : this->InputNames()) { for (auto& input_param : this->InputNames()) {
grad->SetInput(input_param, this->Input(input_param)); grad->SetInput(input_param, this->Input(input_param));
grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); grad->SetOutput(GradVarName(input_param),
this->InputGrad(input_param, DropEmptyIG));
} }
for (auto& output_param : this->OutputNames()) { for (auto& output_param : this->OutputNames()) {
......
...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, ...@@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
} }
OpDesc *OpDescBind::Proto() { OpDesc *OpDescBind::Proto() {
Sync(); Flush();
return &op_desc_; return &op_desc_;
} }
...@@ -101,7 +101,7 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { ...@@ -101,7 +101,7 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) {
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) {
BlockDesc *desc = block.RawPtr(); BlockDesc *desc = block.Proto();
this->attrs_[name] = desc; this->attrs_[name] = desc;
need_update_ = true; need_update_ = true;
} }
...@@ -165,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -165,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
void OpDescBind::Sync() { void OpDescBind::Flush() {
if (need_update_) { if (need_update_) {
this->op_desc_.mutable_inputs()->Clear(); this->op_desc_.mutable_inputs()->Clear();
for (auto &ipt : inputs_) { for (auto &ipt : inputs_) {
......
...@@ -89,8 +89,6 @@ class OpDescBind { ...@@ -89,8 +89,6 @@ class OpDescBind {
this->need_update_ = true; this->need_update_ = true;
} }
void Sync();
const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Inputs() const { return inputs_; }
const VariableNameMap &Outputs() const { return outputs_; } const VariableNameMap &Outputs() const { return outputs_; }
...@@ -104,6 +102,8 @@ class OpDescBind { ...@@ -104,6 +102,8 @@ class OpDescBind {
void InferShape(const BlockDescBind &block) const; void InferShape(const BlockDescBind &block) const;
void Flush();
private: private:
template <typename MapType> template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) { static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
......
...@@ -59,11 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) { ...@@ -59,11 +59,5 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) {
op_desc.GetAttrMap()); op_desc.GetAttrMap());
} }
std::vector<std::unique_ptr<OpDescBind>> OpRegistry::CreateGradOpDescs(
OpDescBind* op_desc) {
auto& info = OpInfoMap::Instance().Get(op_desc->Type());
return info.grad_op_maker_(*op_desc);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -79,9 +79,6 @@ class OpRegistry { ...@@ -79,9 +79,6 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
static std::vector<std::unique_ptr<OpDescBind>> CreateGradOpDescs(
OpDescBind* op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc);
}; };
...@@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar { ...@@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar {
/** /**
* Macro to register Operator. * Macro to register Operator.
*/ */
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ #define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \ grad_op_class) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \ REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \ class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker { \ : public ::paddle::framework::DefaultGradOpDescMaker<true> { \
using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \ using ::paddle::framework::DefaultGradOpDescMaker< \
\ true>::DefaultGradOpDescMaker; \
protected: \ \
virtual std::string GradOpType() const { return #grad_op_type; } \ protected: \
}; \ virtual std::string GradOpType() const { return #grad_op_type; } \
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ }; \
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \
op_maker_class); op_maker_class);
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
......
...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ...@@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) {
ProgramDesc *ProgramDescBind::Proto() { ProgramDesc *ProgramDescBind::Proto() {
for (auto &block : blocks_) { for (auto &block : blocks_) {
block->Sync(); block->Flush();
} }
return prog_; return prog_;
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
namespace paddle {
namespace framework {} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
namespace paddle {
namespace framework {
class SelectedRows {
public:
SelectedRows(const std::vector<int64_t>& rows, const int64_t& height)
: rows_(rows), height_(height) {
value_.reset(new Tensor());
}
SelectedRows() { value_.reset(new Tensor()); }
platform::Place place() const { return value_->place(); }
const Tensor& value() const { return *value_; }
Tensor* mutable_value() { return value_.get(); }
int64_t height() const { return height_; }
void set_height(int64_t height) { height_ = height; }
const std::vector<int64_t>& rows() const { return rows_; }
void set_rows(const std::vector<int64_t>& rows) { rows_ = rows; }
DDim GetCompleteDims() const {
std::vector<int64_t> dims = vectorize(value_->dims());
dims[0] = height_;
return make_ddim(dims);
}
private:
std::vector<int64_t> rows_;
std::unique_ptr<Tensor> value_{nullptr};
int64_t height_;
};
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/selected_rows.h"
#include "gtest/gtest.h"
namespace paddle {
namespace framework {
class SelectedRowsTester : public ::testing::Test {
public:
virtual void SetUp() override {
std::vector<int64_t> rows{0, 4, 7};
int64_t height = 10;
int64_t row_numel = 100;
selected_rows_.reset(new SelectedRows(rows, height));
Tensor* value = selected_rows_->mutable_value();
value->mutable_data<float>(
make_ddim({static_cast<int64_t>(rows.size()), row_numel}), place_);
}
protected:
platform::CPUPlace place_;
std::unique_ptr<SelectedRows> selected_rows_{nullptr};
};
TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); }
TEST_F(SelectedRowsTester, dims) {
ASSERT_EQ(selected_rows_->value().dims(), make_ddim({3, 100}));
}
TEST_F(SelectedRowsTester, complete_dims) {
ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100}));
}
} // namespace framework
} // namespace paddle
...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source, ...@@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector<LoDTensor>& source,
const std::vector<DySeqMeta>& meta, const LoD& lod, const std::vector<DySeqMeta>& meta, const LoD& lod,
size_t level); size_t level);
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) {
// collect indice need to copy to the batch
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + batch_id;
if (id >= seq.end) break;
indice.push_back(id);
}
return indice;
}
} // namespace detail } // namespace detail
const LoDTensor& TensorArray::Read(size_t index) const { const LoDTensor& TensorArray::Read(size_t index) const {
...@@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta, ...@@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector<DySeqMeta>& meta,
return detail::PackDynamicBatch(values_, meta, lod, level); return detail::PackDynamicBatch(values_, meta, lod, level);
} }
std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level,
bool length_desend) { bool length_desend) {
detail::DynamicBatchUnpacker unpacker(source, level, detail::DynamicBatchUnpacker unpacker(source, level,
length_desend /*descend*/); length_desend /*descend*/);
...@@ -129,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level, ...@@ -129,6 +140,7 @@ std::vector<DySeqMeta> TensorArray::Unpack(const LoDTensor& source, int level,
Write(batch_id, unpacker.GetBatch(batch_id)); Write(batch_id, unpacker.GetBatch(batch_id));
} }
PADDLE_ENFORCE(!unpacker.meta.empty());
return unpacker.meta; return unpacker.meta;
} }
...@@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
PADDLE_ENFORCE(!meta.empty(), "should build meta first"); PADDLE_ENFORCE(!meta.empty(), "should build meta first");
LoDTensor result; LoDTensor result;
// collect indice need to copy to the batch auto indice = detail::GenDyBatchIndice(meta, index);
std::vector<size_t> indice;
for (const auto& seq : meta) {
size_t id = seq.begin + index;
if (id >= seq.end) break;
indice.push_back(id);
}
PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index);
// copy the indice of records in LoDTensor // copy the indice of records in LoDTensor
...@@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { ...@@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) {
for (size_t i = 0; i < indice.size(); i++) { for (size_t i = 0; i < indice.size(); i++) {
auto index = indice[i]; auto index = indice[i];
auto target = result.Slice<value_type>(i, i + 1); auto target = result.Slice<value_type>(i, i + 1);
auto source_ = source->Slice<value_type>(index, index + 1); auto slice = source->Slice<value_type>(index, index + 1);
target.CopyFrom<value_type>(source_, platform::CPUPlace(), target.CopyFrom<value_type>(slice, platform::CPUPlace(),
platform::CPUDeviceContext()); platform::CPUDeviceContext());
} }
......
...@@ -34,6 +34,13 @@ struct DySeqMeta { ...@@ -34,6 +34,13 @@ struct DySeqMeta {
size_t ori_idx; size_t ori_idx;
}; };
using DySeqMetaBatch = std::vector<DySeqMeta>;
/*
* Extract the indices of instances.
*/
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);
/* /*
* TensorArray is a C-array-like array of tensors, it is meant to be used with * TensorArray is a C-array-like array of tensors, it is meant to be used with
* dynamic iteration primitives such as while_loop. It is used to segment inputs * dynamic iteration primitives such as while_loop. It is used to segment inputs
...@@ -69,7 +76,7 @@ class TensorArray { ...@@ -69,7 +76,7 @@ class TensorArray {
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and * Recover the original LoD-arranged LoDTensor with the `values`, `level` and
* `indice_map`. * `indice_map`.
*/ */
LoDTensor Pack(size_t level, const std::vector<DySeqMeta> &meta, LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
const LoD &lod) const; const LoD &lod) const;
/* /*
...@@ -77,8 +84,7 @@ class TensorArray { ...@@ -77,8 +84,7 @@ class TensorArray {
* `values`, if set `desend`, will sort by length in descending order else in * `values`, if set `desend`, will sort by length in descending order else in
* ascending order. * ascending order.
*/ */
std::vector<DySeqMeta> Unpack(const LoDTensor &source, int level, DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
bool length_desend);
/* /*
* Pack the values into a tensor with rank one higher than each tensor in * Pack the values into a tensor with rank one higher than each tensor in
......
...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*( ...@@ -36,8 +36,9 @@ using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/, const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>; const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
using GradOpMakerFN = using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDescBind>>(
std::function<std::vector<std::unique_ptr<OpDescBind>>(const OpDescBind&)>; const OpDescBind&, const std::unordered_set<std::string>& /*no_grad_set*/,
std::unordered_map<std::string, std::string>* /*grad_to_var*/)>;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -13,32 +13,58 @@ See the License for the specific language governing permissions and ...@@ -13,32 +13,58 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/framework/var_desc.h" #include "paddle/framework/var_desc.h"
#include "paddle/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void VarDescBind::SetShape(const std::vector<int64_t> &dims) { void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
} }
void VarDescBind::SetDataType(DataType data_type) { void VarDescBind::SetDataType(DataType data_type) {
desc_.mutable_lod_tensor()->set_data_type(data_type); mutable_tensor_desc()->set_data_type(data_type);
} }
std::vector<int64_t> VarDescBind::Shape() const { std::vector<int64_t> VarDescBind::Shape() const {
return RepeatedToVector(desc_.lod_tensor().dims()); return RepeatedToVector(tensor_desc().dims());
} }
DataType VarDescBind::GetDataType() const { DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); }
return desc_.lod_tensor().data_type();
}
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDescBind::SetLoDLevel(int32_t lod_level) {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
desc_.mutable_lod_tensor()->set_lod_level(lod_level); desc_.mutable_lod_tensor()->set_lod_level(lod_level);
} }
int32_t VarDescBind::GetLodLevel() const { int32_t VarDescBind::GetLodLevel() const {
PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR);
return desc_.lod_tensor().lod_level(); return desc_.lod_tensor().lod_level();
} }
const TensorDesc &VarDescBind::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.lod_tensor().tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
}
TensorDesc *VarDescBind::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type");
switch (desc_.type()) {
case VarDesc::SELECTED_ROWS:
return desc_.mutable_selected_rows();
case VarDesc::LOD_TENSOR:
return desc_.mutable_lod_tensor()->mutable_tensor();
default:
PADDLE_THROW("Unexpected branch.");
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -72,7 +72,14 @@ class VarDescBind { ...@@ -72,7 +72,14 @@ class VarDescBind {
int32_t GetLodLevel() const; int32_t GetLodLevel() const;
VarDesc::VarType GetType() const { return desc_.type(); }
void SetType(VarDesc::VarType type) { desc_.set_type(type); }
private: private:
const TensorDesc &tensor_desc() const;
TensorDesc *mutable_tensor_desc();
VarDesc desc_; VarDesc desc_;
}; };
} // namespace framework } // namespace framework
......
...@@ -84,8 +84,9 @@ function(op_library TARGET) ...@@ -84,8 +84,9 @@ function(op_library TARGET)
endif() endif()
# pybind USE_NO_KERNEL_OP # pybind USE_NO_KERNEL_OP
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
string(REPLACE "_op" "" TARGET "${TARGET}") string(REPLACE "_op" "" TARGET "${TARGET}")
if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
......
...@@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
} }
}; };
template <typename AttrType>
class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
HardSigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "Input of HardSigmoid operator");
AddOutput("Y", "Output of HardSigmoid operator");
AddComment(R"DOC(
Hard Sigmoid activation operator.
Segment-wise linear approximation of sigmoid[1].
This is much faster than sigmoid.
hard_sigmoid = max(0, min(1, slope * x + shift))
The slope should be positive. The offset can be either positive or negative.
The default slope and shift are set from [1].
It is recommended to use the defaults for this activation.
References:
[1] Noisy Activation Functions
(https://arxiv.org/abs/1603.00391)
)DOC");
AddAttr<AttrType>("slope", "Slope for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.2));
AddAttr<AttrType>("offset", "Offset for linear approximation of sigmoid")
.SetDefault(static_cast<AttrType>(0.5));
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ...@@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp,
ops::ThresholdedReluOpMaker<float>, thresholded_relu_grad, ops::ThresholdedReluOpMaker<float>, thresholded_relu_grad,
ops::ActivationOpGrad); ops::ActivationOpGrad);
REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker<float>,
hard_sigmoid_grad, ops::ActivationOpGrad);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, \ act_type, \
......
...@@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> { ...@@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor<T> {
} }
}; };
template <typename T>
struct HardSigmoidFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y>
void operator()(Device d, X x, Y y) const {
auto temp = x * static_cast<T>(slope) + static_cast<T>(offset);
y.device(d) = temp.cwiseMax(static_cast<T>(0)).cwiseMin(static_cast<T>(1));
}
};
template <typename T>
struct HardSigmoidGradFunctor : public BaseActivationFunctor<T> {
float slope;
float offset;
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"slope", &slope}, {"offset", &offset}};
}
template <typename Device, typename X, typename Y, typename dY, typename dX>
void operator()(Device d, X x, Y y, dY dy, dX dx) const {
dx.device(d) =
dy *
((y > static_cast<T>(0)) * (y < static_cast<T>(1))).template cast<T>() *
static_cast<T>(slope);
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ #define FOR_EACH_KERNEL_FUNCTOR(__macro) \
__macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(exp, ExpFunctor, ExpGradFunctor); \ __macro(exp, ExpFunctor, ExpGradFunctor); \
__macro(relu, ReluFunctor, ReluGradFunctor); \ __macro(relu, ReluFunctor, ReluGradFunctor); \
__macro(tanh, TanhFunctor, TanhGradFunctor); \ __macro(tanh, TanhFunctor, TanhGradFunctor); \
__macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \
__macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \
__macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \
__macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, LogFunctor, LogGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \
__macro(square, SquareFunctor, SquareGradFunctor); \ __macro(square, SquareFunctor, SquareGradFunctor); \
__macro(brelu, BReluFunctor, BReluGradFunctor); \ __macro(brelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \
__macro(pow, PowFunctor, PowGradFunctor); \ __macro(pow, PowFunctor, PowGradFunctor); \
__macro(stanh, STanhFunctor, STanhGradFunctor); \ __macro(stanh, STanhFunctor, STanhGradFunctor); \
__macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \
__macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \
__macro(relu6, Relu6Functor, Relu6GradFunctor); \ __macro(relu6, Relu6Functor, Relu6GradFunctor); \
__macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \
__macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \
__macro(elu, ELUFunctor, ELUGradFunctor); \ __macro(elu, ELUFunctor, ELUGradFunctor); \
__macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \
__macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \
__macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/adam_op.h"
namespace paddle {
namespace operators {
class AdamOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment1"),
"Input(Moment1) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment2"),
"Input(Moment2) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"),
"Input(Beta1Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"),
"Input(Beta2Pow) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"),
"Output(Moment1Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"),
"Output(Moment2Out) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"),
"Output(Beta1PowOut) of AdamOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Beta2PowOut"),
"Output(Beta2PowOut) of AdamOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"Learning rate should have 1 dimension");
auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow");
PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1,
"Beta1 power accumulator should have 1 dimension");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment1"),
"Param and Moment input of AdamOp should have same dimension");
PADDLE_ENFORCE_EQ(
param_dims, ctx->GetInputDim("Moment2"),
"Param and InfNorm input of AdamOp should have same dimension");
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("Moment1Out", param_dims);
ctx->SetOutputDim("Moment2Out", param_dims);
ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims);
ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims);
}
};
class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("LearningRate", "(Tensor) Learning rate");
AddInput("Moment1", "(Tensor) Input first moment");
AddInput("Moment2", "(Tensor) Input second moment");
AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator");
AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("Moment1Out", "(Tensor) Output first moment");
AddOutput("Moment2Out", "(Tensor) Output second moment");
AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator");
AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator");
AddAttr<float>("beta1",
"(float, default 0.9) "
"Exponential decay rate for the "
"first moment estimates.")
.SetDefault(0.9f);
AddAttr<float>("beta2",
"(float, default 0.999) "
"exponential decay rate for the "
"second moment estimates.")
.SetDefault(0.999f);
AddAttr<float>("epsilon",
"(float, default 1.0e-8) "
"Constant for numerical stability")
.SetDefault(1.0e-8f);
AddComment(R"DOC(
Adam Updates Operator.
This implements the Adam optimizer from Section 2 of the Adam
paper[1]. Adam is a first-order gradient-based optimization
method based on adaptive estimates of lower-order moments.
Adam updates:
moment1_out = beta1 * moment1 + (1 − beta1) * grad
moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
learning_rate_t = learning_rate_t *
sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out)
param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon)
References:
[1] Adam: A Method for Stochastic Optimization
(https://arxiv.org/abs/1412.6980)
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker);
REGISTER_OP_CPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/adam_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(adam,
ops::AdamOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AdamOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment1_out_tensor = ctx.Output<framework::Tensor>("Moment1Out");
auto moment2_out_tensor = ctx.Output<framework::Tensor>("Moment2Out");
auto beta1_pow_out_tensor = ctx.Output<framework::Tensor>("Beta1PowOut");
auto beta2_pow_out_tensor = ctx.Output<framework::Tensor>("Beta2PowOut");
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment1_out_tensor->mutable_data<T>(ctx.GetPlace());
moment2_out_tensor->mutable_data<T>(ctx.GetPlace());
beta1_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
beta2_pow_out_tensor->mutable_data<T>(ctx.GetPlace());
float beta1 = ctx.Attr<float>("beta1");
float beta2 = ctx.Attr<float>("beta2");
float epsilon = ctx.Attr<float>("epsilon");
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment1 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment1"));
auto moment2 = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment2"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto beta1_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta1Pow"));
auto beta2_pow = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Beta2Pow"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment1_out = framework::EigenVector<T>::Flatten(*moment1_out_tensor);
auto moment2_out = framework::EigenVector<T>::Flatten(*moment2_out_tensor);
auto beta1_pow_out =
framework::EigenVector<T>::Flatten(*beta1_pow_out_tensor);
auto beta2_pow_out =
framework::EigenVector<T>::Flatten(*beta2_pow_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
moment1_out.device(place) = beta1 * moment1 + (1 - beta1) * grad;
moment2_out.device(place) = beta2 * moment2 + (1 - beta2) * grad.square();
beta1_pow_out.device(place) = beta1_pow * beta1;
beta2_pow_out.device(place) = beta2_pow * beta2;
// All of these are tensors of 1 element
auto lr_t = lr * (1 - beta2_pow_out).sqrt() / (1 - beta1_pow_out);
// Eigen does not support automatic broadcast
// Get dimensions of moment vector to broadcast lr_t
Eigen::DSizes<int, 1> m_dsize(moment1_out_tensor->numel());
param_out.device(place) =
param -
lr_t.broadcast(m_dsize) *
(moment1_out / (moment2_out.sqrt() + epsilon));
}
};
} // namespace operators
} // namespace paddle
...@@ -23,6 +23,7 @@ using framework::Scope; ...@@ -23,6 +23,7 @@ using framework::Scope;
using framework::TensorArray; using framework::TensorArray;
using framework::LoDTensor; using framework::LoDTensor;
using framework::Variable; using framework::Variable;
using framework::DySeqMetaBatch;
namespace detail { namespace detail {
...@@ -33,6 +34,29 @@ inline void CreateVariables(Scope& scope, ...@@ -33,6 +34,29 @@ inline void CreateVariables(Scope& scope,
} }
} }
/*
* The inputs with sequence should be reordered when they are split, so the
* boot_states should be reordered in the same order.
*
* NOTE This may require that the `pre_state` of the first time step should just
* copy the `boot_state` rather than reference it, for that the content should
* be reordered, but the RNN op should not change the `boot_state` as an input
* variable's content.
*/
template <typename T>
inline void ReorderBootState(const DySeqMetaBatch& metas,
const LoDTensor& boot_state, LoDTensor* tensor,
const platform::Place& dst_place) {
for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) {
auto slice = tensor->Slice<T>(seq_id, seq_id + 1);
auto boot_slice =
boot_state.Slice<T>(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1);
// TODO(superjom) pass in device context as an argument
slice.template CopyFrom<T>(boot_slice, dst_place,
platform::CPUDeviceContext());
}
}
} // namespace detail } // namespace detail
class DynamicRecurrentOpProtoAndCheckerMaker class DynamicRecurrentOpProtoAndCheckerMaker
...@@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope, ...@@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope,
CreateScopes(); CreateScopes();
WriteStepInputs(); WriteStepInputs();
InitStates(); InitStates();
WriteStepOutputs();
// call stepnet in all the time steps // call stepnet in all the time steps
for (size_t step = 0; step < cache_.num_steps; step++) { for (size_t step = 0; step < cache_.num_steps; step++) {
...@@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope, ...@@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope,
stepnet_->Run(step_scope, dev_ctx); stepnet_->Run(step_scope, dev_ctx);
} }
WriteStepOutputs();
ConcatOutputs(); ConcatOutputs();
} }
...@@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const { ...@@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const {
// TODO(superjom) make level a config // TODO(superjom) make level a config
// TODO(superjom) check all the inputs has the same LoD // TODO(superjom) check all the inputs has the same LoD
int level = 0; int level = 0;
const auto& inlinks = cache_.inlinks; for (const auto& item : cache_.inlinks) {
for (const auto& item : inlinks) {
const auto& var = item.second; const auto& var = item.second;
const auto& tensor = var->Get<LoDTensor>(); const auto& tensor = var->Get<LoDTensor>();
TensorArray& ta = step_inputs_[item.first]; TensorArray& ta = step_inputs_[item.first];
dy_seq_metas_[item.first] = dy_seq_metas_[item.first] =
ta.Unpack(tensor, level, true /*length_descend*/); ta.Unpack(tensor, level, true /*length_descend*/);
...@@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const { ...@@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const {
} }
void DynamicRecurrentOp::WriteStepOutputs() const { void DynamicRecurrentOp::WriteStepOutputs() const {
for (size_t step = 0; step < cache_.scopes->size(); step++) { // initialize step outputs
auto& scope = cache_.GetScope(step); for (const auto& item : cache_.outlinks) {
for (auto& item : step_outputs_) { step_outputs_.emplace(item.first, TensorArray());
auto* var = scope.FindVar(item.first);
if (var == nullptr) {
var = scope.Var(item.first);
}
auto* tensor = var->GetMutable<LoDTensor>();
item.second.WriteShared(step, *tensor);
}
} }
PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL);
} }
void DynamicRecurrentOp::CreateScopes() const { void DynamicRecurrentOp::CreateScopes() const {
...@@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const { ...@@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const {
PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first"); PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first");
std::vector<std::string> memories; std::vector<std::string> memories;
std::vector<std::string> pre_memories; std::vector<std::string> pre_memories;
std::vector<std::string> stepnet_outputs;
std::transform(arg_.memories.begin(), arg_.memories.end(), std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(memories), std::back_inserter(memories),
[](const rnn::MemoryAttr& m) { return m.var; }); [](const rnn::MemoryAttr& m) { return m.var; });
std::transform(arg_.memories.begin(), arg_.memories.end(), std::transform(arg_.memories.begin(), arg_.memories.end(),
std::back_inserter(pre_memories), std::back_inserter(pre_memories),
[](const rnn::MemoryAttr& m) { return m.pre_var; }); [](const rnn::MemoryAttr& m) { return m.pre_var; });
for (const auto& item : stepnet_->Outputs()) {
for (const auto& var : item.second) {
stepnet_outputs.push_back(var);
}
}
for (size_t step = 0; step < cache_.num_steps; step++) { for (size_t step = 0; step < cache_.num_steps; step++) {
auto& scope = cache_.GetScope(step); auto& scope = cache_.GetScope(step);
...@@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const { ...@@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const {
detail::CreateVariables(scope, arg_.outlinks); detail::CreateVariables(scope, arg_.outlinks);
detail::CreateVariables(scope, memories); detail::CreateVariables(scope, memories);
detail::CreateVariables(scope, pre_memories); detail::CreateVariables(scope, pre_memories);
detail::CreateVariables(scope, stepnet_outputs);
} }
} }
void DynamicRecurrentOp::ConcatOutputs() const { void DynamicRecurrentOp::ConcatOutputs() const {
// TODO(superjom) transform this to a config // TODO(superjom) transform this to a config
int level = 0; int level = 0;
// TODO(superjom) pass in some lod for (size_t step = 0; step < cache_.num_steps; step++) {
// just a placeholder auto& scope = cache_.GetScope(step);
framework::LoD lod; for (auto& item : step_outputs_) {
auto* var = scope.FindVar(item.first);
PADDLE_ENFORCE_NOT_NULL(var);
auto* tensor = var->GetMutable<LoDTensor>();
tensor->mutable_data<value_type>(platform::CPUPlace());
item.second.WriteShared(step, *tensor);
}
}
// the inlinks' lods should be the same, so randomly get one lod.
const auto& some_lod =
cache_.scope->FindVar(arg_.inlinks.front())->Get<LoDTensor>().lod();
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
for (auto& item : step_outputs_) { for (auto& item : step_outputs_) {
auto tensor = item.second.Pack(level, dy_seq_metas_[item.first], lod); auto tensor = item.second.Pack(level, some_meta, some_lod);
auto& output = cache_.outlinks[item.first]->Get<LoDTensor>(); auto* output = cache_.outlinks[item.first]->GetMutable<LoDTensor>();
const_cast<LoDTensor*>(&output)->ShareDataWith<value_type>(tensor); const_cast<LoDTensor*>(output)->ShareDataWith<value_type>(tensor);
} }
} }
void DynamicRecurrentOp::InitStates() const { void DynamicRecurrentOp::InitStates() const {
// init the first state for (size_t step = 0; step < cache_.num_steps; step++) {
// TODO(superjom) parepare the scenerio that boot state not exists for (const auto& memory : arg_.memories) {
for (auto memory : arg_.memories) { CreateState(memory, step);
auto* boot_state_var = cache_.scope->FindVar(memory.boot_var); LinkState(memory, step);
PADDLE_ENFORCE_NOT_NULL(boot_state_var);
auto& boot_state = boot_state_var->Get<LoDTensor>();
const auto& dims = boot_state.dims();
for (size_t step = 0; step < cache_.num_steps; step++) {
auto& cur_scope = cache_.GetScope(step);
// link pre-state to boot_state
// init state and pre-state
auto* pre_state = cur_scope.FindVar(memory.pre_var);
PADDLE_ENFORCE_NOT_NULL(pre_state);
pre_state->GetMutable<LoDTensor>();
auto* state = cur_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state);
state->GetMutable<LoDTensor>()->Resize(dims);
state->GetMutable<LoDTensor>()->mutable_data<value_type>(
platform::CPUPlace());
if (step == 0) {
auto* pre_state_tensor = pre_state->GetMutable<LoDTensor>();
pre_state_tensor->Resize(boot_state.dims());
pre_state_tensor->ShareDataWith<value_type>(boot_state);
} else {
auto& pre_scope = cache_.GetScope(step - 1);
auto* state_pre = pre_scope.FindVar(memory.var);
PADDLE_ENFORCE_NOT_NULL(state_pre);
pre_state->GetMutable<LoDTensor>()->ShareDataWith<value_type>(
*state_pre->GetMutable<LoDTensor>());
}
} }
} }
} }
void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state = *cache_.GetTensor(scope, memory.var);
auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var);
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
auto dims = boot_state.dims();
dims[0] = num_instances;
state.Resize(dims);
state.mutable_data<value_type>(platform::CPUPlace());
states_[memory.var].WriteShared(step, state);
}
void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory,
size_t step) const {
auto& scope = cache_.GetScope(step);
auto& state_pre = *cache_.GetTensor(scope, memory.pre_var);
// all the step_inputs' metas should be the same, just randomly select one
// and get the dyseq meta.
const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()];
size_t num_instances =
step_inputs_[arg_.inlinks.front()].Read(step).dims()[0];
LoDTensor* pre_state{nullptr};
if (step == 0) {
pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var);
pre_state->mutable_data<float>(platform::CPUPlace());
// allocate memory
state_pre.Resize(pre_state->dims());
state_pre.mutable_data<value_type>(platform::CPUPlace());
detail::ReorderBootState<value_type>(some_meta, *pre_state, &state_pre,
pre_state->place());
} else {
pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var);
}
// shink and share from previous state
auto shrinked_pre_state = pre_state->Slice<value_type>(0, num_instances);
state_pre.ShareDataWith<value_type>(shrinked_pre_state);
}
void DynamicRecurrentOp::ArgCache::Init( void DynamicRecurrentOp::ArgCache::Init(
const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op, const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op,
const paddle::framework::Scope& scope, rnn::Argument* arg) { const paddle::framework::Scope& scope, rnn::Argument* arg) {
...@@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope, ...@@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope,
return var; return var;
} }
LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor(
const framework::Scope& scope, const std::string& name) {
auto* var = GetVariable(scope, name);
return var->GetMutable<LoDTensor>();
}
const rnn::ArgumentName DynamicRecurrentOp::kArgName{ const rnn::ArgumentName DynamicRecurrentOp::kArgName{
"step_net", "step_scopes", "inlinks", "outlinks", "step_net", "step_scopes", "inlinks", "outlinks",
"memories", "pre_memories", "boot_memories"}; "memories", "pre_memories", "boot_memories"};
......
...@@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase {
*/ */
void InitStates() const; void InitStates() const;
/*
* Create state variables for each time step.
*/
void CreateState(const rnn::MemoryAttr& memory, size_t step) const;
/*
* Link pre-state variable in current scope to the state variable in the
* previous time step (scope).
*/
void LinkState(const rnn::MemoryAttr& memory, size_t step) const;
/* /*
* Concatenate outputs in each time step and generate a LoDTensor. * Concatenate outputs in each time step and generate a LoDTensor.
*/ */
...@@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase {
} }
const OperatorBase& GetStepNet() const { return *stepnet_; } const OperatorBase& GetStepNet() const { return *stepnet_; }
const framework::TensorArray& state(const std::string& name) const {
return states_[name];
}
const framework::TensorArray& step_input(const std::string& name) const {
return step_inputs_[name];
}
const framework::TensorArray& step_output(const std::string& name) const {
return step_outputs_[name];
}
protected: protected:
struct ArgCache { struct ArgCache {
framework::Scope const* scope; framework::Scope const* scope;
...@@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase {
return *scopes->at(index); return *scopes->at(index);
} }
framework::LoDTensor* GetTensor(const framework::Scope& scope,
const std::string& name);
private: private:
void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op, void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op,
rnn::Argument* arg); rnn::Argument* arg);
...@@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase { ...@@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase {
private: private:
std::unique_ptr<OperatorBase> stepnet_; std::unique_ptr<OperatorBase> stepnet_;
mutable framework::TensorArray states_; mutable std::map<std::string, framework::TensorArray> states_;
mutable std::map<std::string, framework::TensorArray> step_inputs_; mutable std::map<std::string, framework::TensorArray> step_inputs_;
mutable std::map<std::string, framework::TensorArray> step_outputs_; mutable std::map<std::string, framework::TensorArray> step_outputs_;
mutable std::map<std::string, std::vector<framework::DySeqMeta>> mutable std::map<std::string, std::vector<framework::DySeqMeta>>
......
...@@ -87,7 +87,6 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test { ...@@ -87,7 +87,6 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test {
platform::CPUPlace place; platform::CPUPlace place;
scope.Var("step_scopes"); scope.Var("step_scopes");
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
// auto* out0 =
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
// 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively.
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gru_unit_op.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class GRUUnitOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitOp should not be null.", "HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasOutput("Gate"),
"Output(%s) of GRUUnitOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"),
"Output(%s) of GRUUnitOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
"Output(%s) of GRUUnitOp should not be null.", "Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
}
ctx->SetOutputDim("Gate", {batch_size, frame_size * 3});
ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size});
ctx->SetOutputDim("Hidden", {batch_size, frame_size});
}
};
class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GRUUnitOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Input",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"input.");
AddInput("HiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"states of previous time step.");
AddInput("Weight",
"(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. "
"The elements continuous in memory can be divided into two parts. "
"The first part are weights of the update gate and reset gate "
"with shape [frame_size, frame_size * 2], and the second part are "
"weights of output candidate with shape [frame_size, frame_size]");
AddInput("Bias",
"(Tensor) Bias vector with shape [1, frame_size * 3] concating "
"bias of the update gate, reset gate and output candidate.");
AddOutput("Gate",
"(Tensor) Matrix with shape [batch_size, frame_size * 3] for the "
"output of update gate, reset gate and output candidate")
.AsIntermediate();
AddOutput("ResetHiddenPrev",
"(Tensor) Matrix with shape [batch_size, frame_size] for the "
"reseted hidden state of previous time step.")
.AsIntermediate();
AddOutput("Hidden",
"(Tensor) The GRU hidden state of the current time step "
"with shape [batch_size, frame_size].");
AddAttr<int>("activation",
"(enum int, default tanh) "
"The activation type used for output candidate {h}_t.")
.SetDefault(tanh)
.InEnum({identity, sigmoid, tanh, relu});
AddAttr<int>("gate_activation",
"(enum int, default sigmoid) "
"The activation type used in update gate and reset gate.")
.SetDefault(sigmoid)
.InEnum({identity, sigmoid, tanh, relu});
AddComment(R"DOC(
GRUUnitOp implements part calculations of the GRU unit as following:
\f[
update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\
output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\
output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev)
\f]
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
)DOC");
}
};
class GRUUnitGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Input"),
"Input(%s) of GRUUnitGradOp should not be null.", "Input");
PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"HiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(%s) of GRUUnitGradOp should not be null.", "Weight");
PADDLE_ENFORCE(ctx->HasInput("Gate"),
"Input(%s) of GRUUnitGradOp should not be null.", "Gate");
PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"),
"Input(%s) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput("Hidden"),
"Input(%s) of GRUUnitGradOp should not be null.", "Hidden");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Gate");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"ResetHiddenPrev");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
"Input(%s@GRAD) of GRUUnitGradOp should not be null.",
"Hidden");
auto input_dims = ctx->GetInputDim("Input");
auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev");
auto weight_dims = ctx->GetInputDim("Weight");
// int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_ENFORCE_EQ(
input_size, frame_size * 3,
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_ENFORCE_EQ(
weight_height, frame_size,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_ENFORCE_EQ(
weight_width, frame_size * 3,
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
auto bias = Input("Bias");
if (bias != framework::kEmptyVarName) {
auto bias_dims = ctx->GetInputDim("Bias");
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_ENFORCE_EQ(bias_height, 1,
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_ENFORCE_EQ(bias_width, frame_size * 3,
"The shape of Bias must be [1, frame_size * 3].");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);
}
auto input_grad_name = framework::GradVarName("Input");
if (ctx->HasOutput(input_grad_name))
ctx->SetOutputDim(input_grad_name, input_dims);
auto hidden_prev_grad_name = framework::GradVarName("HiddenPrev");
if (ctx->HasOutput(hidden_prev_grad_name))
ctx->SetOutputDim(hidden_prev_grad_name, hidden_prev_dims);
auto weight_grad_name = framework::GradVarName("Weight");
if (ctx->HasOutput(weight_grad_name))
ctx->SetOutputDim(weight_grad_name, weight_dims);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad,
ops::GRUUnitGradOp);
REGISTER_OP_CPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gru_unit_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gru_unit,
ops::GRUUnitKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
gru_unit_grad, ops::GRUUnitGradKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/operators/activation_op.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 };
template <typename Place, typename T>
class GRUUnitKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y>
void ActCompute(const int act_type, const Device& d, X x, Y y) const {
if (act_type == identity)
y.device(d) = x;
else if (act_type == sigmoid)
SigmoidFunctor<T>()(d, x, y);
else if (act_type == tanh)
TanhFunctor<T>()(d, x, y);
else if (act_type == relu)
ReluFunctor<T>()(d, x, y);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* bias = context.Input<Tensor>("Bias");
auto* gate = context.Output<Tensor>("Gate");
gate->mutable_data<T>(context.GetPlace());
auto* reset_hidden_prev = context.Output<Tensor>("ResetHiddenPrev");
reset_hidden_prev->mutable_data<T>(context.GetPlace());
auto* hidden = context.Output<Tensor>("Hidden");
hidden->mutable_data<T>(context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
auto x = EigenMatrix<T>::From(*input);
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto r_h_p = EigenMatrix<T>::From(*reset_hidden_prev);
auto h = EigenMatrix<T>::From(*hidden);
auto place = context.GetEigenDevice<Place>();
// calculate unactivated gate outputs
if (bias) {
auto b = EigenMatrix<T>::From(*bias);
g.device(place) = x +
b.reshape(Eigen::array<int, 2>({{1, frame_size * 3}}))
.broadcast(Eigen::array<int, 2>({{batch_size, 1}}));
} else {
g.device(place) = x;
}
const T* hidden_prev_data = hidden_prev->data<T>();
const T* weight_data = weight->data<T>();
T* gate_data = gate->data<T>();
T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
2 * frame_size, frame_size, 1, hidden_prev_data,
frame_size, weight_data, frame_size * 2, 1, gate_data,
frame_size * 3);
// calculate activited gate
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(u_offsets, extents), g.slice(u_offsets, extents));
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
ActCompute(context.Attr<int>("gate_activation"), place,
g.slice(r_offsets, extents), g.slice(r_offsets, extents));
auto r = g.slice(r_offsets, extents); // reset gate
r_h_p.device(place) = r * h_p; // reset previous hidden state
math::gemm<Place, T>(context.device_context(), false, false, batch_size,
frame_size, frame_size, 1, reset_hidden_prev_data,
frame_size, weight_data + frame_size * frame_size * 2,
frame_size, 1, gate_data + frame_size * 2,
frame_size * 3);
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
ActCompute(context.Attr<int>("activation"), place,
g.slice(c_offsets, extents), g.slice(c_offsets, extents));
auto c = g.slice(c_offsets, extents); // output candidate
// calculate final output
h.device(place) = u * (h_p - c) + c;
}
};
template <typename Place, typename T>
class GRUUnitGradKernel : public framework::OpKernel<T> {
public:
template <typename Device, typename X, typename Y, typename DX, typename DY>
void ActGradCompute(const int act_type, const Device& d, X x, Y y, DX dx,
DY dy) const {
// x is dummy and won't be used even in Relu(use y instead)
if (act_type == identity)
dx.device(d) = dy;
else if (act_type == sigmoid)
SigmoidGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == tanh)
TanhGradFunctor<T>()(d, x, y, dy, dx);
else if (act_type == relu)
ReluGradFunctor<T>()(d, x, y, dy, dx);
else
PADDLE_THROW("unsupported activation type");
}
void Compute(const framework::ExecutionContext& context) const override {
auto* input = context.Input<Tensor>("Input");
auto* hidden_prev = context.Input<Tensor>("HiddenPrev");
auto* weight = context.Input<Tensor>("Weight");
auto* gate = context.Input<Tensor>("Gate");
auto* reset_hidden_prev = context.Input<Tensor>("ResetHiddenPrev");
auto* hidden_grad = context.Input<Tensor>(framework::GradVarName("Hidden"));
auto* input_grad = context.Output<Tensor>(framework::GradVarName("Input"));
auto* hidden_prev_grad =
context.Output<Tensor>(framework::GradVarName("HiddenPrev"));
auto* weight_grad =
context.Output<Tensor>(framework::GradVarName("Weight"));
auto* bias_grad = context.Output<Tensor>(framework::GradVarName("Bias"));
input_grad->mutable_data<T>(context.GetPlace());
hidden_prev_grad->mutable_data<T>(context.GetPlace());
weight_grad->mutable_data<T>(context.GetPlace());
Tensor gate_grad;
gate_grad.mutable_data<T>(input->dims(), context.GetPlace());
Tensor reset_hidden_prev_grad;
reset_hidden_prev_grad.mutable_data<T>(reset_hidden_prev->dims(),
context.GetPlace());
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
const T* hidden_prev_data = hidden_prev->data<T>();
T* hidden_prev_grad_data = hidden_prev_grad->data<T>();
const T* weight_data = weight->data<T>();
T* weight_grad_data = weight_grad->data<T>();
T* gate_grad_data = gate_grad.data<T>();
const T* reset_hidden_prev_data = reset_hidden_prev->data<T>();
T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data<T>();
auto h_p = EigenMatrix<T>::From(*hidden_prev);
auto g = EigenMatrix<T>::From(*gate);
auto d_h = EigenMatrix<T>::From(*hidden_grad);
auto d_x = EigenMatrix<T>::From(*input_grad);
auto d_h_p = EigenMatrix<T>::From(*hidden_prev_grad);
auto d_g = EigenMatrix<T>::From(gate_grad);
auto d_r_h_p = EigenMatrix<T>::From(reset_hidden_prev_grad);
auto place = context.GetEigenDevice<Place>();
Eigen::array<int, 2> extents({{batch_size, frame_size}});
Eigen::array<int, 2> u_offsets({{0, 0}});
auto u = g.slice(u_offsets, extents); // update gate
Eigen::array<int, 2> r_offsets({{0, frame_size}});
auto r = g.slice(r_offsets, extents); // reset gate
Eigen::array<int, 2> c_offsets({{0, frame_size * 2}});
auto c = g.slice(c_offsets, extents); // output candidate
// backward for unactivated update gate
ActGradCompute(context.Attr<int>("gate_activation"), place, u, u,
d_g.slice(u_offsets, extents), d_h * (h_p - c));
// backward for unactivated output candidate
ActGradCompute(context.Attr<int>("activation"), place, c, c,
d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u));
// backward for reset_hidden_prev
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size, 1,
gate_grad_data + frame_size * 2, frame_size * 3,
weight_data + frame_size * frame_size * 2, frame_size,
0, reset_hidden_prev_grad_data, frame_size);
// backward for state_weight
math::gemm<Place, T>(
context.device_context(), true, false, frame_size, frame_size,
batch_size, 1, reset_hidden_prev_data, frame_size,
gate_grad_data + frame_size * 2, frame_size * 3, 0,
weight_grad_data + frame_size * frame_size * 2, frame_size);
// backward for unactivated reset gate
ActGradCompute(context.Attr<int>("gate_activation"), place, r, r,
d_g.slice(r_offsets, extents), d_r_h_p * h_p);
// backward for update_gate_weight and reset_gate_weight
math::gemm<Place, T>(context.device_context(), true, false, frame_size,
frame_size * 2, batch_size, 1, hidden_prev_data,
frame_size, gate_grad_data, frame_size * 3, 0,
weight_grad_data, frame_size * 2);
// backward for hidden_prev
d_h_p.device(place) = d_r_h_p * r + d_h * u;
math::gemm<Place, T>(context.device_context(), false, true, batch_size,
frame_size, frame_size * 2, 1, gate_grad_data,
frame_size * 3, weight_data, frame_size * 2, 1,
hidden_prev_grad_data, frame_size);
// backward for input
d_x.device(place) = d_g;
// backward for bias
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
auto d_b = EigenMatrix<T>::From(*bias_grad);
d_b.device(place) = d_g.sum(Eigen::array<int, 1>({{0}}));
}
}
};
} // namespace operators
} // namespace paddle
...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel { ...@@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker,
ops::MultiplexGradOp); paddle::framework::DefaultGradOpDescMaker<false>);
REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>); multiplex, ops::MultiplexCPUKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel {
auto in_dim = x_dims[0]; auto in_dim = x_dims[0];
for (size_t i = 1; i < N; i++) { for (size_t i = 1; i < N; i++) {
auto dim = x_dims[i]; auto dim = x_dims[i];
PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape");
} }
ctx->SetOutputDim("Out", in_dim); ctx->SetOutputDim("Out", in_dim);
ctx->ShareLoD("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out");
......
...@@ -123,7 +123,18 @@ void BindProgramDesc(py::module &m) { ...@@ -123,7 +123,18 @@ void BindProgramDesc(py::module &m) {
AppendBackward(program_desc, no_grad_vars); AppendBackward(program_desc, no_grad_vars);
}) })
.def("block", &ProgramDescBind::Block, py::return_value_policy::reference) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size); .def("num_blocks", &ProgramDescBind::Size)
.def("serialize_to_string",
[](ProgramDescBind &program_desc) -> py::bytes {
const ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"ProgramDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize ProgramDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindBlockDesc(py::module &m) { void BindBlockDesc(py::module &m) {
...@@ -149,7 +160,17 @@ void BindBlockDesc(py::module &m) { ...@@ -149,7 +160,17 @@ void BindBlockDesc(py::module &m) {
.def("all_vars", &BlockDescBind::AllVars, .def("all_vars", &BlockDescBind::AllVars,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_ops", &BlockDescBind::AllOps, .def("all_ops", &BlockDescBind::AllOps,
py::return_value_policy::reference); py::return_value_policy::reference)
.def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes {
const BlockDesc *desc = block_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"BlockDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize BlockDesc Error. This could be a bug of Paddle.");
return res;
});
} }
void BindVarDsec(py::module &m) { void BindVarDsec(py::module &m) {
...@@ -162,7 +183,8 @@ void BindVarDsec(py::module &m) { ...@@ -162,7 +183,8 @@ void BindVarDsec(py::module &m) {
.value("FP32", DataType::FP32) .value("FP32", DataType::FP32)
.value("FP64", DataType::FP64); .value("FP64", DataType::FP64);
py::class_<VarDescBind>(m, "VarDesc", "") py::class_<VarDescBind> var_desc(m, "VarDesc", "");
var_desc
.def("name", .def("name",
[](const VarDescBind &self) { [](const VarDescBind &self) {
py::bytes name = self.Name(); py::bytes name = self.Name();
...@@ -174,7 +196,23 @@ void BindVarDsec(py::module &m) { ...@@ -174,7 +196,23 @@ void BindVarDsec(py::module &m) {
.def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference)
.def("data_type", &VarDescBind::GetDataType) .def("data_type", &VarDescBind::GetDataType)
.def("lod_level", &VarDescBind::GetLodLevel) .def("lod_level", &VarDescBind::GetLodLevel)
.def("set_lod_level", &VarDescBind::SetLoDLevel); .def("set_lod_level", &VarDescBind::SetLoDLevel)
.def("type", &VarDescBind::GetType)
.def("set_type", &VarDescBind::SetType)
.def("serialize_to_string", [](VarDescBind &var_desc) -> py::bytes {
const VarDesc *desc = var_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"VarDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize VarDesc Error. This could be a bug of Paddle.");
return res;
});
py::enum_<VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", VarDesc::LOD_TENSOR)
.value("SELECTED_ROWS", VarDesc::SELECTED_ROWS);
} }
void BindOpDesc(py::module &m) { void BindOpDesc(py::module &m) {
...@@ -206,7 +244,17 @@ void BindOpDesc(py::module &m) { ...@@ -206,7 +244,17 @@ void BindOpDesc(py::module &m) {
.def("set_block_attr", &OpDescBind::SetBlockAttr) .def("set_block_attr", &OpDescBind::SetBlockAttr)
.def("block_attr", &OpDescBind::GetBlockAttr) .def("block_attr", &OpDescBind::GetBlockAttr)
.def("check_attrs", &OpDescBind::CheckAttrs) .def("check_attrs", &OpDescBind::CheckAttrs)
.def("infer_shape", &OpDescBind::InferShape); .def("infer_shape", &OpDescBind::InferShape)
.def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes {
const OpDesc *desc = op_desc.Proto();
PADDLE_ENFORCE(desc->IsInitialized(),
"OpDesc has not been initialized.");
std::string res;
PADDLE_ENFORCE(
desc->SerializeToString(&res),
"Serialize OpDesc Error. This could be a bug of Paddle.");
return res;
});
} }
} // namespace pybind } // namespace pybind
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/tensor_array.h" #include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h" #include "paddle/operators/cond_op.h"
#include "paddle/operators/dynamic_recurrent_op.h"
#include "paddle/operators/net_op.h" #include "paddle/operators/net_op.h"
#include "paddle/operators/recurrent_op.h" #include "paddle/operators/recurrent_op.h"
#include "paddle/platform/enforce.h" #include "paddle/platform/enforce.h"
...@@ -341,6 +342,33 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -341,6 +342,33 @@ All parameter, weight, gradient are variables in Paddle.
self.set_stepnet(net.Clone()); self.set_stepnet(net.Clone());
}); });
py::class_<operators::DynamicRecurrentOp, OperatorBase>(m,
"DynamicRecurrentOp")
.def_static("create",
[](py::bytes protobin) -> operators::DynamicRecurrentOp * {
OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
auto rnn_op = OpRegistry::CreateOp(desc);
return static_cast<operators::DynamicRecurrentOp *>(
rnn_op.release());
})
.def("set_stepnet",
[](operators::DynamicRecurrentOp &self, const operators::NetOp &net)
-> void { self.SetStepNet(net.Clone()); })
.def("get_state",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.state(name); })
.def("get_step_input",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_input(name); })
.def("get_step_output",
[](operators::DynamicRecurrentOp &self, const std::string &name)
-> const TensorArray & { return self.step_output(name); });
// cond_op // cond_op
py::class_<operators::CondOp, OperatorBase>(m, "CondOp") py::class_<operators::CondOp, OperatorBase>(m, "CondOp")
.def_static("create", .def_static("create",
......
...@@ -10,6 +10,7 @@ __all__ = ['Block', 'Variable', 'Program', 'Operator'] ...@@ -10,6 +10,7 @@ __all__ = ['Block', 'Variable', 'Program', 'Operator']
class Variable(object): class Variable(object):
def __init__(self, def __init__(self,
block, block,
type=core.VarDesc.VarType.LOD_TENSOR,
name=None, name=None,
shape=None, shape=None,
dtype=None, dtype=None,
...@@ -26,6 +27,14 @@ class Variable(object): ...@@ -26,6 +27,14 @@ class Variable(object):
self.desc = self.block.desc.var(name) self.desc = self.block.desc.var(name)
is_new_var = True is_new_var = True
if is_new_var:
self.desc.set_type(type)
elif self.desc.type() != type:
raise ValueError("Variable {0} has been created before. The "
"previous type is {1}; the new type is {2}. They"
" are not matched".format(self.name,
self.desc.type(), type))
if shape is not None: if shape is not None:
if is_new_var: if is_new_var:
self.desc.set_shape(shape) self.desc.set_shape(shape)
...@@ -64,6 +73,13 @@ class Variable(object): ...@@ -64,6 +73,13 @@ class Variable(object):
self.block.vars[name] = self self.block.vars[name] = self
self.op = None self.op = None
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.VarDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property @property
def name(self): def name(self):
return self.desc.name() return self.desc.name()
...@@ -201,6 +217,13 @@ class Operator(object): ...@@ -201,6 +217,13 @@ class Operator(object):
self.desc.check_attrs() self.desc.check_attrs()
self.desc.infer_shape(self.block.desc) self.desc.infer_shape(self.block.desc)
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.OpDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property @property
def type(self): def type(self):
return self.desc.type() return self.desc.type()
...@@ -243,6 +266,13 @@ class Block(object): ...@@ -243,6 +266,13 @@ class Block(object):
self.ops = collections.deque() # operator list self.ops = collections.deque() # operator list
self.program = program self.program = program
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.BlockDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
@property @property
def parent_idx(self): def parent_idx(self):
return self.desc.parent return self.desc.parent
...@@ -287,6 +317,13 @@ class Program(object): ...@@ -287,6 +317,13 @@ class Program(object):
self.blocks = [Block(self, 0)] self.blocks = [Block(self, 0)]
self.current_block_idx = 0 self.current_block_idx = 0
def __str__(self):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.ProgramDesc.FromString(str(protostr))
return proto.__str__()
__repr__ = __str__
def global_block(self): def global_block(self):
return self.blocks[0] return self.blocks[0]
......
...@@ -219,6 +219,27 @@ class __RecurrentOp__(object): ...@@ -219,6 +219,27 @@ class __RecurrentOp__(object):
return core.RecurrentOp.create(proto.SerializeToString()) return core.RecurrentOp.create(proto.SerializeToString())
class __DynamicRecurrentOp__(object):
__proto__ = None
type = "dynamic_recurrent"
def __init__(self):
# cache recurrent_op's proto
if self.__proto__ is None:
for op_proto in get_all_op_protos():
if op_proto.type == self.type:
self.__proto__ = op_proto
def __call__(self, *args, **kwargs):
if self.type not in args and "type" not in kwargs:
kwargs["type"] = self.type
# create proto
create_method = OpDescCreationMethod(self.__proto__)
proto = create_method(*args, **kwargs)
# create rnnop
return core.DynamicRecurrentOp.create(proto.SerializeToString())
class __CondOp__(object): class __CondOp__(object):
__proto__ = None __proto__ = None
type = "cond" type = "cond"
...@@ -242,4 +263,5 @@ class __CondOp__(object): ...@@ -242,4 +263,5 @@ class __CondOp__(object):
Operator = OperatorFactory() # The default global factory Operator = OperatorFactory() # The default global factory
RecurrentOp = __RecurrentOp__() RecurrentOp = __RecurrentOp__()
DynamicRecurrentOp = __DynamicRecurrentOp__()
CondOp = __CondOp__() CondOp = __CondOp__()
...@@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest): ...@@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest):
self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) self.check_grad(['X'], 'Y', max_relative_error=self.relative_error)
class TestHardSigmoid(OpTest):
def setUp(self):
self.op_type = "hard_sigmoid"
self.relative_error = 0.002
X = np.random.uniform(-5, 5, [2, 2]).astype("float32")
slope = 0.2
offset = 0.5
lower_threshold = -offset / slope
upper_threshold = (1 - offset) / slope
self.inputs = {'X': X}
# Same reason as TestAbs
X[np.abs(X - lower_threshold) < self.relative_error] = \
lower_threshold + 0.2
X[np.abs(X - upper_threshold) < self.relative_error] = \
upper_threshold - 0.2
temp = X * slope + offset
self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Y', max_relative_error=0.002)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
import unittest
import numpy as np
from op_test import OpTest
class TestAdamOp1(OpTest):
def setUp(self):
'''Test Adam Op with supplied attributes
'''
self.op_type = "adam"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.004
beta1 = 0.78
beta2 = 0.836
epsilon = 1e-4
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, self.attrs)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
def test_check_output(self):
self.check_output()
class TestAdamOp2(OpTest):
def setUp(self):
'''Test Adam Op with supplied attributes
'''
self.op_type = "adam"
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.001
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
attributes = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, attributes)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
def test_check_output(self):
self.check_output()
class TestAdamOpMultipleSteps(OpTest):
def setUp(self):
'''Test Adam Operator with supplied attributes
'''
self.op_type = "adam"
self.num_steps = 10
param = np.random.uniform(-1, 1, (102, 105)).astype("float32")
grad = np.random.uniform(-1, 1, (102, 105)).astype("float32")
moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32")
# The second moment is positive
moment2 = np.random.random((102, 105)).astype("float32")
learning_rate = 0.001
beta1 = 0.9
beta2 = 0.999
epsilon = 1e-8
beta1_pow = beta1**10
beta2_pow = beta2**10
self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}
self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2}
def test_check_output(self):
for _ in range(self.num_steps):
param_out, moment1_out, moment2_out, beta1_pow_out, \
beta2_pow_out = adam_step(self.inputs, self.attrs)
self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'Beta1PowOut': beta1_pow_out,
'Beta2PowOut': beta2_pow_out,
'ParamOut': param_out
}
# Verify output for this step
self.check_output()
# Output of this step becomes input for next step
self.inputs['Param'] = param_out
self.inputs['Moment1'] = moment1_out
self.inputs['Moment2'] = moment2_out
self.inputs['Beta1Pow'] = beta1_pow_out
self.inputs['Beta2Pow'] = beta2_pow_out
# Randomize gradient for next step
self.inputs['Grad'] = np.random.uniform(
-1, 1, (102, 105)).astype("float32")
def adam_step(inputs, attributes):
'''
Simulate one step of the adam optimizer
:param inputs: dict of inputs
:param attributes: dict of attributes
:return tuple: tuple of output param, moment1, moment2,
beta1 power accumulator and beta2 power accumulator
'''
param = inputs['Param']
grad = inputs['Grad']
moment1 = inputs['Moment1']
moment2 = inputs['Moment2']
lr = inputs['LearningRate']
beta1_pow = inputs['Beta1Pow']
beta2_pow = inputs['Beta2Pow']
beta1 = attributes['beta1']
beta2 = attributes['beta2']
epsilon = attributes['epsilon']
moment1_out = beta1 * moment1 + (1 - beta1) * grad
moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad)
beta1_pow_out = beta1_pow * beta1
beta2_pow_out = beta2_pow * beta2
lr_t = lr * np.sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out)
param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon))
return param_out, moment1_out, moment2_out, beta1_pow_out, beta2_pow_out
if __name__ == "__main__":
unittest.main()
import logging
import paddle.v2.framework.core as core
import unittest
from paddle.v2.framework.op import Operator, DynamicRecurrentOp
import numpy as np
def create_tensor(scope, name, shape, np_data):
tensor = scope.new_var(name).get_tensor()
tensor.set_dims(shape)
tensor.set(np_data, core.CPUPlace())
return tensor
class DynamicRecurrentOpTest(unittest.TestCase):
'''
Test RNNOp
equation:
h_t = \sigma (W x_t + U h_{t-1})
weights:
- W
- U
vars:
- x
memories:
- h
outputs:
- h
'''
# for siplicity, just one level LoD
lod_py = [[0, 4, 7, 9, 10]]
input_dim = 30
num_sents = len(lod_py[0]) - 1
weight_dim = 15
def forward(self):
self.scope = core.Scope()
self.create_global_variables()
self.create_rnn_op()
self.create_step_net()
ctx = core.DeviceContext.create(core.CPUPlace())
self.rnnop.run(self.scope, ctx)
state = self.rnnop.get_state("h@mem")
print 'state size: ', state.size()
step_inputs = self.rnnop.get_step_input("x")
print "x size ", step_inputs.size()
for i in range(step_inputs.size()):
print "x %d" % i, np.array(step_inputs.read(i).get_dims())
step_outputs = self.rnnop.get_step_output('h@mem')
print 'step_outputs.size ', step_outputs.size()
output = self.scope.find_var("h@mem").get_tensor()
print 'output', np.array(output).shape
def create_global_variables(self):
x = np.random.normal(size=(self.lod_py[0][-1],
self.input_dim)).astype("float32")
W = np.random.normal(size=(self.input_dim,
self.input_dim)).astype("float32")
U = np.random.normal(size=(self.input_dim,
self.input_dim)).astype("float32")
h_boot = np.random.normal(size=(self.num_sents,
self.input_dim)).astype("float32")
# create inlink
x_tensor = create_tensor(self.scope, "x",
[self.num_sents, self.input_dim], x)
x_tensor.set_lod(self.lod_py)
create_tensor(self.scope, "W", [self.input_dim, self.input_dim], W)
create_tensor(self.scope, "U", [self.input_dim, self.input_dim], U)
create_tensor(self.scope, "h_boot", [self.num_sents, self.input_dim],
h_boot)
self.scope.new_var("step_scopes")
self.scope.new_var("h@mem")
def create_rnn_op(self):
# create RNNOp
self.rnnop = DynamicRecurrentOp(
# inputs
inlinks=["x"],
boot_memories=["h_boot"],
step_net="stepnet",
# outputs
outlinks=["h@mem"],
step_scopes="step_scopes",
# attributes
pre_memories=["h@pre"],
memories=["h@mem"])
def create_step_net(self):
stepnet = core.Net.create()
x_fc_op = Operator("mul", X="x", Y="W", Out="Wx")
h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh")
sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum")
sig_op = Operator("sigmoid", X="sum", Y="h@mem")
for op in [x_fc_op, h_fc_op, sum_op, sig_op]:
stepnet.append_op(op)
stepnet.complete_add_op(True)
self.rnnop.set_stepnet(stepnet)
def test_forward(self):
print 'test recurrent op forward'
pd_output = self.forward()
print 'pd_output', pd_output
if __name__ == '__main__':
unittest.main()
import math
import unittest
import numpy as np
from op_test import OpTest
class GRUActivationType(OpTest):
identity = 0
sigmoid = 1
tanh = 2
relu = 3
def identity(x):
return x
def sigmoid(x):
return 1. / (1. + np.exp(-x))
def tanh(x):
return 2. * sigmoid(2. * x) - 1.
def relu(x):
return np.maximum(x, 0)
class TestGRUUnitOp(OpTest):
batch_size = 3
frame_size = 5
activate = {
GRUActivationType.identity: identity,
GRUActivationType.sigmoid: sigmoid,
GRUActivationType.tanh: tanh,
GRUActivationType.relu: relu,
}
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
self.op_type = 'gru_unit'
self.inputs = {
'Input': np.random.uniform(
-0.1, 0.1, (batch_size, frame_size * 3)).astype('float32'),
'HiddenPrev': np.random.uniform(
-0.1, 0.1, (batch_size, frame_size)).astype('float32'),
'Weight': np.random.uniform(
-1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size),
(frame_size, frame_size * 3)).astype('float32'),
}
self.attrs = {
'activation': GRUActivationType.tanh,
'gate_activation': GRUActivationType.sigmoid
}
def set_outputs(self):
# GRU calculations
batch_size = self.batch_size
frame_size = self.frame_size
x = self.inputs['Input']
h_p = self.inputs['HiddenPrev']
w = self.inputs['Weight']
b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros(
(1, frame_size * 3))
g = x + np.tile(b, (batch_size, 1))
w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape(
(frame_size, frame_size * 2))
u_r = self.activate[self.attrs['gate_activation']](np.dot(
h_p, w_u_r) + g[:, :frame_size * 2])
u = u_r[:, :frame_size]
r = u_r[:, frame_size:frame_size * 2]
r_h_p = r * h_p
w_c = w.flatten()[frame_size * frame_size * 2:].reshape(
(frame_size, frame_size))
c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) +
g[:, frame_size * 2:])
g = np.hstack((u_r, c))
h = u * h_p + (1 - u) * c
self.outputs = {'Gate': g, 'ResetHiddenPrev': r_h_p, 'Hidden': h}
def setUp(self):
self.set_inputs()
self.set_outputs()
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight'], ['Hidden'],
max_relative_error=0.007)
class TestGRUUnitOpWithBias(TestGRUUnitOp):
def set_inputs(self):
batch_size = self.batch_size
frame_size = self.frame_size
super(TestGRUUnitOpWithBias, self).set_inputs()
self.inputs['Bias'] = np.random.uniform(
-0.1, 0.1, (1, frame_size * 3)).astype('float32')
self.attrs = {
'activation': GRUActivationType.identity,
'gate_activation': GRUActivationType.sigmoid
}
def test_check_grad(self):
self.check_grad(
['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'],
max_relative_error=0.007)
if __name__ == '__main__':
unittest.main()
...@@ -14,11 +14,14 @@ class TestInferShape(unittest.TestCase): ...@@ -14,11 +14,14 @@ class TestInferShape(unittest.TestCase):
# prepare input/output # prepare input/output
x1 = block.var("x1") x1 = block.var("x1")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(shape) x1.set_shape(shape)
x2 = block.var("x2") x2 = block.var("x2")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(shape) x2.set_shape(shape)
out = block.var("out") out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator # prepare the operator
sum_op_desc = block.append_op() sum_op_desc = block.append_op()
...@@ -40,11 +43,14 @@ class TestInferShape(unittest.TestCase): ...@@ -40,11 +43,14 @@ class TestInferShape(unittest.TestCase):
# prepare input/output # prepare input/output
x1 = block.var("x") x1 = block.var("x")
x1.set_type(core.VarDesc.VarType.LOD_TENSOR)
x1.set_shape(x_shape) x1.set_shape(x_shape)
x2 = block.var("y") x2 = block.var("y")
x2.set_type(core.VarDesc.VarType.LOD_TENSOR)
x2.set_shape(y_shape) x2.set_shape(y_shape)
out = block.var("out") out = block.var("out")
out.set_type(core.VarDesc.VarType.LOD_TENSOR)
# prepare the operator # prepare the operator
mul_op_desc = block.append_op() mul_op_desc = block.append_op()
......
...@@ -34,6 +34,8 @@ class TestOperator(unittest.TestCase): ...@@ -34,6 +34,8 @@ class TestOperator(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": [mul_out]}, outputs={"Out": [mul_out]},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
self.assertNotEqual(str(mul_op), "")
self.assertEqual(mul_op.type, "mul") self.assertEqual(mul_op.type, "mul")
self.assertEqual(mul_op.input_names, ["X", "Y"]) self.assertEqual(mul_op.input_names, ["X", "Y"])
self.assertEqual(mul_op.input("X"), ["mul.x"]) self.assertEqual(mul_op.input("X"), ["mul.x"])
......
...@@ -94,17 +94,21 @@ class TestVarDesc(unittest.TestCase): ...@@ -94,17 +94,21 @@ class TestVarDesc(unittest.TestCase):
program_desc = core.ProgramDesc.__create_program_desc__() program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0) block = program_desc.block(0)
var = block.var('my_var') var = block.var('my_var')
var.set_type(core.VarDesc.VarType.SELECTED_ROWS)
src_shape = [3, 2, 10, 8] src_shape = [3, 2, 10, 8]
var.set_shape(src_shape) var.set_shape(src_shape)
res_shape = var.shape() res_shape = var.shape()
self.assertEqual(src_shape, res_shape) self.assertEqual(src_shape, res_shape)
self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type())
def test_data_type(self): def test_data_type(self):
program_desc = core.ProgramDesc.__create_program_desc__() program_desc = core.ProgramDesc.__create_program_desc__()
block = program_desc.block(0) block = program_desc.block(0)
var = block.var('my_var') var = block.var('my_var')
var.set_type(core.VarDesc.VarType.LOD_TENSOR)
var.set_data_type(core.DataType.INT32) var.set_data_type(core.DataType.INT32)
self.assertEqual(core.DataType.INT32, var.data_type()) self.assertEqual(core.DataType.INT32, var.data_type())
self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type())
class TestBlockDesc(unittest.TestCase): class TestBlockDesc(unittest.TestCase):
......
...@@ -21,6 +21,7 @@ class TestVariable(unittest.TestCase): ...@@ -21,6 +21,7 @@ class TestVariable(unittest.TestCase):
b = g_program.current_block() b = g_program.current_block()
w = b.create_var( w = b.create_var(
dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") dtype="float64", shape=[784, 100], lod_level=0, name="fc.w")
self.assertNotEqual(str(w), "")
self.assertEqual(core.DataType.FP64, w.data_type) self.assertEqual(core.DataType.FP64, w.data_type)
self.assertEqual((784, 100), w.shape) self.assertEqual((784, 100), w.shape)
self.assertEqual("fc.w", w.name) self.assertEqual("fc.w", w.name)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部