diff --git a/CMakeLists.txt b/CMakeLists.txt index 4783095194dc9c6409dc31c95588f46c9bee7c61..1252e7539816016dfdf1b90b8941fa42e6bb85e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON) "different Python interpreter from compiling.") endif() +if(MOBILE_INFERENCE) + set(THIRD_PARTY_BUILD_TYPE MinSizeRel) +else() + set(THIRD_PARTY_BUILD_TYPE Release) +endif() + ######################################################################################## include(external/mklml) # download mklml package diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index f7483f6be9169eb58f0148cd3a956a8c881e1fe3..bd853d921b4362ac7ac5e17e629552b2a200f08a 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -8,7 +8,7 @@ ExternalProject_Add( extern_eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" - GIT_TAG "master" + GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d PREFIX ${EIGEN_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 957f8271e4841836956b0c3f2cf3d8c88a31192a..c819eb4d70898e48eab499c666168d78262d4240 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -36,6 +36,7 @@ ExternalProject_Add( # change this back to the official Github repo once my PR is # merged. GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" + GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb PREFIX ${GFLAGS_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -45,11 +46,11 @@ ExternalProject_Add( -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index b3fef738ccc0b5886bb0a32501bb7b7adade0ff1..08bdc1e1623b0d917061c7368e9b2a8f7e9517fd 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -31,6 +31,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog.git" + GIT_TAG v0.3.5 PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -43,12 +44,12 @@ ExternalProject_Add( -DWITH_GFLAGS=ON -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 6a2a79b7631b32e8a099797de509af64533bbb95..5a4aa7a5b71a4fdfd556a46037e6d1846d668fc4 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -56,11 +56,11 @@ IF(WITH_TESTING) -DBUILD_GMOCK=ON -Dgtest_disable_pthreads=ON -Dgtest_force_shared_crt=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 7cf7ba85cca4c248dcc74e078124c0b3815ee380..be7f6a9465970711170bd15dcecaadeaa8a55f86 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR=lib CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ${OPTIONAL_CACHE_ARGS} diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index bb258c7b5581fc22b44f4fe15c119f8081f4767e..8bd058222880b4df3b08da09c02f9fe7f1d0ee66 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -35,6 +35,7 @@ ExternalProject_Add( extern_warpctc ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" + GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09 PREFIX ${WARPCTC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -48,9 +49,9 @@ ExternalProject_Add( -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DBUILD_SHARED=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release + CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} ) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index c496a52b780364f3014f8fa3dfbc944a7aa7430e..e2c9fe56f335ae5b627b4d8d4bb17e4a2a466677 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -42,11 +42,11 @@ ExternalProject_Add( -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_MACOSX_RPATH=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) LIST(APPEND external_project_dependencies zlib) diff --git a/doc/design/register_grad_op.md b/doc/design/register_grad_op.md index 3cf8a59446d244bb3a388b87b14273d9096c839a..9f1ce4bae7b393cb9f04909e5e4917b8d660771c 100644 --- a/doc/design/register_grad_op.md +++ b/doc/design/register_grad_op.md @@ -3,15 +3,17 @@ ## The Problem Posed -In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. +Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance. -However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. +However, we noticed two problems with the current deisgn: -More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. +1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message. -## Current Implementation +1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation. -OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is +## The Current Implementation + +The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is ```cpp struct OpInfo { diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index bfbbdd0578ebc69ea4b49ade9b041573a9e9ad55..0b2958c1b10ef6a6ce51aa75f61e15a7f2d94b3f 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep ## Definition of VarDesc -A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. +A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`. ```proto message VarDesc { required string name = 1; - optional LoDTensorDesc lod_tensor = 2; + enum VarType { + LOD_TENSOR = 0; + SELECTED_ROWS = 1; + } + required VarType type = 2; + optional LoDTensorDesc lod_desc = 3; + optional TensorDesc selected_rows_desc = 4; + optional bool persistable = 5 [ default = false ]; } ``` -## Definition of LodTensorDesc +## Definition of TensorDesc ```proto enum DataType { @@ -38,87 +45,25 @@ enum DataType { FP64 = 6; } -message LoDTensorDesc { +message TensorDesc { required DataType data_type = 1; - repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - optional int32 lod_level = 3 [default=0]; + repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] } ``` -## Definition of Variable in Python - -In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable. - -```python -image = Variable(dims=[-1, 640, 480]) -# fc1 and fc2 are both Variable -fc1 = layer.fc(input=image, output_size=10) -fc2 = layer.fc(input=fc1, output_size=20) -``` -### what should class `Variable` Have -1. `name`.a name of string type is used to mark the value of the Variable. -1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator. -1. `operator`. Variable should record which operator produce itself. The reaon is: - - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. - -In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph. - -```python -import VarDesc -import LoDTensorDesc -import framework - -def AddInitialOperator(variable, initializer): - # add an initialize Operator to block to init this Variable - -class Variable(object): - def __init__(self, name, dims, type, initializer): - self._block = get_default_block() - self._name = name - self.op = None - - tensor_desc = LoDTensorDesc(data_type=type, dims=dims) - _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) - self._var = framework.CreateVar(_var_desc) - self._block.add_var(self) +A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedRows`, please reference [`SelectedRows`](./selected_rows.md). - # add initial op according to initializer - if initializer is not None: - AddInitialOperator(self, initializer) - - def dims(self): - return self._var.dims() - - def data_type(self): - return self._var.data_type() +## Definition of LodTensorDesc - def to_proto(self): - pass +```proto +message LoDTensorDesc { + required TensorDesc tensor = 1; + optional int lod_level = 2; +} ``` -Then we can use this Variable to create a fc layer in Python. +A LoDTensorDesc contains a tensor and a lod_level. -```python -import paddle as pd - -def flatten_size(X, num_flatten_dims): - prod = 1 # of last num_flatten_dims - for i in xrange(num_flatten_dims): - prod = prod * X.dims[-i-1] - return prod - -def layer.fc(X, output_size, num_flatten_dims): - W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) - b = Variable(pd.random_uniform(), type=FP32, dims=[output_size]) - out = Variable(type=FP32) - y = operator.fc(X, W, b, output=out) # fc will put fc op input into out - pd.InferShape(y) - return out - -x = Variable(dims=[-1, 640, 480]) -y = layer.fc(x, output_size=100) -z = layer.fc(y, output_size=200) +## Definition of Variable in Python -paddle.eval(targets=[z], ...) -print(z) -``` +For Variable in Python, please reference [`Python API`](./python_api.md). diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 184ec65d3fa5526b9ec32b376f1a10ca8ca69a6d..312df0fd71e32e9aa2c56802784ab9f1fbfd5ee5 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) proto_library(framework_proto SRCS framework.proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto) -cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim) +cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) -cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) +cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) @@ -53,3 +53,6 @@ endif() cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) + +cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) +cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 063b108500d95c94d5859cf6e1a5a88dcdb2ed31..1e20789a1f1b520e33c99b0f8740fbbcf2e792fa 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -28,14 +28,15 @@ namespace paddle { namespace framework { static inline std::unique_ptr CreateGradOp( - const OperatorBase& op) { + const OperatorBase& op, const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { OpDescBind op_desc; op_desc.SetInputMap(op.Inputs()); op_desc.SetOutputMap(op.Outputs()); op_desc.SetType(op.Type()); op_desc.SetAttrMap(op.Attrs()); auto& info = OpInfoMap::Instance().Get(op.Type()); - auto grad_descs = info.GradOpMaker()(op_desc); + auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var); std::vector> grad_ops; grad_ops.reserve(grad_descs.size()); std::transform(grad_descs.begin(), grad_descs.end(), @@ -98,7 +99,9 @@ static std::unique_ptr NOP() { // See Backward.h for details static std::unique_ptr BackwardRecursive( const OperatorBase& forwardOp, - std::unordered_set& no_grad_names, size_t& uniq_id) { + std::unordered_set& no_grad_names, + std::unordered_map* grad_to_var, + size_t& uniq_id) { // If all input gradients of forwarding operator do not need to calculate, // just return an NOP. Not return null ptr because NOP does not take // too much time for calculation, but it is useful for simplifying logic. @@ -136,7 +139,7 @@ static std::unique_ptr BackwardRecursive( for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); ++it, ++local_op_id) { auto& fwd = *it; - auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); + auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id); ForEachVarName(bwd->Outputs(), [&dup_output_ops, local_op_id](const std::string& out) { dup_output_ops[out].emplace_back(local_op_id); @@ -187,7 +190,8 @@ static std::unique_ptr BackwardRecursive( net->InsertOp(pos.first + 1, std::move(pos.second)); } } else { - std::unique_ptr grad_op(CreateGradOp(forwardOp)); + std::unique_ptr grad_op( + CreateGradOp(forwardOp, no_grad_names, grad_to_var)); ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( const std::string& grad_input) { @@ -226,7 +230,7 @@ static std::unique_ptr BackwardRecursive( *static_cast(&rnnop.stepnet()); // create stepnet's gradient op rnn_grad_op->set_stepnet( - BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); + BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id)); } if (net->ops_.empty()) { // Current no aux op is added to network @@ -253,7 +257,8 @@ std::unique_ptr Backward( no_grad_names.insert(name + kGradVarSuffix); } size_t uid = 0; - return BackwardRecursive(forwardOp, no_grad_names, uid); + std::unordered_map grad_to_var; + return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid); } // ==================================== // @@ -270,28 +275,31 @@ static bool AllGradInSet(const std::vector& names, std::vector> MakeOpGrad( const std::unique_ptr& op_desc, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { std::vector> grad_op_descs; - // All input gradients of forwarding operator do not need to calculat. + // All input gradients of forwarding operator do not need to calculate. const std::vector& inputs = op_desc->InputArgumentNames(); - if (AllGradInSet(inputs, no_grad_vars)) { + if (AllGradInSet(inputs, *no_grad_vars)) { return grad_op_descs; // empty vector } // All output gradients of forwarding operator do not need to calculate. const std::vector& outputs = op_desc->OutputArgumentNames(); - if (AllGradInSet(outputs, no_grad_vars)) { + if (AllGradInSet(outputs, *no_grad_vars)) { for (const std::string& name : inputs) { - no_grad_vars.insert(GradVarName(name)); + no_grad_vars->insert(GradVarName(name)); } return grad_op_descs; // empty vector } - grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get()); + grad_op_descs = OpInfoMap::Instance() + .Get(op_desc->Type()) + .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var); std::list> pending_fill_zeros_ops; for (auto& desc : grad_op_descs) { for (const std::string& in_name : desc->InputArgumentNames()) { - if (no_grad_vars.count(in_name)) { + if (no_grad_vars->count(in_name)) { std::string prefix = in_name.substr( 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); std::string new_name = prefix + kZeroVarSuffix; @@ -301,11 +309,6 @@ std::vector> MakeOpGrad( pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); } } - for (const std::string& out_name : desc->OutputArgumentNames()) { - if (no_grad_vars.count(out_name)) { - desc->Rename(out_name, kEmptyVarName); - } - } } for (auto& p : pending_fill_zeros_ops) { @@ -316,7 +319,8 @@ std::vector> MakeOpGrad( std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { BlockDescBind* cur_block = program_desc.Block(block_idx); std::deque>& op_descs = cur_block->ops_; std::unordered_map> dup_out_ops; @@ -324,15 +328,15 @@ std::vector> MakeBlockBackward( std::vector> backward_descs; for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { std::vector> op_grads = - MakeOpGrad(*it, no_grad_vars); + MakeOpGrad(*it, no_grad_vars, grad_to_var); if ((*it)->Type() == "recurrent") { PADDLE_ENFORCE_EQ( op_grads.size(), size_t(1), "rnn_op's gradient process should contain only one op."); int step_block_idx = (*it)->GetBlockAttr("stop_block"); - auto backward_block_op_descs = - MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); + auto backward_block_op_descs = MakeBlockBackward( + program_desc, step_block_idx, no_grad_vars, grad_to_var); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); for (auto& ptr : backward_block_op_descs) { backward_block->ops_.push_back(std::move(ptr)); @@ -388,8 +392,9 @@ void AppendBackward(ProgramDescBind& program_desc, no_grad_var_names.insert(GradVarName(name)); } const int root_block_idx = 0; - auto backward_op_descs = - MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); + std::unordered_map grad_to_var; + auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, + &no_grad_var_names, &grad_to_var); auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_; for (auto& ptr : backward_op_descs) { forw_op_descs.push_back(std::move(ptr)); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 8fd8c826187e3e3f830fdb318c4edacbad8b7333..9b15331dff380a2c437e1cc42da92d7fd239d31c 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -169,6 +169,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker { } }; +class MinusGradOpDescMaker : public GradOpDescMakerBase { + public: + using GradOpDescMakerBase::GradOpDescMakerBase; + + std::vector> operator()() const override { + std::vector> retv; + auto x_g = InputGrad("X"); + if (!x_g.empty()) { + auto *op_desc = new OpDescBind(); + op_desc->SetType("scale"); + op_desc->SetInput("X", OutputGrad("Out")); + op_desc->SetOutput("Out", x_g); + op_desc->SetAttr("scale", 1.0f); + retv.emplace_back(op_desc); + } + + auto y_g = InputGrad("Y"); + if (!y_g.empty()) { + auto *op_desc = new OpDescBind(); + op_desc->SetType("scale"); + op_desc->SetInput("X", OutputGrad("Out")); + op_desc->SetOutput("Out", y_g); + op_desc->SetAttr("scale", -1.0f); + retv.emplace_back(op_desc); + } + return retv; + } +}; + +class MinusOpMaker : public OpProtoAndCheckerMaker { + public: + MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", ""); + AddInput("Y", ""); + AddOutput("Out", ""); + AddComment("minus for unittest"); + } +}; } // namespace framework } // namespace paddle @@ -187,6 +226,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, f::NOP); REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); +REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker); TEST(Backward, simple_op_not_need_grad) { auto fwd = f::OpRegistry::CreateOp( @@ -395,12 +435,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { 2UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ - + 2U /* internal variable number*/); + + 2UL /* internal variable number*/ + ); EXPECT_EQ(grad_fc.Outputs(all).size(), 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add - */ - + 1UL /* input number of sigmod */); + + 2UL /* input number of rowwise_add*/ + + 1UL /* input number of sigmod */ + - 1UL /* out2 is not needed*/); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); @@ -580,8 +621,7 @@ TEST(Backward, intermedia_var_no_grad) { std::vector({f::GradVarName("out4")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), - std::vector({f::kEmptyVarName})); + EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); } TEST(Backward, var_no_grad) { @@ -619,8 +659,7 @@ TEST(Backward, var_no_grad) { std::vector({f::GradVarName("z2")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), - std::vector({f::kEmptyVarName})); + EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); f::OpDescBind *fill_zero_op = block->AllOps()[3]; ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); @@ -718,4 +757,19 @@ TEST(Backward, shared_var) { std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); +} + +TEST(Backward, half_backward) { + f::ProgramDesc *program_desc = GetNewProgramDesc(); + f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc); + f::BlockDescBind *block = program.Block(0); + auto *op1 = block->AppendOp(); + op1->SetType("minus"); + op1->SetInput("X", {"a"}); + op1->SetInput("Y", {"b"}); + op1->SetOutput("Out", {"out"}); + + AppendBackward(program, {"b"}); + auto ops = block->AllOps(); + ASSERT_EQ(2UL, ops.size()); } \ No newline at end of file diff --git a/paddle/framework/block_desc.cc b/paddle/framework/block_desc.cc index 17bd83f3a99bba4d1152916609026a7e203328fb..2dbf54301a54d12b9d9597f4c696ee8745b3a5cd 100644 --- a/paddle/framework/block_desc.cc +++ b/paddle/framework/block_desc.cc @@ -66,7 +66,7 @@ std::vector BlockDescBind::AllOps() const { return res; } -void BlockDescBind::Sync() { +void BlockDescBind::Flush() { if (need_update_) { auto &op_field = *this->desc_->mutable_ops(); op_field.Clear(); @@ -91,5 +91,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const { return prog_->Block(static_cast(this->desc_->parent_idx())); } +BlockDesc *BlockDescBind::Proto() { + Flush(); + return desc_; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 40e6cb09675b42c9582627416d62906ddd9a66ca..18af157b668bd56d20dc1c026e2d8e177059fc5e 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -35,7 +35,8 @@ class BlockDescBind { public: friend std::vector> MakeBlockBackward( ProgramDescBind &program_desc, int block_idx, - std::unordered_set &no_grad_vars); + std::unordered_set *no_grad_vars, + std::unordered_map *grad_to_var); friend void AppendBackward( ProgramDescBind &program_desc, @@ -64,9 +65,9 @@ class BlockDescBind { std::vector AllOps() const; - void Sync(); + void Flush(); - BlockDesc *RawPtr() { return desc_; } + BlockDesc *Proto(); private: ProgramDescBind *prog_; // not_own diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index daa474e8c5a223589018720da29a5c3363b5934d..ed7c5f17b0854809bde923276f36440cce193a88 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -97,8 +97,11 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->grad_op_maker_ = [](const OpDescBind& fwd_op) { - T maker(fwd_op); + info->grad_op_maker_ = []( + const OpDescBind& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { + T maker(fwd_op, no_grad_set, grad_to_var); return maker(); }; } diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index f24893227b569546faac3e80278cb4821f35a1e3..6a8db8c66aa38d0c9a6d6f6f17d865a5e6ece652 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/framework/attribute.h" #include "paddle/framework/backward.h" @@ -45,6 +46,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, for (auto kv : outputs) { for (auto v : kv.second) { auto var = block->Var(v); + var->SetType(VarDesc::LOD_TENSOR); var->SetDataType(paddle::framework::DataType::FP32); } } @@ -316,4 +318,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) { } } } + +DECLARE_double(fraction_of_gpu_memory_to_use); + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + // Use less GPU memory for unittest. + FLAGS_fraction_of_gpu_memory_to_use = 0.25; + return RUN_ALL_TESTS(); +} + #endif diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto index b7a63f9ba10b77acff516d75cf1be0d4eeda40d4..65760b07ada7a63a568cb8296eef35a8aa18d9ff 100644 --- a/paddle/framework/framework.proto +++ b/paddle/framework/framework.proto @@ -97,16 +97,26 @@ enum DataType { FP64 = 6; } -message LoDTensorDesc { +message TensorDesc { required DataType data_type = 1; repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - optional int32 lod_level = 3 [ default = 0 ]; +} + +message LoDTensorDesc { + required TensorDesc tensor = 1; + optional int32 lod_level = 2 [ default = 0 ]; } message VarDesc { + enum VarType { + LOD_TENSOR = 1; + SELECTED_ROWS = 2; + } required string name = 1; - optional LoDTensorDesc lod_tensor = 2; - optional bool persistable = 3 [ default = false ]; + required VarType type = 2; + optional LoDTensorDesc lod_tensor = 3; + optional TensorDesc selected_rows = 4; + optional bool persistable = 5 [ default = false ]; } message BlockDesc { diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index e9ae6e22060850fe229998d3b651d08a5ca2033a..1219e0487531b19b00adde5a9aa2bde51bfc0aa8 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -13,6 +13,8 @@ limitations under the License. */ #pragma once +#include +#include #include "paddle/framework/op_desc.h" #include "paddle/framework/operator.h" @@ -21,27 +23,50 @@ namespace framework { class GradOpDescMakerBase { public: - explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} + explicit GradOpDescMakerBase( + const OpDescBind& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) + : fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {} virtual ~GradOpDescMakerBase() = default; virtual std::vector> operator()() const = 0; protected: - static std::vector ToGradNames( - const std::vector& var_names) { + std::vector InputGrad(const std::string& name, + bool drop_empty_grad = true) const { std::vector ret_val; + auto var_names = this->Input(name); ret_val.reserve(var_names.size()); std::transform(var_names.begin(), var_names.end(), - std::back_inserter(ret_val), GradVarName); - return ret_val; - } - - std::vector InputGrad(const std::string& name) const { - return ToGradNames(fwd_op_.Input(name)); + std::back_inserter(ret_val), + [this](const std::string& fwd_var_name) -> std::string { + auto g_name = GradVarName(fwd_var_name); + if (no_grad_set_.count(g_name)) { + return kEmptyVarName; + } else { + (*this->grad_to_var_)[g_name] = fwd_var_name; + return g_name; + } + }); + if (!drop_empty_grad) { + return ret_val; + } + std::vector dropped_ret_val; + dropped_ret_val.reserve(ret_val.size()); + std::copy_if(ret_val.begin(), ret_val.end(), + std::back_inserter(dropped_ret_val), + [](const std::string& str) { return str != kEmptyVarName; }); + return dropped_ret_val; } std::vector OutputGrad(const std::string& name) const { - return ToGradNames(fwd_op_.Output(name)); + std::vector ret_val; + auto onames = this->Output(name); + ret_val.reserve(onames.size()); + std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val), + GradVarName); + return ret_val; } std::vector InputNames() const { @@ -75,6 +100,8 @@ class GradOpDescMakerBase { private: const OpDescBind& fwd_op_; + const std::unordered_set& no_grad_set_; + std::unordered_map* grad_to_var_; }; class SingleGradOpDescMaker : public GradOpDescMakerBase { @@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase { virtual std::unique_ptr Apply() const = 0; }; +template class DefaultGradOpDescMaker : public SingleGradOpDescMaker { public: using SingleGradOpDescMaker::SingleGradOpDescMaker; @@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { for (auto& input_param : this->InputNames()) { grad->SetInput(input_param, this->Input(input_param)); - grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); + grad->SetOutput(GradVarName(input_param), + this->InputGrad(input_param, DropEmptyIG)); } for (auto& output_param : this->OutputNames()) { diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index a5d515bbca729220ca6df5fa07d02f1b3f025109..ef207dc54ebe6cc72d9f1e428dd2aaed5ad3dbf0 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, } OpDesc *OpDescBind::Proto() { - Sync(); + Flush(); return &op_desc_; } @@ -101,7 +101,7 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { } void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { - BlockDesc *desc = block.RawPtr(); + BlockDesc *desc = block.Proto(); this->attrs_[name] = desc; need_update_ = true; } @@ -165,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } }; -void OpDescBind::Sync() { +void OpDescBind::Flush() { if (need_update_) { this->op_desc_.mutable_inputs()->Clear(); for (auto &ipt : inputs_) { diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 90155fadeac148bd9cae4ce9066ac4ce8d9df52d..73b5cf846f702fe21277ae139156ec9784aa79b3 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -89,8 +89,6 @@ class OpDescBind { this->need_update_ = true; } - void Sync(); - const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Outputs() const { return outputs_; } @@ -104,6 +102,8 @@ class OpDescBind { void InferShape(const BlockDescBind &block) const; + void Flush(); + private: template static std::vector MapKeys(const MapType &map) { diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index 94f75b0f309417e52bb5ad3117797d8c25233257..504afbd5dbacf7185f92e0000d19666230e2fb42 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -59,11 +59,5 @@ std::unique_ptr OpRegistry::CreateOp(const OpDescBind& op_desc) { op_desc.GetAttrMap()); } -std::vector> OpRegistry::CreateGradOpDescs( - OpDescBind* op_desc) { - auto& info = OpInfoMap::Instance().Get(op_desc->Type()); - return info.grad_op_maker_(*op_desc); -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 5ca3af52a6909eeee21f647d0e60c7a690f90190..226e8ddcd4b1a2630e0eea00ee6c9f6af6bd5d20 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -79,9 +79,6 @@ class OpRegistry { static std::unique_ptr CreateOp(const OpDesc& op_desc); - static std::vector> CreateGradOpDescs( - OpDescBind* op_desc); - static std::unique_ptr CreateOp(const OpDescBind& op_desc); }; @@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar { /** * Macro to register Operator. */ -#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class) \ - REGISTER_OPERATOR(grad_op_type, grad_op_class); \ - class _GradOpDescMaker_##grad_op_type##_ \ - : public ::paddle::framework::DefaultGradOpDescMaker { \ - using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \ - \ - protected: \ - virtual std::string GradOpType() const { return #grad_op_type; } \ - }; \ - REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ +#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class) \ + REGISTER_OPERATOR(grad_op_type, grad_op_class); \ + class _GradOpDescMaker_##grad_op_type##_ \ + : public ::paddle::framework::DefaultGradOpDescMaker { \ + using ::paddle::framework::DefaultGradOpDescMaker< \ + true>::DefaultGradOpDescMaker; \ + \ + protected: \ + virtual std::string GradOpType() const { return #grad_op_type; } \ + }; \ + REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ op_maker_class); #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index e89f9a46d587b6378aa3be92306c5680093e1926..fcb7292884275d972377983cb3ba1bcd86fb8348 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ProgramDesc *ProgramDescBind::Proto() { for (auto &block : blocks_) { - block->Sync(); + block->Flush(); } return prog_; } diff --git a/paddle/framework/selected_rows.cc b/paddle/framework/selected_rows.cc new file mode 100644 index 0000000000000000000000000000000000000000..c74459c9dd7006a24615b1d6df041583088fb25c --- /dev/null +++ b/paddle/framework/selected_rows.cc @@ -0,0 +1,16 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/selected_rows.h" + +namespace paddle { +namespace framework {} // namespace framework +} // namespace paddle diff --git a/paddle/framework/selected_rows.h b/paddle/framework/selected_rows.h new file mode 100644 index 0000000000000000000000000000000000000000..f9f563051e264ae7ed7cf3c07c0065522b2bbe2e --- /dev/null +++ b/paddle/framework/selected_rows.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace framework { + +class SelectedRows { + public: + SelectedRows(const std::vector& rows, const int64_t& height) + : rows_(rows), height_(height) { + value_.reset(new Tensor()); + } + + SelectedRows() { value_.reset(new Tensor()); } + + platform::Place place() const { return value_->place(); } + + const Tensor& value() const { return *value_; } + + Tensor* mutable_value() { return value_.get(); } + + int64_t height() const { return height_; } + + void set_height(int64_t height) { height_ = height; } + + const std::vector& rows() const { return rows_; } + + void set_rows(const std::vector& rows) { rows_ = rows; } + + DDim GetCompleteDims() const { + std::vector dims = vectorize(value_->dims()); + dims[0] = height_; + return make_ddim(dims); + } + + private: + std::vector rows_; + std::unique_ptr value_{nullptr}; + int64_t height_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/selected_rows_test.cc b/paddle/framework/selected_rows_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ee13a65d72e44693573397bb686b355effb2227 --- /dev/null +++ b/paddle/framework/selected_rows_test.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/selected_rows.h" +#include "gtest/gtest.h" + +namespace paddle { +namespace framework { + +class SelectedRowsTester : public ::testing::Test { + public: + virtual void SetUp() override { + std::vector rows{0, 4, 7}; + int64_t height = 10; + int64_t row_numel = 100; + selected_rows_.reset(new SelectedRows(rows, height)); + + Tensor* value = selected_rows_->mutable_value(); + value->mutable_data( + make_ddim({static_cast(rows.size()), row_numel}), place_); + } + + protected: + platform::CPUPlace place_; + std::unique_ptr selected_rows_{nullptr}; +}; + +TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); } + +TEST_F(SelectedRowsTester, dims) { + ASSERT_EQ(selected_rows_->value().dims(), make_ddim({3, 100})); +} + +TEST_F(SelectedRowsTester, complete_dims) { + ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100})); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc index 7ae16e99cdb8a23f14f0c8b684ba4ec66a4ce074..06459cbfd7b8c19c176452ff73c9f3a81ba1dc03 100644 --- a/paddle/framework/tensor_array.cc +++ b/paddle/framework/tensor_array.cc @@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector& source, const std::vector& meta, const LoD& lod, size_t level); +std::vector GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) { + // collect indice need to copy to the batch + std::vector indice; + for (const auto& seq : meta) { + size_t id = seq.begin + batch_id; + if (id >= seq.end) break; + indice.push_back(id); + } + return indice; +} + } // namespace detail const LoDTensor& TensorArray::Read(size_t index) const { @@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector& meta, return detail::PackDynamicBatch(values_, meta, lod, level); } -std::vector TensorArray::Unpack(const LoDTensor& source, int level, - bool length_desend) { +DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level, + bool length_desend) { detail::DynamicBatchUnpacker unpacker(source, level, length_desend /*descend*/); @@ -129,6 +140,7 @@ std::vector TensorArray::Unpack(const LoDTensor& source, int level, Write(batch_id, unpacker.GetBatch(batch_id)); } + PADDLE_ENFORCE(!unpacker.meta.empty()); return unpacker.meta; } @@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { PADDLE_ENFORCE(!meta.empty(), "should build meta first"); LoDTensor result; - // collect indice need to copy to the batch - std::vector indice; - for (const auto& seq : meta) { - size_t id = seq.begin + index; - if (id >= seq.end) break; - indice.push_back(id); - } + auto indice = detail::GenDyBatchIndice(meta, index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); // copy the indice of records in LoDTensor @@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { for (size_t i = 0; i < indice.size(); i++) { auto index = indice[i]; auto target = result.Slice(i, i + 1); - auto source_ = source->Slice(index, index + 1); + auto slice = source->Slice(index, index + 1); - target.CopyFrom(source_, platform::CPUPlace(), + target.CopyFrom(slice, platform::CPUPlace(), platform::CPUDeviceContext()); } diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h index 293da04997304be41810446cb3e866d545805f83..046ecb5221b7ed9d88e5017348ee8fcde23c7677 100644 --- a/paddle/framework/tensor_array.h +++ b/paddle/framework/tensor_array.h @@ -34,6 +34,13 @@ struct DySeqMeta { size_t ori_idx; }; +using DySeqMetaBatch = std::vector; + +/* + * Extract the indices of instances. + */ +std::vector GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id); + /* * TensorArray is a C-array-like array of tensors, it is meant to be used with * dynamic iteration primitives such as while_loop. It is used to segment inputs @@ -69,7 +76,7 @@ class TensorArray { * Recover the original LoD-arranged LoDTensor with the `values`, `level` and * `indice_map`. */ - LoDTensor Pack(size_t level, const std::vector &meta, + LoDTensor Pack(size_t level, const DySeqMetaBatch &meta, const LoD &lod) const; /* @@ -77,8 +84,7 @@ class TensorArray { * `values`, if set `desend`, will sort by length in descending order else in * ascending order. */ - std::vector Unpack(const LoDTensor &source, int level, - bool length_desend); + DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend); /* * Pack the values into a tensor with rank one higher than each tensor in diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index 6f65a942ba2a4073e6aa1047875ec5c3283c23a6..0d1564a7510ddf0106ff417fb0b487ddbde1ac2e 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -36,8 +36,9 @@ using OpCreator = std::function; -using GradOpMakerFN = - std::function>(const OpDescBind&)>; +using GradOpMakerFN = std::function>( + const OpDescBind&, const std::unordered_set& /*no_grad_set*/, + std::unordered_map* /*grad_to_var*/)>; } // namespace framework } // namespace paddle diff --git a/paddle/framework/var_desc.cc b/paddle/framework/var_desc.cc index a88e813b5e7c7e6420cb0ba8a25bba4f4d658e80..c302217e5aacdc17800238770d689b7fb65804f3 100644 --- a/paddle/framework/var_desc.cc +++ b/paddle/framework/var_desc.cc @@ -13,32 +13,58 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/var_desc.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { void VarDescBind::SetShape(const std::vector &dims) { - VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); + VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims()); } void VarDescBind::SetDataType(DataType data_type) { - desc_.mutable_lod_tensor()->set_data_type(data_type); + mutable_tensor_desc()->set_data_type(data_type); } std::vector VarDescBind::Shape() const { - return RepeatedToVector(desc_.lod_tensor().dims()); + return RepeatedToVector(tensor_desc().dims()); } -DataType VarDescBind::GetDataType() const { - return desc_.lod_tensor().data_type(); -} +DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); } void VarDescBind::SetLoDLevel(int32_t lod_level) { + PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); desc_.mutable_lod_tensor()->set_lod_level(lod_level); } int32_t VarDescBind::GetLodLevel() const { + PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); return desc_.lod_tensor().lod_level(); } + +const TensorDesc &VarDescBind::tensor_desc() const { + PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type"); + switch (desc_.type()) { + case VarDesc::SELECTED_ROWS: + return desc_.selected_rows(); + case VarDesc::LOD_TENSOR: + return desc_.lod_tensor().tensor(); + default: + PADDLE_THROW("Unexpected branch."); + } +} + +TensorDesc *VarDescBind::mutable_tensor_desc() { + PADDLE_ENFORCE(desc_.has_type(), + "invoke MutableTensorDesc must after set type"); + switch (desc_.type()) { + case VarDesc::SELECTED_ROWS: + return desc_.mutable_selected_rows(); + case VarDesc::LOD_TENSOR: + return desc_.mutable_lod_tensor()->mutable_tensor(); + default: + PADDLE_THROW("Unexpected branch."); + } +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/var_desc.h b/paddle/framework/var_desc.h index 44368795645664a343e2706fb670f104a42c5c9f..8ffb858eb244dc06c39a9543de3e037bcdf28f68 100644 --- a/paddle/framework/var_desc.h +++ b/paddle/framework/var_desc.h @@ -72,7 +72,14 @@ class VarDescBind { int32_t GetLodLevel() const; + VarDesc::VarType GetType() const { return desc_.type(); } + + void SetType(VarDesc::VarType type) { desc_.set_type(type); } + private: + const TensorDesc &tensor_desc() const; + TensorDesc *mutable_tensor_desc(); + VarDesc desc_; }; } // namespace framework diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index ad941bde2be3bbbc6d910fff262ea4cb3878f8be..75fcc1cda165197fc4413efc6bbbc440088cb4cd 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -84,8 +84,9 @@ function(op_library TARGET) endif() # pybind USE_NO_KERNEL_OP + # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel file(READ ${TARGET}.cc TARGET_CONTENT) - string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") + string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}") string(REPLACE "_op" "" TARGET "${TARGET}") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index cba57ba57f5e03c7861897e177cc09aa513e5395..84c3775b4fc2602e5df9bb454d21b318b8fda493 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { } }; +template +class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { + public: + HardSigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of HardSigmoid operator"); + AddOutput("Y", "Output of HardSigmoid operator"); + AddComment(R"DOC( +Hard Sigmoid activation operator. + +Segment-wise linear approximation of sigmoid[1]. +This is much faster than sigmoid. + +hard_sigmoid = max(0, min(1, slope * x + shift)) + +The slope should be positive. The offset can be either positive or negative. +The default slope and shift are set from [1]. +It is recommended to use the defaults for this activation. + +References: + [1] Noisy Activation Functions + (https://arxiv.org/abs/1603.00391) + + )DOC"); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(static_cast(0.2)); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(static_cast(0.5)); + } +}; + } // namespace operators } // namespace paddle @@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, thresholded_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, + hard_sigmoid_grad, ops::ActivationOpGrad); + #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ act_type, \ diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 502c33be103c465c14f128be38ac62d029f1bfb9..4f4eb44fedc0a89cdcf60fb7177014a11eb96048 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor { } }; +template +struct HardSigmoidFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y) const { + auto temp = x * static_cast(slope) + static_cast(offset); + y.device(d) = temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); + } +}; + +template +struct HardSigmoidGradFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + dx.device(d) = + dy * + ((y > static_cast(0)) * (y < static_cast(1))).template cast() * + static_cast(slope); + } +}; + } // namespace operators } // namespace paddle -#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ - __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ - __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ - __macro(exp, ExpFunctor, ExpGradFunctor); \ - __macro(relu, ReluFunctor, ReluGradFunctor); \ - __macro(tanh, TanhFunctor, TanhGradFunctor); \ - __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ - __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ - __macro(abs, AbsFunctor, AbsGradFunctor); \ - __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log, LogFunctor, LogGradFunctor); \ - __macro(square, SquareFunctor, SquareGradFunctor); \ - __macro(brelu, BReluFunctor, BReluGradFunctor); \ - __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ - __macro(pow, PowFunctor, PowGradFunctor); \ - __macro(stanh, STanhFunctor, STanhGradFunctor); \ - __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ - __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ - __macro(relu6, Relu6Functor, Relu6GradFunctor); \ - __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ - __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ - __macro(elu, ELUFunctor, ELUGradFunctor); \ - __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ +#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ + __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ + __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ + __macro(exp, ExpFunctor, ExpGradFunctor); \ + __macro(relu, ReluFunctor, ReluGradFunctor); \ + __macro(tanh, TanhFunctor, TanhGradFunctor); \ + __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ + __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(abs, AbsFunctor, AbsGradFunctor); \ + __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ + __macro(log, LogFunctor, LogGradFunctor); \ + __macro(square, SquareFunctor, SquareGradFunctor); \ + __macro(brelu, BReluFunctor, BReluGradFunctor); \ + __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ + __macro(pow, PowFunctor, PowGradFunctor); \ + __macro(stanh, STanhFunctor, STanhGradFunctor); \ + __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ + __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ + __macro(relu6, Relu6Functor, Relu6GradFunctor); \ + __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ + __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ + __macro(elu, ELUFunctor, ELUGradFunctor); \ + __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ + __macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \ __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); diff --git a/paddle/operators/adam_op.cc b/paddle/operators/adam_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..293b37b7750427cb88efb6dfd5a02dcf7ede24ac --- /dev/null +++ b/paddle/operators/adam_op.cc @@ -0,0 +1,144 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/adam_op.h" + +namespace paddle { +namespace operators { + +class AdamOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment1"), + "Input(Moment1) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment2"), + "Input(Moment2) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"), + "Input(Beta1Pow) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"), + "Input(Beta2Pow) of AdamOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"), + "Output(Moment1Out) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"), + "Output(Moment2Out) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"), + "Output(Beta1PowOut) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Beta2PowOut"), + "Output(Beta2PowOut) of AdamOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 dimension"); + auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow"); + PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, + "Beta1 power accumulator should have 1 dimension"); + auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow"); + PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, + "Beta1 power accumulator should have 1 dimension"); + + auto param_dims = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of AdamOp should have same dimension"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment1"), + "Param and Moment input of AdamOp should have same dimension"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment2"), + "Param and InfNorm input of AdamOp should have same dimension"); + + ctx->SetOutputDim("ParamOut", param_dims); + ctx->SetOutputDim("Moment1Out", param_dims); + ctx->SetOutputDim("Moment2Out", param_dims); + ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims); + ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims); + } +}; + +class AdamOpMaker : public framework::OpProtoAndCheckerMaker { + public: + AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Param", "(Tensor) Input parameter"); + AddInput("Grad", "(Tensor) Input gradient"); + AddInput("LearningRate", "(Tensor) Learning rate"); + AddInput("Moment1", "(Tensor) Input first moment"); + AddInput("Moment2", "(Tensor) Input second moment"); + AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); + AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); + + AddOutput("ParamOut", "(Tensor) Output parameter"); + AddOutput("Moment1Out", "(Tensor) Output first moment"); + AddOutput("Moment2Out", "(Tensor) Output second moment"); + AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator"); + AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator"); + + AddAttr("beta1", + "(float, default 0.9) " + "Exponential decay rate for the " + "first moment estimates.") + .SetDefault(0.9f); + AddAttr("beta2", + "(float, default 0.999) " + "exponential decay rate for the " + "second moment estimates.") + .SetDefault(0.999f); + AddAttr("epsilon", + "(float, default 1.0e-8) " + "Constant for numerical stability") + .SetDefault(1.0e-8f); + + AddComment(R"DOC( +Adam Updates Operator. + +This implements the Adam optimizer from Section 2 of the Adam +paper[1]. Adam is a first-order gradient-based optimization +method based on adaptive estimates of lower-order moments. + +Adam updates: + +moment1_out = beta1 * moment1 + (1 − beta1) * grad +moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad +beta1_pow_out = beta1_pow * beta1 +beta2_pow_out = beta2_pow * beta2 +learning_rate_t = learning_rate_t * + sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out) +param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon) + +References: + [1] Adam: A Method for Stochastic Optimization + (https://arxiv.org/abs/1412.6980) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker); +REGISTER_OP_CPU_KERNEL(adam, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..a3def912e540454275350209435eb01ae2151331 --- /dev/null +++ b/paddle/operators/adam_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/adam_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(adam, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h new file mode 100644 index 0000000000000000000000000000000000000000..789c2f14b32478bf9ddc967fc5725bcf65ed2146 --- /dev/null +++ b/paddle/operators/adam_op.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class AdamOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto param_out_tensor = ctx.Output("ParamOut"); + auto moment1_out_tensor = ctx.Output("Moment1Out"); + auto moment2_out_tensor = ctx.Output("Moment2Out"); + auto beta1_pow_out_tensor = ctx.Output("Beta1PowOut"); + auto beta2_pow_out_tensor = ctx.Output("Beta2PowOut"); + + param_out_tensor->mutable_data(ctx.GetPlace()); + moment1_out_tensor->mutable_data(ctx.GetPlace()); + moment2_out_tensor->mutable_data(ctx.GetPlace()); + beta1_pow_out_tensor->mutable_data(ctx.GetPlace()); + beta2_pow_out_tensor->mutable_data(ctx.GetPlace()); + + float beta1 = ctx.Attr("beta1"); + float beta2 = ctx.Attr("beta2"); + float epsilon = ctx.Attr("epsilon"); + + auto param = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto grad = framework::EigenVector::Flatten( + *ctx.Input("Grad")); + auto moment1 = framework::EigenVector::Flatten( + *ctx.Input("Moment1")); + auto moment2 = framework::EigenVector::Flatten( + *ctx.Input("Moment2")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); + auto beta1_pow = framework::EigenVector::Flatten( + *ctx.Input("Beta1Pow")); + auto beta2_pow = framework::EigenVector::Flatten( + *ctx.Input("Beta2Pow")); + auto param_out = framework::EigenVector::Flatten(*param_out_tensor); + auto moment1_out = framework::EigenVector::Flatten(*moment1_out_tensor); + auto moment2_out = framework::EigenVector::Flatten(*moment2_out_tensor); + auto beta1_pow_out = + framework::EigenVector::Flatten(*beta1_pow_out_tensor); + auto beta2_pow_out = + framework::EigenVector::Flatten(*beta2_pow_out_tensor); + auto place = ctx.GetEigenDevice(); + + moment1_out.device(place) = beta1 * moment1 + (1 - beta1) * grad; + moment2_out.device(place) = beta2 * moment2 + (1 - beta2) * grad.square(); + beta1_pow_out.device(place) = beta1_pow * beta1; + beta2_pow_out.device(place) = beta2_pow * beta2; + // All of these are tensors of 1 element + auto lr_t = lr * (1 - beta2_pow_out).sqrt() / (1 - beta1_pow_out); + // Eigen does not support automatic broadcast + // Get dimensions of moment vector to broadcast lr_t + Eigen::DSizes m_dsize(moment1_out_tensor->numel()); + param_out.device(place) = + param - + lr_t.broadcast(m_dsize) * + (moment1_out / (moment2_out.sqrt() + epsilon)); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/dynamic_recurrent_op.cc b/paddle/operators/dynamic_recurrent_op.cc index f0f7265ce9fb6a0dd607f4653302fd36b2a5dba4..03f33e28d49fdaeccb9b6266359e0b41a1cb847f 100644 --- a/paddle/operators/dynamic_recurrent_op.cc +++ b/paddle/operators/dynamic_recurrent_op.cc @@ -23,6 +23,7 @@ using framework::Scope; using framework::TensorArray; using framework::LoDTensor; using framework::Variable; +using framework::DySeqMetaBatch; namespace detail { @@ -33,6 +34,29 @@ inline void CreateVariables(Scope& scope, } } +/* + * The inputs with sequence should be reordered when they are split, so the + * boot_states should be reordered in the same order. + * + * NOTE This may require that the `pre_state` of the first time step should just + * copy the `boot_state` rather than reference it, for that the content should + * be reordered, but the RNN op should not change the `boot_state` as an input + * variable's content. + */ +template +inline void ReorderBootState(const DySeqMetaBatch& metas, + const LoDTensor& boot_state, LoDTensor* tensor, + const platform::Place& dst_place) { + for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { + auto slice = tensor->Slice(seq_id, seq_id + 1); + auto boot_slice = + boot_state.Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); + // TODO(superjom) pass in device context as an argument + slice.template CopyFrom(boot_slice, dst_place, + platform::CPUDeviceContext()); + } +} + } // namespace detail class DynamicRecurrentOpProtoAndCheckerMaker @@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope, CreateScopes(); WriteStepInputs(); InitStates(); + WriteStepOutputs(); // call stepnet in all the time steps for (size_t step = 0; step < cache_.num_steps; step++) { @@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope, stepnet_->Run(step_scope, dev_ctx); } - WriteStepOutputs(); ConcatOutputs(); } @@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const { // TODO(superjom) make level a config // TODO(superjom) check all the inputs has the same LoD int level = 0; - const auto& inlinks = cache_.inlinks; - for (const auto& item : inlinks) { + for (const auto& item : cache_.inlinks) { const auto& var = item.second; const auto& tensor = var->Get(); TensorArray& ta = step_inputs_[item.first]; + dy_seq_metas_[item.first] = ta.Unpack(tensor, level, true /*length_descend*/); @@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const { } void DynamicRecurrentOp::WriteStepOutputs() const { - for (size_t step = 0; step < cache_.scopes->size(); step++) { - auto& scope = cache_.GetScope(step); - for (auto& item : step_outputs_) { - auto* var = scope.FindVar(item.first); - if (var == nullptr) { - var = scope.Var(item.first); - } - auto* tensor = var->GetMutable(); - item.second.WriteShared(step, *tensor); - } + // initialize step outputs + for (const auto& item : cache_.outlinks) { + step_outputs_.emplace(item.first, TensorArray()); } + PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL); } void DynamicRecurrentOp::CreateScopes() const { @@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const { PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first"); std::vector memories; std::vector pre_memories; + std::vector stepnet_outputs; std::transform(arg_.memories.begin(), arg_.memories.end(), std::back_inserter(memories), [](const rnn::MemoryAttr& m) { return m.var; }); std::transform(arg_.memories.begin(), arg_.memories.end(), std::back_inserter(pre_memories), [](const rnn::MemoryAttr& m) { return m.pre_var; }); + for (const auto& item : stepnet_->Outputs()) { + for (const auto& var : item.second) { + stepnet_outputs.push_back(var); + } + } for (size_t step = 0; step < cache_.num_steps; step++) { auto& scope = cache_.GetScope(step); @@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const { detail::CreateVariables(scope, arg_.outlinks); detail::CreateVariables(scope, memories); detail::CreateVariables(scope, pre_memories); + detail::CreateVariables(scope, stepnet_outputs); } } void DynamicRecurrentOp::ConcatOutputs() const { // TODO(superjom) transform this to a config int level = 0; - // TODO(superjom) pass in some lod - // just a placeholder - framework::LoD lod; + for (size_t step = 0; step < cache_.num_steps; step++) { + auto& scope = cache_.GetScope(step); + for (auto& item : step_outputs_) { + auto* var = scope.FindVar(item.first); + PADDLE_ENFORCE_NOT_NULL(var); + auto* tensor = var->GetMutable(); + tensor->mutable_data(platform::CPUPlace()); + item.second.WriteShared(step, *tensor); + } + } + // the inlinks' lods should be the same, so randomly get one lod. + const auto& some_lod = + cache_.scope->FindVar(arg_.inlinks.front())->Get().lod(); + const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; for (auto& item : step_outputs_) { - auto tensor = item.second.Pack(level, dy_seq_metas_[item.first], lod); - auto& output = cache_.outlinks[item.first]->Get(); - const_cast(&output)->ShareDataWith(tensor); + auto tensor = item.second.Pack(level, some_meta, some_lod); + auto* output = cache_.outlinks[item.first]->GetMutable(); + const_cast(output)->ShareDataWith(tensor); } } void DynamicRecurrentOp::InitStates() const { - // init the first state - // TODO(superjom) parepare the scenerio that boot state not exists - for (auto memory : arg_.memories) { - auto* boot_state_var = cache_.scope->FindVar(memory.boot_var); - PADDLE_ENFORCE_NOT_NULL(boot_state_var); - auto& boot_state = boot_state_var->Get(); - const auto& dims = boot_state.dims(); - - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& cur_scope = cache_.GetScope(step); - // link pre-state to boot_state - // init state and pre-state - auto* pre_state = cur_scope.FindVar(memory.pre_var); - PADDLE_ENFORCE_NOT_NULL(pre_state); - pre_state->GetMutable(); - - auto* state = cur_scope.FindVar(memory.var); - PADDLE_ENFORCE_NOT_NULL(state); - state->GetMutable()->Resize(dims); - state->GetMutable()->mutable_data( - platform::CPUPlace()); - - if (step == 0) { - auto* pre_state_tensor = pre_state->GetMutable(); - pre_state_tensor->Resize(boot_state.dims()); - pre_state_tensor->ShareDataWith(boot_state); - } else { - auto& pre_scope = cache_.GetScope(step - 1); - auto* state_pre = pre_scope.FindVar(memory.var); - PADDLE_ENFORCE_NOT_NULL(state_pre); - pre_state->GetMutable()->ShareDataWith( - *state_pre->GetMutable()); - } + for (size_t step = 0; step < cache_.num_steps; step++) { + for (const auto& memory : arg_.memories) { + CreateState(memory, step); + LinkState(memory, step); } } } +void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory, + size_t step) const { + auto& scope = cache_.GetScope(step); + auto& state = *cache_.GetTensor(scope, memory.var); + auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var); + + size_t num_instances = + step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; + auto dims = boot_state.dims(); + dims[0] = num_instances; + + state.Resize(dims); + state.mutable_data(platform::CPUPlace()); + states_[memory.var].WriteShared(step, state); +} + +void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory, + size_t step) const { + auto& scope = cache_.GetScope(step); + auto& state_pre = *cache_.GetTensor(scope, memory.pre_var); + + // all the step_inputs' metas should be the same, just randomly select one + // and get the dyseq meta. + const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; + size_t num_instances = + step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; + + LoDTensor* pre_state{nullptr}; + if (step == 0) { + pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var); + pre_state->mutable_data(platform::CPUPlace()); + // allocate memory + state_pre.Resize(pre_state->dims()); + state_pre.mutable_data(platform::CPUPlace()); + detail::ReorderBootState(some_meta, *pre_state, &state_pre, + pre_state->place()); + } else { + pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var); + } + + // shink and share from previous state + auto shrinked_pre_state = pre_state->Slice(0, num_instances); + state_pre.ShareDataWith(shrinked_pre_state); +} + void DynamicRecurrentOp::ArgCache::Init( const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op, const paddle::framework::Scope& scope, rnn::Argument* arg) { @@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope, return var; } +LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor( + const framework::Scope& scope, const std::string& name) { + auto* var = GetVariable(scope, name); + return var->GetMutable(); +} + const rnn::ArgumentName DynamicRecurrentOp::kArgName{ "step_net", "step_scopes", "inlinks", "outlinks", "memories", "pre_memories", "boot_memories"}; diff --git a/paddle/operators/dynamic_recurrent_op.h b/paddle/operators/dynamic_recurrent_op.h index 6a2970f27fd5bcb25e924dbc567e254159b55a3e..ec80a1c90eee3a655febe0dd3d6c67c16ec6c64b 100644 --- a/paddle/operators/dynamic_recurrent_op.h +++ b/paddle/operators/dynamic_recurrent_op.h @@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase { */ void InitStates() const; + /* + * Create state variables for each time step. + */ + void CreateState(const rnn::MemoryAttr& memory, size_t step) const; + + /* + * Link pre-state variable in current scope to the state variable in the + * previous time step (scope). + */ + void LinkState(const rnn::MemoryAttr& memory, size_t step) const; + /* * Concatenate outputs in each time step and generate a LoDTensor. */ @@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase { } const OperatorBase& GetStepNet() const { return *stepnet_; } + const framework::TensorArray& state(const std::string& name) const { + return states_[name]; + } + const framework::TensorArray& step_input(const std::string& name) const { + return step_inputs_[name]; + } + const framework::TensorArray& step_output(const std::string& name) const { + return step_outputs_[name]; + } + protected: struct ArgCache { framework::Scope const* scope; @@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase { return *scopes->at(index); } + framework::LoDTensor* GetTensor(const framework::Scope& scope, + const std::string& name); + private: void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op, rnn::Argument* arg); @@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase { private: std::unique_ptr stepnet_; - mutable framework::TensorArray states_; + mutable std::map states_; mutable std::map step_inputs_; mutable std::map step_outputs_; mutable std::map> diff --git a/paddle/operators/dynamic_recurrent_op_test.cc b/paddle/operators/dynamic_recurrent_op_test.cc index d8ba0bc9f102ddd494a34bce97dffde4b03f2d54..83a5ba36d9af2ef81ebcbb33e056de2e0b98cbc1 100644 --- a/paddle/operators/dynamic_recurrent_op_test.cc +++ b/paddle/operators/dynamic_recurrent_op_test.cc @@ -87,7 +87,6 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test { platform::CPUPlace place; scope.Var("step_scopes"); CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); - // auto* out0 = CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..24f84597cd7301af6521b8c1032e69569ba6f03a --- /dev/null +++ b/paddle/operators/gru_unit_op.cc @@ -0,0 +1,210 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/gru_unit_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class GRUUnitOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUUnitOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"), + "Input(%s) of GRUUnitOp should not be null.", "HiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUUnitOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasOutput("Gate"), + "Output(%s) of GRUUnitOp should not be null.", "Gate"); + PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"), + "Output(%s) of GRUUnitOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), + "Output(%s) of GRUUnitOp should not be null.", "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev"); + auto weight_dims = ctx->GetInputDim("Weight"); + int batch_size = input_dims[0]; + int input_size = input_dims[1]; + int frame_size = hidden_prev_dims[1]; + int weight_height = weight_dims[0]; + int weight_width = weight_dims[1]; + PADDLE_ENFORCE_EQ( + input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUUnitOp."); + PADDLE_ENFORCE_EQ( + weight_height, frame_size, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + PADDLE_ENFORCE_EQ( + weight_width, frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + } + ctx->SetOutputDim("Gate", {batch_size, frame_size * 3}); + ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size}); + ctx->SetOutputDim("Hidden", {batch_size, frame_size}); + } +}; + +class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GRUUnitOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Input", + "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " + "input."); + AddInput("HiddenPrev", + "(Tensor) Matrix with shape [batch_size, frame_size] for the " + "states of previous time step."); + AddInput("Weight", + "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. " + "The elements continuous in memory can be divided into two parts. " + "The first part are weights of the update gate and reset gate " + "with shape [frame_size, frame_size * 2], and the second part are " + "weights of output candidate with shape [frame_size, frame_size]"); + AddInput("Bias", + "(Tensor) Bias vector with shape [1, frame_size * 3] concating " + "bias of the update gate, reset gate and output candidate."); + AddOutput("Gate", + "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " + "output of update gate, reset gate and output candidate") + .AsIntermediate(); + AddOutput("ResetHiddenPrev", + "(Tensor) Matrix with shape [batch_size, frame_size] for the " + "reseted hidden state of previous time step.") + .AsIntermediate(); + AddOutput("Hidden", + "(Tensor) The GRU hidden state of the current time step " + "with shape [batch_size, frame_size]."); + AddAttr("activation", + "(enum int, default tanh) " + "The activation type used for output candidate {h}_t.") + .SetDefault(tanh) + .InEnum({identity, sigmoid, tanh, relu}); + AddAttr("gate_activation", + "(enum int, default sigmoid) " + "The activation type used in update gate and reset gate.") + .SetDefault(sigmoid) + .InEnum({identity, sigmoid, tanh, relu}); + AddComment(R"DOC( +GRUUnitOp implements part calculations of the GRU unit as following: + +\f[ +update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ +output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev) +\f] + +The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +)DOC"); + } +}; + +class GRUUnitGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUUnitGradOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"), + "Input(%s) of GRUUnitGradOp should not be null.", + "HiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUUnitGradOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasInput("Gate"), + "Input(%s) of GRUUnitGradOp should not be null.", "Gate"); + PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"), + "Input(%s) of GRUUnitGradOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Hidden"), + "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "Gate"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev"); + auto weight_dims = ctx->GetInputDim("Weight"); + // int batch_size = input_dims[0]; + int input_size = input_dims[1]; + int frame_size = hidden_prev_dims[1]; + int weight_height = weight_dims[0]; + int weight_width = weight_dims[1]; + PADDLE_ENFORCE_EQ( + input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUUnitOp."); + PADDLE_ENFORCE_EQ( + weight_height, frame_size, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + PADDLE_ENFORCE_EQ( + weight_width, frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + auto bias_grad_name = framework::GradVarName("Bias"); + if (ctx->HasOutput(bias_grad_name)) + ctx->SetOutputDim(bias_grad_name, bias_dims); + } + auto input_grad_name = framework::GradVarName("Input"); + if (ctx->HasOutput(input_grad_name)) + ctx->SetOutputDim(input_grad_name, input_dims); + auto hidden_prev_grad_name = framework::GradVarName("HiddenPrev"); + if (ctx->HasOutput(hidden_prev_grad_name)) + ctx->SetOutputDim(hidden_prev_grad_name, hidden_prev_dims); + auto weight_grad_name = framework::GradVarName("Weight"); + if (ctx->HasOutput(weight_grad_name)) + ctx->SetOutputDim(weight_grad_name, weight_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad, + ops::GRUUnitGradOp); +REGISTER_OP_CPU_KERNEL(gru_unit, + ops::GRUUnitKernel); +REGISTER_OP_CPU_KERNEL( + gru_unit_grad, ops::GRUUnitGradKernel); diff --git a/paddle/operators/gru_unit_op.cu b/paddle/operators/gru_unit_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..365f656523ddfb7ec8e2a5b885de74674823325a --- /dev/null +++ b/paddle/operators/gru_unit_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/gru_unit_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gru_unit, + ops::GRUUnitKernel); +REGISTER_OP_GPU_KERNEL( + gru_unit_grad, ops::GRUUnitGradKernel); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c53e7d9827e0395e6ce613302e732b2797f83cdd --- /dev/null +++ b/paddle/operators/gru_unit_op.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/operators/activation_op.h" +#include "paddle/operators/math/math_function.h" + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; + +template +class GRUUnitKernel : public framework::OpKernel { + public: + template + void ActCompute(const int act_type, const Device& d, X x, Y y) const { + if (act_type == identity) + y.device(d) = x; + else if (act_type == sigmoid) + SigmoidFunctor()(d, x, y); + else if (act_type == tanh) + TanhFunctor()(d, x, y); + else if (act_type == relu) + ReluFunctor()(d, x, y); + else + PADDLE_THROW("unsupported activation type"); + } + + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* bias = context.Input("Bias"); + auto* gate = context.Output("Gate"); + gate->mutable_data(context.GetPlace()); + auto* reset_hidden_prev = context.Output("ResetHiddenPrev"); + reset_hidden_prev->mutable_data(context.GetPlace()); + auto* hidden = context.Output("Hidden"); + hidden->mutable_data(context.GetPlace()); + + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + + auto x = EigenMatrix::From(*input); + auto h_p = EigenMatrix::From(*hidden_prev); + auto g = EigenMatrix::From(*gate); + auto r_h_p = EigenMatrix::From(*reset_hidden_prev); + auto h = EigenMatrix::From(*hidden); + auto place = context.GetEigenDevice(); + + // calculate unactivated gate outputs + if (bias) { + auto b = EigenMatrix::From(*bias); + g.device(place) = x + + b.reshape(Eigen::array({{1, frame_size * 3}})) + .broadcast(Eigen::array({{batch_size, 1}})); + } else { + g.device(place) = x; + } + const T* hidden_prev_data = hidden_prev->data(); + const T* weight_data = weight->data(); + T* gate_data = gate->data(); + T* reset_hidden_prev_data = reset_hidden_prev->data(); + math::gemm(context.device_context(), false, false, batch_size, + 2 * frame_size, frame_size, 1, hidden_prev_data, + frame_size, weight_data, frame_size * 2, 1, gate_data, + frame_size * 3); + + // calculate activited gate + Eigen::array extents({{batch_size, frame_size}}); + Eigen::array u_offsets({{0, 0}}); + ActCompute(context.Attr("gate_activation"), place, + g.slice(u_offsets, extents), g.slice(u_offsets, extents)); + auto u = g.slice(u_offsets, extents); // update gate + Eigen::array r_offsets({{0, frame_size}}); + ActCompute(context.Attr("gate_activation"), place, + g.slice(r_offsets, extents), g.slice(r_offsets, extents)); + auto r = g.slice(r_offsets, extents); // reset gate + r_h_p.device(place) = r * h_p; // reset previous hidden state + math::gemm(context.device_context(), false, false, batch_size, + frame_size, frame_size, 1, reset_hidden_prev_data, + frame_size, weight_data + frame_size * frame_size * 2, + frame_size, 1, gate_data + frame_size * 2, + frame_size * 3); + + Eigen::array c_offsets({{0, frame_size * 2}}); + ActCompute(context.Attr("activation"), place, + g.slice(c_offsets, extents), g.slice(c_offsets, extents)); + auto c = g.slice(c_offsets, extents); // output candidate + + // calculate final output + h.device(place) = u * (h_p - c) + c; + } +}; + +template +class GRUUnitGradKernel : public framework::OpKernel { + public: + template + void ActGradCompute(const int act_type, const Device& d, X x, Y y, DX dx, + DY dy) const { + // x is dummy and won't be used even in Relu(use y instead) + if (act_type == identity) + dx.device(d) = dy; + else if (act_type == sigmoid) + SigmoidGradFunctor()(d, x, y, dy, dx); + else if (act_type == tanh) + TanhGradFunctor()(d, x, y, dy, dx); + else if (act_type == relu) + ReluGradFunctor()(d, x, y, dy, dx); + else + PADDLE_THROW("unsupported activation type"); + } + + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* gate = context.Input("Gate"); + auto* reset_hidden_prev = context.Input("ResetHiddenPrev"); + auto* hidden_grad = context.Input(framework::GradVarName("Hidden")); + auto* input_grad = context.Output(framework::GradVarName("Input")); + auto* hidden_prev_grad = + context.Output(framework::GradVarName("HiddenPrev")); + auto* weight_grad = + context.Output(framework::GradVarName("Weight")); + auto* bias_grad = context.Output(framework::GradVarName("Bias")); + input_grad->mutable_data(context.GetPlace()); + hidden_prev_grad->mutable_data(context.GetPlace()); + weight_grad->mutable_data(context.GetPlace()); + Tensor gate_grad; + gate_grad.mutable_data(input->dims(), context.GetPlace()); + Tensor reset_hidden_prev_grad; + reset_hidden_prev_grad.mutable_data(reset_hidden_prev->dims(), + context.GetPlace()); + + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + + const T* hidden_prev_data = hidden_prev->data(); + T* hidden_prev_grad_data = hidden_prev_grad->data(); + const T* weight_data = weight->data(); + T* weight_grad_data = weight_grad->data(); + T* gate_grad_data = gate_grad.data(); + const T* reset_hidden_prev_data = reset_hidden_prev->data(); + T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data(); + + auto h_p = EigenMatrix::From(*hidden_prev); + auto g = EigenMatrix::From(*gate); + auto d_h = EigenMatrix::From(*hidden_grad); + auto d_x = EigenMatrix::From(*input_grad); + auto d_h_p = EigenMatrix::From(*hidden_prev_grad); + auto d_g = EigenMatrix::From(gate_grad); + auto d_r_h_p = EigenMatrix::From(reset_hidden_prev_grad); + auto place = context.GetEigenDevice(); + + Eigen::array extents({{batch_size, frame_size}}); + Eigen::array u_offsets({{0, 0}}); + auto u = g.slice(u_offsets, extents); // update gate + Eigen::array r_offsets({{0, frame_size}}); + auto r = g.slice(r_offsets, extents); // reset gate + Eigen::array c_offsets({{0, frame_size * 2}}); + auto c = g.slice(c_offsets, extents); // output candidate + + // backward for unactivated update gate + ActGradCompute(context.Attr("gate_activation"), place, u, u, + d_g.slice(u_offsets, extents), d_h * (h_p - c)); + // backward for unactivated output candidate + ActGradCompute(context.Attr("activation"), place, c, c, + d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + // backward for reset_hidden_prev + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size, 1, + gate_grad_data + frame_size * 2, frame_size * 3, + weight_data + frame_size * frame_size * 2, frame_size, + 0, reset_hidden_prev_grad_data, frame_size); + // backward for state_weight + math::gemm( + context.device_context(), true, false, frame_size, frame_size, + batch_size, 1, reset_hidden_prev_data, frame_size, + gate_grad_data + frame_size * 2, frame_size * 3, 0, + weight_grad_data + frame_size * frame_size * 2, frame_size); + // backward for unactivated reset gate + ActGradCompute(context.Attr("gate_activation"), place, r, r, + d_g.slice(r_offsets, extents), d_r_h_p * h_p); + // backward for update_gate_weight and reset_gate_weight + math::gemm(context.device_context(), true, false, frame_size, + frame_size * 2, batch_size, 1, hidden_prev_data, + frame_size, gate_grad_data, frame_size * 3, 0, + weight_grad_data, frame_size * 2); + // backward for hidden_prev + d_h_p.device(place) = d_r_h_p * r + d_h * u; + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size * 2, 1, gate_grad_data, + frame_size * 3, weight_data, frame_size * 2, 1, + hidden_prev_grad_data, frame_size); + // backward for input + d_x.device(place) = d_g; + // backward for bias + if (bias_grad) { + bias_grad->mutable_data(context.GetPlace()); + auto d_b = EigenMatrix::From(*bias_grad); + d_b.device(place) = d_g.sum(Eigen::array({{0}})); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index a86685b6dde4761cf74f9521bd9609b0864b9bdf..051051b051961c6da064bd9319460b3f41cea3e8 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, - ops::MultiplexGradOp); +REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp); REGISTER_OP_CPU_KERNEL( multiplex, ops::MultiplexCPUKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index ffb0cb92111bfb8490d35e4f5cfc9e405b0e3250..573487b83590c132d5a4379a4b2762fbc16c04bc 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel { auto in_dim = x_dims[0]; for (size_t i = 1; i < N; i++) { auto dim = x_dims[i]; - PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); + PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape"); } ctx->SetOutputDim("Out", in_dim); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 7bb44c06e2a3aaa34ea5b3e2540358414782f335..ee29b61eec6eed2f95013bd5a9bdf03a8493826e 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -123,7 +123,18 @@ void BindProgramDesc(py::module &m) { AppendBackward(program_desc, no_grad_vars); }) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference) - .def("num_blocks", &ProgramDescBind::Size); + .def("num_blocks", &ProgramDescBind::Size) + .def("serialize_to_string", + [](ProgramDescBind &program_desc) -> py::bytes { + const ProgramDesc *desc = program_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "ProgramDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize ProgramDesc Error. This could be a bug of Paddle."); + return res; + }); } void BindBlockDesc(py::module &m) { @@ -149,7 +160,17 @@ void BindBlockDesc(py::module &m) { .def("all_vars", &BlockDescBind::AllVars, py::return_value_policy::reference) .def("all_ops", &BlockDescBind::AllOps, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes { + const BlockDesc *desc = block_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "BlockDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize BlockDesc Error. This could be a bug of Paddle."); + return res; + }); } void BindVarDsec(py::module &m) { @@ -162,7 +183,8 @@ void BindVarDsec(py::module &m) { .value("FP32", DataType::FP32) .value("FP64", DataType::FP64); - py::class_(m, "VarDesc", "") + py::class_ var_desc(m, "VarDesc", ""); + var_desc .def("name", [](const VarDescBind &self) { py::bytes name = self.Name(); @@ -174,7 +196,23 @@ void BindVarDsec(py::module &m) { .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("data_type", &VarDescBind::GetDataType) .def("lod_level", &VarDescBind::GetLodLevel) - .def("set_lod_level", &VarDescBind::SetLoDLevel); + .def("set_lod_level", &VarDescBind::SetLoDLevel) + .def("type", &VarDescBind::GetType) + .def("set_type", &VarDescBind::SetType) + .def("serialize_to_string", [](VarDescBind &var_desc) -> py::bytes { + const VarDesc *desc = var_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "VarDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize VarDesc Error. This could be a bug of Paddle."); + return res; + }); + + py::enum_(var_desc, "VarType", "") + .value("LOD_TENSOR", VarDesc::LOD_TENSOR) + .value("SELECTED_ROWS", VarDesc::SELECTED_ROWS); } void BindOpDesc(py::module &m) { @@ -206,7 +244,17 @@ void BindOpDesc(py::module &m) { .def("set_block_attr", &OpDescBind::SetBlockAttr) .def("block_attr", &OpDescBind::GetBlockAttr) .def("check_attrs", &OpDescBind::CheckAttrs) - .def("infer_shape", &OpDescBind::InferShape); + .def("infer_shape", &OpDescBind::InferShape) + .def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes { + const OpDesc *desc = op_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "OpDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize OpDesc Error. This could be a bug of Paddle."); + return res; + }); } } // namespace pybind diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 0fd356780bf411bdd182be8912d8d3b9ca563249..b143cb9f59d3b734bcbec30c8a69dc85b5a10c97 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "paddle/framework/lod_tensor.h" #include "paddle/framework/tensor_array.h" #include "paddle/operators/cond_op.h" +#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" @@ -341,6 +342,33 @@ All parameter, weight, gradient are variables in Paddle. self.set_stepnet(net.Clone()); }); + py::class_(m, + "DynamicRecurrentOp") + .def_static("create", + [](py::bytes protobin) -> operators::DynamicRecurrentOp * { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + auto rnn_op = OpRegistry::CreateOp(desc); + return static_cast( + rnn_op.release()); + }) + .def("set_stepnet", + [](operators::DynamicRecurrentOp &self, const operators::NetOp &net) + -> void { self.SetStepNet(net.Clone()); }) + .def("get_state", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.state(name); }) + .def("get_step_input", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.step_input(name); }) + .def("get_step_output", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.step_output(name); }); + // cond_op py::class_(m, "CondOp") .def_static("create", diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index d449a9ece0ae62b7d553bdc2c27a4a2471b6df3b..3cfc5bbccf6cad0d2e16329a93c2f3c225738f96 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -10,6 +10,7 @@ __all__ = ['Block', 'Variable', 'Program', 'Operator'] class Variable(object): def __init__(self, block, + type=core.VarDesc.VarType.LOD_TENSOR, name=None, shape=None, dtype=None, @@ -26,6 +27,14 @@ class Variable(object): self.desc = self.block.desc.var(name) is_new_var = True + if is_new_var: + self.desc.set_type(type) + elif self.desc.type() != type: + raise ValueError("Variable {0} has been created before. The " + "previous type is {1}; the new type is {2}. They" + " are not matched".format(self.name, + self.desc.type(), type)) + if shape is not None: if is_new_var: self.desc.set_shape(shape) @@ -64,6 +73,13 @@ class Variable(object): self.block.vars[name] = self self.op = None + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.VarDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + @property def name(self): return self.desc.name() @@ -201,6 +217,13 @@ class Operator(object): self.desc.check_attrs() self.desc.infer_shape(self.block.desc) + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.OpDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + @property def type(self): return self.desc.type() @@ -243,6 +266,13 @@ class Block(object): self.ops = collections.deque() # operator list self.program = program + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.BlockDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + @property def parent_idx(self): return self.desc.parent @@ -287,6 +317,13 @@ class Program(object): self.blocks = [Block(self, 0)] self.current_block_idx = 0 + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.ProgramDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + def global_block(self): return self.blocks[0] diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 9086a5cc3452b178ec37fe6a3e358eaa4c5d606b..bc771a964adf9f97cbeae87c06ce954c76051150 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -219,6 +219,27 @@ class __RecurrentOp__(object): return core.RecurrentOp.create(proto.SerializeToString()) +class __DynamicRecurrentOp__(object): + __proto__ = None + type = "dynamic_recurrent" + + def __init__(self): + # cache recurrent_op's proto + if self.__proto__ is None: + for op_proto in get_all_op_protos(): + if op_proto.type == self.type: + self.__proto__ = op_proto + + def __call__(self, *args, **kwargs): + if self.type not in args and "type" not in kwargs: + kwargs["type"] = self.type + # create proto + create_method = OpDescCreationMethod(self.__proto__) + proto = create_method(*args, **kwargs) + # create rnnop + return core.DynamicRecurrentOp.create(proto.SerializeToString()) + + class __CondOp__(object): __proto__ = None type = "cond" @@ -242,4 +263,5 @@ class __CondOp__(object): Operator = OperatorFactory() # The default global factory RecurrentOp = __RecurrentOp__() +DynamicRecurrentOp = __DynamicRecurrentOp__() CondOp = __CondOp__() diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py index 3acd00e35213981fce60504876af1861961ebe12..5831b880e4c5ef881929920e87ac64d6c87a2ab5 100644 --- a/python/paddle/v2/framework/tests/test_activation_op.py +++ b/python/paddle/v2/framework/tests/test_activation_op.py @@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest): self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) +class TestHardSigmoid(OpTest): + def setUp(self): + self.op_type = "hard_sigmoid" + self.relative_error = 0.002 + + X = np.random.uniform(-5, 5, [2, 2]).astype("float32") + slope = 0.2 + offset = 0.5 + lower_threshold = -offset / slope + upper_threshold = (1 - offset) / slope + + self.inputs = {'X': X} + # Same reason as TestAbs + X[np.abs(X - lower_threshold) < self.relative_error] = \ + lower_threshold + 0.2 + X[np.abs(X - upper_threshold) < self.relative_error] = \ + upper_threshold - 0.2 + + temp = X * slope + offset + self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.002) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_adam_op.py b/python/paddle/v2/framework/tests/test_adam_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ff6faafa6e2119fde11b9eb6cd2a65a75334ebe6 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_adam_op.py @@ -0,0 +1,186 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestAdamOp1(OpTest): + def setUp(self): + '''Test Adam Op with supplied attributes + ''' + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.004 + beta1 = 0.78 + beta2 = 0.836 + epsilon = 1e-4 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + def test_check_output(self): + self.check_output() + + +class TestAdamOp2(OpTest): + def setUp(self): + '''Test Adam Op with supplied attributes + ''' + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.999 + epsilon = 1e-8 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + attributes = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, attributes) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + def test_check_output(self): + self.check_output() + + +class TestAdamOpMultipleSteps(OpTest): + def setUp(self): + '''Test Adam Operator with supplied attributes + ''' + self.op_type = "adam" + self.num_steps = 10 + + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.999 + epsilon = 1e-8 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + def test_check_output(self): + for _ in range(self.num_steps): + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + # Verify output for this step + self.check_output() + + # Output of this step becomes input for next step + self.inputs['Param'] = param_out + self.inputs['Moment1'] = moment1_out + self.inputs['Moment2'] = moment2_out + self.inputs['Beta1Pow'] = beta1_pow_out + self.inputs['Beta2Pow'] = beta2_pow_out + + # Randomize gradient for next step + self.inputs['Grad'] = np.random.uniform( + -1, 1, (102, 105)).astype("float32") + + +def adam_step(inputs, attributes): + ''' + Simulate one step of the adam optimizer + :param inputs: dict of inputs + :param attributes: dict of attributes + :return tuple: tuple of output param, moment1, moment2, + beta1 power accumulator and beta2 power accumulator + ''' + param = inputs['Param'] + grad = inputs['Grad'] + moment1 = inputs['Moment1'] + moment2 = inputs['Moment2'] + lr = inputs['LearningRate'] + beta1_pow = inputs['Beta1Pow'] + beta2_pow = inputs['Beta2Pow'] + + beta1 = attributes['beta1'] + beta2 = attributes['beta2'] + epsilon = attributes['epsilon'] + + moment1_out = beta1 * moment1 + (1 - beta1) * grad + moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad) + beta1_pow_out = beta1_pow * beta1 + beta2_pow_out = beta2_pow * beta2 + lr_t = lr * np.sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out) + param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon)) + return param_out, moment1_out, moment2_out, beta1_pow_out, beta2_pow_out + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py b/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py new file mode 100644 index 0000000000000000000000000000000000000000..b4629a3adb9a84470843214c7c6d80acde7228cc --- /dev/null +++ b/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py @@ -0,0 +1,111 @@ +import logging +import paddle.v2.framework.core as core +import unittest +from paddle.v2.framework.op import Operator, DynamicRecurrentOp +import numpy as np + + +def create_tensor(scope, name, shape, np_data): + tensor = scope.new_var(name).get_tensor() + tensor.set_dims(shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class DynamicRecurrentOpTest(unittest.TestCase): + ''' + Test RNNOp + + equation: + h_t = \sigma (W x_t + U h_{t-1}) + weights: + - W + - U + vars: + - x + memories: + - h + outputs: + - h + ''' + + # for siplicity, just one level LoD + lod_py = [[0, 4, 7, 9, 10]] + input_dim = 30 + num_sents = len(lod_py[0]) - 1 + weight_dim = 15 + + def forward(self): + self.scope = core.Scope() + self.create_global_variables() + self.create_rnn_op() + self.create_step_net() + ctx = core.DeviceContext.create(core.CPUPlace()) + self.rnnop.run(self.scope, ctx) + state = self.rnnop.get_state("h@mem") + print 'state size: ', state.size() + + step_inputs = self.rnnop.get_step_input("x") + print "x size ", step_inputs.size() + for i in range(step_inputs.size()): + print "x %d" % i, np.array(step_inputs.read(i).get_dims()) + step_outputs = self.rnnop.get_step_output('h@mem') + print 'step_outputs.size ', step_outputs.size() + output = self.scope.find_var("h@mem").get_tensor() + + print 'output', np.array(output).shape + + def create_global_variables(self): + x = np.random.normal(size=(self.lod_py[0][-1], + self.input_dim)).astype("float32") + W = np.random.normal(size=(self.input_dim, + self.input_dim)).astype("float32") + U = np.random.normal(size=(self.input_dim, + self.input_dim)).astype("float32") + h_boot = np.random.normal(size=(self.num_sents, + self.input_dim)).astype("float32") + # create inlink + x_tensor = create_tensor(self.scope, "x", + [self.num_sents, self.input_dim], x) + x_tensor.set_lod(self.lod_py) + create_tensor(self.scope, "W", [self.input_dim, self.input_dim], W) + create_tensor(self.scope, "U", [self.input_dim, self.input_dim], U) + create_tensor(self.scope, "h_boot", [self.num_sents, self.input_dim], + h_boot) + self.scope.new_var("step_scopes") + self.scope.new_var("h@mem") + + def create_rnn_op(self): + # create RNNOp + self.rnnop = DynamicRecurrentOp( + # inputs + inlinks=["x"], + boot_memories=["h_boot"], + step_net="stepnet", + # outputs + outlinks=["h@mem"], + step_scopes="step_scopes", + # attributes + pre_memories=["h@pre"], + memories=["h@mem"]) + + def create_step_net(self): + stepnet = core.Net.create() + x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@mem") + + for op in [x_fc_op, h_fc_op, sum_op, sig_op]: + stepnet.append_op(op) + stepnet.complete_add_op(True) + self.rnnop.set_stepnet(stepnet) + + def test_forward(self): + print 'test recurrent op forward' + pd_output = self.forward() + print 'pd_output', pd_output + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gru_unit_op.py b/python/paddle/v2/framework/tests/test_gru_unit_op.py new file mode 100644 index 0000000000000000000000000000000000000000..57625362d21905d257f46ff5330841a20438773a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gru_unit_op.py @@ -0,0 +1,115 @@ +import math +import unittest +import numpy as np +from op_test import OpTest + + +class GRUActivationType(OpTest): + identity = 0 + sigmoid = 1 + tanh = 2 + relu = 3 + + +def identity(x): + return x + + +def sigmoid(x): + return 1. / (1. + np.exp(-x)) + + +def tanh(x): + return 2. * sigmoid(2. * x) - 1. + + +def relu(x): + return np.maximum(x, 0) + + +class TestGRUUnitOp(OpTest): + batch_size = 3 + frame_size = 5 + activate = { + GRUActivationType.identity: identity, + GRUActivationType.sigmoid: sigmoid, + GRUActivationType.tanh: tanh, + GRUActivationType.relu: relu, + } + + def set_inputs(self): + batch_size = self.batch_size + frame_size = self.frame_size + self.op_type = 'gru_unit' + self.inputs = { + 'Input': np.random.uniform( + -0.1, 0.1, (batch_size, frame_size * 3)).astype('float32'), + 'HiddenPrev': np.random.uniform( + -0.1, 0.1, (batch_size, frame_size)).astype('float32'), + 'Weight': np.random.uniform( + -1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size), + (frame_size, frame_size * 3)).astype('float32'), + } + self.attrs = { + 'activation': GRUActivationType.tanh, + 'gate_activation': GRUActivationType.sigmoid + } + + def set_outputs(self): + # GRU calculations + batch_size = self.batch_size + frame_size = self.frame_size + x = self.inputs['Input'] + h_p = self.inputs['HiddenPrev'] + w = self.inputs['Weight'] + b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros( + (1, frame_size * 3)) + g = x + np.tile(b, (batch_size, 1)) + w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape( + (frame_size, frame_size * 2)) + u_r = self.activate[self.attrs['gate_activation']](np.dot( + h_p, w_u_r) + g[:, :frame_size * 2]) + u = u_r[:, :frame_size] + r = u_r[:, frame_size:frame_size * 2] + r_h_p = r * h_p + w_c = w.flatten()[frame_size * frame_size * 2:].reshape( + (frame_size, frame_size)) + c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + + g[:, frame_size * 2:]) + g = np.hstack((u_r, c)) + h = u * h_p + (1 - u) * c + self.outputs = {'Gate': g, 'ResetHiddenPrev': r_h_p, 'Hidden': h} + + def setUp(self): + self.set_inputs() + self.set_outputs() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ['Input', 'HiddenPrev', 'Weight'], ['Hidden'], + max_relative_error=0.007) + + +class TestGRUUnitOpWithBias(TestGRUUnitOp): + def set_inputs(self): + batch_size = self.batch_size + frame_size = self.frame_size + super(TestGRUUnitOpWithBias, self).set_inputs() + self.inputs['Bias'] = np.random.uniform( + -0.1, 0.1, (1, frame_size * 3)).astype('float32') + self.attrs = { + 'activation': GRUActivationType.identity, + 'gate_activation': GRUActivationType.sigmoid + } + + def test_check_grad(self): + self.check_grad( + ['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + max_relative_error=0.007) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_infer_shape.py b/python/paddle/v2/framework/tests/test_infer_shape.py index d23f13be498dda4bcc8e7254660e5a7636f1c204..19bb45acef9a7443a974bf5f11afab5d067321f7 100644 --- a/python/paddle/v2/framework/tests/test_infer_shape.py +++ b/python/paddle/v2/framework/tests/test_infer_shape.py @@ -14,11 +14,14 @@ class TestInferShape(unittest.TestCase): # prepare input/output x1 = block.var("x1") + x1.set_type(core.VarDesc.VarType.LOD_TENSOR) x1.set_shape(shape) x2 = block.var("x2") + x2.set_type(core.VarDesc.VarType.LOD_TENSOR) x2.set_shape(shape) out = block.var("out") + out.set_type(core.VarDesc.VarType.LOD_TENSOR) # prepare the operator sum_op_desc = block.append_op() @@ -40,11 +43,14 @@ class TestInferShape(unittest.TestCase): # prepare input/output x1 = block.var("x") + x1.set_type(core.VarDesc.VarType.LOD_TENSOR) x1.set_shape(x_shape) x2 = block.var("y") + x2.set_type(core.VarDesc.VarType.LOD_TENSOR) x2.set_shape(y_shape) out = block.var("out") + out.set_type(core.VarDesc.VarType.LOD_TENSOR) # prepare the operator mul_op_desc = block.append_op() diff --git a/python/paddle/v2/framework/tests/test_operator_desc.py b/python/paddle/v2/framework/tests/test_operator_desc.py index d7a85d8e4e883efd268c53a0e4977533040a0a14..dfe39c98f7f4fe266d5ec0c4a9ed14ab02e40e3a 100644 --- a/python/paddle/v2/framework/tests/test_operator_desc.py +++ b/python/paddle/v2/framework/tests/test_operator_desc.py @@ -34,6 +34,8 @@ class TestOperator(unittest.TestCase): "Y": mul_y}, outputs={"Out": [mul_out]}, attrs={"x_num_col_dims": 1}) + + self.assertNotEqual(str(mul_op), "") self.assertEqual(mul_op.type, "mul") self.assertEqual(mul_op.input_names, ["X", "Y"]) self.assertEqual(mul_op.input("X"), ["mul.x"]) diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index 2902b2ec656f15f5e5fb12e3496878fa72694677..0ccb2e7eb191378dad40830647fe545cda27640e 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -94,17 +94,21 @@ class TestVarDesc(unittest.TestCase): program_desc = core.ProgramDesc.__create_program_desc__() block = program_desc.block(0) var = block.var('my_var') + var.set_type(core.VarDesc.VarType.SELECTED_ROWS) src_shape = [3, 2, 10, 8] var.set_shape(src_shape) res_shape = var.shape() self.assertEqual(src_shape, res_shape) + self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type()) def test_data_type(self): program_desc = core.ProgramDesc.__create_program_desc__() block = program_desc.block(0) var = block.var('my_var') + var.set_type(core.VarDesc.VarType.LOD_TENSOR) var.set_data_type(core.DataType.INT32) self.assertEqual(core.DataType.INT32, var.data_type()) + self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type()) class TestBlockDesc(unittest.TestCase): diff --git a/python/paddle/v2/framework/tests/test_variable.py b/python/paddle/v2/framework/tests/test_variable.py index 695aaaee6c0c1d035349b1d1716c24bab81e607b..6fb934c743a6271c352a74495cc543b62ac2b9d9 100644 --- a/python/paddle/v2/framework/tests/test_variable.py +++ b/python/paddle/v2/framework/tests/test_variable.py @@ -21,6 +21,7 @@ class TestVariable(unittest.TestCase): b = g_program.current_block() w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") + self.assertNotEqual(str(w), "") self.assertEqual(core.DataType.FP64, w.data_type) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name)