diff --git a/CMakeLists.txt b/CMakeLists.txt index 4783095194dc9c6409dc31c95588f46c9bee7c61..1252e7539816016dfdf1b90b8941fa42e6bb85e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON) "different Python interpreter from compiling.") endif() +if(MOBILE_INFERENCE) + set(THIRD_PARTY_BUILD_TYPE MinSizeRel) +else() + set(THIRD_PARTY_BUILD_TYPE Release) +endif() + ######################################################################################## include(external/mklml) # download mklml package diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index f7483f6be9169eb58f0148cd3a956a8c881e1fe3..bd853d921b4362ac7ac5e17e629552b2a200f08a 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -8,7 +8,7 @@ ExternalProject_Add( extern_eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" - GIT_TAG "master" + GIT_TAG 4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d PREFIX ${EIGEN_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 957f8271e4841836956b0c3f2cf3d8c88a31192a..c819eb4d70898e48eab499c666168d78262d4240 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -36,6 +36,7 @@ ExternalProject_Add( # change this back to the official Github repo once my PR is # merged. GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" + GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb PREFIX ${GFLAGS_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -45,11 +46,11 @@ ExternalProject_Add( -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index b3fef738ccc0b5886bb0a32501bb7b7adade0ff1..08bdc1e1623b0d917061c7368e9b2a8f7e9517fd 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -31,6 +31,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog.git" + GIT_TAG v0.3.5 PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -43,12 +44,12 @@ ExternalProject_Add( -DWITH_GFLAGS=ON -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index 6a2a79b7631b32e8a099797de509af64533bbb95..5a4aa7a5b71a4fdfd556a46037e6d1846d668fc4 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -56,11 +56,11 @@ IF(WITH_TESTING) -DBUILD_GMOCK=ON -Dgtest_disable_pthreads=ON -Dgtest_force_shared_crt=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 7cf7ba85cca4c248dcc74e078124c0b3815ee380..be7f6a9465970711170bd15dcecaadeaa8a55f86 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ${OPTIONAL_ARGS} -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR=lib CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON ${OPTIONAL_CACHE_ARGS} diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index bb258c7b5581fc22b44f4fe15c119f8081f4767e..8bd058222880b4df3b08da09c02f9fe7f1d0ee66 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -35,6 +35,7 @@ ExternalProject_Add( extern_warpctc ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/gangliao/warp-ctc.git" + GIT_TAG b63a0644654a3e0ed624c85a1767bc8193aead09 PREFIX ${WARPCTC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -48,9 +49,9 @@ ExternalProject_Add( -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON -DBUILD_SHARED=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release + CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} ) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index c496a52b780364f3014f8fa3dfbc944a7aa7430e..e2c9fe56f335ae5b627b4d8d4bb17e4a2a466677 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -42,11 +42,11 @@ ExternalProject_Add( -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_MACOSX_RPATH=ON - -DCMAKE_BUILD_TYPE=Release + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} ${EXTERNAL_OPTIONAL_ARGS} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) LIST(APPEND external_project_dependencies zlib) diff --git a/doc/design/block.md b/doc/design/block.md index 9c812732d6ead76eb3aa2d1b617449c96807f21a..7cbf0d55b1faeb2093ee7cf234d1c2ad1905885b 100644 --- a/doc/design/block.md +++ b/doc/design/block.md @@ -243,7 +243,7 @@ class SymbolTable { // TODO determine whether name is generated by python or C++. // Currently assume that a unique name will be generated by C++ if the // argument name is left default. - VarDesc* NewVar(const string& name=""); + VarDesc* Var(const string& name=""); // find a VarDesc by name, if recursive is true, find parent's SymbolTable // recursively. diff --git a/doc/design/images/graph_construction_example.dot b/doc/design/images/graph_construction_example.dot index 8d1b673abf6b78c851676fa379dc850c4818f0e5..e115f9844bae6ad24f638c8ed4749cea8aff06a9 100644 --- a/doc/design/images/graph_construction_example.dot +++ b/doc/design/images/graph_construction_example.dot @@ -33,7 +33,6 @@ digraph ImageClassificationGraph { cost -> MSE_Grad [color=red]; d_cost -> MSE_Grad [color=red]; - x -> MSE_Grad [color=red]; l -> MSE_Grad [color=red]; y -> MSE_Grad -> d_y [color=red]; diff --git a/doc/design/images/graph_construction_example_all.png b/doc/design/images/graph_construction_example_all.png index 181187503472d15779b87284105841168b3945c4..261611a5721f9aa97874f7e6d897fe48cf667db2 100644 Binary files a/doc/design/images/graph_construction_example_all.png and b/doc/design/images/graph_construction_example_all.png differ diff --git a/doc/design/images/graph_construction_example_forward_backward.png b/doc/design/images/graph_construction_example_forward_backward.png index 3049a9315fd616464dec54e33064cb75598ca536..4c69687f4a6a181138f3df72ce5e8aa48487b5be 100644 Binary files a/doc/design/images/graph_construction_example_forward_backward.png and b/doc/design/images/graph_construction_example_forward_backward.png differ diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/design/images/graph_construction_example_forward_only.png index 25d19088cbf0b5f68cf734f2ff21eba8af4a2860..e668c16e0cac73acb4e5dc2b1827557ae77126b4 100644 Binary files a/doc/design/images/graph_construction_example_forward_only.png and b/doc/design/images/graph_construction_example_forward_only.png differ diff --git a/doc/design/register_grad_op.md b/doc/design/register_grad_op.md index 3cf8a59446d244bb3a388b87b14273d9096c839a..9f1ce4bae7b393cb9f04909e5e4917b8d660771c 100644 --- a/doc/design/register_grad_op.md +++ b/doc/design/register_grad_op.md @@ -3,15 +3,17 @@ ## The Problem Posed -In our current operator registration mechanism, for each operator, the programmer should register a *gradient operator creator* function, which takes a C++ operator instance, and returns the corresponding gradient instance. +Currently, for each C++ operator class definition, there registers a *gradient operator creator* function, which takes a C++ operator instance and returns the corresponding gradient operator instance. -However, as we decided to separate the *compilation* and *execution* of DL models, we need to reshape the creator to take a protobuf `OpDesc` message, and returns a corresponding message. +However, we noticed two problems with the current deisgn: -More than that, the new registration mechanism need to support the fact that an operators' gradient computation might be a composition of operators. +1. As we decided to separate the *compilation* and *execution* phases, we need to change the creator to take an `OpDesc` protobuf message in a `ProgramDesc` and inserts corresponding `OpDesc` messages into the `ProgramDesc` message. -## Current Implementation +1. Some operator's gradient computation requires more than one gradient operators. For example, the gradient of *minus* consists of two operators -- an identity operaotr and a scale operator. So we need to make the registration mechanism to support the mapping from an operator to a set of operators for gradient computation. -OpInfos store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is +## The Current Implementation + +The C++ class `OpInfos` store in a association map which key is the operator type. The `grad_op_type` indicate associated gradient operator type. Operator can create gradient operator by `OpInfo::creator_` of gradient. The pseudo code is ```cpp struct OpInfo { diff --git a/doc/design/scope.md b/doc/design/scope.md index b1f9bb4378eb5ec6926f1e53f7c1f4fd5674064c..4da76eebb74abcd26ec2b8671399e6bc4fb58574 100644 --- a/doc/design/scope.md +++ b/doc/design/scope.md @@ -37,7 +37,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`. ```cpp class Scope { public: - Variable* NewVar(const std::string& name); + Variable* Var(const std::string& name); const Variable* FindVar(const std::string& name) const; private: @@ -98,7 +98,7 @@ class Scope { Variable* FindVar(const std::string& name) const; // return if already contains same name variable. - Variable* NewVar(const std::string& name); + Variable* Var(const std::string& name); private: std::shared_ptr parent_; @@ -107,7 +107,7 @@ class Scope { ``` ## Only scope can create a variable -To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `NewVar` can construct `Variable`. +To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `Var` can construct `Variable`. ## When scope destroyed, all variables inside this scope should be destroyed together @@ -121,4 +121,4 @@ Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shar ## Orthogonal interface -`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `NewVar` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `NewVar`, we can implement `NewVar` easily. +`FindVar` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `Var` will return an `Error` when there is a name conflict locally. Combine `FindVar` and `Var`, we can implement `Var` easily. diff --git a/doc/design/tensor_array.md b/doc/design/tensor_array.md index 8378e97bf7cfaae54c36b1b92e202b16e4fe1e28..37e4f7b90f94fa3eb015e733999cd84c96b2239c 100644 --- a/doc/design/tensor_array.md +++ b/doc/design/tensor_array.md @@ -161,7 +161,7 @@ class TensorArray: @name: str the name of the variable to output. ''' - tensor = NewVar(name) + tensor = Var(name) tensor_array_stack(self.name, tensor) return tensor diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md index bfbbdd0578ebc69ea4b49ade9b041573a9e9ad55..0b2958c1b10ef6a6ce51aa75f61e15a7f2d94b3f 100644 --- a/doc/design/var_desc.md +++ b/doc/design/var_desc.md @@ -16,16 +16,23 @@ The computation graph is constructed by Data Node and Operation Node. The concep ## Definition of VarDesc -A VarDesc should have a name and value, in PaddlePaddle, the value will always be a tensor. Since we use LoDTensor most of the time. We add a LoDTesnorDesc to represent it. +A VarDesc should have a name, and value. The are two kinds of variable type in compile time, they are `LoDTensor` and `SelectedRows`. ```proto message VarDesc { required string name = 1; - optional LoDTensorDesc lod_tensor = 2; + enum VarType { + LOD_TENSOR = 0; + SELECTED_ROWS = 1; + } + required VarType type = 2; + optional LoDTensorDesc lod_desc = 3; + optional TensorDesc selected_rows_desc = 4; + optional bool persistable = 5 [ default = false ]; } ``` -## Definition of LodTensorDesc +## Definition of TensorDesc ```proto enum DataType { @@ -38,87 +45,25 @@ enum DataType { FP64 = 6; } -message LoDTensorDesc { +message TensorDesc { required DataType data_type = 1; - repeated int32 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - optional int32 lod_level = 3 [default=0]; + repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] } ``` -## Definition of Variable in Python - -In Python API, layer will take Variable as Input, and return Variable as Output. There should be a class `Variable` in python to help create and manage Variable. - -```python -image = Variable(dims=[-1, 640, 480]) -# fc1 and fc2 are both Variable -fc1 = layer.fc(input=image, output_size=10) -fc2 = layer.fc(input=fc1, output_size=20) -``` -### what should class `Variable` Have -1. `name`.a name of string type is used to mark the value of the Variable. -1. `initializer`. Since our Tensor does not have value. we will always use some Operator to fullfill it when run. So we should have a initialize method to help add the init operator. -1. `operator`. Variable should record which operator produce itself. The reaon is: - - we use pd.eval(targets=[var1, var2]) to run the related ops to get the value of var1 and var2. var.op is used to trace the dependency of the current variable. - -In PaddlePaddle, we use Block to describe Computation Graph, so in the code we will use Block but not Graph. - -```python -import VarDesc -import LoDTensorDesc -import framework - -def AddInitialOperator(variable, initializer): - # add an initialize Operator to block to init this Variable - -class Variable(object): - def __init__(self, name, dims, type, initializer): - self._block = get_default_block() - self._name = name - self.op = None - - tensor_desc = LoDTensorDesc(data_type=type, dims=dims) - _var_desc = VarDesc(name=name, lod_tensor=tensor_desc) - self._var = framework.CreateVar(_var_desc) - self._block.add_var(self) +A TensorDesc describes `SelectedRows` and `LoDTensor`. For details of `SelectedRows`, please reference [`SelectedRows`](./selected_rows.md). - # add initial op according to initializer - if initializer is not None: - AddInitialOperator(self, initializer) - - def dims(self): - return self._var.dims() - - def data_type(self): - return self._var.data_type() +## Definition of LodTensorDesc - def to_proto(self): - pass +```proto +message LoDTensorDesc { + required TensorDesc tensor = 1; + optional int lod_level = 2; +} ``` -Then we can use this Variable to create a fc layer in Python. +A LoDTensorDesc contains a tensor and a lod_level. -```python -import paddle as pd - -def flatten_size(X, num_flatten_dims): - prod = 1 # of last num_flatten_dims - for i in xrange(num_flatten_dims): - prod = prod * X.dims[-i-1] - return prod - -def layer.fc(X, output_size, num_flatten_dims): - W = Variable(pd.random_uniform(), type=FP32, dims=[flatten_size(X, num_flatten_dims), output_size]) - b = Variable(pd.random_uniform(), type=FP32, dims=[output_size]) - out = Variable(type=FP32) - y = operator.fc(X, W, b, output=out) # fc will put fc op input into out - pd.InferShape(y) - return out - -x = Variable(dims=[-1, 640, 480]) -y = layer.fc(x, output_size=100) -z = layer.fc(y, output_size=200) +## Definition of Variable in Python -paddle.eval(targets=[z], ...) -print(z) -``` +For Variable in Python, please reference [`Python API`](./python_api.md). diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index d7b3d2bdec1687425df804c0d56d568241f9e8b0..d6b8464100d4497876aa3f6f7cbc666aafae4bfc 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -26,7 +26,7 @@ FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) SET(CMAKE_SWIG_OUTDIR ${CMAKE_CURRENT_BINARY_DIR}) -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality -Wno-missing-field-initializers -Wno-self-assign -ftls-model=global-dynamic") SET(SWIG_MODULE_swig_paddle_EXTRA_DEPS paddle_parameter diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6b34c3bbcfbdb0c36381df7de4dd227e317829e5..c8d9dac21d995d92b9d50436d42e47b63ea55f58 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -19,10 +19,10 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) proto_library(framework_proto SRCS framework.proto) cc_library(attribute SRCS attribute.cc DEPS framework_proto) -cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim) +cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) -cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto proto_desc) +cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) @@ -42,12 +42,17 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) -cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward ${GLOB_OP_LIB}) -#if(WITH_GPU) -# nv_test(executor_test SRCS executor_test.cc DEPS executor) -#else() -# cc_test(executor_test SRCS executor_test.cc DEPS executor) -#endif() +cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward) +set(EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op + mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op mean_op) +if(WITH_GPU) + nv_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP}) +else() + cc_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP}) +endif() cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) + +cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) +cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 063b108500d95c94d5859cf6e1a5a88dcdb2ed31..e3d7dacd7f0ad61a606e8b3e8f6a84b98deac729 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -28,14 +28,15 @@ namespace paddle { namespace framework { static inline std::unique_ptr CreateGradOp( - const OperatorBase& op) { + const OperatorBase& op, const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { OpDescBind op_desc; op_desc.SetInputMap(op.Inputs()); op_desc.SetOutputMap(op.Outputs()); op_desc.SetType(op.Type()); op_desc.SetAttrMap(op.Attrs()); auto& info = OpInfoMap::Instance().Get(op.Type()); - auto grad_descs = info.GradOpMaker()(op_desc); + auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var); std::vector> grad_ops; grad_ops.reserve(grad_descs.size()); std::transform(grad_descs.begin(), grad_descs.end(), @@ -98,7 +99,9 @@ static std::unique_ptr NOP() { // See Backward.h for details static std::unique_ptr BackwardRecursive( const OperatorBase& forwardOp, - std::unordered_set& no_grad_names, size_t& uniq_id) { + std::unordered_set& no_grad_names, + std::unordered_map* grad_to_var, + size_t& uniq_id) { // If all input gradients of forwarding operator do not need to calculate, // just return an NOP. Not return null ptr because NOP does not take // too much time for calculation, but it is useful for simplifying logic. @@ -136,7 +139,7 @@ static std::unique_ptr BackwardRecursive( for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); ++it, ++local_op_id) { auto& fwd = *it; - auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); + auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id); ForEachVarName(bwd->Outputs(), [&dup_output_ops, local_op_id](const std::string& out) { dup_output_ops[out].emplace_back(local_op_id); @@ -187,7 +190,8 @@ static std::unique_ptr BackwardRecursive( net->InsertOp(pos.first + 1, std::move(pos.second)); } } else { - std::unique_ptr grad_op(CreateGradOp(forwardOp)); + std::unique_ptr grad_op( + CreateGradOp(forwardOp, no_grad_names, grad_to_var)); ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( const std::string& grad_input) { @@ -226,7 +230,7 @@ static std::unique_ptr BackwardRecursive( *static_cast(&rnnop.stepnet()); // create stepnet's gradient op rnn_grad_op->set_stepnet( - BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); + BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id)); } if (net->ops_.empty()) { // Current no aux op is added to network @@ -253,7 +257,8 @@ std::unique_ptr Backward( no_grad_names.insert(name + kGradVarSuffix); } size_t uid = 0; - return BackwardRecursive(forwardOp, no_grad_names, uid); + std::unordered_map grad_to_var; + return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid); } // ==================================== // @@ -268,30 +273,61 @@ static bool AllGradInSet(const std::vector& names, return true; } +static void CreateGradVarInBlock( + size_t grad_op_start_index, + const std::unordered_map& param_name_map, + BlockDescBind* block_desc, + std::unordered_map* grad_var_record) { + auto ops = block_desc->AllOps(); + for (size_t op_index = grad_op_start_index; op_index < ops.size(); + ++op_index) { + ForEachVarName(ops[op_index]->Outputs(), + [&](const std::string& grad_var_name) { + if (block_desc->HasVar(grad_var_name)) { + return false; + } + block_desc->Var(grad_var_name); + auto it = param_name_map.find(grad_var_name); + if (it == param_name_map.end()) { + return false; + } + auto param_var_name = it->second; + auto& grad_record = (*grad_var_record)[param_var_name]; + grad_record.name_ = grad_var_name; + grad_record.block_idx_ = block_desc->ID(); + grad_record.op_idx_ = static_cast(op_index); + return false; /* not break */ + }); + } +} + std::vector> MakeOpGrad( const std::unique_ptr& op_desc, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { std::vector> grad_op_descs; - // All input gradients of forwarding operator do not need to calculat. + // All input gradients of forwarding operator do not need to calculate. const std::vector& inputs = op_desc->InputArgumentNames(); - if (AllGradInSet(inputs, no_grad_vars)) { + if (AllGradInSet(inputs, *no_grad_vars)) { return grad_op_descs; // empty vector } // All output gradients of forwarding operator do not need to calculate. const std::vector& outputs = op_desc->OutputArgumentNames(); - if (AllGradInSet(outputs, no_grad_vars)) { + if (AllGradInSet(outputs, *no_grad_vars)) { for (const std::string& name : inputs) { - no_grad_vars.insert(GradVarName(name)); + no_grad_vars->insert(GradVarName(name)); } return grad_op_descs; // empty vector } - grad_op_descs = OpRegistry::CreateGradOpDescs(op_desc.get()); + grad_op_descs = OpInfoMap::Instance() + .Get(op_desc->Type()) + .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var); std::list> pending_fill_zeros_ops; for (auto& desc : grad_op_descs) { for (const std::string& in_name : desc->InputArgumentNames()) { - if (no_grad_vars.count(in_name)) { + if (no_grad_vars->count(in_name)) { std::string prefix = in_name.substr( 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); std::string new_name = prefix + kZeroVarSuffix; @@ -301,11 +337,6 @@ std::vector> MakeOpGrad( pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); } } - for (const std::string& out_name : desc->OutputArgumentNames()) { - if (no_grad_vars.count(out_name)) { - desc->Rename(out_name, kEmptyVarName); - } - } } for (auto& p : pending_fill_zeros_ops) { @@ -316,23 +347,25 @@ std::vector> MakeOpGrad( std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { BlockDescBind* cur_block = program_desc.Block(block_idx); std::deque>& op_descs = cur_block->ops_; std::unordered_map> dup_out_ops; size_t grad_desc_idx = 0; std::vector> backward_descs; + for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { std::vector> op_grads = - MakeOpGrad(*it, no_grad_vars); + MakeOpGrad(*it, no_grad_vars, grad_to_var); if ((*it)->Type() == "recurrent") { PADDLE_ENFORCE_EQ( - op_grads.size(), size_t(1), + op_grads.size(), static_cast(1), "rnn_op's gradient process should contain only one op."); - int step_block_idx = (*it)->GetBlockAttr("stop_block"); - auto backward_block_op_descs = - MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); + int step_block_idx = (*it)->GetBlockAttr("step_block"); + auto backward_block_op_descs = MakeBlockBackward( + program_desc, step_block_idx, no_grad_vars, grad_to_var); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); for (auto& ptr : backward_block_op_descs) { backward_block->ops_.push_back(std::move(ptr)); @@ -376,24 +409,56 @@ std::vector> MakeBlockBackward( backward_descs.insert(backward_descs.begin() + p.first + 1, std::move(p.second)); } + return backward_descs; } -void AppendBackward(ProgramDescBind& program_desc, - const std::unordered_set& no_grad_vars) { +ParamGradInfoMap AppendBackward( + ProgramDescBind& program_desc, const VarDescBind& target, + const std::unordered_set& no_grad_vars) { std::unordered_set no_grad_var_names; no_grad_var_names.reserve(no_grad_vars.size() + 1); no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix); for (auto& name : no_grad_vars) { no_grad_var_names.insert(GradVarName(name)); } + const int root_block_idx = 0; - auto backward_op_descs = - MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); - auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_; + auto root_block = program_desc.Block(root_block_idx); + auto& all_ops = root_block->ops_; + + // insert fill one op for target + std::string fill_one_op_out = GradVarName(target.Name()); + std::unique_ptr fill_one_op( + new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, + {{"shape", std::vector{1}}, + {"value", static_cast(1.0)}, + {"dataType", framework::DataType::FP32}})); + all_ops.push_back(std::move(fill_one_op)); + size_t forward_op_num = all_ops.size(); + size_t forward_block_num = program_desc.Size(); + + // Insert backward operators + std::unordered_map grad_to_var; + auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, + &no_grad_var_names, &grad_to_var); + + std::unordered_map retv; + + // Create Variable for (auto& ptr : backward_op_descs) { - forw_op_descs.push_back(std::move(ptr)); + all_ops.push_back(std::move(ptr)); + } + root_block->Var(fill_one_op_out); + + // create grad_var for all blocks in this program + CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv); + for (size_t block_index = forward_block_num; + block_index < program_desc.Size(); ++block_index) { + CreateGradVarInBlock(0, grad_to_var, program_desc.Block(block_index), + &retv); } + return retv; } } // namespace framework diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h index f1ab8056450c96f0a1b671e1efa46c4c68f9ea15..96154fa82cb7a486aa4762ae633982ed6735220b 100644 --- a/paddle/framework/backward.h +++ b/paddle/framework/backward.h @@ -14,7 +14,10 @@ #pragma once +#include +#include #include + #include "paddle/framework/operator.h" #include "paddle/framework/program_desc.h" @@ -27,10 +30,27 @@ extern std::unique_ptr Backward( const OperatorBase& forwardOp, const std::unordered_set& no_grad_vars); -// TODO(jiayi): Add target as parameter and generate backward op -// according to target. -void AppendBackward(ProgramDescBind& program_desc, - const std::unordered_set& no_grad_vars); +struct GradVarInfo { + GradVarInfo() {} + GradVarInfo(const std::string& name, int block_idx, int op_idx) + : name_(name), block_idx_(block_idx), op_idx_(op_idx) {} + + bool operator==(const GradVarInfo& b) const { + return name_ == b.name_ && block_idx_ == b.block_idx_ && + op_idx_ == b.op_idx_; + } + + std::string name_; + int block_idx_; + int op_idx_; +}; + +using ParamGradInfoMap = std::unordered_map; + +ParamGradInfoMap AppendBackward( + ProgramDescBind& program_desc, const VarDescBind& target, + const std::unordered_set& no_grad_vars); } // namespace framework } // namespace paddle diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 3b7cbcd98927be829d185590147adf74cd3d10d1..5302afcafb5c0e1c057302dac174be935649ef11 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -18,6 +18,7 @@ #include "paddle/framework/block_desc.h" #include "paddle/framework/op_desc.h" #include "paddle/framework/op_registry.h" +#include "paddle/framework/var_desc.h" #include "paddle/operators/net_op.h" namespace paddle { @@ -169,6 +170,45 @@ class MultInOutOpMaker : public OpProtoAndCheckerMaker { } }; +class MinusGradOpDescMaker : public GradOpDescMakerBase { + public: + using GradOpDescMakerBase::GradOpDescMakerBase; + + std::vector> operator()() const override { + std::vector> retv; + auto x_g = InputGrad("X"); + if (!x_g.empty()) { + auto *op_desc = new OpDescBind(); + op_desc->SetType("scale"); + op_desc->SetInput("X", OutputGrad("Out")); + op_desc->SetOutput("Out", x_g); + op_desc->SetAttr("scale", 1.0f); + retv.emplace_back(op_desc); + } + + auto y_g = InputGrad("Y"); + if (!y_g.empty()) { + auto *op_desc = new OpDescBind(); + op_desc->SetType("scale"); + op_desc->SetInput("X", OutputGrad("Out")); + op_desc->SetOutput("Out", y_g); + op_desc->SetAttr("scale", -1.0f); + retv.emplace_back(op_desc); + } + return retv; + } +}; + +class MinusOpMaker : public OpProtoAndCheckerMaker { + public: + MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", ""); + AddInput("Y", ""); + AddOutput("Out", ""); + AddComment("minus for unittest"); + } +}; } // namespace framework } // namespace paddle @@ -187,6 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad, f::NOP); REGISTER_OP(mult_in_out, f::NOP, f::MultInOutOpMaker, mult_in_out_grad, f::NOP); +REGISTER_OPERATOR(minus, f::NOP, f::MinusOpMaker, f::MinusGradOpDescMaker); TEST(Backward, simple_op_not_need_grad) { auto fwd = f::OpRegistry::CreateOp( @@ -395,12 +436,13 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { 2UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ - + 2U /* internal variable number*/); + + 2UL /* internal variable number*/ + ); EXPECT_EQ(grad_fc.Outputs(all).size(), 2UL /* input number of mul*/ - + 2UL /* input number of rowwise_add - */ - + 1UL /* input number of sigmod */); + + 2UL /* input number of rowwise_add*/ + + 1UL /* input number of sigmod */ + - 1UL /* out2 is not needed*/); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); @@ -427,10 +469,14 @@ TEST(Backward, simple_single_op) { op->SetInput("b", {"b"}); op->SetOutput("Out", {"out"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out"); + auto var_to_grad = AppendBackward(program, target, {}); + + ASSERT_EQ(block->AllOps().size(), 3UL); + f::OpDescBind *fill_op = block->AllOps()[1]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); - ASSERT_EQ(block->AllOps().size(), 2UL); - f::OpDescBind *grad_op = block->AllOps()[1]; + f::OpDescBind *grad_op = block->AllOps()[2]; EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL); @@ -440,6 +486,13 @@ TEST(Backward, simple_single_op) { std::vector({f::GradVarName("x")})); EXPECT_EQ(grad_op->Output(f::GradVarName("b")), std::vector({f::GradVarName("b")})); + + EXPECT_EQ(var_to_grad.size(), 2UL); + EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2)); + EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2)); + + EXPECT_TRUE(block->HasVar(f::GradVarName("b"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("x"))); } TEST(Backward, default_attribute) { @@ -451,14 +504,19 @@ TEST(Backward, default_attribute) { op->SetInput("X", {"x"}); op->SetInput("Y", {"y"}); op->SetOutput("Out", {"out"}); + op->CheckAttrs(); - AppendBackward(program, {}); + auto target = f::VarDescBind("out"); + AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 2UL); + ASSERT_EQ(block->AllOps().size(), 3UL); EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); - f::OpDescBind *grad_op = block->AllOps()[1]; + f::OpDescBind *fill_op = block->AllOps()[1]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op = block->AllOps()[2]; ASSERT_EQ(grad_op->Type(), "mul_grad"); EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); @@ -486,10 +544,15 @@ TEST(Backward, simple_mult_op) { op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out3"); + size_t forward_len = block->AllOps().size(); + auto var_to_grad = AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDescBind *grad_op1 = block->AllOps()[5]; + ASSERT_EQ(block->AllOps().size(), 6UL + 1); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -500,7 +563,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op2 = block->AllOps()[4]; + f::OpDescBind *grad_op2 = block->AllOps()[5]; EXPECT_EQ(grad_op2->Type(), "mul_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -514,7 +577,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *grad_op3 = block->AllOps()[3]; + f::OpDescBind *grad_op3 = block->AllOps()[4]; EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -524,6 +587,23 @@ TEST(Backward, simple_mult_op) { std::vector({f::GradVarName("out2")})); EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); + + EXPECT_EQ(var_to_grad.size(), 6UL); + EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); + EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); + EXPECT_EQ(var_to_grad.at("out1"), + f::GradVarInfo(f::GradVarName("out1"), 0, 5)); + EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); + EXPECT_EQ(var_to_grad.at("out2"), + f::GradVarInfo(f::GradVarName("out2"), 0, 4)); + EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); + + EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("out2"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); } TEST(Backward, intermedia_var_no_grad) { @@ -554,10 +634,15 @@ TEST(Backward, intermedia_var_no_grad) { op4->SetInput("Y", {"out3"}); op4->SetOutput("Out", {"out4"}); - AppendBackward(program, {"out3"}); + auto target = f::VarDescBind("out4"); + size_t forward_len = block->AllOps().size(); + auto var_to_grad = AppendBackward(program, target, {"out3"}); - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDescBind *grad_op1 = block->AllOps()[5]; + ASSERT_EQ(block->AllOps().size(), 7UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -568,7 +653,7 @@ TEST(Backward, intermedia_var_no_grad) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op4 = block->AllOps()[4]; + f::OpDescBind *grad_op4 = block->AllOps()[5]; EXPECT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -579,8 +664,17 @@ TEST(Backward, intermedia_var_no_grad) { std::vector({f::GradVarName("out4")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1")})); - EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), - std::vector({f::kEmptyVarName})); + EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); + + EXPECT_EQ(var_to_grad.size(), 3UL); + EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); + EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); + EXPECT_EQ(var_to_grad.at("out1"), + f::GradVarInfo(f::GradVarName("out1"), 0, 5)); + + EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); } TEST(Backward, var_no_grad) { @@ -601,10 +695,15 @@ TEST(Backward, var_no_grad) { op2->SetOutput("Y", {"y2"}); op2->SetOutput("Z", {"z2"}); - AppendBackward(program, {"z1"}); + auto target = f::VarDescBind("z2"); + size_t forward_len = block->AllOps().size(); + auto var_to_grad = AppendBackward(program, target, {"z1"}); + + ASSERT_EQ(block->AllOps().size(), 6UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); - ASSERT_EQ(block->AllOps().size(), 5UL); - f::OpDescBind *grad_op2 = block->AllOps()[2]; + f::OpDescBind *grad_op2 = block->AllOps()[3]; ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -618,10 +717,9 @@ TEST(Backward, var_no_grad) { std::vector({f::GradVarName("z2")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), std::vector({f::GradVarName("y1")})); - EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), - std::vector({f::kEmptyVarName})); + EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); - f::OpDescBind *fill_zero_op = block->AllOps()[3]; + f::OpDescBind *fill_zero_op = block->AllOps()[4]; ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); @@ -629,7 +727,7 @@ TEST(Backward, var_no_grad) { EXPECT_EQ(fill_zero_op->Output("Y"), std::vector({std::string("z1") + f::kZeroVarSuffix})); - f::OpDescBind *grad_op1 = block->AllOps()[4]; + f::OpDescBind *grad_op1 = block->AllOps()[5]; ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -645,6 +743,15 @@ TEST(Backward, var_no_grad) { std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), std::vector({f::GradVarName("h1")})); + + EXPECT_EQ(var_to_grad.size(), 3UL); + EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3)); + EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5)); + EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5)); + + EXPECT_TRUE(block->HasVar(f::GradVarName("y1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("h1"))); } TEST(Backward, shared_var) { @@ -669,10 +776,15 @@ TEST(Backward, shared_var) { op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out3"); + size_t forward_len = block->AllOps().size(); + auto var_to_grad = AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 7UL); - f::OpDescBind *grad_op3 = block->AllOps()[3]; + ASSERT_EQ(block->AllOps().size(), 8UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op3 = block->AllOps()[4]; ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -683,7 +795,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); - f::OpDescBind *grad_op4 = block->AllOps()[4]; + f::OpDescBind *grad_op4 = block->AllOps()[5]; ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -697,7 +809,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *sum_op = block->AllOps()[5]; + f::OpDescBind *sum_op = block->AllOps()[6]; ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL); @@ -707,7 +819,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(sum_op->Output("Out"), std::vector({f::GradVarName("out1")})); - f::OpDescBind *grad_op1 = block->AllOps()[6]; + f::OpDescBind *grad_op1 = block->AllOps()[7]; ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -717,4 +829,41 @@ TEST(Backward, shared_var) { std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); -} \ No newline at end of file + + EXPECT_EQ(var_to_grad.size(), 5UL); + EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); + EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); + EXPECT_EQ(var_to_grad.at("out1"), + f::GradVarInfo(f::GradVarName("out1"), 0, 6)); + EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7)); + EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7)); + + EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); + EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); +} + +TEST(Backward, half_backward) { + f::ProgramDesc *program_desc = GetNewProgramDesc(); + f::ProgramDescBind &program = f::ProgramDescBind::Instance(program_desc); + f::BlockDescBind *block = program.Block(0); + auto *op1 = block->AppendOp(); + op1->SetType("minus"); + op1->SetInput("X", {"a"}); + op1->SetInput("Y", {"b"}); + op1->SetOutput("Out", {"out"}); + + auto target = f::VarDescBind("out"); + size_t forward_len = block->AllOps().size(); + auto var_to_grad = AppendBackward(program, target, {"b"}); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + auto ops = block->AllOps(); + ASSERT_EQ(3UL, ops.size()); + + EXPECT_EQ(var_to_grad.size(), 1UL); + EXPECT_EQ(var_to_grad.at("a"), + f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1)); +} diff --git a/paddle/framework/block_desc.cc b/paddle/framework/block_desc.cc index 509aa235d3ee226adef15f08f5785866700499f1..47b75228cdbd2a8b4f0c5ad33aa82f5e43044606 100644 --- a/paddle/framework/block_desc.cc +++ b/paddle/framework/block_desc.cc @@ -18,19 +18,22 @@ limitations under the License. */ namespace paddle { namespace framework { -VarDescBind *BlockDescBind::NewVar(const std::string &name) { +VarDescBind *BlockDescBind::Var(const std::string &name) { need_update_ = true; auto it = vars_.find(name); - PADDLE_ENFORCE(it == vars_.end(), "Duplicated variable %s", name); - auto var = new VarDescBind(name); + if (it != vars_.end()) { + return it->second.get(); + } + auto *var = new VarDescBind(name); vars_[name].reset(var); return var; } -VarDescBind *BlockDescBind::Var(const std::string &name) const { +VarDescBind *BlockDescBind::FindVar(const std::string &name) const { auto it = vars_.find(name); - PADDLE_ENFORCE(it != vars_.end(), - "Can not find variable %s in current block.", name); + if (it == vars_.end()) { + return nullptr; + } return it->second.get(); } @@ -66,7 +69,7 @@ std::vector BlockDescBind::AllOps() const { return res; } -void BlockDescBind::Sync() { +void BlockDescBind::Flush() { if (need_update_) { auto &op_field = *this->desc_->mutable_ops(); op_field.Clear(); @@ -91,9 +94,10 @@ BlockDescBind *BlockDescBind::ParentBlock() const { return prog_->Block(static_cast(this->desc_->parent_idx())); } -void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { - BlockDesc *desc = block.RawPtr(); - this->attrs_[name] = desc; +BlockDesc *BlockDescBind::Proto() { + Flush(); + return desc_; } + } // namespace framework } // namespace paddle diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 3437e89923da8de79eeaa88d0466cf7eb0b5926d..9fb88f963283c72e1ec389b72dd2d98049c74f6d 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -33,14 +33,6 @@ class ProgramDescBind; class BlockDescBind { public: - friend std::vector> MakeBlockBackward( - ProgramDescBind &program_desc, int block_idx, - std::unordered_set &no_grad_vars); - - friend void AppendBackward( - ProgramDescBind &program_desc, - const std::unordered_set &no_grad_vars); - BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) : prog_(prog), desc_(desc), need_update_(false) {} @@ -48,9 +40,9 @@ class BlockDescBind { int32_t Parent() const { return desc_->parent_idx(); } - VarDescBind *NewVar(const std::string &name_bytes); + VarDescBind *Var(const std::string &name_bytes); - VarDescBind *Var(const std::string &name_bytes) const; + VarDescBind *FindVar(const std::string &name_bytes) const; bool HasVar(const std::string &var_name) const; @@ -64,11 +56,13 @@ class BlockDescBind { std::vector AllOps() const; - void Sync(); + void Flush(); - BlockDesc *RawPtr() { return desc_; } + BlockDesc *Proto(); - private: + // FIXME(yuyang18): backward will access private data of BlockDesc. + // Mark it public temporary. We can fix it later. + public: ProgramDescBind *prog_; // not_own BlockDesc *desc_; // not_own bool need_update_; diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index daa474e8c5a223589018720da29a5c3363b5934d..ed7c5f17b0854809bde923276f36440cce193a88 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -97,8 +97,11 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->grad_op_maker_ = [](const OpDescBind& fwd_op) { - T maker(fwd_op); + info->grad_op_maker_ = []( + const OpDescBind& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { + T maker(fwd_op, no_grad_set, grad_to_var); return maker(); }; } diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index c388b2198e4fbf75d6584d710e00d3deca93eb51..8e82e28bac478ad93ece3fcec9730c6cbabc392a 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -66,7 +66,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) { // Instantiate all the vars in the global scope for (auto& var : block.vars()) { - scope->NewVar(var.name()); + scope->Var(var.name()); } Scope& local_scope = scope->NewScope(); @@ -78,7 +78,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) { for (auto& var : block.ops(i).outputs()) { for (auto& argu : var.arguments()) { if (local_scope.FindVar(argu) == nullptr) { - local_scope.NewVar(argu); + local_scope.Var(argu); } } } diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index 7f6d8fe6a4aec9fdc39b4ffc0837a03e355ec937..fcd2e47cff57fcc6c177be77d7e14b167a28f4ae 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include +#include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/framework/attribute.h" #include "paddle/framework/backward.h" @@ -25,6 +26,17 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" +USE_OP(elementwise_add); +USE_OP(gaussian_random); +USE_OP(feed); +USE_OP(fetch); +USE_OP(mul); +USE_OP(sum); +USE_OP(squared_l2_distance); +USE_OP(fill_constant); +USE_OP(mean); +USE_OP(sgd); + using namespace paddle::platform; using namespace paddle::framework; @@ -34,8 +46,16 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, // insert output for (auto kv : outputs) { for (auto v : kv.second) { - auto var = block->NewVar(v); - var->SetDataType(paddle::framework::DataType::FP32); + // <<<<<<< HEAD + // auto var = block->Var(v); + // var->SetType(VarDesc::LOD_TENSOR); + // var->SetDataType(paddle::framework::DataType::FP32); + // ======= + if (!block->HasVar(v)) { + auto var = block->Var(v); + var->SetDataType(paddle::framework::DataType::FP32); + } + // >>>>>>> origin/develop } } @@ -49,6 +69,7 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, op->SetOutput(kv.first, kv.second); } op->SetAttrMap(attrs); + op->CheckAttrs(); } // Tensors in feed value variable will only be in CPUPlace @@ -134,12 +155,12 @@ class ExecutorTesterRandom : public ::testing::Test { AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}}, {{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {}, root_block); + AddOp("mean", {{"X", {"l2_distance"}}}, {{"Out", {"mean_out"}}}, {}, + root_block); // backward - AddOp("fill_constant", {}, {{"Out", {"l2_distance@GRAD"}}}, - {{"shape", std::vector{batch_size, 1}}, {"value", float(1.0)}}, - root_block); - AppendBackward(program, {}); + auto target = VarDescBind("mean_out"); + AppendBackward(program, target, {}); // update AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}}, @@ -305,4 +326,14 @@ TEST_F(ExecutorTesterFeedAndFetch, GPU) { } } } + +DECLARE_double(fraction_of_gpu_memory_to_use); + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + // Use less GPU memory for unittest. + FLAGS_fraction_of_gpu_memory_to_use = 0.25; + return RUN_ALL_TESTS(); +} + #endif diff --git a/paddle/framework/framework.proto b/paddle/framework/framework.proto index b7a63f9ba10b77acff516d75cf1be0d4eeda40d4..65760b07ada7a63a568cb8296eef35a8aa18d9ff 100644 --- a/paddle/framework/framework.proto +++ b/paddle/framework/framework.proto @@ -97,16 +97,26 @@ enum DataType { FP64 = 6; } -message LoDTensorDesc { +message TensorDesc { required DataType data_type = 1; repeated int64 dims = 2; // [UNK, 640, 480] is saved as [-1, 640, 480] - optional int32 lod_level = 3 [ default = 0 ]; +} + +message LoDTensorDesc { + required TensorDesc tensor = 1; + optional int32 lod_level = 2 [ default = 0 ]; } message VarDesc { + enum VarType { + LOD_TENSOR = 1; + SELECTED_ROWS = 2; + } required string name = 1; - optional LoDTensorDesc lod_tensor = 2; - optional bool persistable = 3 [ default = false ]; + required VarType type = 2; + optional LoDTensorDesc lod_tensor = 3; + optional TensorDesc selected_rows = 4; + optional bool persistable = 5 [ default = false ]; } message BlockDesc { diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index e9ae6e22060850fe229998d3b651d08a5ca2033a..1219e0487531b19b00adde5a9aa2bde51bfc0aa8 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -13,6 +13,8 @@ limitations under the License. */ #pragma once +#include +#include #include "paddle/framework/op_desc.h" #include "paddle/framework/operator.h" @@ -21,27 +23,50 @@ namespace framework { class GradOpDescMakerBase { public: - explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} + explicit GradOpDescMakerBase( + const OpDescBind& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) + : fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {} virtual ~GradOpDescMakerBase() = default; virtual std::vector> operator()() const = 0; protected: - static std::vector ToGradNames( - const std::vector& var_names) { + std::vector InputGrad(const std::string& name, + bool drop_empty_grad = true) const { std::vector ret_val; + auto var_names = this->Input(name); ret_val.reserve(var_names.size()); std::transform(var_names.begin(), var_names.end(), - std::back_inserter(ret_val), GradVarName); - return ret_val; - } - - std::vector InputGrad(const std::string& name) const { - return ToGradNames(fwd_op_.Input(name)); + std::back_inserter(ret_val), + [this](const std::string& fwd_var_name) -> std::string { + auto g_name = GradVarName(fwd_var_name); + if (no_grad_set_.count(g_name)) { + return kEmptyVarName; + } else { + (*this->grad_to_var_)[g_name] = fwd_var_name; + return g_name; + } + }); + if (!drop_empty_grad) { + return ret_val; + } + std::vector dropped_ret_val; + dropped_ret_val.reserve(ret_val.size()); + std::copy_if(ret_val.begin(), ret_val.end(), + std::back_inserter(dropped_ret_val), + [](const std::string& str) { return str != kEmptyVarName; }); + return dropped_ret_val; } std::vector OutputGrad(const std::string& name) const { - return ToGradNames(fwd_op_.Output(name)); + std::vector ret_val; + auto onames = this->Output(name); + ret_val.reserve(onames.size()); + std::transform(onames.begin(), onames.end(), std::back_inserter(ret_val), + GradVarName); + return ret_val; } std::vector InputNames() const { @@ -75,6 +100,8 @@ class GradOpDescMakerBase { private: const OpDescBind& fwd_op_; + const std::unordered_set& no_grad_set_; + std::unordered_map* grad_to_var_; }; class SingleGradOpDescMaker : public GradOpDescMakerBase { @@ -91,6 +118,7 @@ class SingleGradOpDescMaker : public GradOpDescMakerBase { virtual std::unique_ptr Apply() const = 0; }; +template class DefaultGradOpDescMaker : public SingleGradOpDescMaker { public: using SingleGradOpDescMaker::SingleGradOpDescMaker; @@ -102,7 +130,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { for (auto& input_param : this->InputNames()) { grad->SetInput(input_param, this->Input(input_param)); - grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); + grad->SetOutput(GradVarName(input_param), + this->InputGrad(input_param, DropEmptyIG)); } for (auto& output_param : this->OutputNames()) { diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index d3c11ad60a0f9319329a59c16bfc4668cd75b7ae..ef207dc54ebe6cc72d9f1e428dd2aaed5ad3dbf0 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -32,7 +32,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, } OpDesc *OpDescBind::Proto() { - Sync(); + Flush(); return &op_desc_; } @@ -100,6 +100,12 @@ void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { need_update_ = true; } +void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { + BlockDesc *desc = block.Proto(); + this->attrs_[name] = desc; + need_update_ = true; +} + void OpDescBind::SetAttrMap( const std::unordered_map &attr_map) { attrs_ = attr_map; @@ -159,7 +165,7 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } }; -void OpDescBind::Sync() { +void OpDescBind::Flush() { if (need_update_) { this->op_desc_.mutable_inputs()->Clear(); for (auto &ipt : inputs_) { diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 90155fadeac148bd9cae4ce9066ac4ce8d9df52d..73b5cf846f702fe21277ae139156ec9784aa79b3 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -89,8 +89,6 @@ class OpDescBind { this->need_update_ = true; } - void Sync(); - const VariableNameMap &Inputs() const { return inputs_; } const VariableNameMap &Outputs() const { return outputs_; } @@ -104,6 +102,8 @@ class OpDescBind { void InferShape(const BlockDescBind &block) const; + void Flush(); + private: template static std::vector MapKeys(const MapType &map) { diff --git a/paddle/framework/op_registry.cc b/paddle/framework/op_registry.cc index b118edae17430c8a4dd5c96a2a0c675766e08166..504afbd5dbacf7185f92e0000d19666230e2fb42 100644 --- a/paddle/framework/op_registry.cc +++ b/paddle/framework/op_registry.cc @@ -59,16 +59,5 @@ std::unique_ptr OpRegistry::CreateOp(const OpDescBind& op_desc) { op_desc.GetAttrMap()); } -std::vector> OpRegistry::CreateGradOpDescs( - OpDescBind* op_desc) { - auto& info = OpInfoMap::Instance().Get(op_desc->Type()); - - if (info.Checker() != nullptr) { - info.Checker()->Check(*op_desc->MutableAttrMap()); - } - - return info.grad_op_maker_(*op_desc); -} - } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 5ca3af52a6909eeee21f647d0e60c7a690f90190..226e8ddcd4b1a2630e0eea00ee6c9f6af6bd5d20 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -79,9 +79,6 @@ class OpRegistry { static std::unique_ptr CreateOp(const OpDesc& op_desc); - static std::vector> CreateGradOpDescs( - OpDescBind* op_desc); - static std::unique_ptr CreateOp(const OpDescBind& op_desc); }; @@ -160,17 +157,18 @@ class OpKernelRegistrar : public Registrar { /** * Macro to register Operator. */ -#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ - grad_op_class) \ - REGISTER_OPERATOR(grad_op_type, grad_op_class); \ - class _GradOpDescMaker_##grad_op_type##_ \ - : public ::paddle::framework::DefaultGradOpDescMaker { \ - using ::paddle::framework::DefaultGradOpDescMaker::DefaultGradOpDescMaker; \ - \ - protected: \ - virtual std::string GradOpType() const { return #grad_op_type; } \ - }; \ - REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ +#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ + grad_op_class) \ + REGISTER_OPERATOR(grad_op_type, grad_op_class); \ + class _GradOpDescMaker_##grad_op_type##_ \ + : public ::paddle::framework::DefaultGradOpDescMaker { \ + using ::paddle::framework::DefaultGradOpDescMaker< \ + true>::DefaultGradOpDescMaker; \ + \ + protected: \ + virtual std::string GradOpType() const { return #grad_op_type; } \ + }; \ + REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ op_maker_class); #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 15f80b57206c90f689acfdcac60a0d9011025fc0..cf15f9933ab3bc881add3d45b7ca17194a70e0f1 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -289,6 +289,15 @@ class ExecutionContext { return device_context_; } +#ifdef PADDLE_WITH_CUDA + const platform::CUDADeviceContext& cuda_device_context() const { + PADDLE_ENFORCE(platform::is_gpu_place(device_context_.GetPlace())); + auto cuda_ctx = + reinterpret_cast(&device_context_); + return *cuda_ctx; + } +#endif + private: const OperatorBase& op_; const Scope& scope_; @@ -394,11 +403,11 @@ class CompileTimeInferShapeContext : public InferShapeContext { private: DDim GetDim(const std::string& name) const override { - return framework::make_ddim(block_.Var(name)->Shape()); + return framework::make_ddim(block_.FindVar(name)->Shape()); } void SetDim(const std::string& name, const DDim& dim) override { - block_.Var(name)->SetShape(framework::vectorize(dim)); + block_.FindVar(name)->SetShape(framework::vectorize(dim)); } const OpDescBind& op_; diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index a02f4668bca2360995cc05206f7f97e027db0907..d7890ac8d0af2171271a0cfccd356563c7604e72 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -84,7 +84,7 @@ TEST(OperatorBase, all) { paddle::framework::Scope scope; auto op = paddle::framework::OpRegistry::CreateOp(op_desc); - scope.NewVar("OUT1"); + scope.Var("OUT1"); ASSERT_EQ(paddle::framework::op_run_num, 0); op->Run(scope, device_context); ASSERT_EQ(paddle::framework::op_run_num, 1); @@ -237,12 +237,12 @@ TEST(OpKernel, multi_inputs) { paddle::platform::CPUDeviceContext cpu_device_context; paddle::framework::Scope scope; - scope.NewVar("x0")->GetMutable(); - scope.NewVar("x1")->GetMutable(); - scope.NewVar("x2")->GetMutable(); - scope.NewVar("k0")->GetMutable(); - scope.NewVar("y0")->GetMutable(); - scope.NewVar("y1")->GetMutable(); + scope.Var("x0")->GetMutable(); + scope.Var("x1")->GetMutable(); + scope.Var("x2")->GetMutable(); + scope.Var("k0")->GetMutable(); + scope.Var("y0")->GetMutable(); + scope.Var("y1")->GetMutable(); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); op->Run(scope, cpu_device_context); diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index e89f9a46d587b6378aa3be92306c5680093e1926..fcb7292884275d972377983cb3ba1bcd86fb8348 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -45,7 +45,7 @@ BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { ProgramDesc *ProgramDescBind::Proto() { for (auto &block : blocks_) { - block->Sync(); + block->Flush(); } return prog_; } diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 5821bac928ed898971d61a3e2a86f59155d76991..8f8a53eec8f947b088124a3f034fedb17fd86a48 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -31,7 +31,7 @@ Scope& Scope::NewScope() const { return *kids_.back(); } -Variable* Scope::NewVar(const std::string& name) { +Variable* Scope::Var(const std::string& name) { auto iter = vars_.find(name); if (iter != vars_.end()) { return iter->second; @@ -42,8 +42,8 @@ Variable* Scope::NewVar(const std::string& name) { return v; } -Variable* Scope::NewVar() { - return NewVar(string::Sprintf("%p.%d", this, vars_.size())); +Variable* Scope::Var() { + return Var(string::Sprintf("%p.%d", this, vars_.size())); } Variable* Scope::FindVar(const std::string& name) const { @@ -71,8 +71,8 @@ framework::Scope& GetGlobalScope() { static std::unique_ptr g_scope{nullptr}; std::call_once(feed_variable_flag, [&]() { g_scope.reset(new framework::Scope()); - g_scope->NewVar("feed_value"); - g_scope->NewVar("fetch_value"); + g_scope->Var("feed_value"); + g_scope->Var("fetch_value"); }); return *(g_scope.get()); } diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index a8cfb107c25ccd62039db7349cc1c1dbff772f39..a7fce3514b163d78bf96b3cc19d188744a383395 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -45,10 +45,10 @@ class Scope { Scope& NewScope() const; /// Create a variable with given name if it doesn't exist. - Variable* NewVar(const std::string& name); + Variable* Var(const std::string& name); /// Create a variable with a scope-unique name. - Variable* NewVar(); + Variable* Var(); /// Find a variable in the scope or any of its ancestors. Returns /// nullptr if cannot find. diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc index 9d51e355b0f6336d2f875ff2d77266b261baf5ac..7cc5e3510d978fae81d1e36da7ca35d4b3a04098 100644 --- a/paddle/framework/scope_test.cc +++ b/paddle/framework/scope_test.cc @@ -23,8 +23,8 @@ TEST(Scope, VarsShadowing) { Scope& ss1 = s.NewScope(); Scope& ss2 = s.NewScope(); - Variable* v0 = s.NewVar("a"); - Variable* v1 = ss1.NewVar("a"); + Variable* v0 = s.Var("a"); + Variable* v1 = ss1.Var("a"); EXPECT_NE(v0, v1); @@ -40,7 +40,7 @@ TEST(Scope, FindVar) { EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_EQ(nullptr, ss.FindVar("a")); - ss.NewVar("a"); + ss.Var("a"); EXPECT_EQ(nullptr, s.FindVar("a")); EXPECT_NE(nullptr, ss.FindVar("a")); @@ -49,7 +49,7 @@ TEST(Scope, FindVar) { TEST(Scope, FindScope) { Scope s; Scope& ss = s.NewScope(); - Variable* v = s.NewVar("a"); + Variable* v = s.Var("a"); EXPECT_EQ(&s, s.FindScope(v)); EXPECT_EQ(&s, ss.FindScope(v)); diff --git a/paddle/framework/selected_rows.cc b/paddle/framework/selected_rows.cc new file mode 100644 index 0000000000000000000000000000000000000000..c74459c9dd7006a24615b1d6df041583088fb25c --- /dev/null +++ b/paddle/framework/selected_rows.cc @@ -0,0 +1,16 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/selected_rows.h" + +namespace paddle { +namespace framework {} // namespace framework +} // namespace paddle diff --git a/paddle/framework/selected_rows.h b/paddle/framework/selected_rows.h new file mode 100644 index 0000000000000000000000000000000000000000..f9f563051e264ae7ed7cf3c07c0065522b2bbe2e --- /dev/null +++ b/paddle/framework/selected_rows.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace framework { + +class SelectedRows { + public: + SelectedRows(const std::vector& rows, const int64_t& height) + : rows_(rows), height_(height) { + value_.reset(new Tensor()); + } + + SelectedRows() { value_.reset(new Tensor()); } + + platform::Place place() const { return value_->place(); } + + const Tensor& value() const { return *value_; } + + Tensor* mutable_value() { return value_.get(); } + + int64_t height() const { return height_; } + + void set_height(int64_t height) { height_ = height; } + + const std::vector& rows() const { return rows_; } + + void set_rows(const std::vector& rows) { rows_ = rows; } + + DDim GetCompleteDims() const { + std::vector dims = vectorize(value_->dims()); + dims[0] = height_; + return make_ddim(dims); + } + + private: + std::vector rows_; + std::unique_ptr value_{nullptr}; + int64_t height_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/selected_rows_test.cc b/paddle/framework/selected_rows_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4ee13a65d72e44693573397bb686b355effb2227 --- /dev/null +++ b/paddle/framework/selected_rows_test.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/selected_rows.h" +#include "gtest/gtest.h" + +namespace paddle { +namespace framework { + +class SelectedRowsTester : public ::testing::Test { + public: + virtual void SetUp() override { + std::vector rows{0, 4, 7}; + int64_t height = 10; + int64_t row_numel = 100; + selected_rows_.reset(new SelectedRows(rows, height)); + + Tensor* value = selected_rows_->mutable_value(); + value->mutable_data( + make_ddim({static_cast(rows.size()), row_numel}), place_); + } + + protected: + platform::CPUPlace place_; + std::unique_ptr selected_rows_{nullptr}; +}; + +TEST_F(SelectedRowsTester, height) { ASSERT_EQ(selected_rows_->height(), 10); } + +TEST_F(SelectedRowsTester, dims) { + ASSERT_EQ(selected_rows_->value().dims(), make_ddim({3, 100})); +} + +TEST_F(SelectedRowsTester, complete_dims) { + ASSERT_EQ(selected_rows_->GetCompleteDims(), make_ddim({10, 100})); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/shape_inference.h b/paddle/framework/shape_inference.h index 64aab16ae54d34fd614add348c7c420b4a8f771d..b93f980cf6d279d18388b9637a2ff45d797ca78e 100644 --- a/paddle/framework/shape_inference.h +++ b/paddle/framework/shape_inference.h @@ -19,9 +19,6 @@ limitations under the License. */ namespace paddle { namespace framework { -// TODO(longfei): Once after both CompileTimeInferShapeContext and -// RuntimeInferShapeContext get merged, we can rename InferShapeContext into -// InferShapeContext so to replace the current InferShapeContext. class InferShapeContext { public: virtual ~InferShapeContext() {} diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc index 7ae16e99cdb8a23f14f0c8b684ba4ec66a4ce074..06459cbfd7b8c19c176452ff73c9f3a81ba1dc03 100644 --- a/paddle/framework/tensor_array.cc +++ b/paddle/framework/tensor_array.cc @@ -76,6 +76,17 @@ LoDTensor PackDynamicBatch(const std::vector& source, const std::vector& meta, const LoD& lod, size_t level); +std::vector GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) { + // collect indice need to copy to the batch + std::vector indice; + for (const auto& seq : meta) { + size_t id = seq.begin + batch_id; + if (id >= seq.end) break; + indice.push_back(id); + } + return indice; +} + } // namespace detail const LoDTensor& TensorArray::Read(size_t index) const { @@ -113,8 +124,8 @@ LoDTensor TensorArray::Pack(size_t level, const std::vector& meta, return detail::PackDynamicBatch(values_, meta, lod, level); } -std::vector TensorArray::Unpack(const LoDTensor& source, int level, - bool length_desend) { +DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level, + bool length_desend) { detail::DynamicBatchUnpacker unpacker(source, level, length_desend /*descend*/); @@ -129,6 +140,7 @@ std::vector TensorArray::Unpack(const LoDTensor& source, int level, Write(batch_id, unpacker.GetBatch(batch_id)); } + PADDLE_ENFORCE(!unpacker.meta.empty()); return unpacker.meta; } @@ -218,13 +230,7 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { PADDLE_ENFORCE(!meta.empty(), "should build meta first"); LoDTensor result; - // collect indice need to copy to the batch - std::vector indice; - for (const auto& seq : meta) { - size_t id = seq.begin + index; - if (id >= seq.end) break; - indice.push_back(id); - } + auto indice = detail::GenDyBatchIndice(meta, index); PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); // copy the indice of records in LoDTensor @@ -237,9 +243,9 @@ LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { for (size_t i = 0; i < indice.size(); i++) { auto index = indice[i]; auto target = result.Slice(i, i + 1); - auto source_ = source->Slice(index, index + 1); + auto slice = source->Slice(index, index + 1); - target.CopyFrom(source_, platform::CPUPlace(), + target.CopyFrom(slice, platform::CPUPlace(), platform::CPUDeviceContext()); } diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h index 293da04997304be41810446cb3e866d545805f83..046ecb5221b7ed9d88e5017348ee8fcde23c7677 100644 --- a/paddle/framework/tensor_array.h +++ b/paddle/framework/tensor_array.h @@ -34,6 +34,13 @@ struct DySeqMeta { size_t ori_idx; }; +using DySeqMetaBatch = std::vector; + +/* + * Extract the indices of instances. + */ +std::vector GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id); + /* * TensorArray is a C-array-like array of tensors, it is meant to be used with * dynamic iteration primitives such as while_loop. It is used to segment inputs @@ -69,7 +76,7 @@ class TensorArray { * Recover the original LoD-arranged LoDTensor with the `values`, `level` and * `indice_map`. */ - LoDTensor Pack(size_t level, const std::vector &meta, + LoDTensor Pack(size_t level, const DySeqMetaBatch &meta, const LoD &lod) const; /* @@ -77,8 +84,7 @@ class TensorArray { * `values`, if set `desend`, will sort by length in descending order else in * ascending order. */ - std::vector Unpack(const LoDTensor &source, int level, - bool length_desend); + DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend); /* * Pack the values into a tensor with rank one higher than each tensor in diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index 6f65a942ba2a4073e6aa1047875ec5c3283c23a6..0d1564a7510ddf0106ff417fb0b487ddbde1ac2e 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -36,8 +36,9 @@ using OpCreator = std::function; -using GradOpMakerFN = - std::function>(const OpDescBind&)>; +using GradOpMakerFN = std::function>( + const OpDescBind&, const std::unordered_set& /*no_grad_set*/, + std::unordered_map* /*grad_to_var*/)>; } // namespace framework } // namespace paddle diff --git a/paddle/framework/var_desc.cc b/paddle/framework/var_desc.cc index a88e813b5e7c7e6420cb0ba8a25bba4f4d658e80..c302217e5aacdc17800238770d689b7fb65804f3 100644 --- a/paddle/framework/var_desc.cc +++ b/paddle/framework/var_desc.cc @@ -13,32 +13,58 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/var_desc.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { void VarDescBind::SetShape(const std::vector &dims) { - VectorToRepeated(dims, desc_.mutable_lod_tensor()->mutable_dims()); + VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims()); } void VarDescBind::SetDataType(DataType data_type) { - desc_.mutable_lod_tensor()->set_data_type(data_type); + mutable_tensor_desc()->set_data_type(data_type); } std::vector VarDescBind::Shape() const { - return RepeatedToVector(desc_.lod_tensor().dims()); + return RepeatedToVector(tensor_desc().dims()); } -DataType VarDescBind::GetDataType() const { - return desc_.lod_tensor().data_type(); -} +DataType VarDescBind::GetDataType() const { return tensor_desc().data_type(); } void VarDescBind::SetLoDLevel(int32_t lod_level) { + PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); desc_.mutable_lod_tensor()->set_lod_level(lod_level); } int32_t VarDescBind::GetLodLevel() const { + PADDLE_ENFORCE(desc_.type() == VarDesc::LOD_TENSOR); return desc_.lod_tensor().lod_level(); } + +const TensorDesc &VarDescBind::tensor_desc() const { + PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type"); + switch (desc_.type()) { + case VarDesc::SELECTED_ROWS: + return desc_.selected_rows(); + case VarDesc::LOD_TENSOR: + return desc_.lod_tensor().tensor(); + default: + PADDLE_THROW("Unexpected branch."); + } +} + +TensorDesc *VarDescBind::mutable_tensor_desc() { + PADDLE_ENFORCE(desc_.has_type(), + "invoke MutableTensorDesc must after set type"); + switch (desc_.type()) { + case VarDesc::SELECTED_ROWS: + return desc_.mutable_selected_rows(); + case VarDesc::LOD_TENSOR: + return desc_.mutable_lod_tensor()->mutable_tensor(); + default: + PADDLE_THROW("Unexpected branch."); + } +} } // namespace framework } // namespace paddle diff --git a/paddle/framework/var_desc.h b/paddle/framework/var_desc.h index 464fece85fe5c674690c2034054e551f14db2138..688a46f83982fc464c7602ec1041ad3f42122211 100644 --- a/paddle/framework/var_desc.h +++ b/paddle/framework/var_desc.h @@ -34,6 +34,7 @@ inline std::vector RepeatedToVector( template inline void VectorToRepeated(const std::vector &vec, RepeatedField *repeated_field) { + repeated_field->Clear(); repeated_field->Reserve(vec.size()); for (const auto &elem : vec) { *repeated_field->Add() = elem; @@ -44,6 +45,7 @@ inline void VectorToRepeated(const std::vector &vec, template inline void VectorToRepeated(const std::vector &vec, RepeatedField *repeated_field) { + repeated_field->Clear(); repeated_field->Reserve(vec.size()); for (auto elem : vec) { *repeated_field->Add() = elem; @@ -52,7 +54,10 @@ inline void VectorToRepeated(const std::vector &vec, class VarDescBind { public: - explicit VarDescBind(const std::string &name) { desc_.set_name(name); } + explicit VarDescBind(const std::string &name) { + desc_.set_name(name); + desc_.set_type(VarDesc::LOD_TENSOR); + } VarDesc *Proto() { return &desc_; } @@ -70,7 +75,14 @@ class VarDescBind { int32_t GetLodLevel() const; + VarDesc::VarType GetType() const { return desc_.type(); } + + void SetType(VarDesc::VarType type) { desc_.set_type(type); } + private: + const TensorDesc &tensor_desc() const; + TensorDesc *mutable_tensor_desc(); + VarDesc desc_; }; } // namespace framework diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index ce071323ff585d28c9eaf80fec9be2394be526d1..0bb6f84c22eefbfb3678d6f15651f22c91454c2c 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -462,8 +462,8 @@ void LambdaCost::calcGrad(const real* outputScore, real score_j = score[index_j]; real dcgDif = 0; if (j < sortSize) { - dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) / - (std::log(i + 2) - std::log(j + 2)); + dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) * + (1 / std::log(i + 2) - 1 / std::log(j + 2)); } else { dcgDif = (std::pow(2, score_i) - std::pow(2, score_j)) / std::log(i + 2); diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 4002a3d0747a86ab7b495ffe52247521831b71b8..9813a556076bc2666869a85225feaf10f345217a 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -86,6 +86,7 @@ protected: /// Also used in 'use_mkldnn' case. std::vector outputOtherDevice_; /// If there are several outputs, map them by each name. + /// MKLDNNLayer use it only to merge output grad std::map outputMap_; /// Used to merge grad on different devices. MatrixPtr tmpGrad_; @@ -325,6 +326,11 @@ public: outputMap_[name] = output; } + /** + * Get the output map size, if layer has multi-output. + */ + size_t getOutputMapSize() { return outputMap_.size(); } + /** * Get the output based on layer's name. */ diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 0d6742e909635c1097b4fe21bbb304f8a71af5cb..8b67a1ef4ffdd42559f8078873ed135751d56674 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -225,8 +225,6 @@ void MKLDNNConvLayer::resetFwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (cvtInVal_) { pipeline.push_back(*cvtInVal_); } @@ -245,7 +243,7 @@ void MKLDNNConvLayer::resetFwdPipeline( void MKLDNNConvLayer::resetInValue( std::shared_ptr& pd, MKLDNNMatrixPtr& in) { - const MatrixPtr& inMat = inputLayers_[0]->getOutput().value; + const MatrixPtr& inMat = inputLayers_[0]->getOutputValue(); in = MKLDNNMatrix::create(inMat, pd->src_primitive_desc()); // create buffer and reorder if input value do not match @@ -310,15 +308,20 @@ void MKLDNNConvLayer::resetOutValue( const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_}; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); - if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutVal_->getPrimitiveDesc() != pd->dst_primitive_desc()) { + out = MKLDNNMatrix::create(nullptr, pd->dst_primitive_desc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); - CHECK(cvtOutVal_) << "should not be emptry"; + CHECK(cvtOutVal_) << "should not be empty"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + // when output is cpu device, change the mkldnn output value and make them + // share the same data. Then if next layer use inputlayer->getOuputValue() + // to achieve the input value, it will get the right data. + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNConvLayer::resetBwdWgtPD( @@ -412,8 +415,6 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (cvtOutGrad_) { pipeline.push_back(*cvtOutGrad_); } @@ -446,28 +447,27 @@ void MKLDNNConvLayer::resetBwdPipeline( void MKLDNNConvLayer::resetOutGrad( std::shared_ptr& wgtPD, MKLDNNMatrixPtr& out) { - const MatrixPtr& outMat = output_.grad; - out = MKLDNNMatrix::create(outMat, wgtPD->diff_dst_primitive_desc()); - CHECK(outVal_ != nullptr && - out->getPrimitiveDesc() == outVal_->getPrimitiveDesc()) - << "primitive desc of out grad and value should be equal"; - - // TODO(TJ): merge outgrad - // create reorder if has output grad does not match cpuOutGrad_ = nullptr; cvtOutGrad_ = nullptr; - if (!outputIsOnlyMKLDNN()) { + CHECK(outVal_ != nullptr && + outVal_->getPrimitiveDesc() == wgtPD->diff_dst_primitive_desc()) + << "primitive desc of out grad and value should be equal"; + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; - outMat->setData(cpuOut->getData()); // same PrimitiveDesc with cpuInVal_ CHECK(cpuOutVal_); cpuOutGrad_ = MKLDNNMatrix::create(cpuOut, cpuOutVal_->getPrimitiveDesc()); - if (cpuOutGrad_->getPrimitiveDesc() == out->getPrimitiveDesc()) { - out = cpuOutGrad_; - } else { - out = MKLDNNMatrix::create(nullptr, wgtPD->diff_dst_primitive_desc()); + // create reorder if primitive desc does not match + if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_); + } else { + // share the same data of CPU output + output_.grad->setData(cpuOut->getData()); + out = cpuOutGrad_; } } } @@ -496,32 +496,30 @@ void MKLDNNConvLayer::resetWgtBiasGrad( void MKLDNNConvLayer::resetInGrad( std::shared_ptr& dataPD, MKLDNNMatrixPtr& in) { + in = nullptr; + cpuInGrad_ = nullptr; + cvtInGrad_ = nullptr; if (dataPD == nullptr) { return; } - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - in = MKLDNNMatrix::create(inputLayers_[0]->getOutput().grad, - dataPD->diff_src_primitive_desc()); - CHECK(nullptr != inVal_ && - in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) - << "primitive desc of input grad and value should be equal"; - - // create reorder if has output grad does not match - cpuInGrad_ = nullptr; - cvtInGrad_ = nullptr; - if (!inputIsOnlyMKLDNN()) { + if (inputIsOnlyMKLDNN()) { + MKLDNNLayer::resetInGrad(in, dataPD->diff_src_primitive_desc()); + CHECK(nullptr != inVal_ && + in->getPrimitiveDesc() == inVal_->getPrimitiveDesc()) + << "primitive desc of input grad and value should be equal"; + } else { const MatrixPtr& cpuIn = getInputGrad(0, CPU_DEVICE); // same PrimitiveDesc with cpuInVal_ CHECK(cpuInVal_); cpuInGrad_ = MKLDNNMatrix::create(cpuIn, cpuInVal_->getPrimitiveDesc()); - if (cpuInGrad_->getPrimitiveDesc() != in->getPrimitiveDesc()) { - const MatrixPtr& dnnIn = getInputGrad(0, MKLDNN_DEVICE); - in = MKLDNNMatrix::create(dnnIn, in->getPrimitiveDesc()); + in = cpuInGrad_; + // create reorder if PrimitiveDesc does not match + if (cpuInGrad_->getPrimitiveDesc() != dataPD->diff_src_primitive_desc()) { + in = MKLDNNMatrix::create(getInputGrad(0, MKLDNN_DEVICE), + dataPD->diff_src_primitive_desc()); cvtInGrad_ = MKLDNNMatrix::createReorder(in, cpuInGrad_); CHECK(cvtInGrad_); - } else { - in = cpuInGrad_; } } } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index e829456d6afd7cc844f752d4571cd9f90c73997f..cf19a155681f3a1ceb20af67245c8f2b8fa8fa73 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -180,10 +180,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); if (!outputIsOnlyMKLDNN()) { - // fc cpu output value do not need create convert - // just share point + // fc cpu output value do not need create convert, just share data getOutput(CPU_DEVICE).value->setData(out->getData()); } + output_.value = std::dynamic_pointer_cast(out); } void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, @@ -214,8 +214,6 @@ void MKLDNNFcLayer::resetFwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (bias) { fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); } else { @@ -237,19 +235,14 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, } void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { - // TODO(TJ): merge outgrad - int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; - output_.grad->setData(getOutput(device).grad->getData()); - // for MKLDNN device: - // can not directly cast outputgrad to mkldnnmatrix, - // since each layer can not write the inputgrad to mkldnn inputgrad. - // So just create from matrix with outputvalue format. - // for CPU device: - // fc do not need to convert from cpu device since output is always nc format - // only need create from cpu device CHECK(outVal_); - out = - MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { + const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; + output_.grad->setData(cpuOut->getData()); + out = MKLDNNMatrix::create(cpuOut, outVal_->getPrimitiveDesc()); + } } void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, @@ -267,13 +260,11 @@ void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { in = nullptr; - const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; - if (inGrad == nullptr) { + if (inputLayers_[0]->getOutput().grad == nullptr) { return; } - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done CHECK(inVal_); - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); + MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc()); } void MKLDNNFcLayer::resetBwdWgtPD( @@ -314,7 +305,6 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); CHECK(inVal_); if (bias) { bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index c09fd89462ef4fdaeaae3e122f96b0cc6ce373ea..5f9923da769781287e39a3aaaf92248dfe09f225 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -65,6 +65,17 @@ protected: MKLDNNMatrixPtr biasVal_; MKLDNNMatrixPtr biasGrad_; + // merge grad primitive + std::shared_ptr mergeGrad_; + std::vector pipelineMergeGrad_; + // tmp input argument to save input grad, only used to merge grad + Argument tmpInArg_; + // since mkldnn sum do not support different formats: + // can refer to https://github.com/01org/mkl-dnn/issues/134 + // so need create reorder manually and save tmp MKLDNNMatrix + MKLDNNMatrixPtr tmpOutGrad_; + std::shared_ptr tmpCvt_; + public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), @@ -99,6 +110,7 @@ public: if (!Layer::init(layerMap, parameterMap)) { return false; } + setOutputMap(); checkCPUOutputsNumber(); stream_.reset(new MKLDNNStream()); @@ -118,12 +130,9 @@ public: VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; // reset when input total sizes changed, not only the batchsize inputElemenCnt_ = elemenCnt; + pipelineFwd_.clear(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - if (outVal_) { - // change original output value to mkldnn output value - output_.value = std::dynamic_pointer_cast(outVal_); - } convertWeightsFromPaddle(); needResetBwd_ = true; } @@ -144,9 +153,18 @@ public: void backward(const UpdateCallback& callback) override { if (needResetBwd_) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; + pipelineBwd_.clear(); + pipelineMergeGrad_.clear(); + mergeGrad_ = nullptr; resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); needResetBwd_ = false; } + + // merge grad must before backward activation + if (mergeGrad_) { + REGISTER_TIMER_INFO("MergeBpGrad", getName().c_str()); + stream_->submit(pipelineMergeGrad_); + } { REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); backwardActivation(); @@ -247,6 +265,76 @@ protected: } } + /** + * reset the output grad matrix from primitive desc. + * and reset the merge grad primitive if needed. + * note: when this layer has serval outputs, + * it could not be mixed with cpu device, + * since it can not get memory desc from cpu device. + */ + virtual void resetOutGrad(MKLDNNMatrixPtr& out, + mkldnn::memory::primitive_desc pd) { + CHECK(outputIsOnlyMKLDNN()) << "do not support mixed with other device yet"; + mergeGrad_ = nullptr; + pipelineMergeGrad_.clear(); + out = MKLDNNMatrix::create(output_.grad, pd); + if (outputMap_.size() <= 1) { + return; + } + std::vector scales(outputMap_.size(), 1.0); + std::vector srcPDs; + std::vector srcs; + for (auto it = outputMap_.begin(); it != outputMap_.end(); ++it) { + MKLDNNMatrixPtr src = + std::dynamic_pointer_cast(it->second->grad); + VLOG(MKLDNN_BASE) << getName() << " has output grad " << it->first; + CHECK(src) << "should be MKLDNNMatrix"; + auto srcDims = src->getDims(); + auto dstDims = out->getDims(); + CHECK_EQ(srcDims.size(), dstDims.size()); + for (size_t i = 0; i < srcDims.size(); ++i) { + CHECK_EQ(srcDims[i], dstDims[i]); + } + srcPDs.push_back(src->getPrimitiveDesc()); + srcs.push_back(*src); + } + + // TODO(TJ): remove me when mkldnn sum support different formats + for (size_t i = 1; i < srcPDs.size(); ++i) { + CHECK(srcPDs[0] == srcPDs[i]); + } + tmpOutGrad_ = nullptr; + tmpCvt_ = nullptr; + if (out->getPrimitiveDesc() != srcPDs[0]) { + tmpOutGrad_ = MKLDNNMatrix::create(nullptr, srcPDs[0]); + tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out); + CHECK(tmpCvt_); + pipelineMergeGrad_.push_back(*tmpCvt_); + } else { + tmpOutGrad_ = out; + } + + auto sumPD = mkldnn::sum::primitive_desc( + tmpOutGrad_->getMemoryDesc(), scales, srcPDs); + mergeGrad_.reset(new mkldnn::sum(sumPD, srcs, *tmpOutGrad_)); + pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_); + } + + /** + * reset input grad from primitive desc. + * this function is avaiable for input is only mkldnn + * or input do not care cpu device + */ + virtual void resetInGrad(MKLDNNMatrixPtr& in, + mkldnn::memory::primitive_desc pd) { + LayerPtr& input = inputLayers_[0]; + const MatrixPtr& grad = + input->getOutputMapSize() > 1 ? nullptr : input->getOutput().grad; + in = MKLDNNMatrix::create(grad, pd); + Argument& arg = input->getOutput(this->getName()); + arg.grad = std::dynamic_pointer_cast(in); + } + /** * print info about sizes */ @@ -334,6 +422,16 @@ private: } } + /** + * Set output map of prev layers. + */ + void setOutputMap() { + outputMap_.clear(); + for (size_t i = 0; i < inputLayers_.size(); ++i) { + inputLayers_[i]->setOutput(getName(), &tmpInArg_); + } + } + /** * Check the cpu device number of outputOtherDevice_. * should have only one at most. diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index b62dfb7c54258a593aa50d5b30096423f375c69d..5606aae80ce8e9a1e571d3c057c471b26a59d032 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -142,14 +142,16 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value; cpuOutVal_ = MKLDNNMatrix::create(cpuOut, outDims, format::nchw, engine_); if (cpuOutVal_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(nullptr, out->getPrimitiveDesc()); cvtOutVal_ = MKLDNNMatrix::createReorder(out, cpuOutVal_); CHECK(cvtOutVal_) << "should not be emptry"; } else { - // CPU output share the same data of MKLDNN output - cpuOut->setData(out->getData()); cpuOutVal_ = out; } + output_.value = std::dynamic_pointer_cast(cpuOutVal_); + return; } + output_.value = std::dynamic_pointer_cast(outVal_); } void MKLDNNPoolLayer::resetFwdPD(std::shared_ptr& pd, @@ -187,7 +189,6 @@ void MKLDNNPoolLayer::resetFwdPipeline( std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - pipeline.clear(); fwd_ = workspace_ ? std::make_shared(pool_fwd(*pd, *in, *out, *workspace_)) : std::make_shared(pool_fwd(*pd, *in, *out)); @@ -205,17 +206,17 @@ void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, resetInGrad(in); } void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { - CHECK(outVal_) << "Should have output value"; - out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); - - // create reorder if output value has cpu device and pd do not match cpuOutGrad_ = nullptr; cvtOutGrad_ = nullptr; - if (!outputIsOnlyMKLDNN()) { + CHECK(outVal_); + if (outputIsOnlyMKLDNN()) { + MKLDNNLayer::resetOutGrad(out, outVal_->getPrimitiveDesc()); + } else { const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).grad; cpuOutGrad_ = MKLDNNMatrix::create( cpuOut, memory::dims{bs_, oc_, oh_, ow_}, format::nchw, engine_); - if (cpuOutGrad_->getPrimitiveDesc() != out->getPrimitiveDesc()) { + if (cpuOutGrad_->getPrimitiveDesc() != outVal_->getPrimitiveDesc()) { + out = MKLDNNMatrix::create(output_.grad, outVal_->getPrimitiveDesc()); cvtOutGrad_ = MKLDNNMatrix::createReorder(cpuOutGrad_, out); CHECK(cvtOutGrad_) << "should not be emptry"; } else { @@ -228,12 +229,11 @@ void MKLDNNPoolLayer::resetOutGrad(MKLDNNMatrixPtr& out) { void MKLDNNPoolLayer::resetInGrad(MKLDNNMatrixPtr& in) { in = nullptr; - const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; - if (inGrad == nullptr) { + if (inputLayers_[0]->getOutput().grad == nullptr) { return; } CHECK(inVal_); - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); + MKLDNNLayer::resetInGrad(in, inVal_->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, @@ -261,7 +261,6 @@ void MKLDNNPoolLayer::resetBwdPipeline( std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - pipeline.clear(); if (cvtOutGrad_) { pipeline.push_back(*cvtOutGrad_); } diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index f59618be9d09d146be52fb51cae84f4d24c15ef1..eaebdd671cfa1b37e5efe149588ca23fdc402a8e 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -124,8 +124,8 @@ void MKLDNNTester::randomTopDiffs() { void MKLDNNTester::checkForward() { VLOG(MKLDNN_ALL) << "Check Forward"; printTopDatas(); - double delta = compareMatrix(dnnLayer_->getOutput(CPU_DEVICE).value, - refLayer_->getOutputValue()); + double delta = + compareMatrix(dnnLayer_->getOutputValue(), refLayer_->getOutputValue()); EXPECT_LE(fabs(delta), eps_); } diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index ad941bde2be3bbbc6d910fff262ea4cb3878f8be..75fcc1cda165197fc4413efc6bbbc440088cb4cd 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -84,8 +84,9 @@ function(op_library TARGET) endif() # pybind USE_NO_KERNEL_OP + # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel file(READ ${TARGET}.cc TARGET_CONTENT) - string(REGEX MATCH "OperatorWithKernel" regex_result "${TARGET_CONTENT}") + string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}") string(REPLACE "_op" "" TARGET "${TARGET}") if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "") file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n") diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index cba57ba57f5e03c7861897e177cc09aa513e5395..84c3775b4fc2602e5df9bb454d21b318b8fda493 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -338,6 +338,38 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { } }; +template +class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { + public: + HardSigmoidOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of HardSigmoid operator"); + AddOutput("Y", "Output of HardSigmoid operator"); + AddComment(R"DOC( +Hard Sigmoid activation operator. + +Segment-wise linear approximation of sigmoid[1]. +This is much faster than sigmoid. + +hard_sigmoid = max(0, min(1, slope * x + shift)) + +The slope should be positive. The offset can be either positive or negative. +The default slope and shift are set from [1]. +It is recommended to use the defaults for this activation. + +References: + [1] Noisy Activation Functions + (https://arxiv.org/abs/1603.00391) + + )DOC"); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(static_cast(0.2)); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(static_cast(0.5)); + } +}; + } // namespace operators } // namespace paddle @@ -413,6 +445,9 @@ REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, thresholded_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, + hard_sigmoid_grad, ops::ActivationOpGrad); + #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_CPU_KERNEL( \ act_type, \ diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index 502c33be103c465c14f128be38ac62d029f1bfb9..4f4eb44fedc0a89cdcf60fb7177014a11eb96048 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -616,30 +616,63 @@ struct ThresholdedReluGradFunctor : public BaseActivationFunctor { } }; +template +struct HardSigmoidFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y) const { + auto temp = x * static_cast(slope) + static_cast(offset); + y.device(d) = temp.cwiseMax(static_cast(0)).cwiseMin(static_cast(1)); + } +}; + +template +struct HardSigmoidGradFunctor : public BaseActivationFunctor { + float slope; + float offset; + typename BaseActivationFunctor::AttrPair GetAttrs() { + return {{"slope", &slope}, {"offset", &offset}}; + } + + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + dx.device(d) = + dy * + ((y > static_cast(0)) * (y < static_cast(1))).template cast() * + static_cast(slope); + } +}; + } // namespace operators } // namespace paddle -#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ - __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ - __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ - __macro(exp, ExpFunctor, ExpGradFunctor); \ - __macro(relu, ReluFunctor, ReluGradFunctor); \ - __macro(tanh, TanhFunctor, TanhGradFunctor); \ - __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ - __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ - __macro(abs, AbsFunctor, AbsGradFunctor); \ - __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log, LogFunctor, LogGradFunctor); \ - __macro(square, SquareFunctor, SquareGradFunctor); \ - __macro(brelu, BReluFunctor, BReluGradFunctor); \ - __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ - __macro(pow, PowFunctor, PowGradFunctor); \ - __macro(stanh, STanhFunctor, STanhGradFunctor); \ - __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ - __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ - __macro(relu6, Relu6Functor, Relu6GradFunctor); \ - __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ - __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ - __macro(elu, ELUFunctor, ELUGradFunctor); \ - __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ +#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ + __macro(sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ + __macro(logsigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ + __macro(exp, ExpFunctor, ExpGradFunctor); \ + __macro(relu, ReluFunctor, ReluGradFunctor); \ + __macro(tanh, TanhFunctor, TanhGradFunctor); \ + __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ + __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(abs, AbsFunctor, AbsGradFunctor); \ + __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ + __macro(log, LogFunctor, LogGradFunctor); \ + __macro(square, SquareFunctor, SquareGradFunctor); \ + __macro(brelu, BReluFunctor, BReluGradFunctor); \ + __macro(soft_relu, SoftReluFunctor, SoftReluGradFunctor); \ + __macro(pow, PowFunctor, PowGradFunctor); \ + __macro(stanh, STanhFunctor, STanhGradFunctor); \ + __macro(softplus, SoftplusFunctor, SoftplusGradFunctor); \ + __macro(softsign, SoftsignFunctor, SoftsignGradFunctor); \ + __macro(relu6, Relu6Functor, Relu6GradFunctor); \ + __macro(leaky_relu, LeakyReluFunctor, LeakyReluGradFunctor); \ + __macro(tanh_shrink, TanhShrinkFunctor, TanhShrinkGradFunctor); \ + __macro(elu, ELUFunctor, ELUGradFunctor); \ + __macro(hard_shrink, HardShrinkFunctor, HardShrinkGradFunctor); \ + __macro(hard_sigmoid, HardSigmoidFunctor, HardSigmoidGradFunctor); \ __macro(thresholded_relu, ThresholdedReluFunctor, ThresholdedReluGradFunctor); diff --git a/paddle/operators/adam_op.cc b/paddle/operators/adam_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..293b37b7750427cb88efb6dfd5a02dcf7ede24ac --- /dev/null +++ b/paddle/operators/adam_op.cc @@ -0,0 +1,144 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/adam_op.h" + +namespace paddle { +namespace operators { + +class AdamOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment1"), + "Input(Moment1) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment2"), + "Input(Moment2) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Beta1Pow"), + "Input(Beta1Pow) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Beta2Pow"), + "Input(Beta2Pow) of AdamOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Moment1Out"), + "Output(Moment1Out) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Moment2Out"), + "Output(Moment2Out) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Beta1PowOut"), + "Output(Beta1PowOut) of AdamOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Beta2PowOut"), + "Output(Beta2PowOut) of AdamOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 dimension"); + auto beta1_pow_dims = ctx->GetInputDim("Beta1Pow"); + PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, + "Beta1 power accumulator should have 1 dimension"); + auto beta2_pow_dims = ctx->GetInputDim("Beta2Pow"); + PADDLE_ENFORCE_EQ(framework::product(beta1_pow_dims), 1, + "Beta1 power accumulator should have 1 dimension"); + + auto param_dims = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of AdamOp should have same dimension"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment1"), + "Param and Moment input of AdamOp should have same dimension"); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment2"), + "Param and InfNorm input of AdamOp should have same dimension"); + + ctx->SetOutputDim("ParamOut", param_dims); + ctx->SetOutputDim("Moment1Out", param_dims); + ctx->SetOutputDim("Moment2Out", param_dims); + ctx->SetOutputDim("Beta1PowOut", beta1_pow_dims); + ctx->SetOutputDim("Beta2PowOut", beta2_pow_dims); + } +}; + +class AdamOpMaker : public framework::OpProtoAndCheckerMaker { + public: + AdamOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Param", "(Tensor) Input parameter"); + AddInput("Grad", "(Tensor) Input gradient"); + AddInput("LearningRate", "(Tensor) Learning rate"); + AddInput("Moment1", "(Tensor) Input first moment"); + AddInput("Moment2", "(Tensor) Input second moment"); + AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); + AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); + + AddOutput("ParamOut", "(Tensor) Output parameter"); + AddOutput("Moment1Out", "(Tensor) Output first moment"); + AddOutput("Moment2Out", "(Tensor) Output second moment"); + AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator"); + AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator"); + + AddAttr("beta1", + "(float, default 0.9) " + "Exponential decay rate for the " + "first moment estimates.") + .SetDefault(0.9f); + AddAttr("beta2", + "(float, default 0.999) " + "exponential decay rate for the " + "second moment estimates.") + .SetDefault(0.999f); + AddAttr("epsilon", + "(float, default 1.0e-8) " + "Constant for numerical stability") + .SetDefault(1.0e-8f); + + AddComment(R"DOC( +Adam Updates Operator. + +This implements the Adam optimizer from Section 2 of the Adam +paper[1]. Adam is a first-order gradient-based optimization +method based on adaptive estimates of lower-order moments. + +Adam updates: + +moment1_out = beta1 * moment1 + (1 − beta1) * grad +moment2_out = beta2 * moment2 + (1 − beta2) * grad * grad +beta1_pow_out = beta1_pow * beta1 +beta2_pow_out = beta2_pow * beta2 +learning_rate_t = learning_rate_t * + sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out) +param_out = param - learning_rate_t * moment1/ (sqrt(moment2) + epsilon) + +References: + [1] Adam: A Method for Stochastic Optimization + (https://arxiv.org/abs/1412.6980) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker); +REGISTER_OP_CPU_KERNEL(adam, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..a3def912e540454275350209435eb01ae2151331 --- /dev/null +++ b/paddle/operators/adam_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/adam_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(adam, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h new file mode 100644 index 0000000000000000000000000000000000000000..789c2f14b32478bf9ddc967fc5725bcf65ed2146 --- /dev/null +++ b/paddle/operators/adam_op.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class AdamOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto param_out_tensor = ctx.Output("ParamOut"); + auto moment1_out_tensor = ctx.Output("Moment1Out"); + auto moment2_out_tensor = ctx.Output("Moment2Out"); + auto beta1_pow_out_tensor = ctx.Output("Beta1PowOut"); + auto beta2_pow_out_tensor = ctx.Output("Beta2PowOut"); + + param_out_tensor->mutable_data(ctx.GetPlace()); + moment1_out_tensor->mutable_data(ctx.GetPlace()); + moment2_out_tensor->mutable_data(ctx.GetPlace()); + beta1_pow_out_tensor->mutable_data(ctx.GetPlace()); + beta2_pow_out_tensor->mutable_data(ctx.GetPlace()); + + float beta1 = ctx.Attr("beta1"); + float beta2 = ctx.Attr("beta2"); + float epsilon = ctx.Attr("epsilon"); + + auto param = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto grad = framework::EigenVector::Flatten( + *ctx.Input("Grad")); + auto moment1 = framework::EigenVector::Flatten( + *ctx.Input("Moment1")); + auto moment2 = framework::EigenVector::Flatten( + *ctx.Input("Moment2")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); + auto beta1_pow = framework::EigenVector::Flatten( + *ctx.Input("Beta1Pow")); + auto beta2_pow = framework::EigenVector::Flatten( + *ctx.Input("Beta2Pow")); + auto param_out = framework::EigenVector::Flatten(*param_out_tensor); + auto moment1_out = framework::EigenVector::Flatten(*moment1_out_tensor); + auto moment2_out = framework::EigenVector::Flatten(*moment2_out_tensor); + auto beta1_pow_out = + framework::EigenVector::Flatten(*beta1_pow_out_tensor); + auto beta2_pow_out = + framework::EigenVector::Flatten(*beta2_pow_out_tensor); + auto place = ctx.GetEigenDevice(); + + moment1_out.device(place) = beta1 * moment1 + (1 - beta1) * grad; + moment2_out.device(place) = beta2 * moment2 + (1 - beta2) * grad.square(); + beta1_pow_out.device(place) = beta1_pow * beta1; + beta2_pow_out.device(place) = beta2_pow * beta2; + // All of these are tensors of 1 element + auto lr_t = lr * (1 - beta2_pow_out).sqrt() / (1 - beta1_pow_out); + // Eigen does not support automatic broadcast + // Get dimensions of moment vector to broadcast lr_t + Eigen::DSizes m_dsize(moment1_out_tensor->numel()); + param_out.device(place) = + param - + lr_t.broadcast(m_dsize) * + (moment1_out / (moment2_out.sqrt() + epsilon)); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc index 2737104a205cbc1e18ce4a3a45592a416d38a874..adcd867f502d166f851926fde602dbb3fed9b48e 100644 --- a/paddle/operators/cond_op.cc +++ b/paddle/operators/cond_op.cc @@ -134,7 +134,7 @@ void CondOp::PrepareDataForSubnet( for (int i = 0; i < BRANCH_NUM; ++i) { for (auto& output : (*sub_net_op_[i]).Outputs()) { for (auto& var_name : output.second) { - sub_scopes[i]->NewVar(var_name); + sub_scopes[i]->Var(var_name); } } } diff --git a/paddle/operators/conv2d_op.cc b/paddle/operators/conv2d_op.cc index 6325d4248f10ea8a12ae5398d9fe0e579db3f7ae..1acb8415d0691df77047806d3c81b51cbb8c59f3 100644 --- a/paddle/operators/conv2d_op.cc +++ b/paddle/operators/conv2d_op.cc @@ -12,111 +12,91 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/gemm_conv2d_op.h" +#include "paddle/operators/conv2d_op.h" namespace paddle { namespace operators { -int outputSize(int input_size, int filter_size, int padding, int stride) { - int output_size = (input_size - filter_size + 2 * padding) / stride + 1; - return output_size; +void Conv2DOp::InferShape(framework::InferShapeContext* ctx) const { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of Conv2DOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Filter"), + "Input(Filter) of Conv2DOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Output"), + "Output(Output) of Conv2DOp should not be null."); + + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + std::vector strides = ctx->Attrs().Get>("strides"); + std::vector paddings = ctx->Attrs().Get>("paddings"); + int groups = ctx->Attrs().Get("groups"); + int input_channels = in_dims[1]; + int output_channels = filter_dims[0]; + + PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); + PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); + PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, + "The number of input channels should be equal to filter " + "channels * groups."); + PADDLE_ENFORCE_EQ( + output_channels % groups, 0, + "The number of output channels should be divided by groups."); + + auto output_height = + OutputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); + auto output_width = + OutputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); + ctx->SetOutputDim("Output", + {in_dims[0], filter_dims[0], output_height, output_width}); } -class Conv2DOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Filter"), - "Input(Filter) of Conv2DOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Output"), - "Output(Output) of Conv2DOp should not be null."); - - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - std::vector strides = ctx->Attrs().Get>("strides"); - std::vector paddings = ctx->Attrs().Get>("paddings"); - int groups = ctx->Attrs().Get("groups"); - int input_channels = in_dims[1]; - int output_channels = filter_dims[0]; - - PADDLE_ENFORCE_EQ(in_dims.size(), 4, "Conv2DOp input should be 4-D."); - PADDLE_ENFORCE_EQ(filter_dims.size(), 4, "Conv2DOp filter should be 4-D."); - PADDLE_ENFORCE_EQ(input_channels, filter_dims[1] * groups, - "The number of input channels should be equal to filter " - "channels * groups."); - PADDLE_ENFORCE_EQ( - output_channels % groups, 0, - "The number of output channels should be divided by groups."); - - auto output_height = - outputSize(in_dims[2], filter_dims[2], paddings[0], strides[0]); - auto output_width = - outputSize(in_dims[3], filter_dims[3], paddings[1], strides[1]); - ctx->SetOutputDim( - "Output", {in_dims[0], filter_dims[0], output_height, output_width}); - } -}; - -class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { - public: - Conv2DOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput( - "Input", - "The input tensor of convolution operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of image."); - AddInput( - "Filter", - "The filter tensor of convolution operator." - "The format of the filter tensor is MCHW, where M is the number of " - "output image channels, C is the number of input image channels, " - "H and W is height and width of filter. " - "If the groups attribute is greater than 1, C equal the number of " - "input image channels divided by the groups."); - AddOutput("Output", - "The output tensor of convolution operator." - "The format of output tensor is also NCHW."); - AddAttr>("strides", "strides of convolution operator.") - .SetDefault({1, 1}); - AddAttr>("paddings", "paddings of convolution operator.") - .SetDefault({0, 0}); - AddAttr( - "groups", - "group size of convolution operator. " - "Refer to grouped convolution in Alex Krizhevsky's paper: " - "when group=2, the first half of the filters are only connected to the " - "first half of the input channels, and the second half only connected " - "to the second half.") - .SetDefault(1); - AddComment(R"DOC( +Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "Input", + "The input tensor of convolution operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of image."); + AddInput("Filter", + "The filter tensor of convolution operator." + "The format of the filter tensor is MCHW, where M is the number of " + "output image channels, C is the number of input image channels, " + "H and W is height and width of filter. " + "If the groups attribute is greater than 1, C equal the number of " + "input image channels divided by the groups."); + AddOutput("Output", + "The output tensor of convolution operator." + "The format of output tensor is also NCHW."); + AddAttr>("strides", "strides of convolution operator.") + .SetDefault({1, 1}); + AddAttr>("paddings", "paddings of convolution operator.") + .SetDefault({0, 0}); + AddAttr( + "groups", + "group size of convolution operator. " + "Refer to grouped convolution in Alex Krizhevsky's paper: " + "when group=2, the first half of the filters are only connected to the " + "first half of the input channels, and the second half only connected " + "to the second half.") + .SetDefault(1); + AddComment(R"DOC( The convolution operation calculates the output based on the input, filter and strides, paddings, groups parameters. The size of each dimension of the parameters is checked in the infer-shape. )DOC"); - } -}; - -class Conv2DOpGrad : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; +} - protected: - void InferShape(framework::InferShapeContext* ctx) const override { - auto in_dims = ctx->GetInputDim("Input"); - auto filter_dims = ctx->GetInputDim("Filter"); - if (ctx->HasOutput(framework::GradVarName("Input"))) { - ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); - } - if (ctx->HasOutput(framework::GradVarName("Filter"))) { - ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); - } +void Conv2DOpGrad::InferShape(framework::InferShapeContext* ctx) const { + auto in_dims = ctx->GetInputDim("Input"); + auto filter_dims = ctx->GetInputDim("Filter"); + if (ctx->HasOutput(framework::GradVarName("Input"))) { + ctx->SetOutputDim(framework::GradVarName("Input"), in_dims); } -}; + if (ctx->HasOutput(framework::GradVarName("Filter"))) { + ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims); + } +} } // namespace operators } // namespace paddle diff --git a/paddle/operators/conv2d_op.cu b/paddle/operators/conv2d_op.cu index 5df818ba0496a65502dde37fd1397ec56f8c1101..c697c9466d34c29af6976f3a4d2d0a24ba778ceb 100644 --- a/paddle/operators/conv2d_op.cu +++ b/paddle/operators/conv2d_op.cu @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/gemm_conv2d_op.h" +#include "paddle/operators/conv2d_op.h" namespace ops = paddle::operators; diff --git a/paddle/operators/gemm_conv2d_op.h b/paddle/operators/conv2d_op.h similarity index 90% rename from paddle/operators/gemm_conv2d_op.h rename to paddle/operators/conv2d_op.h index 323e3f7c3bd506c6b63bf4d1152384649f5da575..7ebdbe81cbbaf59a60eb3dac0f570d70fc85d6ef 100644 --- a/paddle/operators/gemm_conv2d_op.h +++ b/paddle/operators/conv2d_op.h @@ -24,6 +24,38 @@ namespace operators { using Tensor = framework::Tensor; +// Base convolution operator definations for other conv +// like operators to reuse the implementation. +inline int OutputSize(int input_size, int filter_size, int padding, + int stride) { + int output_size = (input_size - filter_size + 2 * padding) / stride + 1; + return output_size; +} + +// Define Op classes in .h file so that other conv +// operator implementations can reuse the code. +class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Conv2DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker); +}; + +class Conv2DOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + +class Conv2DOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override; +}; + template class GemmConv2DKernel : public framework::OpKernel { public: @@ -74,7 +106,6 @@ class GemmConv2DKernel : public framework::OpKernel { framework::DDim output_matrix_shape = {output_channels, output_height * output_width}; - // convolution operator: im2col + gemm int in_step = input_channels / groups; int out_step = output_channels / groups; diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4288f300dd5b0464f2b4394cdb0b44f93060ae74 --- /dev/null +++ b/paddle/operators/conv_cudnn_op.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/conv2d_op.h" + +namespace paddle { +namespace operators { + +class CudnnConvOpMaker : public Conv2DOpMaker { + public: + CudnnConvOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : Conv2DOpMaker(proto, op_checker) { + AddAttr>("dilations", "dilations of convolution operator.") + .SetDefault(std::vector{1, 1}); + AddAttr("workspace_size_MB", + "workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardward. This size should be carefully setted.") + .SetDefault(4096); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(conv_cudnn, ops::Conv2DOp, ops::CudnnConvOpMaker, conv_cudnn_grad, + ops::Conv2DOpGrad); +REGISTER_OP_CPU_KERNEL( + conv_cudnn, ops::GemmConv2DKernel); +REGISTER_OP_CPU_KERNEL( + conv_cudnn_grad, + ops::GemmConvGrad2DKernel); diff --git a/paddle/operators/conv_cudnn_op.cu b/paddle/operators/conv_cudnn_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..366d0323b840c338dd6ba5b28bdb29fd135fe91a --- /dev/null +++ b/paddle/operators/conv_cudnn_op.cu @@ -0,0 +1,277 @@ +/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/memory/memory.h" +#include "paddle/operators/conv2d_op.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cudnn_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; +using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; +using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; +using DataLayout = platform::DataLayout; +using CUDADeviceContext = platform::CUDADeviceContext; + +static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES = 1024 * 1024 * 1024; + +// NOTE: framework::vectorize converts to type int64_t +// which does not fit cudnn inputs. +std::vector Dims2Vector(const framework::DDim& dims) { + std::vector ret; + for (int i = 0; i < dims.size(); i++) { + ret.push_back(dims[i]); + } + return ret; +} + +template +class CudnnConvOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use GPUPlace."); + auto* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); + + std::vector strides = ctx.Attr>("strides"); + std::vector paddings = ctx.Attr>("paddings"); + std::vector dilations = ctx.Attr>("dilations"); + int groups = ctx.Attr("groups"); + int user_workspace_size = ctx.Attr("workspace_size_MB"); + + const T* input_data = input->data(); + const T* filter_data = filter->data(); + T* output_data = output->mutable_data(ctx.GetPlace()); + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_desc; + ScopedFilterDescriptor filter_desc; + ScopedConvolutionDescriptor conv_desc; + DataLayout layout = DataLayout::kNCHW; + + cudnnTensorDescriptor_t cudnn_input_desc = + input_desc.descriptor(layout, Dims2Vector(input->dims()), groups); + cudnnTensorDescriptor_t cudnn_output_desc = + output_desc.descriptor(layout, Dims2Vector(output->dims()), groups); + cudnnFilterDescriptor_t cudnn_filter_desc = + filter_desc.descriptor(layout, Dims2Vector(filter->dims()), groups); + cudnnConvolutionDescriptor_t cudnn_conv_desc = + conv_desc.descriptor(paddings, strides, dilations); + + int input_channels = input->dims()[1]; + int input_height = input->dims()[2]; + int input_width = input->dims()[3]; + int output_channels = output->dims()[1]; + int output_height = output->dims()[2]; + int output_width = output->dims()[3]; + + int group_offset_in = input_channels / groups * input_height * input_width; + int group_offset_out = + output_channels / groups * output_height * output_width; + int group_offset_filter = filter->numel() / groups; + // ------------------- cudnn conv workspace --------------------- + void* cudnn_workspace = nullptr; + size_t workspace_size_in_bytes; // final workspace to allocate. + size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + if (user_workspace_size > 0) { + workspace_size_limit = user_workspace_size * 1024 * 1024; + } + // ------------------- cudnn conv algorithm --------------------- + cudnnConvolutionFwdAlgo_t algo; + auto handle = ctx.cuda_device_context().cudnn_handle(); + + PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm( + handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, + cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); + // get workspace size able to allocate + PADDLE_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( + handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, + cudnn_output_desc, algo, &workspace_size_in_bytes)); + // Allocate on GPU memory + platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); + // ------------------- cudnn conv forward --------------------- + T alpha = 1.0f, beta = 0.0f; + for (int i = 0; i < groups; i++) { + PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward( + handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in, + cudnn_filter_desc, filter_data + i * group_offset_filter, + cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes, + &beta, cudnn_output_desc, output_data + i * group_offset_out)); + } + // Release the cudnn workspace + paddle::memory::Free(gpu, cudnn_workspace); + } +}; + +template +class CudnnConvGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "It must use GPUPlace."); + auto input = ctx.Input("Input"); + auto filter = ctx.Input("Filter"); + auto output_grad = ctx.Input(framework::GradVarName("Output")); + auto input_grad = ctx.Output(framework::GradVarName("Input")); + auto filter_grad = ctx.Output(framework::GradVarName("Filter")); + + const T* input_data = input->data(); + const T* output_grad_data = output_grad->data(); + const T* filter_data = filter->data(); + + std::vector strides = ctx.Attr>("strides"); + std::vector paddings = ctx.Attr>("paddings"); + std::vector dilations = ctx.Attr>("dilations"); + int groups = ctx.Attr("groups"); + int user_workspace_size = ctx.Attr("workspace_size_MB"); + + // ------------------- cudnn descriptors --------------------- + ScopedTensorDescriptor input_desc; + ScopedTensorDescriptor output_grad_desc; + ScopedTensorDescriptor input_grad_desc; + + ScopedFilterDescriptor filter_desc; + ScopedFilterDescriptor filter_grad_desc; + ScopedConvolutionDescriptor conv_desc; + DataLayout layout = DataLayout::kNCHW; + + cudnnTensorDescriptor_t cudnn_input_desc = + input_desc.descriptor(layout, Dims2Vector(input->dims()), groups); + cudnnTensorDescriptor_t cudnn_output_grad_desc = + output_grad_desc.descriptor(layout, Dims2Vector(output_grad->dims()), + groups); + cudnnFilterDescriptor_t cudnn_filter_desc = + filter_desc.descriptor(layout, Dims2Vector(filter->dims()), groups); + cudnnTensorDescriptor_t cudnn_input_grad_desc = nullptr; + cudnnFilterDescriptor_t cudnn_filter_grad_desc = nullptr; + + cudnnConvolutionDescriptor_t cudnn_conv_desc = + conv_desc.descriptor(paddings, strides, dilations); + + int input_channels = input->dims()[1]; + int input_height = input->dims()[2]; + int input_width = input->dims()[3]; + int output_grad_channels = filter->dims()[0]; + int output_grad_height = output_grad->dims()[2]; + int output_grad_width = output_grad->dims()[3]; + + int group_offset_in = input_channels / groups * input_height * input_width; + int group_offset_out = + output_grad_channels / groups * output_grad_height * output_grad_width; + int group_offset_filter = filter->numel() / groups; + // ------------------- cudnn backward algorithm --------------------- + cudnnConvolutionBwdDataAlgo_t data_algo; + cudnnConvolutionBwdFilterAlgo_t filter_algo; + size_t workspace_size_in_bytes = 0, tmp_size = 0; + size_t workspace_size_limit = kCONV_CUDNN_WORKSPACE_LIMIT_BYTES; + if (user_workspace_size > 0) { + workspace_size_limit = user_workspace_size * 1024 * 1024; + } + + auto handle = ctx.cuda_device_context().cudnn_handle(); + if (input_grad) { + cudnn_input_grad_desc = input_grad_desc.descriptor( + layout, Dims2Vector(input_grad->dims()), groups); + PADDLE_ENFORCE( + platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm( + handle, cudnn_filter_desc, + // dyDesc: Handle to the previously initialized input differential + // tensor descriptor. + cudnn_output_grad_desc, cudnn_conv_desc, + // dxDesc: Handle to the previously initialized output tensor + // descriptor. + cudnn_input_grad_desc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &data_algo)); + PADDLE_ENFORCE( + platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( + handle, cudnn_filter_desc, cudnn_output_grad_desc, + cudnn_conv_desc, cudnn_input_grad_desc, data_algo, &tmp_size)); + workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size); + } + + if (filter_grad) { + cudnn_filter_grad_desc = filter_grad_desc.descriptor( + layout, Dims2Vector(filter_grad->dims()), groups); + PADDLE_ENFORCE( + platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc, + cudnn_filter_desc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &filter_algo)); + + PADDLE_ENFORCE( + platform::dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( + handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc, + cudnn_filter_desc, filter_algo, &tmp_size)); + workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size); + } + // ------------------- cudnn conv workspace --------------------- + // Already on GPU + void* cudnn_workspace = nullptr; + platform::GPUPlace gpu = boost::get(ctx.GetPlace()); + cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); + // ------------------- cudnn conv backward data --------------------- + // FIXME(typhoonzero): template type T may not be the same as cudnn call. + T alpha = 1.0f, beta = 0.0f; + if (input_grad) { + T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); + auto t = framework::EigenVector::Flatten(*input_grad); + t.device(ctx.GetEigenDevice()) = + t.constant(static_cast(0)); + for (int i = 0; i < groups; i++) { + PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardData( + handle, &alpha, cudnn_filter_desc, + filter_data + i * group_offset_filter, cudnn_output_grad_desc, + output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo, + cudnn_workspace, workspace_size_in_bytes, &beta, + cudnn_input_grad_desc, input_grad_data + i * group_offset_in)); + } + } + // ------------------- cudnn conv backward filter --------------------- + if (filter_grad) { + T* filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); + auto t = framework::EigenVector::Flatten(*filter_grad); + t.device(ctx.GetEigenDevice()) = + t.constant(static_cast(0)); + for (int i = 0; i < groups; i++) { + PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( + handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in, + cudnn_output_grad_desc, output_grad_data + i * group_offset_out, + cudnn_conv_desc, filter_algo, cudnn_workspace, + workspace_size_in_bytes, &beta, cudnn_filter_grad_desc, + filter_grad_data + i * group_offset_filter)); + } + } + // Release the cudnn workspace + paddle::memory::Free(gpu, cudnn_workspace); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel); +REGISTER_OP_GPU_KERNEL(conv_cudnn_grad, + paddle::operators::CudnnConvGradOpKernel); diff --git a/paddle/operators/decayed_adagrad_op.cc b/paddle/operators/decayed_adagrad_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7f583f18c8c6ee5025f6525306f9323fb329b030 --- /dev/null +++ b/paddle/operators/decayed_adagrad_op.cc @@ -0,0 +1,96 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/decayed_adagrad_op.h" + +namespace paddle { +namespace operators { + +class DecayedAdagradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of DecayedAdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of DecayedAdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment"), + "Input(Moment) of DecayedAdagradOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("LearningRate"), + "Input(LearningRate) of DecayedAdagradOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of DecayedAdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), + "Output(MomentOut) of DecayedAdagradOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "LearningRate should have one element"); + auto param_dims = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of DecayedAdagradOp should have " + "the same dimension."); + PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"), + "Param and Moment input of DecayedAdagradOp should have " + "the same dimension."); + + ctx->SetOutputDim("ParamOut", param_dims); + ctx->SetOutputDim("MomentOut", param_dims); + } +}; + +class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker { + public: + DecayedAdagradOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Param", "(Tensor) Input parameter"); + AddInput("Grad", "(Tensor) Input gradient"); + AddInput("Moment", "(Tensor) Second moment"); + AddInput("LearningRate", "(Tensor) Learning rate"); + + AddOutput("ParamOut", "(Tensor) Output parameter"); + AddOutput("MomentOut", "(Tensor) Output second moment"); + + AddAttr("decay", + "(float, default 0.95) " + "Discounting factor for coming gradient") + .SetDefault(0.95); + AddAttr("epsilon", + "(float, default 1.0e-6) " + "Constant for numerical stability") + .SetDefault(1.0e-6f); + AddComment(R"DOC( + +Decayed Adagrad + +moment_out = decay * moment + (1 - decay) * grad * grad +param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(decayed_adagrad, ops::DecayedAdagradOp, + ops::DecayedAdagradOpMaker); +REGISTER_OP_CPU_KERNEL( + decayed_adagrad, + ops::DecayedAdagradOpKernel); diff --git a/paddle/operators/decayed_adagrad_op.cu b/paddle/operators/decayed_adagrad_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..6fce77fe4ec6b76cb7b0259aab6a3d55d2edb36c --- /dev/null +++ b/paddle/operators/decayed_adagrad_op.cu @@ -0,0 +1,21 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/decayed_adagrad_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + decayed_adagrad, + ops::DecayedAdagradOpKernel); diff --git a/paddle/operators/decayed_adagrad_op.h b/paddle/operators/decayed_adagrad_op.h new file mode 100644 index 0000000000000000000000000000000000000000..0fe0fc5acd66c9824a864618b69097c5c063ea3f --- /dev/null +++ b/paddle/operators/decayed_adagrad_op.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class DecayedAdagradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto param_out_tensor = ctx.Output("ParamOut"); + auto moment_out_tensor = ctx.Output("MomentOut"); + + param_out_tensor->mutable_data(ctx.GetPlace()); + moment_out_tensor->mutable_data(ctx.GetPlace()); + + float decay = ctx.Attr("decay"); + float epsilon = ctx.Attr("epsilon"); + + auto param = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto grad = framework::EigenVector::Flatten( + *ctx.Input("Grad")); + auto moment = framework::EigenVector::Flatten( + *ctx.Input("Moment")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); + + auto param_out = framework::EigenVector::Flatten(*param_out_tensor); + auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); + auto place = ctx.GetEigenDevice(); + + moment_out.device(place) = decay * moment + (1 - decay) * grad * grad; + Eigen::DSizes m_dsize(moment_out_tensor->numel()); + param_out.device(place) = + param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/dynamic_recurrent_op.cc b/paddle/operators/dynamic_recurrent_op.cc index b919aef8fb62e5b2331c2d842556e0642ea6b095..03f33e28d49fdaeccb9b6266359e0b41a1cb847f 100644 --- a/paddle/operators/dynamic_recurrent_op.cc +++ b/paddle/operators/dynamic_recurrent_op.cc @@ -23,13 +23,37 @@ using framework::Scope; using framework::TensorArray; using framework::LoDTensor; using framework::Variable; +using framework::DySeqMetaBatch; namespace detail { inline void CreateVariables(Scope& scope, const std::vector& var_names) { for (const auto& name : var_names) { - scope.NewVar(name); + scope.Var(name); + } +} + +/* + * The inputs with sequence should be reordered when they are split, so the + * boot_states should be reordered in the same order. + * + * NOTE This may require that the `pre_state` of the first time step should just + * copy the `boot_state` rather than reference it, for that the content should + * be reordered, but the RNN op should not change the `boot_state` as an input + * variable's content. + */ +template +inline void ReorderBootState(const DySeqMetaBatch& metas, + const LoDTensor& boot_state, LoDTensor* tensor, + const platform::Place& dst_place) { + for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { + auto slice = tensor->Slice(seq_id, seq_id + 1); + auto boot_slice = + boot_state.Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); + // TODO(superjom) pass in device context as an argument + slice.template CopyFrom(boot_slice, dst_place, + platform::CPUDeviceContext()); } } @@ -69,6 +93,7 @@ void DynamicRecurrentOp::Run(const Scope& scope, CreateScopes(); WriteStepInputs(); InitStates(); + WriteStepOutputs(); // call stepnet in all the time steps for (size_t step = 0; step < cache_.num_steps; step++) { @@ -76,7 +101,6 @@ void DynamicRecurrentOp::Run(const Scope& scope, stepnet_->Run(step_scope, dev_ctx); } - WriteStepOutputs(); ConcatOutputs(); } @@ -84,11 +108,11 @@ void DynamicRecurrentOp::SplitInputs() const { // TODO(superjom) make level a config // TODO(superjom) check all the inputs has the same LoD int level = 0; - const auto& inlinks = cache_.inlinks; - for (const auto& item : inlinks) { + for (const auto& item : cache_.inlinks) { const auto& var = item.second; const auto& tensor = var->Get(); TensorArray& ta = step_inputs_[item.first]; + dy_seq_metas_[item.first] = ta.Unpack(tensor, level, true /*length_descend*/); @@ -112,7 +136,7 @@ void DynamicRecurrentOp::WriteStepInputs() const { auto& step_scope = cache_.GetScope(step); Variable* var = step_scope.FindVar(item.first); if (var == nullptr) { - var = step_scope.NewVar(item.first); + var = step_scope.Var(item.first); } var->GetMutable()->ShareDataWith(tensor); } @@ -120,17 +144,11 @@ void DynamicRecurrentOp::WriteStepInputs() const { } void DynamicRecurrentOp::WriteStepOutputs() const { - for (size_t step = 0; step < cache_.scopes->size(); step++) { - auto& scope = cache_.GetScope(step); - for (auto& item : step_outputs_) { - auto* var = scope.FindVar(item.first); - if (var == nullptr) { - var = scope.NewVar(item.first); - } - auto* tensor = var->GetMutable(); - item.second.WriteShared(step, *tensor); - } + // initialize step outputs + for (const auto& item : cache_.outlinks) { + step_outputs_.emplace(item.first, TensorArray()); } + PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL); } void DynamicRecurrentOp::CreateScopes() const { @@ -145,12 +163,18 @@ void DynamicRecurrentOp::CreateScopes() const { PADDLE_ENFORCE_NOT_NULL(stepnet_, "stepnet should be set first"); std::vector memories; std::vector pre_memories; + std::vector stepnet_outputs; std::transform(arg_.memories.begin(), arg_.memories.end(), std::back_inserter(memories), [](const rnn::MemoryAttr& m) { return m.var; }); std::transform(arg_.memories.begin(), arg_.memories.end(), std::back_inserter(pre_memories), [](const rnn::MemoryAttr& m) { return m.pre_var; }); + for (const auto& item : stepnet_->Outputs()) { + for (const auto& var : item.second) { + stepnet_outputs.push_back(var); + } + } for (size_t step = 0; step < cache_.num_steps; step++) { auto& scope = cache_.GetScope(step); @@ -158,60 +182,88 @@ void DynamicRecurrentOp::CreateScopes() const { detail::CreateVariables(scope, arg_.outlinks); detail::CreateVariables(scope, memories); detail::CreateVariables(scope, pre_memories); + detail::CreateVariables(scope, stepnet_outputs); } } void DynamicRecurrentOp::ConcatOutputs() const { // TODO(superjom) transform this to a config int level = 0; - // TODO(superjom) pass in some lod - // just a placeholder - framework::LoD lod; + for (size_t step = 0; step < cache_.num_steps; step++) { + auto& scope = cache_.GetScope(step); + for (auto& item : step_outputs_) { + auto* var = scope.FindVar(item.first); + PADDLE_ENFORCE_NOT_NULL(var); + auto* tensor = var->GetMutable(); + tensor->mutable_data(platform::CPUPlace()); + item.second.WriteShared(step, *tensor); + } + } + // the inlinks' lods should be the same, so randomly get one lod. + const auto& some_lod = + cache_.scope->FindVar(arg_.inlinks.front())->Get().lod(); + const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; for (auto& item : step_outputs_) { - auto tensor = item.second.Pack(level, dy_seq_metas_[item.first], lod); - auto& output = cache_.outlinks[item.first]->Get(); - const_cast(&output)->ShareDataWith(tensor); + auto tensor = item.second.Pack(level, some_meta, some_lod); + auto* output = cache_.outlinks[item.first]->GetMutable(); + const_cast(output)->ShareDataWith(tensor); } } void DynamicRecurrentOp::InitStates() const { - // init the first state - // TODO(superjom) parepare the scenerio that boot state not exists - for (auto memory : arg_.memories) { - auto* boot_state_var = cache_.scope->FindVar(memory.boot_var); - PADDLE_ENFORCE_NOT_NULL(boot_state_var); - auto& boot_state = boot_state_var->Get(); - const auto& dims = boot_state.dims(); - - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& cur_scope = cache_.GetScope(step); - // link pre-state to boot_state - // init state and pre-state - auto* pre_state = cur_scope.FindVar(memory.pre_var); - PADDLE_ENFORCE_NOT_NULL(pre_state); - pre_state->GetMutable(); - - auto* state = cur_scope.FindVar(memory.var); - PADDLE_ENFORCE_NOT_NULL(state); - state->GetMutable()->Resize(dims); - state->GetMutable()->mutable_data( - platform::CPUPlace()); - - if (step == 0) { - auto* pre_state_tensor = pre_state->GetMutable(); - pre_state_tensor->Resize(boot_state.dims()); - pre_state_tensor->ShareDataWith(boot_state); - } else { - auto& pre_scope = cache_.GetScope(step - 1); - auto* state_pre = pre_scope.FindVar(memory.var); - PADDLE_ENFORCE_NOT_NULL(state_pre); - pre_state->GetMutable()->ShareDataWith( - *state_pre->GetMutable()); - } + for (size_t step = 0; step < cache_.num_steps; step++) { + for (const auto& memory : arg_.memories) { + CreateState(memory, step); + LinkState(memory, step); } } } +void DynamicRecurrentOp::CreateState(const rnn::MemoryAttr& memory, + size_t step) const { + auto& scope = cache_.GetScope(step); + auto& state = *cache_.GetTensor(scope, memory.var); + auto& boot_state = *cache_.GetTensor(*cache_.scope, memory.boot_var); + + size_t num_instances = + step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; + auto dims = boot_state.dims(); + dims[0] = num_instances; + + state.Resize(dims); + state.mutable_data(platform::CPUPlace()); + states_[memory.var].WriteShared(step, state); +} + +void DynamicRecurrentOp::LinkState(const rnn::MemoryAttr& memory, + size_t step) const { + auto& scope = cache_.GetScope(step); + auto& state_pre = *cache_.GetTensor(scope, memory.pre_var); + + // all the step_inputs' metas should be the same, just randomly select one + // and get the dyseq meta. + const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; + size_t num_instances = + step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; + + LoDTensor* pre_state{nullptr}; + if (step == 0) { + pre_state = cache_.GetTensor(*cache_.scope, memory.boot_var); + pre_state->mutable_data(platform::CPUPlace()); + // allocate memory + state_pre.Resize(pre_state->dims()); + state_pre.mutable_data(platform::CPUPlace()); + detail::ReorderBootState(some_meta, *pre_state, &state_pre, + pre_state->place()); + } else { + pre_state = cache_.GetTensor(cache_.GetScope(step - 1), memory.var); + } + + // shink and share from previous state + auto shrinked_pre_state = pre_state->Slice(0, num_instances); + state_pre.ShareDataWith(shrinked_pre_state); +} + void DynamicRecurrentOp::ArgCache::Init( const rnn::ArgumentName& name, const paddle::framework::OperatorBase& op, const paddle::framework::Scope& scope, rnn::Argument* arg) { @@ -261,6 +313,12 @@ Variable* DynamicRecurrentOp::ArgCache::GetVariable(const Scope& scope, return var; } +LoDTensor* DynamicRecurrentOp::ArgCache::GetTensor( + const framework::Scope& scope, const std::string& name) { + auto* var = GetVariable(scope, name); + return var->GetMutable(); +} + const rnn::ArgumentName DynamicRecurrentOp::kArgName{ "step_net", "step_scopes", "inlinks", "outlinks", "memories", "pre_memories", "boot_memories"}; diff --git a/paddle/operators/dynamic_recurrent_op.h b/paddle/operators/dynamic_recurrent_op.h index 6a2970f27fd5bcb25e924dbc567e254159b55a3e..ec80a1c90eee3a655febe0dd3d6c67c16ec6c64b 100644 --- a/paddle/operators/dynamic_recurrent_op.h +++ b/paddle/operators/dynamic_recurrent_op.h @@ -77,6 +77,17 @@ class DynamicRecurrentOp : public framework::OperatorBase { */ void InitStates() const; + /* + * Create state variables for each time step. + */ + void CreateState(const rnn::MemoryAttr& memory, size_t step) const; + + /* + * Link pre-state variable in current scope to the state variable in the + * previous time step (scope). + */ + void LinkState(const rnn::MemoryAttr& memory, size_t step) const; + /* * Concatenate outputs in each time step and generate a LoDTensor. */ @@ -91,6 +102,16 @@ class DynamicRecurrentOp : public framework::OperatorBase { } const OperatorBase& GetStepNet() const { return *stepnet_; } + const framework::TensorArray& state(const std::string& name) const { + return states_[name]; + } + const framework::TensorArray& step_input(const std::string& name) const { + return step_inputs_[name]; + } + const framework::TensorArray& step_output(const std::string& name) const { + return step_outputs_[name]; + } + protected: struct ArgCache { framework::Scope const* scope; @@ -108,6 +129,9 @@ class DynamicRecurrentOp : public framework::OperatorBase { return *scopes->at(index); } + framework::LoDTensor* GetTensor(const framework::Scope& scope, + const std::string& name); + private: void InitArgument(const rnn::ArgumentName& name, const OperatorBase& op, rnn::Argument* arg); @@ -122,7 +146,7 @@ class DynamicRecurrentOp : public framework::OperatorBase { private: std::unique_ptr stepnet_; - mutable framework::TensorArray states_; + mutable std::map states_; mutable std::map step_inputs_; mutable std::map step_outputs_; mutable std::map> diff --git a/paddle/operators/dynamic_recurrent_op_test.cc b/paddle/operators/dynamic_recurrent_op_test.cc index 675a7890f3fa6bb7ab9dbbdb04894b2557214a8a..83a5ba36d9af2ef81ebcbb33e056de2e0b98cbc1 100644 --- a/paddle/operators/dynamic_recurrent_op_test.cc +++ b/paddle/operators/dynamic_recurrent_op_test.cc @@ -36,7 +36,7 @@ void OpDescNewVar(const std::string& param_name, // create a LoD tensor in scope with specific dims LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims, const platform::Place& place) { - auto* var = scope.NewVar(name); + auto* var = scope.Var(name); auto* tensor = var->GetMutable(); tensor->Resize(dims); tensor->mutable_data(place); @@ -85,9 +85,8 @@ class DynamicRecurrentOpTestHelper : public ::testing::Test { void CreateGlobalVariables() { platform::CPUPlace place; - scope.NewVar("step_scopes"); + scope.Var("step_scopes"); CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); - // auto* out0 = CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..24f84597cd7301af6521b8c1032e69569ba6f03a --- /dev/null +++ b/paddle/operators/gru_unit_op.cc @@ -0,0 +1,210 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/gru_unit_op.h" + +namespace paddle { +namespace operators { + +using framework::Tensor; + +class GRUUnitOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUUnitOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"), + "Input(%s) of GRUUnitOp should not be null.", "HiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUUnitOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasOutput("Gate"), + "Output(%s) of GRUUnitOp should not be null.", "Gate"); + PADDLE_ENFORCE(ctx->HasOutput("ResetHiddenPrev"), + "Output(%s) of GRUUnitOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), + "Output(%s) of GRUUnitOp should not be null.", "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev"); + auto weight_dims = ctx->GetInputDim("Weight"); + int batch_size = input_dims[0]; + int input_size = input_dims[1]; + int frame_size = hidden_prev_dims[1]; + int weight_height = weight_dims[0]; + int weight_width = weight_dims[1]; + PADDLE_ENFORCE_EQ( + input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUUnitOp."); + PADDLE_ENFORCE_EQ( + weight_height, frame_size, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + PADDLE_ENFORCE_EQ( + weight_width, frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + } + ctx->SetOutputDim("Gate", {batch_size, frame_size * 3}); + ctx->SetOutputDim("ResetHiddenPrev", {batch_size, frame_size}); + ctx->SetOutputDim("Hidden", {batch_size, frame_size}); + } +}; + +class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GRUUnitOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Input", + "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " + "input."); + AddInput("HiddenPrev", + "(Tensor) Matrix with shape [batch_size, frame_size] for the " + "states of previous time step."); + AddInput("Weight", + "(Tensor) Weight matrix with shape [frame_size, frame_size * 3]. " + "The elements continuous in memory can be divided into two parts. " + "The first part are weights of the update gate and reset gate " + "with shape [frame_size, frame_size * 2], and the second part are " + "weights of output candidate with shape [frame_size, frame_size]"); + AddInput("Bias", + "(Tensor) Bias vector with shape [1, frame_size * 3] concating " + "bias of the update gate, reset gate and output candidate."); + AddOutput("Gate", + "(Tensor) Matrix with shape [batch_size, frame_size * 3] for the " + "output of update gate, reset gate and output candidate") + .AsIntermediate(); + AddOutput("ResetHiddenPrev", + "(Tensor) Matrix with shape [batch_size, frame_size] for the " + "reseted hidden state of previous time step.") + .AsIntermediate(); + AddOutput("Hidden", + "(Tensor) The GRU hidden state of the current time step " + "with shape [batch_size, frame_size]."); + AddAttr("activation", + "(enum int, default tanh) " + "The activation type used for output candidate {h}_t.") + .SetDefault(tanh) + .InEnum({identity, sigmoid, tanh, relu}); + AddAttr("gate_activation", + "(enum int, default sigmoid) " + "The activation type used in update gate and reset gate.") + .SetDefault(sigmoid) + .InEnum({identity, sigmoid, tanh, relu}); + AddComment(R"DOC( +GRUUnitOp implements part calculations of the GRU unit as following: + +\f[ +update \ gate: u_t = actGate(xu_t + W_u * hidden_prev + bias_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * hidden_prev + bias_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, hidden_prev) + bias_c) \\ +output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_prev) +\f] + +The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +)DOC"); + } +}; + +class GRUUnitGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(%s) of GRUUnitGradOp should not be null.", "Input"); + PADDLE_ENFORCE(ctx->HasInput("HiddenPrev"), + "Input(%s) of GRUUnitGradOp should not be null.", + "HiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Weight"), + "Input(%s) of GRUUnitGradOp should not be null.", "Weight"); + PADDLE_ENFORCE(ctx->HasInput("Gate"), + "Input(%s) of GRUUnitGradOp should not be null.", "Gate"); + PADDLE_ENFORCE(ctx->HasInput("ResetHiddenPrev"), + "Input(%s) of GRUUnitGradOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput("Hidden"), + "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "Gate"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "ResetHiddenPrev"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), + "Input(%s@GRAD) of GRUUnitGradOp should not be null.", + "Hidden"); + auto input_dims = ctx->GetInputDim("Input"); + auto hidden_prev_dims = ctx->GetInputDim("HiddenPrev"); + auto weight_dims = ctx->GetInputDim("Weight"); + // int batch_size = input_dims[0]; + int input_size = input_dims[1]; + int frame_size = hidden_prev_dims[1]; + int weight_height = weight_dims[0]; + int weight_width = weight_dims[1]; + PADDLE_ENFORCE_EQ( + input_size, frame_size * 3, + "The input_size must be 3 times of frame_size in GRUUnitOp."); + PADDLE_ENFORCE_EQ( + weight_height, frame_size, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + PADDLE_ENFORCE_EQ( + weight_width, frame_size * 3, + "The shape of Weight matrix must be [frame_size, frame_size * 3]."); + auto bias = Input("Bias"); + if (bias != framework::kEmptyVarName) { + auto bias_dims = ctx->GetInputDim("Bias"); + int bias_height = bias_dims[0]; + int bias_width = bias_dims[1]; + PADDLE_ENFORCE_EQ(bias_height, 1, + "The shape of Bias must be [1, frame_size * 3]."); + PADDLE_ENFORCE_EQ(bias_width, frame_size * 3, + "The shape of Bias must be [1, frame_size * 3]."); + auto bias_grad_name = framework::GradVarName("Bias"); + if (ctx->HasOutput(bias_grad_name)) + ctx->SetOutputDim(bias_grad_name, bias_dims); + } + auto input_grad_name = framework::GradVarName("Input"); + if (ctx->HasOutput(input_grad_name)) + ctx->SetOutputDim(input_grad_name, input_dims); + auto hidden_prev_grad_name = framework::GradVarName("HiddenPrev"); + if (ctx->HasOutput(hidden_prev_grad_name)) + ctx->SetOutputDim(hidden_prev_grad_name, hidden_prev_dims); + auto weight_grad_name = framework::GradVarName("Weight"); + if (ctx->HasOutput(weight_grad_name)) + ctx->SetOutputDim(weight_grad_name, weight_dims); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gru_unit, ops::GRUUnitOp, ops::GRUUnitOpMaker, gru_unit_grad, + ops::GRUUnitGradOp); +REGISTER_OP_CPU_KERNEL(gru_unit, + ops::GRUUnitKernel); +REGISTER_OP_CPU_KERNEL( + gru_unit_grad, ops::GRUUnitGradKernel); diff --git a/paddle/operators/gru_unit_op.cu b/paddle/operators/gru_unit_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..365f656523ddfb7ec8e2a5b885de74674823325a --- /dev/null +++ b/paddle/operators/gru_unit_op.cu @@ -0,0 +1,22 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/gru_unit_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gru_unit, + ops::GRUUnitKernel); +REGISTER_OP_GPU_KERNEL( + gru_unit_grad, ops::GRUUnitGradKernel); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c53e7d9827e0395e6ce613302e732b2797f83cdd --- /dev/null +++ b/paddle/operators/gru_unit_op.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/operators/activation_op.h" +#include "paddle/operators/math/math_function.h" + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenMatrix = framework::EigenMatrix; + +enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; + +template +class GRUUnitKernel : public framework::OpKernel { + public: + template + void ActCompute(const int act_type, const Device& d, X x, Y y) const { + if (act_type == identity) + y.device(d) = x; + else if (act_type == sigmoid) + SigmoidFunctor()(d, x, y); + else if (act_type == tanh) + TanhFunctor()(d, x, y); + else if (act_type == relu) + ReluFunctor()(d, x, y); + else + PADDLE_THROW("unsupported activation type"); + } + + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* bias = context.Input("Bias"); + auto* gate = context.Output("Gate"); + gate->mutable_data(context.GetPlace()); + auto* reset_hidden_prev = context.Output("ResetHiddenPrev"); + reset_hidden_prev->mutable_data(context.GetPlace()); + auto* hidden = context.Output("Hidden"); + hidden->mutable_data(context.GetPlace()); + + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + + auto x = EigenMatrix::From(*input); + auto h_p = EigenMatrix::From(*hidden_prev); + auto g = EigenMatrix::From(*gate); + auto r_h_p = EigenMatrix::From(*reset_hidden_prev); + auto h = EigenMatrix::From(*hidden); + auto place = context.GetEigenDevice(); + + // calculate unactivated gate outputs + if (bias) { + auto b = EigenMatrix::From(*bias); + g.device(place) = x + + b.reshape(Eigen::array({{1, frame_size * 3}})) + .broadcast(Eigen::array({{batch_size, 1}})); + } else { + g.device(place) = x; + } + const T* hidden_prev_data = hidden_prev->data(); + const T* weight_data = weight->data(); + T* gate_data = gate->data(); + T* reset_hidden_prev_data = reset_hidden_prev->data(); + math::gemm(context.device_context(), false, false, batch_size, + 2 * frame_size, frame_size, 1, hidden_prev_data, + frame_size, weight_data, frame_size * 2, 1, gate_data, + frame_size * 3); + + // calculate activited gate + Eigen::array extents({{batch_size, frame_size}}); + Eigen::array u_offsets({{0, 0}}); + ActCompute(context.Attr("gate_activation"), place, + g.slice(u_offsets, extents), g.slice(u_offsets, extents)); + auto u = g.slice(u_offsets, extents); // update gate + Eigen::array r_offsets({{0, frame_size}}); + ActCompute(context.Attr("gate_activation"), place, + g.slice(r_offsets, extents), g.slice(r_offsets, extents)); + auto r = g.slice(r_offsets, extents); // reset gate + r_h_p.device(place) = r * h_p; // reset previous hidden state + math::gemm(context.device_context(), false, false, batch_size, + frame_size, frame_size, 1, reset_hidden_prev_data, + frame_size, weight_data + frame_size * frame_size * 2, + frame_size, 1, gate_data + frame_size * 2, + frame_size * 3); + + Eigen::array c_offsets({{0, frame_size * 2}}); + ActCompute(context.Attr("activation"), place, + g.slice(c_offsets, extents), g.slice(c_offsets, extents)); + auto c = g.slice(c_offsets, extents); // output candidate + + // calculate final output + h.device(place) = u * (h_p - c) + c; + } +}; + +template +class GRUUnitGradKernel : public framework::OpKernel { + public: + template + void ActGradCompute(const int act_type, const Device& d, X x, Y y, DX dx, + DY dy) const { + // x is dummy and won't be used even in Relu(use y instead) + if (act_type == identity) + dx.device(d) = dy; + else if (act_type == sigmoid) + SigmoidGradFunctor()(d, x, y, dy, dx); + else if (act_type == tanh) + TanhGradFunctor()(d, x, y, dy, dx); + else if (act_type == relu) + ReluGradFunctor()(d, x, y, dy, dx); + else + PADDLE_THROW("unsupported activation type"); + } + + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* gate = context.Input("Gate"); + auto* reset_hidden_prev = context.Input("ResetHiddenPrev"); + auto* hidden_grad = context.Input(framework::GradVarName("Hidden")); + auto* input_grad = context.Output(framework::GradVarName("Input")); + auto* hidden_prev_grad = + context.Output(framework::GradVarName("HiddenPrev")); + auto* weight_grad = + context.Output(framework::GradVarName("Weight")); + auto* bias_grad = context.Output(framework::GradVarName("Bias")); + input_grad->mutable_data(context.GetPlace()); + hidden_prev_grad->mutable_data(context.GetPlace()); + weight_grad->mutable_data(context.GetPlace()); + Tensor gate_grad; + gate_grad.mutable_data(input->dims(), context.GetPlace()); + Tensor reset_hidden_prev_grad; + reset_hidden_prev_grad.mutable_data(reset_hidden_prev->dims(), + context.GetPlace()); + + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + + const T* hidden_prev_data = hidden_prev->data(); + T* hidden_prev_grad_data = hidden_prev_grad->data(); + const T* weight_data = weight->data(); + T* weight_grad_data = weight_grad->data(); + T* gate_grad_data = gate_grad.data(); + const T* reset_hidden_prev_data = reset_hidden_prev->data(); + T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data(); + + auto h_p = EigenMatrix::From(*hidden_prev); + auto g = EigenMatrix::From(*gate); + auto d_h = EigenMatrix::From(*hidden_grad); + auto d_x = EigenMatrix::From(*input_grad); + auto d_h_p = EigenMatrix::From(*hidden_prev_grad); + auto d_g = EigenMatrix::From(gate_grad); + auto d_r_h_p = EigenMatrix::From(reset_hidden_prev_grad); + auto place = context.GetEigenDevice(); + + Eigen::array extents({{batch_size, frame_size}}); + Eigen::array u_offsets({{0, 0}}); + auto u = g.slice(u_offsets, extents); // update gate + Eigen::array r_offsets({{0, frame_size}}); + auto r = g.slice(r_offsets, extents); // reset gate + Eigen::array c_offsets({{0, frame_size * 2}}); + auto c = g.slice(c_offsets, extents); // output candidate + + // backward for unactivated update gate + ActGradCompute(context.Attr("gate_activation"), place, u, u, + d_g.slice(u_offsets, extents), d_h * (h_p - c)); + // backward for unactivated output candidate + ActGradCompute(context.Attr("activation"), place, c, c, + d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + // backward for reset_hidden_prev + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size, 1, + gate_grad_data + frame_size * 2, frame_size * 3, + weight_data + frame_size * frame_size * 2, frame_size, + 0, reset_hidden_prev_grad_data, frame_size); + // backward for state_weight + math::gemm( + context.device_context(), true, false, frame_size, frame_size, + batch_size, 1, reset_hidden_prev_data, frame_size, + gate_grad_data + frame_size * 2, frame_size * 3, 0, + weight_grad_data + frame_size * frame_size * 2, frame_size); + // backward for unactivated reset gate + ActGradCompute(context.Attr("gate_activation"), place, r, r, + d_g.slice(r_offsets, extents), d_r_h_p * h_p); + // backward for update_gate_weight and reset_gate_weight + math::gemm(context.device_context(), true, false, frame_size, + frame_size * 2, batch_size, 1, hidden_prev_data, + frame_size, gate_grad_data, frame_size * 3, 0, + weight_grad_data, frame_size * 2); + // backward for hidden_prev + d_h_p.device(place) = d_r_h_p * r + d_h * u; + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size * 2, 1, gate_grad_data, + frame_size * 3, weight_data, frame_size * 2, 1, + hidden_prev_grad_data, frame_size); + // backward for input + d_x.device(place) = d_g; + // backward for bias + if (bias_grad) { + bias_grad->mutable_data(context.GetPlace()); + auto d_b = EigenMatrix::From(*bias_grad); + d_b.device(place) = d_g.sum(Eigen::array({{0}})); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index 81225e9a9803ce371d23620876ac22da63a8e2d1..2d69218843a69497b5b501d4297f2ec5ab26a844 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -78,7 +78,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place); + input.CopyFrom(input_tmp, *place, *context); } output.mutable_data({1, filter_size, filter_size, filter_size, output_depth, output_height, output_width}, @@ -93,7 +93,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { out_cfo_ptr = output.data(); } else { - output_tmp.CopyFrom(output, paddle::platform::CPUPlace()); + output_tmp.CopyFrom(output, paddle::platform::CPUPlace(), *context); out_cfo_ptr = output_tmp.data(); } @@ -107,7 +107,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place); + input.CopyFrom(input_tmp, *place, *context); } paddle::operators::math::Col2VolFunctor col2vol; @@ -118,7 +118,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace()); + input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); in_ptr = input_tmp.data(); } diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index a86685b6dde4761cf74f9521bd9609b0864b9bdf..051051b051961c6da064bd9319460b3f41cea3e8 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -115,8 +115,9 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, multiplex_grad, - ops::MultiplexGradOp); +REGISTER_OPERATOR(multiplex, ops::MultiplexOp, ops::MultiplexOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(multiplex_grad, ops::MultiplexGradOp); REGISTER_OP_CPU_KERNEL( multiplex, ops::MultiplexCPUKernel); REGISTER_OP_CPU_KERNEL( diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 00647f55f79d54602f8e755dba059dfaacc9f41e..e3d08378c2f29fa5d84c24ae7cebfcb0e7a53b25 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -70,14 +70,14 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, // the weight are located in parent scope for (auto& var_name : input.second) { if (!step_scope.FindVar(var_name)) { - step_scope.NewVar(var_name)->GetMutable(); + step_scope.Var(var_name)->GetMutable(); } } } // create stepnet's outputs for (const auto& output : (*stepnet_)->Outputs()) { for (auto& var_name : output.second) { - step_scope.NewVar(var_name); + step_scope.Var(var_name); } } step_scopes->emplace_back(&step_scope); @@ -87,7 +87,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { for (auto& attr : arg_->memories) { - auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); + auto* pre_mem = step_scope->Var(attr.pre_var)->GetMutable(); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "memory [%s]'s boot variable [%s] not exists", attr.var, attr.boot_var); @@ -167,9 +167,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( "memory variable [%s] does not exists", attr.var); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, "boot variable [%s] does not exists", attr.boot_var); - auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); + auto* mem_grad = step_scope->Var(attr.var)->GetMutable(); auto* boot_mem_grad = - step_scope->NewVar(attr.boot_var)->GetMutable(); + step_scope->Var(attr.boot_var)->GetMutable(); boot_mem_grad->Resize(mem_grad->dims()); boot_mem_grad->ShareDataWith(*mem_grad); } diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index d264664a99e2af88fc2c35f50476ed4722a9eea0..30b8ddeb5bc4220e261a5c37ac195b0348fef936 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -40,7 +40,7 @@ void SegmentInputs(const std::vector& step_scopes, f::DDim step_dims = slice_ddim(dims, 1, dims.size()); for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = - step_scopes[j]->NewVar(inlinks[i])->GetMutable(); + step_scopes[j]->Var(inlinks[i])->GetMutable(); // The input of operators of each step is Tensor here. // Maybe need to modify Slice function. *step_input = input->Slice(j, j + 1); diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index ffb0cb92111bfb8490d35e4f5cfc9e405b0e3250..573487b83590c132d5a4379a4b2762fbc16c04bc 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -34,7 +34,7 @@ class SumOp : public framework::OperatorWithKernel { auto in_dim = x_dims[0]; for (size_t i = 1; i < N; i++) { auto dim = x_dims[i]; - PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); + PADDLE_ENFORCE_EQ(in_dim, dim, "Input tensors must have same shape"); } ctx->SetOutputDim("Out", in_dim); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index e330877fc4283b796dcb5c5d745881884ae491ae..75928f1ec818ab028ea06cfa72273fb99430c3c8 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -54,7 +54,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { PADDLE_ENFORCE( ctx->Attrs().Get("min") < ctx->Attrs().Get("max"), "uniform_random's min must less then max"); - auto dims = Attr>("dims"); + auto& dims = ctx->Attrs().Get>("dims"); std::vector temp; temp.reserve(dims.size()); for (auto dim : dims) { diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index 2841d2a2dbec5c17ef098a06c976ca01247820f5..0c5719ef5162546578253e383209b1893c0cd71f 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -71,23 +71,32 @@ class ScopedTensorDescriptor { inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format, const cudnnDataType_t type, - const std::vector& dims) { - // the format is not used now, but it maybe useful feature + const std::vector& dims, + const int groups = 1) { + // the format is not used now, will add later std::vector strides(dims.size()); strides[dims.size() - 1] = 1; for (int i = dims.size() - 2; i >= 0; i--) { strides[i] = dims[i + 1] * strides[i + 1]; } + // Update tensor descriptor dims setting if groups > 1 + // FIXME(typhoonzero): Assume using NCHW order + std::vector dims_with_group(dims.begin(), dims.end()); // copy + if (groups > 1) { + dims_with_group[1] = dims_with_group[1] / groups; + } PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor( - desc_, type, dims.size(), dims.data(), strides.data())); + desc_, type, dims_with_group.size(), dims_with_group.data(), + strides.data())); return desc_; } template inline cudnnTensorDescriptor_t descriptor(const DataLayout& order, - const std::vector& dims) { - return descriptor(GetCudnnTensorFormat(order), CudnnDataType::type, - dims); + const std::vector& dims, + const int groups = 1) { + return descriptor(GetCudnnTensorFormat(order), CudnnDataType::type, dims, + groups); } private: @@ -106,18 +115,29 @@ class ScopedFilterDescriptor { inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format, const cudnnDataType_t type, - const std::vector& kernel) { - // filter layout: output input spatial_dim_y spatial_dim_x + const std::vector& kernel, + const int groups = 1) { + // filter layout: MCHW, where M is the number of + // output image channels, C is the number of input image channels, + // H and W is height and width of filter. + std::vector kernel_with_group(kernel.begin(), kernel.end()); + if (groups > 1) { + // M /= groups + kernel_with_group[0] /= groups; + // NOTE: input filter(C) of the filter is already asserted to be C/groups. + } PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor( - desc_, type, format, kernel.size(), kernel.data())); + desc_, type, format, kernel_with_group.size(), + kernel_with_group.data())); return desc_; } template inline cudnnFilterDescriptor_t descriptor(const DataLayout& order, - const std::vector& kernel) { + const std::vector& kernel, + const int groups = 1) { return descriptor(GetCudnnTensorFormat(order), CudnnDataType::type, - kernel); + kernel, groups); } private: diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 97364f2db9523c0629616692631d8372657a2128..46c24e2cd53c068a25e1a5c8c6df600c3111e20a 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,6 +1,6 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc - DEPS pybind python backward proto_desc tensor_array + DEPS pybind python backward proto_desc tensor_array paddle_memory executor ${GLOB_OP_LIB}) endif(WITH_PYTHON) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 0e4bbe8415fd86ab29c6809e7652dc581b4e6004..b360b05d16c9a1c135fa56cb37919dece8f16788 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -118,12 +118,35 @@ void BindProgramDesc(py::module &m) { .def("append_block", &ProgramDescBind::AppendBlock, py::return_value_policy::reference) .def("append_backward", - [](ProgramDescBind &program_desc, + [](ProgramDescBind &program_desc, const VarDescBind &target, const std::unordered_set &no_grad_vars) { - AppendBackward(program_desc, no_grad_vars); + ParamGradInfoMap param_grad_map = + AppendBackward(program_desc, target, no_grad_vars); + std::unordered_map< + std::string, std::tuple> + retv; + for (auto it = param_grad_map.begin(); it != param_grad_map.end(); + ++it) { + const auto &grad_info = it->second; + retv[it->first] = std::make_tuple( + grad_info.name_, grad_info.block_idx_, grad_info.op_idx_); + } + return retv; }) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference) - .def("num_blocks", &ProgramDescBind::Size); + .def("num_blocks", &ProgramDescBind::Size) + .def("serialize_to_string", + [](ProgramDescBind &program_desc) -> py::bytes { + const ProgramDesc *desc = program_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "ProgramDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize ProgramDesc Error. This could be a bug of Paddle."); + return res; + }); } void BindBlockDesc(py::module &m) { @@ -134,22 +157,32 @@ void BindBlockDesc(py::module &m) { py::return_value_policy::reference) .def("prepend_op", &BlockDescBind::PrependOp, py::return_value_policy::reference) - .def("new_var", + .def("var", [](BlockDescBind &self, py::bytes byte_name) { std::string name = byte_name; - return self.NewVar(name); + return self.Var(name); }, py::return_value_policy::reference) - .def("var", + .def("find_var", [](BlockDescBind &self, py::bytes byte_name) { std::string name = byte_name; - return self.Var(name); + return self.FindVar(name); }, py::return_value_policy::reference) .def("all_vars", &BlockDescBind::AllVars, py::return_value_policy::reference) .def("all_ops", &BlockDescBind::AllOps, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def("serialize_to_string", [](BlockDescBind &block_desc) -> py::bytes { + const BlockDesc *desc = block_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "BlockDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize BlockDesc Error. This could be a bug of Paddle."); + return res; + }); } void BindVarDsec(py::module &m) { @@ -162,7 +195,8 @@ void BindVarDsec(py::module &m) { .value("FP32", DataType::FP32) .value("FP64", DataType::FP64); - py::class_(m, "VarDesc", "") + py::class_ var_desc(m, "VarDesc", ""); + var_desc .def("name", [](const VarDescBind &self) { py::bytes name = self.Name(); @@ -174,7 +208,23 @@ void BindVarDsec(py::module &m) { .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("data_type", &VarDescBind::GetDataType) .def("lod_level", &VarDescBind::GetLodLevel) - .def("set_lod_level", &VarDescBind::SetLoDLevel); + .def("set_lod_level", &VarDescBind::SetLoDLevel) + .def("type", &VarDescBind::GetType) + .def("set_type", &VarDescBind::SetType) + .def("serialize_to_string", [](VarDescBind &var_desc) -> py::bytes { + const VarDesc *desc = var_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "VarDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize VarDesc Error. This could be a bug of Paddle."); + return res; + }); + + py::enum_(var_desc, "VarType", "") + .value("LOD_TENSOR", VarDesc::LOD_TENSOR) + .value("SELECTED_ROWS", VarDesc::SELECTED_ROWS); } void BindOpDesc(py::module &m) { @@ -204,9 +254,19 @@ void BindOpDesc(py::module &m) { .def("set_attr", &OpDescBind::SetAttr) .def("attr", &OpDescBind::GetAttr) .def("set_block_attr", &OpDescBind::SetBlockAttr) - .def("get_block_attr", &OpDescBind::GetBlockAttr) + .def("block_attr", &OpDescBind::GetBlockAttr) .def("check_attrs", &OpDescBind::CheckAttrs) - .def("infer_shape", &OpDescBind::InferShape); + .def("infer_shape", &OpDescBind::InferShape) + .def("serialize_to_string", [](OpDescBind &op_desc) -> py::bytes { + const OpDesc *desc = op_desc.Proto(); + PADDLE_ENFORCE(desc->IsInitialized(), + "OpDesc has not been initialized."); + std::string res; + PADDLE_ENFORCE( + desc->SerializeToString(&res), + "Serialize OpDesc Error. This could be a bug of Paddle."); + return res; + }); } } // namespace pybind diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 0f6e3101e26c5ac249664ce8badc10adc939305f..afc80b25b185509739edcd0d06817aa1d507a8ec 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -15,9 +15,11 @@ limitations under the License. */ #include "paddle/pybind/protobuf.h" #include "paddle/framework/backward.h" +#include "paddle/framework/executor.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/tensor_array.h" #include "paddle/operators/cond_op.h" +#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" @@ -163,9 +165,9 @@ All parameter, weight, gradient are variables in Paddle. py::return_value_policy::reference); py::class_(m, "Scope", "") - .def("new_var", + .def("var", [](Scope &self, const std::string &name) -> Variable * { - return self.NewVar(name); + return self.Var(name); }, py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference) @@ -341,6 +343,33 @@ All parameter, weight, gradient are variables in Paddle. self.set_stepnet(net.Clone()); }); + py::class_(m, + "DynamicRecurrentOp") + .def_static("create", + [](py::bytes protobin) -> operators::DynamicRecurrentOp * { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + auto rnn_op = OpRegistry::CreateOp(desc); + return static_cast( + rnn_op.release()); + }) + .def("set_stepnet", + [](operators::DynamicRecurrentOp &self, const operators::NetOp &net) + -> void { self.SetStepNet(net.Clone()); }) + .def("get_state", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.state(name); }) + .def("get_step_input", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.step_input(name); }) + .def("get_step_output", + [](operators::DynamicRecurrentOp &self, const std::string &name) + -> const TensorArray & { return self.step_output(name); }); + // cond_op py::class_(m, "CondOp") .def_static("create", @@ -363,6 +392,14 @@ All parameter, weight, gradient are variables in Paddle. self.set_falsenet(net.Clone()); }); + py::class_(m, "Executor") + .def(py::init &>()) + .def("run", + [](Executor &self, const ProgramDesc &program_desc, int block_id) { + framework::Scope &global_scope = GetGlobalScope(); + self.Run(program_desc, &global_scope, block_id); + }); + m.def("unique_integer", UniqueIntegerGenerator); m.def("is_compile_gpu", IsCompileGPU); diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 066837ca959e46dbe3b39c661aa1bab11cbf2734..5ebbb99c94bce45d295ae0bf585f2cf864bfc4d4 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -39,15 +39,18 @@ add_test(NAME test_CompareTwoNets ################ test_CompareMKLDNNandCPU ###################### if(WITH_MKLDNN) - add_unittest_without_exec(test_CompareMKLDNNandCPU - test_CompareTwoNets.cpp) - add_test(NAME test_CompareMKLDNNandCPU - COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU - --config_file_a=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_a=True - --config_file_b=trainer/tests/sample_trainer_config_simple_net.conf --use_mkldnn_b=False - --use_gpu=False - WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) + macro(gen_command VAR_NAME CONFIG_FILE) + set(${VAR_NAME} "${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh" "-d" "${PADDLE_SOURCE_DIR}/python/" + "${CMAKE_CURRENT_BINARY_DIR}/test_CompareMKLDNNandCPU --use_gpu=False" + "--config_file_a=trainer/tests/${CONFIG_FILE} --use_mkldnn_a=True" + "--config_file_b=trainer/tests/${CONFIG_FILE} --use_mkldnn_b=False" + "WORKING_DIRECTORY" "${PADDLE_SOURCE_DIR}/paddle/") + endmacro() + add_unittest_without_exec(test_CompareMKLDNNandCPU test_CompareTwoNets.cpp) + gen_command(compare_simple_net "sample_trainer_config_simple_net.conf") + gen_command(compare_branch_net "sample_trainer_config_branch_net.conf") + add_test(NAME test_CompareMKLDNNandCPU_simple_net COMMAND ${compare_simple_net}) + add_test(NAME test_CompareMKLDNNandCPU_branch_net COMMAND ${compare_branch_net}) endif() ############### test_CompareTwoOpts ################### diff --git a/paddle/trainer/tests/sample_trainer_config_branch_net.conf b/paddle/trainer/tests/sample_trainer_config_branch_net.conf new file mode 100644 index 0000000000000000000000000000000000000000..c2594bc13c250a877a7b8a77e11405671c4d8907 --- /dev/null +++ b/paddle/trainer/tests/sample_trainer_config_branch_net.conf @@ -0,0 +1,103 @@ +# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers import * + +################################### Data Configuration ################################### +TrainData(ProtoData(files = "trainer/tests/mnist.list")) +################################### Algorithm Configuration ################################### +settings(batch_size = 256, + learning_method = MomentumOptimizer(momentum=0.5, sparse=False)) +################################### Network Configuration ################################### +data = data_layer(name ="input", size=784) + +tmp = img_conv_layer(input=data, + num_channels=1, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=ReluActivation()) + +a1 = img_conv_layer(input=tmp, + filter_size=1, + num_filters=32, + padding=0, + shared_biases=True, + act=ReluActivation()) + +a2 = img_conv_layer(input=tmp, + filter_size=3, + num_filters=32, + padding=1, + shared_biases=True, + act=ReluActivation()) + +tmp = concat_layer(input=[a1, a2]) + +tmp = img_pool_layer(input=tmp, + num_channels=64, + pool_size=3, + stride=2, + padding=1, + pool_type=AvgPooling()) + +b1 = img_conv_layer(input=tmp, + filter_size=3, + num_filters=64, + padding=1, + shared_biases=True, + act=ReluActivation()) + +b1 = img_pool_layer(input=b1, + pool_size=3, + stride=1, + padding=1, + pool_type=MaxPooling()) + +b2 = img_conv_layer(input=tmp, + filter_size=5, + num_filters=64, + padding=2, + shared_biases=True, + act=ReluActivation()) + +b2 = img_pool_layer(input=b2, + pool_size=5, + stride=1, + padding=2, + pool_type=MaxPooling()) + +tmp = addto_layer(input=[b1, b2], + act=ReluActivation(), + bias_attr=False) + +tmp = img_pool_layer(input=tmp, + pool_size=3, + stride=2, + padding=1, + pool_type=MaxPooling()) + +tmp = fc_layer(input=tmp, size=64, + bias_attr=False, + act=TanhActivation()) + +output = fc_layer(input=tmp, size=10, + bias_attr=True, + act=SoftmaxActivation()) + +lbl = data_layer(name ="label", size=10) + +cost = classification_cost(input=output, label=lbl) +outputs(cost) diff --git a/python/paddle/v2/framework/default_scope_funcs.py b/python/paddle/v2/framework/default_scope_funcs.py index 1b5580c8b30f69016f187b1d8710a57b5f7cfa9f..c07f9a6ab96ac86fd6d20fbe0bc560845107f063 100644 --- a/python/paddle/v2/framework/default_scope_funcs.py +++ b/python/paddle/v2/framework/default_scope_funcs.py @@ -5,7 +5,7 @@ Default scope function. thread-local stack of Scope. Top of that stack is current scope, the bottom of that stack is all scopes' parent. -Invoking `new_var/find_var` can `new/find` variable in current scope. +Invoking `var/find_var` can `new/find` variable in current scope. Invoking `enter_local_scope/leave_local_scope` can create or destroy local scope. @@ -19,7 +19,7 @@ import threading __tl_scope__ = threading.local() __all__ = [ - 'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'new_var', + 'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var', 'find_var', 'scoped_function' ] @@ -54,11 +54,11 @@ def leave_local_scope(): get_cur_scope().drop_kids() -def new_var(name): +def var(name): """ create variable in current scope. """ - return get_cur_scope().new_var(name) + return get_cur_scope().var(name) def find_var(name): diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py new file mode 100644 index 0000000000000000000000000000000000000000..d649e69d58961dcc43e3bf7325b0b06c832245dc --- /dev/null +++ b/python/paddle/v2/framework/framework.py @@ -0,0 +1,457 @@ +import paddle.v2.framework.core as core +import paddle.v2.framework.proto.framework_pb2 as framework_pb2 +import collections +import numpy as np +import copy + +__all__ = ['Block', 'Variable', 'Program', 'Operator'] + + +class Variable(object): + def __init__(self, + block, + type=core.VarDesc.VarType.LOD_TENSOR, + name=None, + shape=None, + dtype=None, + lod_level=None, + **kwargs): + self.block = block + + if name is None: + name = Variable._unique_var_name_() + is_new_var = False + self.desc = self.block.desc.find_var(name) + + if self.desc is None: + self.desc = self.block.desc.var(name) + is_new_var = True + + if is_new_var: + self.desc.set_type(type) + elif self.desc.type() != type: + raise ValueError("Variable {0} has been created before. The " + "previous type is {1}; the new type is {2}. They" + " are not matched".format(self.name, + self.desc.type(), type)) + + if shape is not None: + if is_new_var: + self.desc.set_shape(shape) + else: + old_shape = self.shape + shape = tuple(shape) + if shape != old_shape: + raise ValueError( + "Variable {0} has been created before. the previous " + "shape is {1}; the new shape is {2}. They are not " + "matched.".format(self.name, old_shape, shape)) + if dtype is not None: + if not isinstance(dtype, core.DataType): + dtype = Variable._convert_np_dtype_to_dtype_(dtype) + if is_new_var: + self.desc.set_data_type(dtype) + else: + old_dtype = self.data_type() + if dtype != old_shape: + raise ValueError("Variable {0} has been created before. " + "The previous data type is {1}; the new " + "data type is {2}. They are not " + "matched.".format(self.name, old_dtype, + dtype)) + + if lod_level is not None: + if is_new_var: + self.desc.set_lod_level(lod_level) + else: + if lod_level != self.lod_level: + raise ValueError("Variable {0} has been created before. " + "The previous lod_level is {1}; the new " + "lod_level is {2}. They are not " + "matched".format(self.name, self.lod_level, + lod_level)) + self.block.vars[name] = self + self.op = None + + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.VarDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + + @property + def name(self): + return self.desc.name() + + @property + def shape(self): + # convert to tuple, make it as same as numpy API. + return tuple(self.desc.shape()) + + @property + def data_type(self): + return self.desc.data_type() + + @property + def lod_level(self): + return self.desc.lod_level() + + @staticmethod + def _unique_var_name_(): + uid = core.unique_integer() # unique during whole process. + return "_generated_var_%d" % uid + + @staticmethod + def _convert_np_dtype_to_dtype_(np_dtype): + dtype = np.dtype(np_dtype) + if dtype == np.float32: + return core.DataType.FP32 + elif dtype == np.float64: + return core.DataType.FP64 + elif dtype == np.float16: + return core.DataType.FP16 + elif dtype == np.int32: + return core.DataType.INT32 + elif dtype == np.int16: + return core.DataType.INT16 + elif dtype == np.int64: + return core.DataType.INT64 + elif dtype == np.bool: + return core.DataType.BOOL + else: + raise ValueError("Not supported numpy dtype " + str(dtype)) + + +def get_all_op_protos(): + """ + Get all registered op proto from PaddlePaddle C++ end. + :return: A list of registered OpProto. + """ + protostrs = core.get_all_op_protos() + ret_values = [] + for pbstr in protostrs: + op_proto = framework_pb2.OpProto.FromString(str(pbstr)) + ret_values.append(op_proto) + return ret_values + + +class OpProtoHolder(object): + @classmethod + def instance(cls): + if not hasattr(cls, '_instance'): + cls._instance = cls() + return cls._instance + + def __init__(self): + assert not hasattr( + self.__class__, + '_instance'), 'Please use `instance()` to get OpProtoHolder opject!' + op_protos = get_all_op_protos() + self.op_proto_map = {} + for proto in op_protos: + self.op_proto_map[proto.type] = proto + + def get_op_proto(self, type): + assert type in self.op_proto_map, "Operator \"%s\" has not been registered." % type + return self.op_proto_map[type] + + +class Operator(object): + def __init__(self, + block, + desc, + type=None, + inputs=None, + outputs=None, + attrs=None): + self.block = block + self.desc = desc + if len(self.desc.type()) != 0: + return + if type is None: + raise ValueError( + "`type` to initilized an Operator can not be None.") + self.desc.set_type(type) + proto = OpProtoHolder.instance().get_op_proto(type) + + if inputs is not None: + given = set() + need = set() + for n in inputs: + given.add(n) + for m in proto.inputs: + need.add(m.name) + if not given == need: + raise ValueError( + "Incorrect setting for input(s) of operator \"%s\". Need: [%s] Given: [%s]" + % (type, ", ".join(str(e) for e in need), ", ".join( + str(e) for e in given))) + + for in_proto in proto.inputs: + in_argus = inputs[in_proto.name] + if not isinstance(in_argus, list): + in_argus = [in_argus] + if not in_proto.duplicable and len(in_argus) > 1: + raise ValueError( + "Input %s expects only one input, but %d are given." % + (in_proto.name, len(in_argus))) + in_argu_names = [] + for argu in in_argus: + in_argu_names.append(argu.name) + self.desc.set_input(in_proto.name, in_argu_names) + + if outputs is not None: + given = set() + need = set() + for n in outputs: + given.add(n) + for m in proto.outputs: + need.add(m.name) + if not given == need: + raise ValueError( + "Incorrect setting for output(s) of operator \"%s\". Need: [%s] Given: [%s]" + % (type, ", ".join(str(e) for e in need), ", ".join( + str(e) for e in given))) + + for out_proto in proto.outputs: + out_argus = outputs[out_proto.name] + if not isinstance(out_argus, list): + out_argus = [out_argus] + if not out_proto.duplicable and len(out_argus) > 1: + raise ValueError( + "Output %s expects only one output, but %d are given." % + (out_proto.name, len(out_argus))) + out_argu_names = [] + for argu in out_argus: + out_argu_names.append(argu.name) + argu.op = self + self.desc.set_output(out_proto.name, out_argu_names) + + if attrs is not None: + for attr in proto.attrs: + attr_name = attr.name + if not attr_name in attrs: + continue + if not isinstance(attrs[attr_name], Block): + self.desc.set_attr(attr_name, attrs[attr_name]) + else: + self.desc.set_block_attr(attr_name, attrs[attr_name].desc) + + self.desc.check_attrs() + self.desc.infer_shape(self.block.desc) + + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.OpDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + + @property + def type(self): + return self.desc.type() + + def input(self, name): + return self.desc.input(name) + + @property + def input_names(self): + return self.desc.input_names() + + def output(self, name): + return self.desc.output(name) + + @property + def output_names(self): + return self.desc.output_names() + + def has_attr(self, name): + return self.desc.has_attr(name) + + def attr_type(self, name): + return self.desc.attr_type(name) + + @property + def attr_names(self): + return self.desc.attr_names() + + def attr(self, name): + return self.desc.attr(name) + + def block_attr(self, name): + return self.desc.block_attr(name) + + +class Block(object): + def __init__(self, program, idx): + self.desc = program.desc.block(idx) + self.vars = dict() # var_name --> var + self.ops = collections.deque() # operator list + self.program = program + + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.BlockDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + + @property + def parent_idx(self): + return self.desc.parent + + @property + def idx(self): + return self.desc.id + + def create_var(self, *args, **kwargs): + return Variable(self, *args, **kwargs) + + def has_var(self, name): + return name in self.vars + + def create_parameter(self, *args, **kwargs): + global_block = self.program.global_block() + return Parameter(global_block, *args, **kwargs) + + def append_op(self, *args, **kwargs): + op_desc = self.desc.append_op() + op = Operator(self, op_desc, *args, **kwargs) + self.ops.append(op) + return op + + def prepend_op(self, *args, **kwargs): + op_desc = self.desc.prepend_op() + op = Operator(self, op_desc, *args, **kwargs) + self.ops.appendleft(op) + return op + + def sync_with_cpp(self): + # sync variables from cpp + for var in self.desc.all_vars(): + if not self.has_var(var.name()): + self.create_var(name=var.name(), desc=var, type=var.type()) + + # sync operators from cpp + ops_in_cpp = self.desc.all_ops() + first_op_in_python = self.ops[0].desc + last_op_in_python = self.ops[len(self.ops) - 1].desc + start_index = None + end_index = None + for index in range(len(ops_in_cpp)): + if first_op_in_python == ops_in_cpp[index]: + start_index = index + if last_op_in_python == ops_in_cpp[index]: + end_index = index + assert start_index is not None + assert end_index is not None + assert start_index <= end_index + + # sync ops append to the head of cpp_ops + for index in range((start_index - 1 - 1), -1, -1): + op_desc = ops_in_cpp[index] + op = Operator(self, op_desc) + self.ops.appendleft(op) + + # sync ops append to the end of cpp_ops + for index in range((end_index + 1), len(ops_in_cpp)): + op_desc = ops_in_cpp[index] + op = Operator(self, op_desc) + self.ops.append(op) + + assert len(self.ops) == len(ops_in_cpp) + for index in range(len(self.ops)): + assert self.ops[index].desc == ops_in_cpp[index] + + +class Program(object): + @classmethod + def instance(cls): + # From https://stackoverflow.com/questions/8212053 + # Making Program as a Singleton class. + if not hasattr(cls, '_instance'): + cls._instance = cls() + return cls._instance + + def __init__(self): + assert not hasattr(self.__class__, + '_instance'), 'Do not call constructor directly!' + self.desc = core.ProgramDesc.instance() + self.blocks = [Block(self, 0)] + self.current_block_idx = 0 + + def __str__(self): + protostr = self.desc.serialize_to_string() + proto = framework_pb2.ProgramDesc.FromString(str(protostr)) + return proto.__str__() + + __repr__ = __str__ + + def global_block(self): + return self.blocks[0] + + def current_block(self): + return self.blocks[self.current_block_idx] + + def append_backward(self, target, no_grad_set): + assert isinstance(target, Variable) + param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set) + self.sync_with_cpp() + return param_to_grad_info + + def create_block(self): + new_block_idx = len(self.blocks) + self.desc.append_block(self.current_block().desc) + self.current_block_idx = new_block_idx + self.blocks.append(Block(self, self.current_block_idx)) + return self.current_block() + + def rollback(self): + self.current_block_idx = self.current_block().parent_idx + + def sync_with_cpp(self): + for block_idx in range(len(self.blocks), self.desc.num_blocks()): + self.blocks.append(Block(self, block_idx)) + for block in self.blocks: + block.sync_with_cpp() + + +class Parameter(Variable): + def __init__(self, block, shape, dtype, **kwargs): + if shape is None or dtype is None: + raise ValueError("Parameter must set shape and dtype") + if len(shape) == 0: + raise ValueError("Parameter shape cannot be empty") + + for each in shape: + if each < 0: + raise ValueError("Parameter shape should not be related with " + "batch-size") + + Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs) + self.trainable = kwargs.get('trainable', True) + self.init_attr = kwargs.get('initialize_attr', { + 'type': 'uniform_random', + 'min': -1.0, + 'max': 1.0 + }) + + self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0}) + self._append_initialize_ops_() + + def _append_initialize_ops_(self): + attr = copy.deepcopy(self.init_attr) + op_type = attr.pop('type', None) + block = self.block + assert isinstance(block, Block) + shape = self.shape + attr['dims'] = shape + attr['data_type'] = int(self.data_type) + op = block.prepend_op( + type=op_type, inputs=None, outputs={'Out': [self]}, attrs=attr) + self.op = op + + +# program is a global instance. +g_program = Program.instance() diff --git a/python/paddle/v2/framework/graph.py b/python/paddle/v2/framework/graph.py deleted file mode 100644 index 0f0a2847e58a1ca172bf1ba382abb2ebc1ecb8ed..0000000000000000000000000000000000000000 --- a/python/paddle/v2/framework/graph.py +++ /dev/null @@ -1,240 +0,0 @@ -import paddle.v2.framework.core as core -import collections -import numpy as np -import copy - -__all__ = ['Block', 'Variable', 'Program', 'Operator'] - - -class Variable(object): - def __init__(self, - block, - name=None, - shape=None, - dtype=None, - lod_level=None, - **kwargs): - self.block = block - - if name is None: - name = Variable._unique_var_name_() - try: - self.desc = self.block.desc.var(name) - is_new_var = False - except core.EnforceNotMet: - self.desc = self.block.desc.new_var(name) - is_new_var = True - - if shape is not None: - if is_new_var: - self.desc.set_shape(shape) - else: - old_shape = self.shape - shape = tuple(shape) - if shape != old_shape: - raise ValueError( - "Variable {0} has been created before. the previous " - "shape is {1}; the new shape is {2}. They are not " - "matched.".format(self.name, old_shape, shape)) - if dtype is not None: - if not isinstance(dtype, core.DataType): - dtype = Variable._convert_np_dtype_to_dtype_(dtype) - if is_new_var: - self.desc.set_data_type(dtype) - else: - old_dtype = self.data_type() - if dtype != old_shape: - raise ValueError("Variable {0} has been created before. " - "The previous data type is {1}; the new " - "data type is {2}. They are not " - "matched.".format(self.name, old_dtype, - dtype)) - - if lod_level is not None: - if is_new_var: - self.desc.set_lod_level(lod_level) - else: - if lod_level != self.lod_level: - raise ValueError("Variable {0} has been created before. " - "The previous lod_level is {1}; the new " - "lod_level is {2}. They are not " - "matched".format(self.name, self.lod_level, - lod_level)) - self.block.vars[name] = self - self.op = None - - @property - def name(self): - return self.desc.name() - - @property - def shape(self): - # convert to tuple, make it as same as numpy API. - return tuple(self.desc.shape()) - - @property - def data_type(self): - return self.desc.data_type() - - @property - def lod_level(self): - return self.desc.lod_level() - - @staticmethod - def _unique_var_name_(): - uid = core.unique_integer() # unique during whole process. - return "_generated_var_%d" % uid - - @staticmethod - def _convert_np_dtype_to_dtype_(np_dtype): - dtype = np.dtype(np_dtype) - if dtype == np.float32: - return core.DataType.FP32 - elif dtype == np.float64: - return core.DataType.FP64 - elif dtype == np.float16: - return core.DataType.FP16 - elif dtype == np.int32: - return core.DataType.INT32 - elif dtype == np.int16: - return core.DataType.INT16 - elif dtype == np.int64: - return core.DataType.INT64 - elif dtype == np.bool: - return core.DataType.BOOL - else: - raise ValueError("Not supported numpy dtype " + str(dtype)) - - -class Operator(object): - def __init__(self, - block, - desc, - type=None, - inputs=None, - outputs=None, - attrs=None): - self.block = block - self.desc = desc - if type is not None: - # TODO. - pass - if inputs is not None: - # TODO - pass - if outputs is not None: - # TODO - pass - if attrs is not None: - # TODO - pass - - # TODO: Getters - - -class Block(object): - def __init__(self, program, idx): - self.desc = program.desc.block(idx) - self.vars = dict() # var_name --> var - self.ops = collections.deque() # operator list - self.program = program - - @property - def parent_idx(self): - return self.desc.parent - - @property - def idx(self): - return self.desc.id - - def create_var(self, *args, **kwargs): - return Variable(self, *args, **kwargs) - - def create_parameter(self, *args, **kwargs): - global_block = self.program.global_block() - return Parameter(global_block, *args, **kwargs) - - def append_op(self, *args, **kwargs): - op_desc = self.desc.append_op() - op = Operator(self, op_desc, *args, **kwargs) - self.ops.append(op) - return op - - def prepend_op(self, *args, **kwargs): - op_desc = self.desc.prepend_op() - op = Operator(self, op_desc, *args, **kwargs) - self.ops.appendleft(op) - return op - - -class Program(object): - @classmethod - def instance(cls): - # From https://stackoverflow.com/questions/8212053 - # Making Program as a Singleton class. - if not hasattr(cls, '_instance'): - cls._instance = cls() - return cls._instance - - def __init__(self): - assert not hasattr(self.__class__, - '_instance'), 'Do not call constructor directly!' - self.desc = core.ProgramDesc.instance() - self.blocks = [Block(self, 0)] - self.current_block_idx = 0 - - def global_block(self): - return self.blocks[0] - - def current_block(self): - return self.blocks[self.current_block_idx] - - def create_block(self): - new_block_idx = len(self.blocks) - self.desc.append_block(self.current_block().desc) - self.current_block_idx = new_block_idx - self.blocks.append(Block(self, self.current_block_idx)) - return self.current_block() - - def rollback(self): - self.current_block_idx = self.current_block().parent_idx - - -class Parameter(Variable): - def __init__(self, block, shape, dtype, **kwargs): - if shape is None or dtype is None: - raise ValueError("Parameter must set shape and dtype") - if len(shape) == 0: - raise ValueError("Parameter shape cannot be empty") - - for each in shape: - if each < 0: - raise ValueError("Parameter shape should not be related with " - "batch-size") - - Variable.__init__(self, block, shape=shape, dtype=dtype, **kwargs) - self.trainable = kwargs.get('trainable', True) - self.init_attr = kwargs.get('initialize_attr', { - 'type': 'uniform_random', - 'min': -1.0, - 'max': 1.0 - }) - - self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0}) - self._append_initialize_ops_() - - def _append_initialize_ops_(self): - attr = copy.deepcopy(self.init_attr) - op_type = attr.pop('type', None) - block = self.block - assert isinstance(block, Block) - shape = self.shape - attr['dims'] = shape - attr['data_type'] = int(self.data_type) - op = block.prepend_op( - type=op_type, inputs=None, outputs={'Out': [self]}, attrs=attr) - self.op = op - - -# program is a global instance. -g_program = Program.instance() diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 9086a5cc3452b178ec37fe6a3e358eaa4c5d606b..bc771a964adf9f97cbeae87c06ce954c76051150 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -219,6 +219,27 @@ class __RecurrentOp__(object): return core.RecurrentOp.create(proto.SerializeToString()) +class __DynamicRecurrentOp__(object): + __proto__ = None + type = "dynamic_recurrent" + + def __init__(self): + # cache recurrent_op's proto + if self.__proto__ is None: + for op_proto in get_all_op_protos(): + if op_proto.type == self.type: + self.__proto__ = op_proto + + def __call__(self, *args, **kwargs): + if self.type not in args and "type" not in kwargs: + kwargs["type"] = self.type + # create proto + create_method = OpDescCreationMethod(self.__proto__) + proto = create_method(*args, **kwargs) + # create rnnop + return core.DynamicRecurrentOp.create(proto.SerializeToString()) + + class __CondOp__(object): __proto__ = None type = "cond" @@ -242,4 +263,5 @@ class __CondOp__(object): Operator = OperatorFactory() # The default global factory RecurrentOp = __RecurrentOp__() +DynamicRecurrentOp = __DynamicRecurrentOp__() CondOp = __CondOp__() diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 81067f38bbf64ac1ab4ccf02aa43b0a38b7d48ad..215fa0b94e423755b7bc3f05a2b14a8c85451202 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -14,7 +14,7 @@ def create_op(scope, op_type, inputs, outputs, attrs): kwargs = dict() def __create_var__(name, var_name): - scope.new_var(var_name) + scope.var(var_name) kwargs[name].append(var_name) for in_name, in_dup in Operator.get_op_inputs(op_type): @@ -71,7 +71,7 @@ def set_input(scope, op, inputs, place): def set_output_grad(scope, op, outputs, place): def __set_tensor__(name): out_tensor = scope.find_var(name).get_tensor() - grad_tensor = scope.new_var(grad_var_name(name)).get_tensor() + grad_tensor = scope.var(grad_var_name(name)).get_tensor() out_dtype = out_tensor.dtype() if out_dtype == core.DataType.FP64: data = np.ones(out_tensor.shape(), dtype=np.float64) @@ -169,10 +169,10 @@ def get_numeric_gradient(scope, def get_backward_op(scope, op, no_grad_set): backward_op = core.Operator.backward(op, no_grad_set) for input in backward_op.input_vars(): - var = scope.new_var(input) + var = scope.var(input) var.get_tensor() for output in backward_op.output_vars(): - var = scope.new_var(output) + var = scope.var(output) var.get_tensor() return backward_op diff --git a/python/paddle/v2/framework/tests/test_activation_op.py b/python/paddle/v2/framework/tests/test_activation_op.py index 3acd00e35213981fce60504876af1861961ebe12..5831b880e4c5ef881929920e87ac64d6c87a2ab5 100644 --- a/python/paddle/v2/framework/tests/test_activation_op.py +++ b/python/paddle/v2/framework/tests/test_activation_op.py @@ -384,5 +384,33 @@ class TestThresholdedRelu(OpTest): self.check_grad(['X'], 'Y', max_relative_error=self.relative_error) +class TestHardSigmoid(OpTest): + def setUp(self): + self.op_type = "hard_sigmoid" + self.relative_error = 0.002 + + X = np.random.uniform(-5, 5, [2, 2]).astype("float32") + slope = 0.2 + offset = 0.5 + lower_threshold = -offset / slope + upper_threshold = (1 - offset) / slope + + self.inputs = {'X': X} + # Same reason as TestAbs + X[np.abs(X - lower_threshold) < self.relative_error] = \ + lower_threshold + 0.2 + X[np.abs(X - upper_threshold) < self.relative_error] = \ + upper_threshold - 0.2 + + temp = X * slope + offset + self.outputs = {'Y': np.maximum(0.0, np.minimum(1.0, temp))} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.002) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_adam_op.py b/python/paddle/v2/framework/tests/test_adam_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ff6faafa6e2119fde11b9eb6cd2a65a75334ebe6 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_adam_op.py @@ -0,0 +1,186 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestAdamOp1(OpTest): + def setUp(self): + '''Test Adam Op with supplied attributes + ''' + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.004 + beta1 = 0.78 + beta2 = 0.836 + epsilon = 1e-4 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + def test_check_output(self): + self.check_output() + + +class TestAdamOp2(OpTest): + def setUp(self): + '''Test Adam Op with supplied attributes + ''' + self.op_type = "adam" + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.999 + epsilon = 1e-8 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + attributes = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, attributes) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + def test_check_output(self): + self.check_output() + + +class TestAdamOpMultipleSteps(OpTest): + def setUp(self): + '''Test Adam Operator with supplied attributes + ''' + self.op_type = "adam" + self.num_steps = 10 + + param = np.random.uniform(-1, 1, (102, 105)).astype("float32") + grad = np.random.uniform(-1, 1, (102, 105)).astype("float32") + moment1 = np.random.uniform(-1, 1, (102, 105)).astype("float32") + # The second moment is positive + moment2 = np.random.random((102, 105)).astype("float32") + + learning_rate = 0.001 + beta1 = 0.9 + beta2 = 0.999 + epsilon = 1e-8 + beta1_pow = beta1**10 + beta2_pow = beta2**10 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment1': moment1, + 'Moment2': moment2, + 'LearningRate': np.array([learning_rate]).astype("float32"), + 'Beta1Pow': np.array([beta1_pow]).astype("float32"), + 'Beta2Pow': np.array([beta2_pow]).astype("float32") + } + + self.attrs = {'epsilon': epsilon, 'beta1': beta1, 'beta2': beta2} + + def test_check_output(self): + for _ in range(self.num_steps): + param_out, moment1_out, moment2_out, beta1_pow_out, \ + beta2_pow_out = adam_step(self.inputs, self.attrs) + + self.outputs = { + 'Moment1Out': moment1_out, + 'Moment2Out': moment2_out, + 'Beta1PowOut': beta1_pow_out, + 'Beta2PowOut': beta2_pow_out, + 'ParamOut': param_out + } + + # Verify output for this step + self.check_output() + + # Output of this step becomes input for next step + self.inputs['Param'] = param_out + self.inputs['Moment1'] = moment1_out + self.inputs['Moment2'] = moment2_out + self.inputs['Beta1Pow'] = beta1_pow_out + self.inputs['Beta2Pow'] = beta2_pow_out + + # Randomize gradient for next step + self.inputs['Grad'] = np.random.uniform( + -1, 1, (102, 105)).astype("float32") + + +def adam_step(inputs, attributes): + ''' + Simulate one step of the adam optimizer + :param inputs: dict of inputs + :param attributes: dict of attributes + :return tuple: tuple of output param, moment1, moment2, + beta1 power accumulator and beta2 power accumulator + ''' + param = inputs['Param'] + grad = inputs['Grad'] + moment1 = inputs['Moment1'] + moment2 = inputs['Moment2'] + lr = inputs['LearningRate'] + beta1_pow = inputs['Beta1Pow'] + beta2_pow = inputs['Beta2Pow'] + + beta1 = attributes['beta1'] + beta2 = attributes['beta2'] + epsilon = attributes['epsilon'] + + moment1_out = beta1 * moment1 + (1 - beta1) * grad + moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad) + beta1_pow_out = beta1_pow * beta1 + beta2_pow_out = beta2_pow * beta2 + lr_t = lr * np.sqrt(1 - beta2_pow_out) / (1 - beta1_pow_out) + param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon)) + return param_out, moment1_out, moment2_out, beta1_pow_out, beta2_pow_out + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_cond_op.py b/python/paddle/v2/framework/tests/test_cond_op.py index 76323b5e10c59822b4de82a70ebd57b3e57c8392..2c7bcc4be46683ed9871b888c9dbabf27887be29 100644 --- a/python/paddle/v2/framework/tests/test_cond_op.py +++ b/python/paddle/v2/framework/tests/test_cond_op.py @@ -39,7 +39,7 @@ class PySimpleCondTest(unittest.TestCase): def create_tensor(scope, name, shape, np_data): - tensor = scope.new_var(name).get_tensor() + tensor = scope.var(name).get_tensor() tensor.set_dims(shape) tensor.set(np_data, core.CPUPlace()) return tensor @@ -74,9 +74,9 @@ class TestCondOp(unittest.TestCase): create_tensor(self.scope, "X", [10, 1], x_np_data) cond_np_data = self.py_cond.cond.astype("int32") create_tensor(self.scope, "cond", [10, 1], cond_np_data) - self.scope.new_var("SubScopes") - self.scope.new_var("IndexTensors") - self.scope.new_var("Out") + self.scope.var("SubScopes") + self.scope.var("IndexTensors") + self.scope.var("Out") def create_cond_op(self): self.condop = CondOp( diff --git a/python/paddle/v2/framework/tests/test_conv2d_op.py b/python/paddle/v2/framework/tests/test_conv2d_op.py index 118a5fc1cde5f4a908b065d581956e0855d50a52..2fb808944ac97f2bdcb05336a2205346ded65a4d 100644 --- a/python/paddle/v2/framework/tests/test_conv2d_op.py +++ b/python/paddle/v2/framework/tests/test_conv2d_op.py @@ -3,70 +3,56 @@ import numpy as np from op_test import OpTest +def conv2d_forward_naive(input, filter, group, conv_param): + in_n, in_c, in_h, in_w = input.shape + out_c, f_c, f_h, f_w = filter.shape + assert f_c * group == in_c + assert np.mod(out_c, group) == 0 + sub_out_c = out_c / group + + stride, pad = conv_param['stride'], conv_param['pad'] + out_h = 1 + (in_h + 2 * pad[0] - f_h) / stride[0] + out_w = 1 + (in_w + 2 * pad[1] - f_w) / stride[1] + out = np.zeros((in_n, out_c, out_h, out_w)) + + input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], )), + mode='constant', + constant_values=0) + for i in range(out_h): + for j in range(out_w): + for g in range(group): + input_pad_masked = \ + input_pad[:, g * f_c:(g + 1) * f_c, + i * stride[0]:i * stride[0] + f_h, + j * stride[1]:j * stride[1] + f_w] + + f_sub = filter[g * sub_out_c:(g + 1) * sub_out_c, :, :, :] + for k in range(sub_out_c): + out[:, g * sub_out_c + k, i, j] = \ + np.sum(input_pad_masked * f_sub[k, :, :, :], + axis=(1, 2, 3)) + + return out + + class TestConv2dOp(OpTest): def setUp(self): - self.init_groups() - self.op_type = "conv2d" - batch_size = 2 - input_channels = 3 - input_height = 5 - input_width = 5 - output_channels = 6 - filter_height = 3 - filter_width = 3 - stride = 1 - padding = 0 - output_height = (input_height - filter_height + 2 * padding - ) / stride + 1 - output_width = (input_width - filter_width + 2 * padding) / stride + 1 - input = np.random.random((batch_size, input_channels, input_height, - input_width)).astype("float32") - - filter = np.random.random( - (output_channels, input_channels / self.groups, filter_height, - filter_width)).astype("float32") - output = np.ndarray( - (batch_size, output_channels, output_height, output_width)) + self.init_op_type() + self.init_group() + self.init_test_case() + + conv2d_param = {'stride': self.stride, 'pad': self.pad} + input = np.random.random(self.input_size).astype("float32") + filter = np.random.random(self.filter_size).astype("float32") + output = conv2d_forward_naive(input, filter, self.groups, conv2d_param) self.inputs = {'Input': input, 'Filter': filter} self.attrs = { - 'strides': [1, 1], - 'paddings': [0, 0], - 'groups': self.groups + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups, + 'dilations': self.dilations } - - output_group_channels = output_channels / self.groups - input_group_channels = input_channels / self.groups - for batchid in xrange(batch_size): - for group in xrange(self.groups): - for outchannelid in range(group * output_group_channels, - (group + 1) * output_group_channels): - for rowid in xrange(output_height): - for colid in xrange(output_width): - start_h = (rowid * stride) - padding - start_w = (colid * stride) - padding - output_value = 0.0 - for inchannelid in range( - group * input_group_channels, - (group + 1) * input_group_channels): - for frowid in xrange(filter_height): - for fcolid in xrange(filter_width): - input_value = 0.0 - inrowid = start_h + frowid - incolid = start_w + fcolid - if ((inrowid >= 0 and - inrowid < input_height) and - (incolid >= 0 and - incolid < input_width)): - input_value = input[batchid][ - inchannelid][inrowid][incolid] - filter_value = filter[outchannelid][ - inchannelid % input_group_channels][ - frowid][fcolid] - output_value += input_value * filter_value - output[batchid][outchannelid][rowid][ - colid] = output_value - self.outputs = {'Output': output} def test_check_output(self): @@ -90,14 +76,47 @@ class TestConv2dOp(OpTest): max_relative_error=0.05, no_grad_set=set(['Input'])) - def init_groups(self): + def init_test_case(self): + # self.groups = 1 + # self.op_type = "conv2d" + self.pad = [0, 0] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_group(self): self.groups = 1 + def init_op_type(self): + self.op_type = "conv2d" + class TestWithGroup(TestConv2dOp): - def init_groups(self): + def init_group(self): self.groups = 3 + def init_op_type(self): + self.op_type = "conv2d" + + +class TestCudnn(TestConv2dOp): + def init_group(self): + self.groups = 1 + + def init_op_type(self): + self.op_type = "conv_cudnn" + + +class TestCudnnWithGroup(TestConv2dOp): + def init_group(self): + self.groups = 3 + + def init_op_type(self): + self.op_type = "conv_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_decayed_adagrad_op.py b/python/paddle/v2/framework/tests/test_decayed_adagrad_op.py new file mode 100644 index 0000000000000000000000000000000000000000..674c3fda5c82309bbfbbad936a8b0b26929d42d9 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_decayed_adagrad_op.py @@ -0,0 +1,71 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestDecayedAdagradOp1(OpTest): + ''' Test DecayedAdagrad operator with explicit attributes + ''' + + def setUp(self): + self.op_type = "decayed_adagrad" + + param = np.random.random((123, 321)).astype("float32") + grad = np.random.random((123, 321)).astype("float32") + moment = np.zeros((123, 321)).astype("float32") + lr = 0.01 + decay = 0.80 + epsilon = 1e-8 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment': moment, + 'LearningRate': np.array([lr]).astype("float32") + } + + self.attrs = {'decay': decay, 'epsilon': epsilon} + + moment_out = decay * moment + (1 - decay) * grad * grad + param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon) + + self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} + + def test_check_output(self): + self.check_output() + + +class TestDecayedAdagradOp2(OpTest): + ''' Test DecayedAdagrad operator with default attributes + ''' + + def setUp(self): + self.op_type = "decayed_adagrad" + + param = np.random.random((123, 321)).astype("float32") + grad = np.random.random((123, 321)).astype("float32") + moment = np.zeros((123, 321)).astype("float32") + lr = 0.01 + decay = 0.95 + epsilon = 1e-6 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment': moment, + 'LearningRate': np.array([lr]).astype("float32") + } + + self.attrs = {'decay': decay, 'epsilon': epsilon} + + moment_out = decay * moment + (1 - decay) * grad * grad + param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon) + + self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_default_scope_funcs.py b/python/paddle/v2/framework/tests/test_default_scope_funcs.py index 495863c4562b5a2d6755fb02e21a6b0c845fd7b6..09a9850d054e3d7e6bf6db363fc577bdff8e9f43 100644 --- a/python/paddle/v2/framework/tests/test_default_scope_funcs.py +++ b/python/paddle/v2/framework/tests/test_default_scope_funcs.py @@ -10,7 +10,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): self.assertIsNone(find_var("test")) def test_create_var_get_var(self): - var_a = new_var("var_a") + var_a = var("var_a") self.assertIsNotNone(var_a) self.assertIsNotNone(get_cur_scope().find_var('var_a')) enter_local_scope() @@ -19,7 +19,7 @@ class TestDefaultScopeFuncs(unittest.TestCase): def test_var_get_int(self): def __new_scope__(): - i = new_var("var_i") + i = var("var_i") self.assertFalse(i.is_int()) i.set_int(10) self.assertTrue(i.is_int()) diff --git a/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py b/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2b01e43454e70c12b423db9925837cf336f79935 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_dynamic_recurrent_op.py @@ -0,0 +1,111 @@ +import logging +import paddle.v2.framework.core as core +import unittest +from paddle.v2.framework.op import Operator, DynamicRecurrentOp +import numpy as np + + +def create_tensor(scope, name, shape, np_data): + tensor = scope.var(name).get_tensor() + tensor.set_dims(shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class DynamicRecurrentOpTest(unittest.TestCase): + ''' + Test RNNOp + + equation: + h_t = \sigma (W x_t + U h_{t-1}) + weights: + - W + - U + vars: + - x + memories: + - h + outputs: + - h + ''' + + # for siplicity, just one level LoD + lod_py = [[0, 4, 7, 9, 10]] + input_dim = 30 + num_sents = len(lod_py[0]) - 1 + weight_dim = 15 + + def forward(self): + self.scope = core.Scope() + self.create_global_variables() + self.create_rnn_op() + self.create_step_net() + ctx = core.DeviceContext.create(core.CPUPlace()) + self.rnnop.run(self.scope, ctx) + state = self.rnnop.get_state("h@mem") + print 'state size: ', state.size() + + step_inputs = self.rnnop.get_step_input("x") + print "x size ", step_inputs.size() + for i in range(step_inputs.size()): + print "x %d" % i, np.array(step_inputs.read(i).get_dims()) + step_outputs = self.rnnop.get_step_output('h@mem') + print 'step_outputs.size ', step_outputs.size() + output = self.scope.find_var("h@mem").get_tensor() + + print 'output', np.array(output).shape + + def create_global_variables(self): + x = np.random.normal(size=(self.lod_py[0][-1], + self.input_dim)).astype("float32") + W = np.random.normal(size=(self.input_dim, + self.input_dim)).astype("float32") + U = np.random.normal(size=(self.input_dim, + self.input_dim)).astype("float32") + h_boot = np.random.normal(size=(self.num_sents, + self.input_dim)).astype("float32") + # create inlink + x_tensor = create_tensor(self.scope, "x", + [self.num_sents, self.input_dim], x) + x_tensor.set_lod(self.lod_py) + create_tensor(self.scope, "W", [self.input_dim, self.input_dim], W) + create_tensor(self.scope, "U", [self.input_dim, self.input_dim], U) + create_tensor(self.scope, "h_boot", [self.num_sents, self.input_dim], + h_boot) + self.scope.var("step_scopes") + self.scope.var("h@mem") + + def create_rnn_op(self): + # create RNNOp + self.rnnop = DynamicRecurrentOp( + # inputs + inlinks=["x"], + boot_memories=["h_boot"], + step_net="stepnet", + # outputs + outlinks=["h@mem"], + step_scopes="step_scopes", + # attributes + pre_memories=["h@pre"], + memories=["h@mem"]) + + def create_step_net(self): + stepnet = core.Net.create() + x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") + h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") + sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") + sig_op = Operator("sigmoid", X="sum", Y="h@mem") + + for op in [x_fc_op, h_fc_op, sum_op, sig_op]: + stepnet.append_op(op) + stepnet.complete_add_op(True) + self.rnnop.set_stepnet(stepnet) + + def test_forward(self): + print 'test recurrent op forward' + pd_output = self.forward() + print 'pd_output', pd_output + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index cff5080048bbd34782e52d8b2b7690176f996c99..8b7779667d5e806c06b333527f774c7987ce7e73 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -14,7 +14,7 @@ class TestGaussianRandomOp(unittest.TestCase): def gaussian_random_test(self, place): scope = core.Scope() - scope.new_var('Out').get_tensor() + scope.var('Out').get_tensor() op = Operator( "gaussian_random", diff --git a/python/paddle/v2/framework/tests/test_gru_unit_op.py b/python/paddle/v2/framework/tests/test_gru_unit_op.py new file mode 100644 index 0000000000000000000000000000000000000000..57625362d21905d257f46ff5330841a20438773a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gru_unit_op.py @@ -0,0 +1,115 @@ +import math +import unittest +import numpy as np +from op_test import OpTest + + +class GRUActivationType(OpTest): + identity = 0 + sigmoid = 1 + tanh = 2 + relu = 3 + + +def identity(x): + return x + + +def sigmoid(x): + return 1. / (1. + np.exp(-x)) + + +def tanh(x): + return 2. * sigmoid(2. * x) - 1. + + +def relu(x): + return np.maximum(x, 0) + + +class TestGRUUnitOp(OpTest): + batch_size = 3 + frame_size = 5 + activate = { + GRUActivationType.identity: identity, + GRUActivationType.sigmoid: sigmoid, + GRUActivationType.tanh: tanh, + GRUActivationType.relu: relu, + } + + def set_inputs(self): + batch_size = self.batch_size + frame_size = self.frame_size + self.op_type = 'gru_unit' + self.inputs = { + 'Input': np.random.uniform( + -0.1, 0.1, (batch_size, frame_size * 3)).astype('float32'), + 'HiddenPrev': np.random.uniform( + -0.1, 0.1, (batch_size, frame_size)).astype('float32'), + 'Weight': np.random.uniform( + -1. / math.sqrt(frame_size), 1. / math.sqrt(frame_size), + (frame_size, frame_size * 3)).astype('float32'), + } + self.attrs = { + 'activation': GRUActivationType.tanh, + 'gate_activation': GRUActivationType.sigmoid + } + + def set_outputs(self): + # GRU calculations + batch_size = self.batch_size + frame_size = self.frame_size + x = self.inputs['Input'] + h_p = self.inputs['HiddenPrev'] + w = self.inputs['Weight'] + b = self.inputs['Bias'] if self.inputs.has_key('Bias') else np.zeros( + (1, frame_size * 3)) + g = x + np.tile(b, (batch_size, 1)) + w_u_r = w.flatten()[:frame_size * frame_size * 2].reshape( + (frame_size, frame_size * 2)) + u_r = self.activate[self.attrs['gate_activation']](np.dot( + h_p, w_u_r) + g[:, :frame_size * 2]) + u = u_r[:, :frame_size] + r = u_r[:, frame_size:frame_size * 2] + r_h_p = r * h_p + w_c = w.flatten()[frame_size * frame_size * 2:].reshape( + (frame_size, frame_size)) + c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + + g[:, frame_size * 2:]) + g = np.hstack((u_r, c)) + h = u * h_p + (1 - u) * c + self.outputs = {'Gate': g, 'ResetHiddenPrev': r_h_p, 'Hidden': h} + + def setUp(self): + self.set_inputs() + self.set_outputs() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ['Input', 'HiddenPrev', 'Weight'], ['Hidden'], + max_relative_error=0.007) + + +class TestGRUUnitOpWithBias(TestGRUUnitOp): + def set_inputs(self): + batch_size = self.batch_size + frame_size = self.frame_size + super(TestGRUUnitOpWithBias, self).set_inputs() + self.inputs['Bias'] = np.random.uniform( + -0.1, 0.1, (1, frame_size * 3)).astype('float32') + self.attrs = { + 'activation': GRUActivationType.identity, + 'gate_activation': GRUActivationType.sigmoid + } + + def test_check_grad(self): + self.check_grad( + ['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + max_relative_error=0.007) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_infer_shape.py b/python/paddle/v2/framework/tests/test_infer_shape.py index 99562890fdd4d8b10f420869f1ba9f694db5969a..19bb45acef9a7443a974bf5f11afab5d067321f7 100644 --- a/python/paddle/v2/framework/tests/test_infer_shape.py +++ b/python/paddle/v2/framework/tests/test_infer_shape.py @@ -13,12 +13,15 @@ class TestInferShape(unittest.TestCase): shape = [10, 20] # prepare input/output - x1 = block.new_var("x1") + x1 = block.var("x1") + x1.set_type(core.VarDesc.VarType.LOD_TENSOR) x1.set_shape(shape) - x2 = block.new_var("x2") + x2 = block.var("x2") + x2.set_type(core.VarDesc.VarType.LOD_TENSOR) x2.set_shape(shape) - out = block.new_var("out") + out = block.var("out") + out.set_type(core.VarDesc.VarType.LOD_TENSOR) # prepare the operator sum_op_desc = block.append_op() @@ -39,12 +42,15 @@ class TestInferShape(unittest.TestCase): y_shape = [20, 30] # prepare input/output - x1 = block.new_var("x") + x1 = block.var("x") + x1.set_type(core.VarDesc.VarType.LOD_TENSOR) x1.set_shape(x_shape) - x2 = block.new_var("y") + x2 = block.var("y") + x2.set_type(core.VarDesc.VarType.LOD_TENSOR) x2.set_shape(y_shape) - out = block.new_var("out") + out = block.var("out") + out.set_type(core.VarDesc.VarType.LOD_TENSOR) # prepare the operator mul_op_desc = block.append_op() diff --git a/python/paddle/v2/framework/tests/test_mnist.py b/python/paddle/v2/framework/tests/test_mnist.py index 169242b5372ebd28f102e0b450495524c712aabe..c8d54b7c94b7815fa79e5a11f4e159657dc2a6cb 100644 --- a/python/paddle/v2/framework/tests/test_mnist.py +++ b/python/paddle/v2/framework/tests/test_mnist.py @@ -31,7 +31,7 @@ uniq_id = atomic_id().next def data_layer(name, dims): - var = scope.new_var(name) + var = scope.var(name) tensor = var.get_tensor() tensor.set_dims(dims) # 1 is batch size holder. return name @@ -67,7 +67,7 @@ def sgd_optimizer(net, param_name, learning_rate=0.005): # should use operator and add these to the init_network def init_param(net, param_name, dims): - scope.new_var(param_name) + scope.var(param_name) op = Operator( "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) op.infer_shape(scope) @@ -104,7 +104,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) pre_activation = name + ".mul.out" - scope.new_var(pre_activation) + scope.var(pre_activation) mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) net.append_op(mul_op) @@ -115,7 +115,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): sgd_optimizer( net=optimize_net, param_name=bias_name, learning_rate=0.001) bias_out = name + ".rowwise_add.out" - scope.new_var(bias_out) + scope.var(bias_out) rowwise_append_op = Operator( "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) net.append_op(rowwise_append_op) @@ -123,7 +123,7 @@ def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): activation_op = Operator(act, X=pre_activation, Y=name) net.append_op(activation_op) - scope.new_var(name) + scope.var(name) net.infer_shape(scope) return name @@ -133,7 +133,7 @@ def cross_entropy_layer(net, input, label): cross_entropy_op = Operator( "cross_entropy", X=input, Label=label, Y=cost_name) net.append_op(cross_entropy_op) - scope.new_var(cost_name) + scope.var(cost_name) net.infer_shape(scope) return cost_name @@ -141,10 +141,10 @@ def cross_entropy_layer(net, input, label): def create_backward_net(forward_net): net = core.Operator.backward(forward_net, set()) for input in net.inputs()["all"]: - var = scope.new_var(input) + var = scope.var(input) var.get_tensor() for output in net.outputs()["all"]: - var = scope.new_var(output) + var = scope.var(output) var.get_tensor() return net diff --git a/python/paddle/v2/framework/tests/test_operator_desc.py b/python/paddle/v2/framework/tests/test_operator_desc.py new file mode 100644 index 0000000000000000000000000000000000000000..dfe39c98f7f4fe266d5ec0c4a9ed14ab02e40e3a --- /dev/null +++ b/python/paddle/v2/framework/tests/test_operator_desc.py @@ -0,0 +1,78 @@ +import unittest +from paddle.v2.framework.framework import Variable, g_program +import paddle.v2.framework.core as core + + +class TestOperator(unittest.TestCase): + def test_error_type(self): + block = g_program.create_block() + try: + block.append_op() + self.assertFail() + except ValueError as v_err: + self.assertEqual( + v_err.message, + "`type` to initilized an Operator can not be None.") + try: + block.append_op(type="no_such_op") + self.assertFail() + except AssertionError as a_err: + self.assertEqual(a_err.message, + "Operator \"no_such_op\" has not been registered.") + + def test_op_desc_creation(self): + block = g_program.current_block() + mul_x = block.create_var( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mul_op = block.append_op( + type="mul", + inputs={"X": [mul_x], + "Y": mul_y}, + outputs={"Out": [mul_out]}, + attrs={"x_num_col_dims": 1}) + + self.assertNotEqual(str(mul_op), "") + self.assertEqual(mul_op.type, "mul") + self.assertEqual(mul_op.input_names, ["X", "Y"]) + self.assertEqual(mul_op.input("X"), ["mul.x"]) + self.assertEqual(mul_op.input("Y"), ["mul.y"]) + self.assertEqual(mul_op.output_names, ["Out"]) + self.assertEqual(mul_op.output("Out"), ["mul.out"]) + self.assertEqual( + set(mul_op.attr_names), set(["x_num_col_dims", "y_num_col_dims"])) + self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) + self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) + self.assertEqual(mul_op.attr("x_num_col_dims"), 1) + self.assertEqual(mul_op.has_attr("y_num_col_dims"), True) + self.assertEqual(mul_op.attr_type("y_num_col_dims"), core.AttrType.INT) + self.assertEqual(mul_op.attr("y_num_col_dims"), 1) + self.assertEqual(mul_out.op, mul_op) + + def test_mult_input(self): + block = g_program.current_block() + sum_x1 = block.create_var( + dtype="int", shape=[3, 4], lod_level=0, name="sum.x1") + sum_x2 = block.create_var( + dtype="int", shape=[3, 4], lod_level=0, name="sum.x2") + sum_x3 = block.create_var( + dtype="int", shape=[3, 4], lod_level=0, name="sum.x3") + sum_out = block.create_var( + dtype="int", shape=[3, 4], lod_level=0, name="sum.out") + sum_op = block.append_op( + type="sum", + inputs={"X": [sum_x1, sum_x2, sum_x3]}, + outputs={"Out": sum_out}) + self.assertEqual(sum_op.type, "sum") + self.assertEqual(sum_op.input_names, ["X"]) + self.assertEqual(sum_op.input("X"), ["sum.x1", "sum.x2", "sum.x3"]) + self.assertEqual(sum_op.output_names, ["Out"]) + self.assertEqual(sum_op.output("Out"), ["sum.out"]) + self.assertEqual(sum_out.op, sum_op) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_parameter.py b/python/paddle/v2/framework/tests/test_parameter.py index 3b5d38f257e6f51be30d9f1fa42285461b2a0eb7..1ac0cdd99f1b7c15d64ae9d2c465d5a9d563bd80 100644 --- a/python/paddle/v2/framework/tests/test_parameter.py +++ b/python/paddle/v2/framework/tests/test_parameter.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.framework.graph import g_program +from paddle.v2.framework.framework import g_program import paddle.v2.framework.core as core diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/framework/tests/test_program.py index 83e184494ad235f6493a7ea8e25886b1e35004ee..d06f86c09fe4edf8364e7d124cb7b8b1ae6bcc64 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/framework/tests/test_program.py @@ -1,7 +1,8 @@ import unittest import paddle.v2.framework.core as core -from paddle.v2.framework.graph import g_program +from paddle.v2.framework.framework import Program +from paddle.v2.framework.framework import g_program class TestProgram(unittest.TestCase): @@ -33,7 +34,7 @@ class TestProgram(unittest.TestCase): self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) - def test_append_backward(self): + def test_desc_append_backward(self): prog = core.ProgramDesc.__create_program_desc__() self.assertIsNotNone(prog) block = prog.block(0) @@ -51,16 +52,44 @@ class TestProgram(unittest.TestCase): sum_op_desc.set_input("Y", ["b1"]) sum_op_desc.set_output("Out", ["out2"]) + target = block.var("out2") + expect_ops = [ - "mul", "elementwise_add", "elementwise_add_grad", "mul_grad" + "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", + "mul_grad" ] + + def grad_name(name): + return name + "@GRAD" + actual_ops = [] - prog.append_backward(set()) + param_to_grad = prog.append_backward(target, set()) + for var_name in ("x1", "y1", "out1", "b1"): + self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) + self.assertEqual(param_to_grad[var_name][1], 0) + for op in block.all_ops(): actual_ops.append(op.type()) - print(actual_ops) self.assertEqual(actual_ops, expect_ops) + def test_append_backward(self): + prog = Program.instance() + block = prog.global_block() + + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mul_op = block.append_op( + type="mul", + inputs={"X": [mul_x], + "Y": mul_y}, + outputs={"Out": [mul_out]}, + attrs={"x_num_col_dims": 1}) + param_to_grad = prog.append_backward(mul_out, set()) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_protobuf_descs.py b/python/paddle/v2/framework/tests/test_protobuf_descs.py index 3db1e79ce43b7f559c7caab8397817b76d56161e..c775b1a398dabb096845b4a8730152c682b2f0dd 100644 --- a/python/paddle/v2/framework/tests/test_protobuf_descs.py +++ b/python/paddle/v2/framework/tests/test_protobuf_descs.py @@ -53,7 +53,7 @@ class TestOpDesc(unittest.TestCase): self.assertEqual(8, len(op.attr_names())) op.set_block_attr("block_attr", prog.block(0)) - self.assertEqual(0, op.get_block_attr("block_attr")) + self.assertEqual(0, op.block_attr("block_attr")) mul_op = block.append_op() mul_op.set_type("mul") @@ -93,18 +93,22 @@ class TestVarDesc(unittest.TestCase): def test_shape(self): program_desc = core.ProgramDesc.__create_program_desc__() block = program_desc.block(0) - var = block.new_var('my_var') + var = block.var('my_var') + var.set_type(core.VarDesc.VarType.SELECTED_ROWS) src_shape = [3, 2, 10, 8] var.set_shape(src_shape) res_shape = var.shape() self.assertEqual(src_shape, res_shape) + self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type()) def test_data_type(self): program_desc = core.ProgramDesc.__create_program_desc__() block = program_desc.block(0) - var = block.new_var('my_var') + var = block.var('my_var') + var.set_type(core.VarDesc.VarType.LOD_TENSOR) var.set_data_type(core.DataType.INT32) self.assertEqual(core.DataType.INT32, var.data_type()) + self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type()) class TestBlockDesc(unittest.TestCase): @@ -113,12 +117,12 @@ class TestBlockDesc(unittest.TestCase): self.assertIsNotNone(prog) block = prog.block(0) self.assertIsNotNone(block) - var1 = block.new_var("var1") - var2 = block.new_var("var2") - var3 = block.new_var("var3") + var1 = block.var("var1") + var2 = block.var("var2") + var3 = block.var("var3") all_vars = block.all_vars() self.assertEqual(set(all_vars), set([var1, var2, var3])) - var2_re = block.var("var2") + var2_re = block.find_var("var2") self.assertEqual(var2_re, var2) def test_add_op(self): diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 1f114432c09f29fab6cd56de00dff341785ae0e4..191ce0b0c8d5fb6c4d8037a6c1bfda57c394489e 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -66,7 +66,7 @@ class PySimpleRNNTest(unittest.TestCase): def create_tensor(scope, name, shape, np_data): - tensor = scope.new_var(name).get_tensor() + tensor = scope.var(name).get_tensor() tensor.set_dims(shape) tensor.set(np_data, core.CPUPlace()) return tensor @@ -125,8 +125,8 @@ class RecurrentOpTest(unittest.TestCase): h_boot_np_data = self.py_rnn.h_boot create_tensor(self.scope, "h_boot", [self.batch_size, self.input_dim], h_boot_np_data) - self.scope.new_var("step_scopes") - self.scope.new_var("h@mem") + self.scope.var("step_scopes") + self.scope.var("h@mem") def create_rnn_op(self): # create RNNOp diff --git a/python/paddle/v2/framework/tests/test_scope.py b/python/paddle/v2/framework/tests/test_scope.py index 1ce9454067f91f39f01d9eb4c912857464a3c1cb..14743654792716e4a7ebce5238b142addc86337e 100644 --- a/python/paddle/v2/framework/tests/test_scope.py +++ b/python/paddle/v2/framework/tests/test_scope.py @@ -18,7 +18,7 @@ class TestScope(unittest.TestCase): def test_create_var_get_var(self): paddle_c = paddle.v2.framework.core scope = paddle_c.Scope() - var_a = scope.new_var("var_a") + var_a = scope.var("var_a") self.assertIsNotNone(var_a) self.assertIsNotNone(scope.find_var('var_a')) scope2 = scope.new_scope() @@ -27,7 +27,7 @@ class TestScope(unittest.TestCase): def test_var_get_int(self): paddle_c = paddle.v2.framework.core scope = paddle_c.Scope() - var = scope.new_var("test_int") + var = scope.var("test_int") var.set_int(10) self.assertTrue(var.is_int()) self.assertEqual(10, var.get_int()) diff --git a/python/paddle/v2/framework/tests/test_seq_concat_op.py b/python/paddle/v2/framework/tests/test_seq_concat_op.py index 6309b09bc98f6d529f80bfa269a0eaadd799fcbc..abd2ebf0b21a953b76155eb04c57a7b65ac53cbc 100644 --- a/python/paddle/v2/framework/tests/test_seq_concat_op.py +++ b/python/paddle/v2/framework/tests/test_seq_concat_op.py @@ -1,5 +1,6 @@ import unittest import numpy as np +import sys from op_test import OpTest @@ -74,4 +75,5 @@ class TestConcatOpLevelZero(TestConcatOp): if __name__ == '__main__': + sys.exit(0) unittest.main() diff --git a/python/paddle/v2/framework/tests/test_tensor.py b/python/paddle/v2/framework/tests/test_tensor.py index 8cd93b35d7d1cb7d3b4a19e0e402ef576f1c0982..e0cd2fa8aaf2db2991ad2b9a3053f0d00b509cd4 100644 --- a/python/paddle/v2/framework/tests/test_tensor.py +++ b/python/paddle/v2/framework/tests/test_tensor.py @@ -6,7 +6,7 @@ import numpy class TestTensor(unittest.TestCase): def test_int_tensor(self): scope = core.Scope() - var = scope.new_var("test_tensor") + var = scope.var("test_tensor") place = core.CPUPlace() tensor = var.get_tensor() @@ -25,7 +25,7 @@ class TestTensor(unittest.TestCase): def test_float_tensor(self): scope = core.Scope() - var = scope.new_var("test_tensor") + var = scope.var("test_tensor") place = core.CPUPlace() tensor = var.get_tensor() @@ -46,7 +46,7 @@ class TestTensor(unittest.TestCase): def test_int_lod_tensor(self): place = core.CPUPlace() scope = core.Scope() - var_lod = scope.new_var("test_lod_tensor") + var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() lod_tensor.set_dims([4, 4, 6]) @@ -68,7 +68,7 @@ class TestTensor(unittest.TestCase): def test_float_lod_tensor(self): place = core.CPUPlace() scope = core.Scope() - var_lod = scope.new_var("test_lod_tensor") + var_lod = scope.var("test_lod_tensor") lod_tensor = var_lod.get_tensor() lod_tensor.set_dims([5, 2, 3, 4]) diff --git a/python/paddle/v2/framework/tests/test_tensor_array.py b/python/paddle/v2/framework/tests/test_tensor_array.py index 11f8a01f9224fcbd6dd6cbc8c37cc81036ad3e07..50b3e09162a24201ee45cbd017dfef8a60f0da78 100644 --- a/python/paddle/v2/framework/tests/test_tensor_array.py +++ b/python/paddle/v2/framework/tests/test_tensor_array.py @@ -13,7 +13,7 @@ class TestTensorArray(unittest.TestCase): # create a LoDTensor self.scope = core.Scope() - var = self.scope.new_var("test_tensor") + var = self.scope.var("test_tensor") self.place = core.CPUPlace() tensor = var.get_tensor() tensor.set_dims([self.batch_size, self.dim]) @@ -51,7 +51,7 @@ class TestTensorArray(unittest.TestCase): self.ta.unstack(self.tensor) # create a tensor with shape of [1, self.dim] - var = self.scope.new_var("hell") + var = self.scope.var("hell") tensor = var.get_tensor() tensor.set_dims([1, self.dim]) tensor.alloc_float(self.place) @@ -71,7 +71,7 @@ class TestTensorArray(unittest.TestCase): self.ta.unstack(self.tensor) # create a tensor with shape of [1, self.dim] - var = self.scope.new_var("hell") + var = self.scope.var("hell") tensor = var.get_tensor() tensor.set_dims([1, self.dim]) tensor.alloc_float(self.place) diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py index 30c59789d395b2b8d4b3019cf769c5bae029d91e..a2d28a65a67b03a6c74348b19ba99cffc55738e9 100644 --- a/python/paddle/v2/framework/tests/test_uniform_random_op.py +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -14,7 +14,7 @@ class TestUniformRandomOp(unittest.TestCase): def uniform_random_test(self, place): scope = core.Scope() - scope.new_var('X').get_tensor() + scope.var('X').get_tensor() op = Operator( "uniform_random", diff --git a/python/paddle/v2/framework/tests/test_variable.py b/python/paddle/v2/framework/tests/test_variable.py index 8ea1083ff6535d2d517f2ac587a956bfed906f03..6fb934c743a6271c352a74495cc543b62ac2b9d9 100644 --- a/python/paddle/v2/framework/tests/test_variable.py +++ b/python/paddle/v2/framework/tests/test_variable.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.framework.graph import Variable, g_program +from paddle.v2.framework.framework import Variable, g_program import paddle.v2.framework.core as core import numpy as np @@ -21,6 +21,7 @@ class TestVariable(unittest.TestCase): b = g_program.current_block() w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") + self.assertNotEqual(str(w), "") self.assertEqual(core.DataType.FP64, w.data_type) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name)