提交 1a53cba6 编写于 作者: H hedaoyuan

Merge branch 'develop' of https://github.com/baidu/Paddle into ImageExpandFunction

...@@ -47,6 +47,7 @@ option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) ...@@ -47,6 +47,7 @@ option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
...@@ -107,6 +108,7 @@ include(configure) # add paddle env configuration ...@@ -107,6 +108,7 @@ include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
...@@ -126,9 +128,12 @@ endif(WITH_GPU) ...@@ -126,9 +128,12 @@ endif(WITH_GPU)
add_subdirectory(proto) add_subdirectory(proto)
add_subdirectory(paddle) add_subdirectory(paddle)
add_subdirectory(python) add_subdirectory(python)
#TODO (add go/master/c back when fixed)
add_subdirectory(doc) add_subdirectory(doc)
...@@ -40,6 +40,10 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -40,6 +40,10 @@ if(NOT CMAKE_CROSSCOMPILING)
endif() endif()
endif() endif()
add_definitions(-DPADDLE_ONLY_CPU) add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
...@@ -11,56 +11,164 @@ ...@@ -11,56 +11,164 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# To simplify the build process of PaddlePaddle, we defined couple of # generic.cmake defines CMakes functions that look like Bazel's
# fundamental abstractions, e.g., how to build library, binary and # building rules (https://bazel.build/).
# test in C++, CUDA and Go.
# #
# ------------------------------------------- # -------------------------------------------
# C++ CUDA C++ Go # C++ CUDA C++ Go
# ------------------------------------------- # -------------------------------------------
# cc_library nv_library go_library # cc_library nv_library go_library
# cc_binary nv_binary go_binary # cc_binary nv_binary go_binary
# cc_test nv_test go_test # cc_test nv_test go_test
# ------------------------------------------- # -------------------------------------------
# To build a static library example.a from example.cc using the system
# compiler (like GCC):
# cc_library(example SRCS example.cc)
# To build a static library example.a from multiple source files
# example{1,2,3}.cc:
# cc_library(example SRCS example1.cc example2.cc example3.cc)
# To build a shared library example.so from example.cc:
# cc_library(example SHARED SRCS example.cc)
# To build a library using Nvidia's NVCC from .cu file(s), use the nv_
# prefixed version:
# nv_library(example SRCS example.cu)
# To specify that a library new_example.a depends on other libraies:
# cc_library(new_example SRCS new_example.cc DEPS example)
# Static libraries can be composed of other static libraries:
# cc_library(composed DEPS dependent1 dependent2 dependent3)
# To build an executable binary file from some source files and
# dependent libraries:
# cc_binary(example SRCS main.cc something.cc DEPS example1 example2)
# To build an executable binary file using NVCC, use the nv_ prefixed
# version:
# nv_binary(example SRCS main.cc something.cu DEPS example1 example2)
# To build a unit test binary, which is an executable binary with
# GoogleTest linked:
# cc_test(example_test SRCS example_test.cc DEPS example)
# To build a unit test binary using NVCC, use the nv_ prefixed version:
# nv_test(example_test SRCS example_test.cu DEPS example)
# #
# cmake_parse_arguments can help us to achieve this goal. # It is pretty often that executable and test binaries depend on
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html # pre-defined external libaries like glog and gflags defined in
# /cmake/external/*.cmake:
# #
# cc_test(example_test SRCS example_test.cc DEPS example glog gflags)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
endif(NOT APPLE) endif(NOT APPLE)
# cc_library parses tensor.cc and figures out that target also depend on tensor.h. function(merge_static_libs TARGET_NAME)
# cc_library(tensor set(libs ${ARGN})
# tensor.cc
# DEPS # First get the file names of the libraries to be merged
# variant) foreach(lib ${libs})
get_target_property(libtype ${lib} TYPE)
message(FATAL_ERROR "merge_static_libs can only process static libraries")
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
if(APPLE) # Use OSX's libtool to merge archives
COMMAND libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
DEPENDS ${libs}
add_dependencies(${TARGET_NAME} ${TARGET_NAME}_archive)
else() # general UNIX: use "ar" to extract objects and re-add to a common lib
foreach(lib ${libs})
set(objlistfile ${lib}.objlist) # list of objects in the input library
set(objdir ${lib}.objdir)
add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir})
add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ../${objlistfile}
DEPENDS ${lib} ${objdir}
# Empty dummy source file that goes into merged library
set(mergebase ${lib}.mergebase.c)
add_custom_command(OUTPUT ${mergebase}
COMMAND ${CMAKE_COMMAND} -E touch ${mergebase}
DEPENDS ${objlistfile})
list(APPEND mergebases "${mergebase}")
# We need a target for the output merged library
add_library(${TARGET_NAME} STATIC ${mergebases})
set(outlibfile "$<TARGET_FILE:${TARGET_NAME}>")
foreach(lib ${libs})
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${objlistfile}"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_RANLIB} ${outlibfile})
function(cc_library TARGET_NAME) function(cc_library TARGET_NAME)
set(options OPTIONAL) set(options STATIC static SHARED shared)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${cc_library_OPTIONAL} STREQUAL "SHARED") if (cc_library_SRCS)
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) if (cc_library_SHARED OR cc_library_shared) # build *.so
else() add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) else()
endif() add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
if (cc_library_DEPS) endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) if (cc_library_DEPS)
endif() add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
if (cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
message(FATAL "Please specify source file or library in cc_library.")
endfunction(cc_library) endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# tensor.cc)
function(cc_binary TARGET_NAME) function(cc_binary TARGET_NAME)
set(options OPTIONAL) set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -71,13 +179,6 @@ function(cc_binary TARGET_NAME) ...@@ -71,13 +179,6 @@ function(cc_binary TARGET_NAME)
endif() endif()
endfunction(cc_binary) endfunction(cc_binary)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
# cc_test(tensor_test
# tensor_test.cc
# tensor)
function(cc_test TARGET_NAME) function(cc_test TARGET_NAME)
set(options "") set(options "")
...@@ -91,28 +192,28 @@ function(cc_test TARGET_NAME) ...@@ -91,28 +192,28 @@ function(cc_test TARGET_NAME)
endif() endif()
endfunction(cc_test) endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
# their parameters, so ops depend on tensor. This implies that if
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
# nv_library(ops
# ops.cu
# tensor)
function(nv_library TARGET_NAME) function(nv_library TARGET_NAME)
set(options OPTIONAL) set(options STATIC static SHARED shared)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED") if(nv_library_SRCS)
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) if (nv_library_SHARED OR nv_library_shared) # build *.so
else() cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) else()
endif() cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
if (nv_library_DEPS) endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) if (nv_library_DEPS)
endif() add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
if (nv_library_DEPS)
merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
message(FATAL "Please specify source file or library in nv_library.")
endif() endif()
endfunction(nv_library) endfunction(nv_library)
...@@ -130,13 +231,6 @@ function(nv_binary TARGET_NAME) ...@@ -130,13 +231,6 @@ function(nv_binary TARGET_NAME)
endif() endif()
endfunction(nv_binary) endfunction(nv_binary)
# The dependency to target tensor implies that if any of
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
# nv_test(ops_test
# ops_test.cu
# ops)
function(nv_test TARGET_NAME) function(nv_test TARGET_NAME)
set(options "") set(options "")
...@@ -84,6 +84,7 @@ function(link_paddle_exe TARGET_NAME) ...@@ -84,6 +84,7 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter paddle_parameter
paddle_proto paddle_proto
paddle_cuda paddle_cuda
...@@ -11,13 +11,4 @@ include(flags) ...@@ -11,13 +11,4 @@ include(flags)
go_library(paddle_pserver_cclient STATIC) go_library(paddle_pserver_cclient STATIC)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/
DEPENDS paddle_pserver_cclient)
add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a)
add_subdirectory(test) add_subdirectory(test)
...@@ -8,6 +8,7 @@ add_subdirectory(gserver) ...@@ -8,6 +8,7 @@ add_subdirectory(gserver)
add_subdirectory(pserver) add_subdirectory(pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
add_subdirectory(strings) add_subdirectory(strings)
# Do not build go directory until go cmake is working smoothly. # Do not build go directory until go cmake is working smoothly.
...@@ -19,8 +20,8 @@ find_package(Boost QUIET) ...@@ -19,8 +20,8 @@ find_package(Boost QUIET)
if(Boost_FOUND) if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory(platform)
add_subdirectory(majel) add_subdirectory(framework)
endif() endif()
...@@ -16,7 +16,7 @@ set(API_HEADER ...@@ -16,7 +16,7 @@ set(API_HEADER
Internal.h) Internal.h)
add_library(paddle_api STATIC ${API_SOURCES}) add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib) add_dependencies(paddle_api gen_proto_cpp paddle_trainer_lib)
...@@ -842,7 +842,8 @@ public: ...@@ -842,7 +842,8 @@ public:
int passCount, int passCount,
bool useSparseUpdater); bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater( static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, const std::string pserverSpec); OptimizationConfig* config,
const std::string pserverSpec) throw(UnsupportError);
~ParameterUpdater(); ~ParameterUpdater();
/** /**
...@@ -15,7 +15,9 @@ limitations under the License. */ ...@@ -15,7 +15,9 @@ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/trainer/NewRemoteParameterUpdater.h" #include "paddle/trainer/NewRemoteParameterUpdater.h"
#include "paddle/trainer/RemoteParameterUpdater.h" #include "paddle/trainer/RemoteParameterUpdater.h"
#include "paddle/trainer/ThreadParameterUpdater.h" #include "paddle/trainer/ThreadParameterUpdater.h"
...@@ -30,11 +32,16 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( ...@@ -30,11 +32,16 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
} }
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, const std::string pserverSpec) { OptimizationConfig *config,
const std::string pserverSpec) throw(UnsupportError) {
auto updater = new ParameterUpdater(); auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec)); config->m->getConfig(), pserverSpec));
return updater; return updater;
throw UnsupportError();
} }
ParameterUpdater *ParameterUpdater::createRemoteUpdater( ParameterUpdater *ParameterUpdater::createRemoteUpdater(
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
#include "paddle/majel/ddim.h" #include "paddle/framework/ddim.h"
namespace majel { namespace paddle {
namespace framework {
///@cond HIDDEN ///@cond HIDDEN
...@@ -66,7 +67,7 @@ DDim make_ddim(const std::vector<int>& dims) { ...@@ -66,7 +67,7 @@ DDim make_ddim(const std::vector<int>& dims) {
///@cond HIDDEN ///@cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors // XXX For some reason, putting this in an anonymous namespace causes errors
class DynamicMutableIndexer : public boost::static_visitor<int&> { class DynamicMutableIndexer : public boost::static_visitor<int&> {
public: public:
DynamicMutableIndexer(int idx) : idx_(idx) {} DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D> template <int D>
...@@ -74,12 +75,12 @@ public: ...@@ -74,12 +75,12 @@ public:
return dim[idx_]; return dim[idx_];
} }
private: private:
int idx_; int idx_;
}; };
class DynamicConstIndexer : public boost::static_visitor<int> { class DynamicConstIndexer : public boost::static_visitor<int> {
public: public:
DynamicConstIndexer(int idx) : idx_(idx) {} DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D> template <int D>
...@@ -87,7 +88,7 @@ public: ...@@ -87,7 +88,7 @@ public:
return dim[idx_]; return dim[idx_];
} }
private: private:
int idx_; int idx_;
}; };
...@@ -213,10 +214,11 @@ struct DDimPrinter : boost::static_visitor<void> { ...@@ -213,10 +214,11 @@ struct DDimPrinter : boost::static_visitor<void> {
///\endcond ///\endcond
std::ostream& operator<<(std::ostream& os, const majel::DDim& ddim) { std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
DDimPrinter printer(os); DDimPrinter printer(os);
boost::apply_visitor(printer, ddim); boost::apply_visitor(printer, ddim);
return os; return os;
} }
} // namespace majel } // namespace framework
} // namespace paddle
...@@ -5,20 +5,14 @@ ...@@ -5,20 +5,14 @@
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include "paddle/majel/dim.h" #include "paddle/framework/dim.h"
namespace majel { namespace paddle {
namespace framework {
namespace { namespace {
typedef boost::variant<Dim<1>, typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
Dim<2>, Dim<8>, Dim<9>>
DDimVar; DDimVar;
} }
...@@ -95,14 +89,15 @@ ssize_t product(const DDim& ddim); ...@@ -95,14 +89,15 @@ ssize_t product(const DDim& ddim);
int arity(const DDim& ddim); int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const majel::DDim&); std::ostream& operator<<(std::ostream&, const DDim&);
} // namespace majel } // namespace framework
} // namespace paddle
namespace boost { namespace boost {
template <typename T> template <typename T>
T get(const majel::DDim& in) { T get(const paddle::framework::DDim& in) {
return boost::get<T>(in.var); return boost::get<T>(in.var);
} }
...@@ -4,18 +4,18 @@ ...@@ -4,18 +4,18 @@
#include <vector> #include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/majel/ddim.h" #include "paddle/framework/ddim.h"
TEST(DDim, Equality) { TEST(DDim, Equality) {
// construct a DDim from an initialization list // construct a DDim from an initialization list
majel::DDim ddim = majel::make_ddim({9, 1, 5}); paddle::framework::DDim ddim = paddle::framework::make_ddim({9, 1, 5});
EXPECT_EQ(ddim[0], 9); EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1); EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5); EXPECT_EQ(ddim[2], 5);
// construct a DDim from a vector // construct a DDim from a vector
std::vector<int> vec({9, 1, 5}); std::vector<int> vec({9, 1, 5});
majel::DDim vddim = majel::make_ddim(vec); paddle::framework::DDim vddim = paddle::framework::make_ddim(vec);
EXPECT_EQ(ddim[0], 9); EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1); EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5); EXPECT_EQ(ddim[2], 5);
...@@ -23,43 +23,43 @@ TEST(DDim, Equality) { ...@@ -23,43 +23,43 @@ TEST(DDim, Equality) {
// mutate a DDim // mutate a DDim
ddim[1] = 2; ddim[1] = 2;
EXPECT_EQ(ddim[1], 2); EXPECT_EQ(ddim[1], 2);
majel::set(ddim, 0, 6); paddle::framework::set(ddim, 0, 6);
EXPECT_EQ(majel::get(ddim, 0), 6); EXPECT_EQ(paddle::framework::get(ddim, 0), 6);
// vectorize a DDim // vectorize a DDim
std::vector<int> res_vec = majel::vectorize(vddim); std::vector<int> res_vec = paddle::framework::vectorize(vddim);
EXPECT_EQ(res_vec[0], 9); EXPECT_EQ(res_vec[0], 9);
EXPECT_EQ(res_vec[1], 1); EXPECT_EQ(res_vec[1], 1);
EXPECT_EQ(res_vec[2], 5); EXPECT_EQ(res_vec[2], 5);
majel::Dim<3> d(3, 2, 1); paddle::framework::Dim<3> d(3, 2, 1);
res_vec = majel::vectorize(majel::DDim(d)); res_vec = paddle::framework::vectorize(paddle::framework::DDim(d));
EXPECT_EQ(res_vec[0], 3); EXPECT_EQ(res_vec[0], 3);
EXPECT_EQ(res_vec[1], 2); EXPECT_EQ(res_vec[1], 2);
EXPECT_EQ(res_vec[2], 1); EXPECT_EQ(res_vec[2], 1);
// add two DDims // add two DDims
majel::DDim ddim_sum = ddim + vddim; paddle::framework::DDim ddim_sum = ddim + vddim;
EXPECT_EQ(ddim_sum[0], 15); EXPECT_EQ(ddim_sum[0], 15);
EXPECT_EQ(ddim_sum[1], 3); EXPECT_EQ(ddim_sum[1], 3);
EXPECT_EQ(ddim_sum[2], 10); EXPECT_EQ(ddim_sum[2], 10);
// multiply two DDims // multiply two DDims
majel::DDim ddim_mul = ddim * vddim; paddle::framework::DDim ddim_mul = ddim * vddim;
EXPECT_EQ(ddim_mul[0], 54); EXPECT_EQ(ddim_mul[0], 54);
EXPECT_EQ(ddim_mul[1], 2); EXPECT_EQ(ddim_mul[1], 2);
EXPECT_EQ(ddim_mul[2], 25); EXPECT_EQ(ddim_mul[2], 25);
// arity of a DDim // arity of a DDim
EXPECT_EQ(majel::arity(ddim), 3); EXPECT_EQ(paddle::framework::arity(ddim), 3);
// product of a DDim // product of a DDim
EXPECT_EQ(majel::product(vddim), 45); EXPECT_EQ(paddle::framework::product(vddim), 45);
} }
TEST(DDim, Print) { TEST(DDim, Print) {
// print a DDim // print a DDim
std::stringstream ss; std::stringstream ss;
majel::DDim ddim = majel::make_ddim({2, 3, 4}); paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 3, 4});
ss << ddim; ss << ddim;
EXPECT_EQ("2, 3, 4", ss.str()); EXPECT_EQ("2, 3, 4", ss.str());
} }
...@@ -5,10 +5,11 @@ ...@@ -5,10 +5,11 @@
#include <stdexcept> #include <stdexcept>
#include <type_traits> #include <type_traits>
#include "paddle/majel/detail/cuda_assert.h" #include "paddle/platform/assert.h"
#include "paddle/majel/detail/hostdevice.h" #include "paddle/platform/hostdevice.h"
namespace majel { namespace paddle {
namespace framework {
// Statically sized, statically indexed dimension // Statically sized, statically indexed dimension
template <int i> template <int i>
...@@ -74,7 +75,7 @@ struct Dim<1> { ...@@ -74,7 +75,7 @@ struct Dim<1> {
throw std::invalid_argument("Index out of range."); throw std::invalid_argument("Index out of range.");
} }
#else #else
MAJEL_ASSERT(idx < size.head); PADDLE_ASSERT(idx < size.head);
#endif #endif
} }
...@@ -131,7 +132,7 @@ HOSTDEVICE int& indexer(Dim<D>& dim, int idx) { ...@@ -131,7 +132,7 @@ HOSTDEVICE int& indexer(Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension"); throw std::invalid_argument("Tried to access a negative dimension");
} }
#else #else
MAJEL_ASSERT(idx >= 0); PADDLE_ASSERT(idx >= 0);
#endif #endif
if (idx == 0) { if (idx == 0) {
return dim.head; return dim.head;
...@@ -146,7 +147,7 @@ HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) { ...@@ -146,7 +147,7 @@ HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index"); throw std::invalid_argument("Invalid index");
} }
#else #else
MAJEL_ASSERT(idx == 0); PADDLE_ASSERT(idx == 0);
#endif #endif
return dim.head; return dim.head;
} }
...@@ -158,7 +159,7 @@ HOSTDEVICE int indexer(const Dim<D>& dim, int idx) { ...@@ -158,7 +159,7 @@ HOSTDEVICE int indexer(const Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension"); throw std::invalid_argument("Tried to access a negative dimension");
} }
#else #else
MAJEL_ASSERT(idx >= 0); PADDLE_ASSERT(idx >= 0);
#endif #endif
if (idx == 0) { if (idx == 0) {
return dim.head; return dim.head;
...@@ -173,7 +174,7 @@ HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) { ...@@ -173,7 +174,7 @@ HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index"); throw std::invalid_argument("Invalid index");
} }
#else #else
MAJEL_ASSERT(idx == 0); PADDLE_ASSERT(idx == 0);
#endif #endif
return dim.head; return dim.head;
} }
...@@ -411,7 +412,7 @@ HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) { ...@@ -411,7 +412,7 @@ HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
// XXX For some reason, overloading fails to resolve this correctly // XXX For some reason, overloading fails to resolve this correctly
template <int i> template <int i>
typename std::enable_if<(i > 1), std::ostream&>::type operator<<( typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) { std::ostream& os, const Dim<i>& d) {
os << d.head << ", " << d.tail; os << d.head << ", " << d.tail;
return os; return os;
} }
...@@ -420,7 +421,7 @@ typename std::enable_if<(i > 1), std::ostream&>::type operator<<( ...@@ -420,7 +421,7 @@ typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
// XXX I wish this could be an overload instead of a template // XXX I wish this could be an overload instead of a template
template <int i> template <int i>
typename std::enable_if<(i == 1), std::ostream&>::type operator<<( typename std::enable_if<(i == 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) { std::ostream& os, const Dim<i>& d) {
os << d.head; os << d.head;
return os; return os;
} }
...@@ -448,4 +449,5 @@ HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) { ...@@ -448,4 +449,5 @@ HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
return result; return result;
} }
} // namespace majel } // namespace framework
} // namespace paddle
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/framework/dim.h"
#include "gtest/gtest.h"
__global__ void test(paddle::framework::Dim<2>* o) {
o[0] = paddle::framework::make_dim(5, 6);
__global__ void dyn_idx_gpu(int* o) {
auto d = paddle::framework::make_dim(5, 6);
o[0] = d[1];
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = paddle::framework::make_dim(3, 4);
EXPECT_EQ(paddle::framework::get<0>(a), 3);
EXPECT_EQ(paddle::framework::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<paddle::framework::Dim<2>> t(2);
a = t[0];
EXPECT_EQ(paddle::framework::get<0>(a), 5);
EXPECT_EQ(paddle::framework::get<1>(a), 6);
// linearization
auto b = paddle::framework::make_dim(7, 8);
EXPECT_EQ(paddle::framework::linearize(a, b), 83);
// product
EXPECT_EQ(paddle::framework::product(a), 30);
// mutate a Dim
paddle::framework::get<1>(b) = 10;
EXPECT_EQ(paddle::framework::get<0>(b), 7);
EXPECT_EQ(paddle::framework::get<1>(b), 10);
// dynamic access
paddle::framework::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(paddle::framework::get<0>(b), 8);
EXPECT_EQ(paddle::framework::get<1>(b), 11);
EXPECT_EQ(paddle::framework::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 12);
// contiguous_strides
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 1, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 0);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 10, 1));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 10);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(1, 10, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 0);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(2, 3, 4));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 2);
EXPECT_EQ(paddle::framework::get<2>(c), 6);
// generate from an index
auto size = paddle::framework::make_dim(4, 5, 2);
c = paddle::framework::Dim<3>(14, size);
EXPECT_EQ(paddle::framework::get<0>(c), 2);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::Dim<3>(25, size);
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 1);
TEST(Dim, Bool) {
auto a = paddle::framework::make_dim(3, 4);
auto b = paddle::framework::make_dim(5, 6);
auto c = paddle::framework::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(paddle::framework::contained(a, b));
EXPECT_FALSE(paddle::framework::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
paddle::framework::Dim<3> sizef(x, y, z);
paddle::framework::Dim<3> stridea(1, x, x*y);
paddle::framework::Dim<3> strideb(2, 2*x, 2*x*y);
paddle::framework::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(paddle::framework::contiguous(sizef, stridea));
EXPECT_FALSE(paddle::framework::contiguous(sizef, strideb));
EXPECT_FALSE(paddle::framework::contiguous(sizef, stridec));
TEST(Dim, Print) {
std::stringstream ss;
auto a = paddle::framework::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
std::stringstream ss;
ss << paddle::framework::make_dim(8);
EXPECT_EQ(ss.str(), "8");
...@@ -68,14 +68,12 @@ public: ...@@ -68,14 +68,12 @@ public:
numOutputs_ = 1; numOutputs_ = 1;
} }
virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {}
// input can be INPUT and INPUT_GRAD // input can be INPUT and INPUT_GRAD
// filter can be FILTER and FILTER_GRAD // filter can be FILTER and FILTER_GRAD
// output can be OUTPUT and OUTPUT_GRAD // output can be OUTPUT and OUTPUT_GRAD
void check(const TensorShape& input, void checkShape(const TensorShape& input,
const TensorShape& filter, const TensorShape& filter,
const TensorShape& output) { const TensorShape& output) {
// inputs and outputs arguments should be 4-dimensional. // inputs and outputs arguments should be 4-dimensional.
CHECK_EQ(input.ndims(), (size_t)4); CHECK_EQ(input.ndims(), (size_t)4);
CHECK_EQ(output.ndims(), (size_t)4); CHECK_EQ(output.ndims(), (size_t)4);
...@@ -117,15 +117,23 @@ public: ...@@ -117,15 +117,23 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size()); CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size()); CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// TODO(hedaoyuan): Need to define some index macros, // TODO(hedaoyuan): Need to define some index macros,
// to avoid useing 0 and 1. // to avoid useing 0 and 1.
const TensorShape& input = inputs[0].shape(); const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape(); const TensorShape& output = outputs[0].shape();
check(input, filter, output);
real beta; real beta;
if (outputs[0].getArgType() == ADD_TO) { if (outputs[0].getArgType() == ADD_TO) {
...@@ -209,16 +217,24 @@ public: ...@@ -209,16 +217,24 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
checkShape(input, filter, output);
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size()); CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size()); CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// Since the implementation of Col2ImFunctor is ADD_TO, // Since the implementation of Col2ImFunctor is ADD_TO,
// this function only supports ADD_TO mode. // this function only supports ADD_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO); CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape(); const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape(); const TensorShape& input = outputs[0].shape();
check(input, filter, output);
size_t batchSize = input[0]; size_t batchSize = input[0];
size_t inputChannels = input[1]; size_t inputChannels = input[1];
...@@ -295,13 +311,21 @@ public: ...@@ -295,13 +311,21 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
checkShape(input, filter, output);
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size()); CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size()); CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& output = inputs[0].shape(); const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape(); const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape(); const TensorShape& filter = outputs[0].shape();
check(input, filter, output);
real beta; real beta;
if (outputs[0].getArgType() == ADD_TO) { if (outputs[0].getArgType() == ADD_TO) {
...@@ -54,8 +54,8 @@ public: ...@@ -54,8 +54,8 @@ public:
T inValue; T inValue;
const int inH = inStartH + fH; const int inH = inStartH + fH;
const int inW = inStartW + fW; const int inW = inStartW + fW;
if ((inH >= 0 && inH < inputHeight) && if ((inH >= 0 && inH < (int)inputHeight) &&
(inW >= 0 && inW < inputWidth)) { (inW >= 0 && inW < (int)inputWidth)) {
size_t offsetInput = size_t offsetInput =
batch * inputChannels * inputHeight * inputWidth + batch * inputChannels * inputHeight * inputWidth +
inC * inputHeight * inputWidth + inH * inputWidth + inW; inC * inputHeight * inputWidth + inH * inputWidth + inW;
...@@ -90,14 +90,19 @@ public: ...@@ -90,14 +90,19 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { virtual void check(const BufferArgs& inputs,
CHECK_EQ(numInputs_, inputs.size()); const BufferArgs& outputs) override {
CHECK_EQ(numOutputs_, outputs.size());
const TensorShape& input = inputs[0].shape(); const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape(); const TensorShape& output = outputs[0].shape();
check(input, filter, output); checkShape(input, filter, output);
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
check(inputs, outputs);
size_t batchSize = inputs[0].shape()[0]; size_t batchSize = inputs[0].shape()[0];
size_t inputChannels = inputs[0].shape()[1]; size_t inputChannels = inputs[0].shape()[1];
...@@ -284,6 +284,16 @@ public: ...@@ -284,6 +284,16 @@ public:
} }
protected: protected:
std::vector<Argument::SeqInfo> commonSeqInfo_;
ICpuGpuVectorPtr sequenceStartPositions_;
void calcSequenceStartPositions();
void checkInputConsistency(int inlinkId,
const std::vector<Argument::SeqInfo>& seqInfo);
void reorganizeInput(PassType passType);
void reorganizeOutput(PassType passType);
void connectFrames(PassType passType);
void calcNumSequencesAtEachStep();
void resizeOrCreateFrames(int numFrames); void resizeOrCreateFrames(int numFrames);
void resizeBootFrame(int numSequences); void resizeBootFrame(int numSequences);
...@@ -295,8 +305,7 @@ protected: ...@@ -295,8 +305,7 @@ protected:
std::string linkName; std::string linkName;
LayerPtr inLayer; LayerPtr inLayer;
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
bool hasSubseq; Argument outArg; // scatter output argument
Argument outArg; // scatter output argument
}; };
std::vector<InFrameLine> inFrameLines_; std::vector<InFrameLine> inFrameLines_;
...@@ -318,7 +327,6 @@ protected: ...@@ -318,7 +327,6 @@ protected:
std::vector<LayerPtr> agents; std::vector<LayerPtr> agents;
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
Argument outArg; // scatter output argument Argument outArg; // scatter output argument
bool is_sequence;
// Different memoryFrameLine have different element as follows // Different memoryFrameLine have different element as follows
IVectorPtr allIds; // scattered id of realLayer IVectorPtr allIds; // scattered id of realLayer
ICpuGpuVectorPtr ICpuGpuVectorPtr
...@@ -330,22 +338,27 @@ protected: ...@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine, // and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_. // which is assigned by targetInfoInlinkId_.
struct Info { struct Info {
IVectorPtr allIds; // scattered id of realLayer // The original positions in the original batch
std::vector<int> idIndex; // index of allIds IVectorPtr allIds; // scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std::vector<int> idIndex;
ICpuGpuVectorPtr ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
}; };
std::vector<Info> info_; std::vector<Info> info_; // for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence // numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data) // data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std::vector<int> numSeqs_; std::vector<int> numSeqs_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_; std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// the id of inlink which share info with outlinks void checkOutputConsistency(OutFrameLine& outFrameLine);
int targetInfoInlinkId_;
/* create scattered id infomation for all realLayer of inFrameLines one time. /* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation * If hasSubseq, will also create scattered sequenceStartPositions infomation
...@@ -354,6 +367,28 @@ protected: ...@@ -354,6 +367,28 @@ protected:
void createInFrameInfo(int inlinks_id, void createInFrameInfo(int inlinks_id,
const Argument& input, const Argument& input,
PassType passType); PassType passType);
void createInFrameInfo_nonseq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_seq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_subseq(int inlinks_id,
const Argument& input,
PassType passType);
void createOutFrameInfo(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine, void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType); PassType passType);
...@@ -386,9 +421,7 @@ protected: ...@@ -386,9 +421,7 @@ protected:
NeuralNetwork* rootNetwork_; NeuralNetwork* rootNetwork_;
bool reversed_; bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples int maxSequenceLength_; // Max top-level length
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
bool useGpu_; bool useGpu_;
bool stopBeamSearch_; bool stopBeamSearch_;
...@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) { ...@@ -36,14 +36,23 @@ void AgentLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput(); Argument& realOutput = realLayer_->getOutput();
int realHeight = realOutput.getBatchSize(); int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realHeight); CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers // get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) { if (numSamples_ > 0 && numSamples_ < realNumSequences) {
if (realOutput.ids) { if (realOutput.hasSeq()) {
output_.ids = int numRows =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_); realOutput.sequenceStartPositions->getData(false)[numSamples_];
/* offset */ 0,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else { } else {
output_.subArgFrom( output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_); realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
...@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) { ...@@ -53,34 +62,6 @@ void AgentLayer::forward(PassType passType) {
} }
} }
void SequenceAgentLayer::forward(PassType passType) {
Argument& realOutput = realLayer_->getOutput();
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows =
CHECK(!realOutput.ids) << "Not supported";
/* offset */ 0,
/* trans */ false,
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_ = realOutput;
REGISTER_LAYER(sequence_agent, SequenceAgentLayer);
bool GatherAgentLayer::init(const LayerMap& layerMap, bool GatherAgentLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) { const ParameterMap& parameterMap) {
CHECK_EQ(config_.inputs_size(), 0); CHECK_EQ(config_.inputs_size(), 0);
...@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap, ...@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return true; return true;
} }
void GatherAgentLayer::copyIdAndSequenceInfo(const Argument& input, void GatherAgentLayer::copyIdAndSequenceInfo(
const IVectorPtr& ids, ICpuGpuVectorPtr sequenceStartPositions,
const std::vector<int>& idIndex) { ICpuGpuVectorPtr subSequenceStartPositions,
output_.sequenceStartPositions = input.sequenceStartPositions; const IVectorPtr& ids,
output_.subSequenceStartPositions = input.subSequenceStartPositions; const std::vector<int>& idIndex) {
realLayers_.clear(); output_.sequenceStartPositions = sequenceStartPositions;
output_.subSequenceStartPositions = subSequenceStartPositions;
allIds_ = ids; allIds_ = ids;
idIndex_ = idIndex; idIndex_ = idIndex;
} }
void GatherAgentLayer::forward(PassType passType) { void GatherAgentLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
void GatherAgentLayer::forwardValue(PassType passType) {
MatrixPtr valueReal = realLayers_[0]->getOutputValue();
if (!valueReal) return;
int height = allIds_->getSize(); int height = allIds_->getSize();
int width = this->getSize(); int width = this->getSize();
...@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) { ...@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize(); int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) { if (realOutArg_.hasSeq()) {
} else if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom( output_.subArgFrom(
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
} else { // used in generation } else { // used in generation
...@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { ...@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if (realGrad) { if (realGrad) {
// for agent in inFrameLines and memoryFrameLines, // for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward // only first scatterAgentLayer should do addToRows in backward
if (idIndex_ == 0) { if (handleBackward_) {
outputGrad->addToRows(*realGrad, *ids_); outputGrad->addToRows(*realGrad, *ids_);
} }
} }
...@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { ...@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void SequenceGatherAgentLayer::forward(PassType passType) { void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0; int height = 0;
int* starts = output_.subSequenceStartPositions->getMutableData(false);
IVectorPtr idReal = realLayers_[0]->getOutputLabel(); IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (idReal) {
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec // Gather generator.idsVec
// if is beam search generation result. Get first result. // if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) { if (idReal->getData()[idReal->getSize() - 1] == -1) {
...@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) { ...@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->copyFrom(*realLayers_[i]->getOutputLabel()); ->copyFrom(*realLayers_[i]->getOutputLabel());
} }
} else { } else {
// Gather output.value, same as GatherAgentLayer LOG(FATAL) << "Not implemented";
} }
} }
void SequenceScatterAgentLayer::forward(PassType passType) { void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
...@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) { ...@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */ seqStartPosIndex_, /* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_); /* seqSize */ numSequences_);
} else { } else {
// Putting the generation logic here is really an ugly hack!
// used in generation // used in generation
int height = 0; int height = 0;
size_t numSequences = ids_->getSize(); size_t numSequences = ids_->getSize();
...@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) { ...@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
} }
} }
REGISTER_LAYER(sequence_gather_agent, SequenceGatherAgentLayer);
REGISTER_LAYER(sequence_scatter_agent, SequenceScatterAgentLayer);
} // namespace paddle } // namespace paddle
...@@ -49,18 +49,6 @@ public: ...@@ -49,18 +49,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override {} void backward(const UpdateCallback& callback = nullptr) override {}
}; };
* like AgentLayer, but use first *numSamples* sequences
class SequenceAgentLayer : public AgentLayer {
explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {}
~SequenceAgentLayer() {}
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override {}
/** /**
* Like AgentLayer, but it can gather many real layers. Each real * Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers, * layer give a few rows of a sequence, after gather all real layers,
...@@ -83,7 +71,10 @@ public: ...@@ -83,7 +71,10 @@ public:
const ParameterMap& parameterMap) override; const ParameterMap& parameterMap) override;
// call before addRealLayer // call before addRealLayer
void copyIdAndSequenceInfo(const Argument& input, void clearRealLayers() { realLayers_.clear(); }
void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& allIds, const IVectorPtr& allIds,
const std::vector<int>& idIndex); const std::vector<int>& idIndex);
...@@ -92,24 +83,8 @@ public: ...@@ -92,24 +83,8 @@ public:
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback) override;
}; void forwardValue(PassType passType);
void forwardIds(PassType passType);
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
class SequenceGatherAgentLayer : public GatherAgentLayer {
explicit SequenceGatherAgentLayer(const LayerConfig& config)
: GatherAgentLayer(config) {}
virtual ~SequenceGatherAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
// same as GatherAgentLayer
}; };
/** /**
...@@ -129,6 +104,11 @@ protected: ...@@ -129,6 +104,11 @@ protected:
int idSize_; int idSize_;
int seqStartPosIndex_; int seqStartPosIndex_;
int numSequences_; // number of sequences in this scatterAgentLayer int numSequences_; // number of sequences in this scatterAgentLayer
bool handleBackward_;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public: public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
...@@ -147,19 +127,15 @@ public: ...@@ -147,19 +127,15 @@ public:
* false(default) in ScatterAgentLayer, and * false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer. * true in SequenceScatterAgentLayer.
*/ */
void setRealLayer(LayerPtr layer, void setRealLayer(LayerPtr layer, const std::vector<int>& ids) {
const std::vector<int>& ids,
bool copyId = false) {
realLayer_ = layer; realLayer_ = layer;
IVector::resizeOrCreate(ids_, ids.size(), useGpu_); IVector::resizeOrCreate(ids_, ids.size(), useGpu_);
ids_->copyFrom(ids.data(), ids.size()); ids_->copyFrom(ids.data(), ids.size());
if (copyId) { if (useGpu_) {
if (useGpu_) { IVector::resizeOrCreate(cpuIds_, ids.size(), false);
IVector::resizeOrCreate(cpuIds_, ids.size(), false); cpuIds_->copyFrom(ids.data(), ids.size());
cpuIds_->copyFrom(ids.data(), ids.size()); } else {
} else { cpuIds_ = ids_;
cpuIds_ = ids_;
} }
} }
...@@ -169,12 +145,14 @@ public: ...@@ -169,12 +145,14 @@ public:
const Argument& outArg, const Argument& outArg,
const IVectorPtr& ids, const IVectorPtr& ids,
int idIndex, int idIndex,
int idSize) { int idSize,
bool handleBackward) {
realLayer_ = layer; realLayer_ = layer;
realOutArg_ = outArg; realOutArg_ = outArg;
ids_ = ids; ids_ = ids;
idIndex_ = idIndex; idIndex_ = idIndex;
idSize_ = idSize; idSize_ = idSize;
handleBackward_ = handleBackward;
} }
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
...@@ -187,28 +165,8 @@ public: ...@@ -187,28 +165,8 @@ public:
void forward(PassType passType) override; void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override; void backward(const UpdateCallback& callback) override;
/** void forwardSequence(PassType passType);
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
class SequenceScatterAgentLayer : public ScatterAgentLayer {
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
explicit SequenceScatterAgentLayer(const LayerConfig& config)
: ScatterAgentLayer(config) {}
virtual ~SequenceScatterAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
}; };
} // namespace paddle } // namespace paddle
...@@ -40,6 +40,7 @@ namespace paddle { ...@@ -40,6 +40,7 @@ namespace paddle {
class FeatureMapExpandLayer : public Layer { class FeatureMapExpandLayer : public Layer {
private: private:
int numFilters_; int numFilters_;
bool asRowVector_;
public: public:
explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {} explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {}
...@@ -62,6 +63,7 @@ bool FeatureMapExpandLayer::init(const LayerMap& layerMap, ...@@ -62,6 +63,7 @@ bool FeatureMapExpandLayer::init(const LayerMap& layerMap,
CHECK_EQ(inputLayers_.size(), 1UL); CHECK_EQ(inputLayers_.size(), 1UL);
numFilters_ = config_.num_filters(); numFilters_ = config_.num_filters();
asRowVector_ = config_.user_arg() != "as_col_vec";
return true; return true;
} }
...@@ -76,16 +78,30 @@ void FeatureMapExpandLayer::forward(PassType passType) { ...@@ -76,16 +78,30 @@ void FeatureMapExpandLayer::forward(PassType passType) {
{ {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) { if (asRowVector_) {
MatrixPtr outVTmp = for (size_t i = 0; i < batchSize; i++) {
Matrix::create(outputV->getData() + i * imgSize * numFilters_, MatrixPtr outVTmp =
numFilters_, Matrix::create(outputV->getData() + i * imgSize * numFilters_,
imgSize, numFilters_,
false, imgSize,
useGpu_); false,
MatrixPtr inVTmp = Matrix::create( useGpu_);
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_); MatrixPtr inVTmp = Matrix::create(
outVTmp->addRowVector(*inVTmp); inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_);
} }
} }
/* activation */ { /* activation */ {
...@@ -102,24 +118,38 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { ...@@ -102,24 +118,38 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
MatrixPtr outGrad = getOutputGrad(); MatrixPtr outGrad = getOutputGrad();
size_t batchSize = getInput(0).getBatchSize(); size_t batchSize = getInput(0).getBatchSize();
int imgSize = inGrad->getWidth(); int imgSize = inGrad->getWidth();
/* Do activation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
{ {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
for (size_t i = 0; i < batchSize; i++) { if (asRowVector_) {
MatrixPtr outGradTmp = for (size_t i = 0; i < batchSize; i++) {
Matrix::create(outGrad->getData() + i * imgSize * numFilters_, MatrixPtr outGradTmp =
numFilters_, Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
imgSize, numFilters_,
false, imgSize,
useGpu_); false,
MatrixPtr inGradTmp = Matrix::create( useGpu_);
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_); MatrixPtr inGradTmp = Matrix::create(
inGradTmp->collectBias(*outGradTmp, 1); inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
inGradTmp->sumRows(*outGradTmp, 1, 1);
} }
} }
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
} }
} // namespace paddle. } // namespace paddle.
...@@ -22,10 +22,33 @@ public: ...@@ -22,10 +22,33 @@ public:
void forward(PassType passType) override { void forward(PassType passType) override {
Layer::forward(passType); Layer::forward(passType);
std::vector<std::string> vals;
for (size_t i = 0; i != inputLayers_.size(); ++i) { for (size_t i = 0; i != inputLayers_.size(); ++i) {
getInput(i).printValueString(LOG(INFO), std::ostringstream s;
"layer=" + inputLayers_[i]->getName() + " "); getInput(i).printValueString(s, "");
} }
size_t pos = 0;
int i = 0;
std::ostringstream s;
const std::string& format = config_.user_arg();
while (true) {
size_t pos1 = format.find("%s", pos);
if (pos1 == std::string::npos) break;
if (i >= vals.size()) {
s << format.substr(pos, pos1 - pos) << vals[i];
pos = pos1 + 2;
if (i != inputLayers_.size()) {
LOG(ERROR) << "Number of value in the format (" << format
<< ") is not same as the number of inputs ("
<< inputLayers_.size() << ") at " << getName();
s << format.substr(pos);
LOG(INFO) << s.str();
} }
void backward(const UpdateCallback& callback) override {} void backward(const UpdateCallback& callback) override {}
...@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) { ...@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
const Argument& input = getInput(0); const Argument& input = getInput(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "Input should be a sequence or subsequence for layer " << getName();
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences(); newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize(); size_t dim = getSize();
// check // check
...@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name): ...@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1 = reduce(lambda x, y: x + y, d[0]) words1 = reduce(lambda x, y: x + y, d[0])
words2 = reduce(lambda x, y: x + y, d[1]) words2 = reduce(lambda x, y: x + y, d[1])
yield words1, words2, d[2] yield words1, words2, d[2]
data3 = [
[[[1, 2], [4, 5, 2]], [1, 2], 0],
[[[0, 2], [2, 5], [0, 1, 2]], [2, 3, 0], 1],
# Used for sequence_nest_mixed_inputs.conf
integer_value_sub_sequence(10), integer_value_sequence(10),
def process_mixed(settings, file_name):
for d in data3:
yield d
...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import * ...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None, test_list=None,
module='rnn_data_provider', module='rnn_data_provider',
obj='process_subseq2') obj='process_subseq')
settings(batch_size=2, learning_rate=0.01) settings(batch_size=2, learning_rate=0.01)
...@@ -57,7 +57,7 @@ def outer_step(wid, x): ...@@ -57,7 +57,7 @@ def outer_step(wid, x):
last = last_seq(input=inner_rnn_output, name="outer_rnn_state") last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
# "return last" should also work. But currently RecurrentGradientMachine # "return last" should also work. But currently RecurrentGradientMachine
# does not handle it, and will report error: In hierachical RNN, all out # does not handle it, and will report error: In hierachical RNN, all out
# links should be from sequences now. # links should be from sequences now.
return inner_rnn_output return inner_rnn_output
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(subseq, seq, nonseq):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
return out
decoder = recurrent_group(
step=inner_step, name='inner', input=[subseq, seq, nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
subseq, expand_layer(
seq, expand_as=subseq,
expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer(
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(data1, data2, label):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
print_layer(input=[data1, seq, label, inner_mem])
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
return out
decoder = recurrent_group(
step=inner_step, name='inner',
input=[subseq, StaticInput(seq), nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
input=[data1, data2, StaticInput(label), StaticInput(encoding)])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import * ...@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None, test_list=None,
module='rnn_data_provider', module='rnn_data_provider',
obj='process_seq2') obj='process_seq')
settings(batch_size=2, learning_rate=0.01) settings(batch_size=2, learning_rate=0.01)
...@@ -1598,12 +1598,15 @@ TEST(Layer, FeatureMapExpandLayer) { ...@@ -1598,12 +1598,15 @@ TEST(Layer, FeatureMapExpandLayer) {
/* paraSize= */ 0}); /* paraSize= */ 0});
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) { for (auto useGpu : {false, true}) {
testLayerGrad(config, for (auto asRowVec : {false, true}) {
"featmap_expand", config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec");
/*batch_size*/ 100, testLayerGrad(config,
/* trans= */ false, "featmap_expand",
useGpu, /*batch_size*/ 100,
/* useWeight */ true); /* trans= */ false,
/* useWeight */ true);
} }
} }
...@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) { ...@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
} }
} }
TEST(RecurrentGradientMachine, rnn_mixed_input) {
for (bool useGpu : {false, true}) {
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
\ No newline at end of file
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/majel/dim.h"
#include "gtest/gtest.h"
__global__ void test(majel::Dim<2>* o) {
o[0] = majel::make_dim(5, 6);
__global__ void dyn_idx_gpu(int* o) {
auto d = majel::make_dim(5, 6);
o[0] = d[1];
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = majel::make_dim(3, 4);
EXPECT_EQ(majel::get<0>(a), 3);
EXPECT_EQ(majel::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<majel::Dim<2>> t(2);
a = t[0];
EXPECT_EQ(majel::get<0>(a), 5);
EXPECT_EQ(majel::get<1>(a), 6);
// linearization
auto b = majel::make_dim(7, 8);
EXPECT_EQ(majel::linearize(a, b), 83);
// product
EXPECT_EQ(majel::product(a), 30);
// mutate a Dim
majel::get<1>(b) = 10;
EXPECT_EQ(majel::get<0>(b), 7);
EXPECT_EQ(majel::get<1>(b), 10);
// dynamic access
majel::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(majel::get<0>(b), 8);
EXPECT_EQ(majel::get<1>(b), 11);
EXPECT_EQ(majel::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
majel::Dim<3> c = majel::ex_prefix_mul(majel::Dim<3>(3, 4, 5));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 12);
// contiguous_strides
c = majel::contiguous_strides(majel::Dim<3>(10, 1, 10));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 0);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(10, 10, 1));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 10);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::contiguous_strides(majel::Dim<3>(1, 10, 10));
EXPECT_EQ(majel::get<0>(c), 0);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(2, 3, 4));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 2);
EXPECT_EQ(majel::get<2>(c), 6);
// generate from an index
auto size = majel::make_dim(4, 5, 2);
c = majel::Dim<3>(14, size);
EXPECT_EQ(majel::get<0>(c), 2);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::Dim<3>(25, size);
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 1);
TEST(Dim, Bool) {
auto a = majel::make_dim(3, 4);
auto b = majel::make_dim(5, 6);
auto c = majel::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(majel::contained(a, b));
EXPECT_FALSE(majel::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
majel::Dim<3> sizef(x, y, z);
majel::Dim<3> stridea(1, x, x*y);
majel::Dim<3> strideb(2, 2*x, 2*x*y);
majel::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(majel::contiguous(sizef, stridea));
EXPECT_FALSE(majel::contiguous(sizef, strideb));
EXPECT_FALSE(majel::contiguous(sizef, stridec));
TEST(Dim, Print) {
std::stringstream ss;
auto a = majel::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
std::stringstream ss;
ss << majel::make_dim(8);
EXPECT_EQ(ss.str(), "8");
...@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const { ...@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_. // Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \ #define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \ do { \
setSync(useGpu); \
if (useGpu) { \ if (useGpu) { \
copyToGpu(); \ copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \ return gpuVectorT_->OP(args); \
} else { \ } else { \
copyToCpu(); \ copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \ return cpuVectorT_->OP(args); \
} \ } \
} while (0) } while (0)
...@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() { ...@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
CHECK(gpuVectorT_); CHECK(gpuVectorT_);
this->resizeOrCreate(gpuVectorT_->getSize(), false); this->resizeOrCreate(gpuVectorT_->getSize(), false);
cpuVectorT_->copyFrom(*gpuVectorT_, HPPL_STREAM_DEFAULT); cpuVectorT_->copyFrom(*gpuVectorT_);
setSync(SYNCED); setSync(SYNCED);
break; break;
...@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() { ...@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
CHECK(cpuVectorT_); CHECK(cpuVectorT_);
this->resizeOrCreate(cpuVectorT_->getSize(), true); this->resizeOrCreate(cpuVectorT_->getSize(), true);
gpuVectorT_->copyFrom(*cpuVectorT_, HPPL_STREAM_DEFAULT); gpuVectorT_->copyFrom(*cpuVectorT_);
setSync(SYNCED); setSync(SYNCED);
break; break;
add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(paddle_optimizer gen_proto_cpp)
#include "adadelta_optimizer.h"
#include <algorithm>
#include <cmath>
namespace paddle {
namespace optimizer {
void AdadeltaOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient_;
Tensor& accum_d = *accum_delta_;
Tensor& update_d = *update_delta_;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i];
update_d[i] = std::sqrt(accum_d[i] + epsilon_) /
std::sqrt(accum_g[i] + epsilon_) * grad[i];
accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i];
param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i];
const char* AdadeltaOptimizer::SerializeState(int* state_len) {
AdadeltaOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
TensorToProto(*accum_delta_, state.mutable_accum_delta());
TensorToProto(*update_delta_, state.mutable_update_delta());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
void AdadeltaOptimizer::DeserializeState(const std::string& str) {
AdadeltaOptimizerState state;
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
ProtoToTensor(state.accum_delta(), accum_delta_);
ProtoToTensor(state.update_delta(), update_delta_);
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdadeltaOptimizer : public ParameterOptimizer {
Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
accum_delta_(new Tensor(parameter->size())),
update_delta_(new Tensor(parameter->size())),
decay_(decay) {}
~AdadeltaOptimizer() {
if (accum_gradient_) delete accum_gradient_;
if (accum_delta_) delete accum_delta_;
if (update_delta_) delete update_delta_;
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
Tensor *accum_gradient_;
Tensor *accum_delta_;
Tensor *update_delta_;
double rho_;
double epsilon_;
double decay_;
} // namespace optimizer
} // namespace paddle
#include <cmath>
#include "adagrad_optimizer.h"
namespace paddle {
namespace optimizer {
void AdagradOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
Tensor& accum_g = *accum_gradient_;
const Tensor& grad = *gradient;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] += grad[i] * grad[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) +
learning_rate * decay_ * param[i];
const char* AdagradOptimizer::SerializeState(int* state_len) {
AdagradOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
void AdagradOptimizer::DeserializeState(const std::string& str) {
AdagradOptimizerState state;
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdagradOptimizer : public ParameterOptimizer {
AdagradOptimizer(Tensor *parameter,
LrPolicy *lr,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
decay_(decay) {}
~AdagradOptimizer() {
if (accum_gradient_) delete accum_gradient_;
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
Tensor *accum_gradient_;
double epsilon_;
double decay_;
} // namespace optimizer
} // namespace paddle
#include "adam_optimizer.h"
#include <cmath>
namespace paddle {
namespace optimizer {
void AdamOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_);
double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_);
learning_rate *= std::sqrt(coef2) / coef1;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
Tensor &v = *velocitys_;
for (size_t i = 0; i < param.size(); ++i) {
m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i];
v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i];
param[i] -=
learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]);
const char *AdamOptimizer::SerializeState(int *state_len) {
AdamOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*momentums_, state.mutable_momentums());
TensorToProto(*velocitys_, state.mutable_velocitys());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
void AdamOptimizer::DeserializeState(const std::string &str) {
AdamOptimizerState state;
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.momentums(), momentums_);
ProtoToTensor(state.velocitys(), velocitys_);
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdamOptimizer : public ParameterOptimizer {
AdamOptimizer(Tensor *parameter,
LrPolicy *lr,
double beta_1,
double beta_2,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
momentums_(new Tensor(parameter->size())),
velocitys_(new Tensor(parameter->size())),
decay_(decay) {}
~AdamOptimizer() {
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
Tensor *momentums_;
Tensor *velocitys_;
double beta_1_;
double beta_2_;
double epsilon_;
double decay_;
} // namespace optimizer
} // namespace paddle
#pragma once
#include <algorithm>
#include "OptimizerConfig.pb.h"
namespace paddle {
namespace optimizer {
class LrPolicy {
virtual ~LrPolicy() {}
virtual double LearningRate(const uint64_t num_sample_passed) = 0;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeserializeState(const std::string &state) = 0;
// constant learning rate policy
class ConstLr final : public LrPolicy {
ConstLr(double lr) : learning_rate(lr){};
double LearningRate(const uint64_t num_sample_passed) {
return learning_rate;
const char *SerializeState(int *state_len) { return nullptr; }
void DeserializeState(const std::string &state) {}
double learning_rate;
class LinearLr final : public LrPolicy {
LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double LearningRate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
const char *SerializeState(int *state_len) {
// TODO(zhihong) : add lr_policy serialization
return nullptr;
void DeserializeState(const std::string &state) {
// TODO(zhihong) : add lr_policy serialization
double learning_rate;
double lr_decay_a;
double lr_decay_b;
} // namespace optimizer
} // namespace paddle
#include "optimizer.h"
#include <string>
#include "parameter_optimizer.h"
using namespace paddle;
using namespace paddle::optimizer;
template <paddle_element_type VALUE>
struct EnumToType {};
template <class T>
struct TypeToEnum {};
template <> \
struct TypeToEnum<TYPE> { \
static paddle_element_type v() { return ENUM; }; \
static constexpr TYPE value = ENUM; \
}; \
template <> \
struct EnumToType<ENUM> { \
typedef TYPE Type; \
// TODO(zhihong): only implement below type, need to fix
struct paddle_optimizer {
paddle::optimizer::ParameterOptimizer* impl;
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len,
const paddle_element_type data_type,
void* param_buffer,
int num_bytes,
const char* state,
const int state_len) {
paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len);
Tensor* parameter =
new Tensor(reinterpret_cast<float*>(param_buffer), num_bytes);
optimizer->impl = ParameterOptimizer::Create(config, parameter);
if (state != nullptr) {
std::string s(state, state + state_len);
return optimizer;
int paddle_release_optimizer(paddle_optimizer* o) {
if (o != nullptr) delete o->impl;
int paddle_update_parameter(paddle_optimizer* o,
const paddle_element_type data_type,
const void* grad_buffer,
int num_bytes) {
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const float*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<float*>(grad_type), num_bytes);
int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) {
int param_size = 0;
*param_buffer = (void*)o->impl->get_weight(&param_size);
return param_size;
int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) {
int state_len = 0;
*state = o->impl->SerializeState(&state_len);
return state_len;
#pragma once
#include <stdbool.h>
#include <stdint.h>
* @brief optimizer library in independent with other module
* which will be used in :
* Case A, the gradient optimized locally on the trainer.
* Case B, the gradient optimized on the parameter server.
#ifdef __cplusplus
extern "C" {
typedef enum {
} paddle_element_type;
* @brief execution status code
const int32_t PADDLE_SUCCESS = 0;
const int32_t PADDLE_ERROR = -1;
typedef struct paddle_optimizer paddle_optimizer;
* this group interface called in order :
* 1. create optimizer with config
* 2. set weights
* 3. update_parameter
* 4. get_weights
* 5. release optimizer
* @brief create optimizer with proto_config
* @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail
* @return return optimizer instance
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len,
const paddle_element_type data_type,
void* param_buffer,
int num_bytes,
const char* state,
const int state_len);
* @brief release optimizer
* @param optimizer
* @return return exec status
int paddle_release_optimizer(paddle_optimizer* o);
* @brief optimizer instance
* @param datatype of gradient and parameter
* @param gradient, calculate by optimzizer caller.
* TODO(zhihong): just pass loss to reduce communicate overhead.
* Project Adam Ms'14 paper for detail
* @param num_bytes, gradient size
* @return return exec status
int paddle_update_parameter(paddle_optimizer* o,
const paddle_element_type data_type,
const void* gradient,
int num_bytes);
* @brief optimizer for get parameter buffer
* @param param_buffer, initilized parameter buffer
* @return return content length
int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer);
* @brief optimzizer for saving training state
* @param training state for receive SerializeState
* @return return state_buffer length
int paddle_optimizer_get_state(paddle_optimizer* o, const char** state);
#ifdef __cplusplus
#include <glog/logging.h>
#include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "lr_policy.h"
#include "sgd_optimizer.h"
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto,
Tensor *parameter) {
paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == true)
<< "failed parse optimizer config";
auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * {
if (config.lr_policy() == OptimizerConfig::Const)
return new ConstLr(config.const_lr().learning_rate());
if (config.lr_policy() == OptimizerConfig::Linear)
return new LinearLr(config.linear_lr().learning_rate(),
// default
LOG(WARNING) << " have not select any LrPolicy. use ConstLr in default";
return new ConstLr(0.1);
LrPolicy *lr = select_lr_policy(config);
auto select_optimizer = [=](
Tensor *parameter,
const OptimizerConfig &config) -> ParameterOptimizer * {
if (config.optimizer() == OptimizerConfig::SGD) {
return new SGDOptimizer(parameter,
if (config.optimizer() == OptimizerConfig::Adadelta) {
return new AdadeltaOptimizer(parameter,
if (config.optimizer() == OptimizerConfig::Adagrad) {
return new AdagradOptimizer(
parameter, lr, config.adagrad().epsilon(), config.adagrad().decay());
if (config.optimizer() == OptimizerConfig::Adam) {
return new AdamOptimizer(parameter,
// default
<< "have not select any Optimizer. use SGDOptimizer in default";
return new SGDOptimizer(parameter, lr, 0.0, 0.0, false);
return select_optimizer(parameter, config);
float *ParameterOptimizer::get_weight(int *param_size) const {
*param_size = (int)parameter_->size();
return parameter_->get_buffer();
} // namespace optimizer
} // namespace paddle
#pragma once
#include <glog/logging.h>
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "lr_policy.h"
#include "serialization.h"
#include "tensor.h"
namespace paddle {
namespace optimizer {
class ParameterOptimizer {
* @brief update hook for algorithm need to traverse parameter more than
* once.
ParameterOptimizer(Tensor *parameter, LrPolicy *lr)
: parameter_(parameter), lr_policy_(lr), num_sample_passed_(0) {}
virtual ~ParameterOptimizer() {
delete parameter_;
delete lr_policy_;
static ParameterOptimizer *Create(const std::string &config_proto,
Tensor *parameter);
virtual void Update(const Tensor *gradient) = 0;
virtual float *get_weight(int *param_size) const;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeserializeState(const std::string &state) = 0;
Tensor *parameter_;
// learning rate policy
LrPolicy *lr_policy_;
uint64_t num_sample_passed_;
} // namespace optimizer
} // namespace paddle
#include "parameter_optimizer.h"
#include <cmath>
#include <map>
#include <vector>
#include "gtest/gtest.h"
#include "lr_policy.h"
using namespace paddle;
using namespace paddle::optimizer;
Tensor* FillTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) {
p[i] = (float)rand() / (float)RAND_MAX;
return param;
Tensor* FixedTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) {
p[i] = i;
return param;
class OptimizerTest : public testing::Test {
// init tensor shape
const size_t kSize = 5;
virtual void SetUp() {
virtual void TearDown() {}
void CreateSGD() {
Tensor* parameter = FixedTensor(kSize);
std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter);
void CreateAdam() {
Tensor* parameter = FixedTensor(kSize);
std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter);
void TestGetWeight() {
Tensor* p = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
int s = 0;
float* newp = (float*)opts_[i]->get_weight(&s);
for (size_t j = 0; j < kSize; ++j) {
EXPECT_EQ(newp[j], (*p)[j]);
void TestUpdate() {
Tensor* g = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
void TestCheckPoint() {
for (size_t i = 0; i < opts_.size(); ++i) {
int state_len = 0;
std::string state = opts_[i]->SerializeState(&state_len);
std::vector<ParameterOptimizer*> opts_;
OptimizerConfig config_;
TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); }
TEST_F(OptimizerTest, TestUpdate) { TestUpdate(); }
TEST_F(OptimizerTest, TestCheckPoint) { TestCheckPoint(); }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
#pragma once
#include <iostream>
#include <sstream>
#include <string>
#include <type_traits>
#include "OptimizerConfig.pb.h"
#include "paddle/utils/Logging.h"
#include "tensor.h"
namespace paddle {
namespace optimizer {
static void TensorToProto(const Tensor& tensor, TensorProto* proto) {
std::stringstream os;
for (size_t i = 0; i < tensor.size(); ++i) {
os << tensor[i];
static void ProtoToTensor(const TensorProto& proto, Tensor* tensor) {
std::stringstream sin;
for (auto i = 0; i < proto.content_size(); ++i) {
sin << proto.content(i);
sin >> (*tensor)[i];
} // namespace optimizer
} // namespace paddle
#include "serialization.h"
#include "gtest/gtest.h"
using namespace paddle;
using namespace paddle::optimizer;
TEST(TensorToProto, Case1) {
Tensor t(3), t1(3);
for (size_t i = 0; i < t.size(); ++i) {
t[i] = i;
t1[i] = 0;
TensorProto proto;
TensorToProto(t, &proto);
ProtoToTensor(proto, &t1);
for (size_t i = 0; i < t1.size(); ++i) {
EXPECT_EQ(t1[i], t[i]);
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
#include "sgd_optimizer.h"
#include "serialization.h"
namespace paddle {
namespace optimizer {
void SGDOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
float velocity = 0.0;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
for (size_t i = 0; i < param.size(); ++i) {
if (momentum_ == 0.0) {
velocity = -learning_rate * grad[i] - learning_rate * decay_ * param[i];
} else {
m[i] = momentum_ * m[i] - learning_rate * grad[i] -
learning_rate * decay_ * param[i];
velocity = m[i];
if (nesterov_) {
param[i] += momentum_ * velocity - learning_rate * grad[i];
} else {
param[i] += velocity;
const char *SGDOptimizer::SerializeState(int *state_len) {
SGDOptimizerState state;
TensorToProto(*parameter_, state.mutable_parameter());
if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
void SGDOptimizer::DeserializeState(const std::string &str) {
SGDOptimizerState state;
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
if (momentum_ != 0.0) ProtoToTensor(state.parameter(), momentums_);
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class SGDOptimizer : public ParameterOptimizer {
SGDOptimizer(Tensor* parameter, LrPolicy* lr, double m, double d, bool n)
: ParameterOptimizer(parameter, lr),
nesterov_(n) {
if (momentum_ != 0.0) {
size_t size = parameter->size();
// TODO: fix it with align aware allocator bind to Tensor
momentums_ = new Tensor(size);
virtual ~SGDOptimizer() {
if (momentums_) delete momentums_;
void Update(const Tensor* gradient);
const char* SerializeState(int* state_len);
void DeserializeState(const std::string& state);
Tensor* momentums_;
double momentum_;
double decay_;
bool nesterov_;
} // namespace optimizer
} // namespace paddle
#pragma once
* @brief tensor used by optimizer
#include <string.h>
#include <memory>
#include "paddle/utils/Common.h"
#include "paddle/utils/Logging.h"
namespace paddle {
namespace optimizer {
template <class T>
class TensorT {
TensorT(size_t size) : height_(1), width_(size) {
data_ptr_ = std::shared_ptr<T>(new T[size], std::default_delete<T[]>());
data_ = data_ptr_.get();
TensorT(T* data, size_t size)
: height_(1), width_(size), data_ptr_(nullptr), data_(data) {}
TensorT(T* data, size_t h, size_t w)
: height_(h), width_(w), data_ptr_(nullptr), data_(data) {}
virtual ~TensorT() {}
T* get_buffer() { return this->data_; }
T& operator[](const size_t idx) {
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
return data_[idx];
T& operator[](const size_t idx) const {
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
return data_[idx];
// TODO: replace with tensorshape
size_t size() const { return this->width_ * this->height_; }
size_t height_;
size_t width_;
std::shared_ptr<T> data_ptr_;
T* data_;
// TODO(zhihong): design problem of dynamic datatype, need to fix it
typedef TensorT<float> Tensor;
} // namespace optimizer
} // namespace paddle
...@@ -149,6 +149,7 @@ struct Argument { ...@@ -149,6 +149,7 @@ struct Argument {
: getBatchSize(); : getBatchSize();
} }
bool hasSeq() const { return sequenceStartPositions != nullptr; }
bool hasSubseq() const { return subSequenceStartPositions != nullptr; } bool hasSubseq() const { return subSequenceStartPositions != nullptr; }
const int* getCpuStartPositions() const { const int* getCpuStartPositions() const {
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
nv_test(cuda_test SRCS cuda_test.cu)
cc_library(place SRCS place.cc) cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
...@@ -5,28 +5,25 @@ ...@@ -5,28 +5,25 @@
#if defined(__APPLE__) && defined(__CUDA_ARCH__) && !defined(NDEBUG) #if defined(__APPLE__) && defined(__CUDA_ARCH__) && !defined(NDEBUG)
#include <stdio.h> #include <stdio.h>
#define MAJEL_ASSERT(e) \ #define PADDLE_ASSERT(e) \
do { \ do { \
if (!(e)) { \ if (!(e)) { \
printf( \ printf("%s:%d Assertion `%s` failed.\n", __FILE__, __LINE__, \
"%s:%d Assertion `%s` failed.\n", __FILE__, __LINE__, TOSTRING(e)); \ TOSTRING(e)); \
asm("trap;"); \ asm("trap;"); \
} \ } \
} while (0) } while (0)
#define MAJEL_ASSERT_MSG(e, m) \ #define PADDLE_ASSERT_MSG(e, m) \
do { \ do { \
if (!(e)) { \ if (!(e)) { \
printf("%s:%d Assertion `%s` failed (%s).\n", \ printf("%s:%d Assertion `%s` failed (%s).\n", __FILE__, __LINE__, \
__FILE__, \ TOSTRING(e), m); \
__LINE__, \ asm("trap;"); \
TOSTRING(e), \ } \
m); \
asm("trap;"); \
} \
} while (0) } while (0)
#else #else
#include <assert.h> #include <assert.h>
#define MAJEL_ASSERT(e) assert(e) #define PADDLE_ASSERT(e) assert(e)
#define MAJEL_ASSERT_MSG(e, m) assert((e) && (m)) #define PADDLE_ASSERT_MSG(e, m) assert((e) && (m))
#endif #endif
#include "paddle/majel/place.h" #include "paddle/platform/place.h"
namespace majel { namespace paddle {
namespace platform {
namespace detail { namespace detail {
class PlacePrinter : public boost::static_visitor<> { class PlacePrinter : public boost::static_visitor<> {
private: public:
std::ostream& os_; PlacePrinter(std::ostream &os) : os_(os) {}
void operator()(const CpuPlace &) { os_ << "CpuPlace"; }
void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; }
public: private:
PlacePrinter(std::ostream& os) : os_(os) {} std::ostream &os_;
void operator()(const CpuPlace&) { os_ << "CpuPlace"; }
void operator()(const GpuPlace& p) { os_ << "GpuPlace(" << p.device << ")"; }
}; };
} // namespace detail } // namespace detail
static Place the_default_place; static Place the_default_place;
void set_place(const Place& place) { the_default_place = place; } void set_place(const Place &place) { the_default_place = place; }
const Place &get_place() { return the_default_place; }
const Place& get_place() { return the_default_place; }
const GpuPlace default_gpu() { return GpuPlace(0); } const GpuPlace default_gpu() { return GpuPlace(0); }
const CpuPlace default_cpu() { return CpuPlace(); } const CpuPlace default_cpu() { return CpuPlace(); }
bool is_gpu_place(const Place& p) { bool is_gpu_place(const Place &p) {
return boost::apply_visitor(IsGpuPlace(), p); return boost::apply_visitor(IsGpuPlace(), p);
} }
bool is_cpu_place(const Place &p) {
bool is_cpu_place(const Place& p) {
return !boost::apply_visitor(IsGpuPlace(), p); return !boost::apply_visitor(IsGpuPlace(), p);
} }
bool places_are_same_class(const Place& p1, const Place& p2) { bool places_are_same_class(const Place &p1, const Place &p2) {
return is_gpu_place(p1) == is_gpu_place(p2); return is_gpu_place(p1) == is_gpu_place(p2);
} }
std::ostream& operator<<(std::ostream& os, const majel::Place& p) { std::ostream &operator<<(std::ostream &os, const Place &p) {
majel::detail::PlacePrinter printer(os); detail::PlacePrinter printer(os);
boost::apply_visitor(printer, p); boost::apply_visitor(printer, p);
return os; return os;
} }
} // namespace majel } // namespace platform
} // namespace paddle
...@@ -2,49 +2,48 @@ ...@@ -2,49 +2,48 @@
#include <boost/variant.hpp> #include <boost/variant.hpp>
#include <iostream> #include <iostream>
namespace majel { namespace paddle {
namespace platform {
struct CpuPlace { struct CpuPlace {
CpuPlace() {} // WORKAROUND: for some reason, omitting this constructor // WORKAROUND: for some reason, omitting this constructor
// causes errors with boost 1.59 and OSX // causes errors with boost 1.59 and OSX
// needed for variant equality comparison CpuPlace() {}
inline bool operator==(const CpuPlace&) const { return true; }
inline bool operator!=(const CpuPlace&) const { return false; } // needed for variant equality comparison
inline bool operator==(const CpuPlace &) const { return true; }
inline bool operator!=(const CpuPlace &) const { return false; }
}; };
struct GpuPlace { struct GpuPlace {
GpuPlace() : GpuPlace(0) {}
GpuPlace(int d) : device(d) {} GpuPlace(int d) : device(d) {}
// needed for variant equality comparison // needed for variant equality comparison
inline bool operator==(const GpuPlace& o) const { return device == o.device; } inline bool operator==(const GpuPlace &o) const { return device == o.device; }
inline bool operator!=(const GpuPlace &o) const { return !(*this == o); }
inline bool operator!=(const GpuPlace& o) const { return !(*this == o); }
GpuPlace() : GpuPlace(0) {}
int device; int device;
}; };
class IsGpuPlace : public boost::static_visitor<bool> { struct IsGpuPlace : public boost::static_visitor<bool> {
public: bool operator()(const CpuPlace &) const { return false; }
bool operator()(const CpuPlace&) const { return false; } bool operator()(const GpuPlace &gpu) const { return true; }
bool operator()(const GpuPlace& gpu) const { return true; }
}; };
typedef boost::variant<GpuPlace, CpuPlace> Place; typedef boost::variant<GpuPlace, CpuPlace> Place;
void set_place(const Place&); void set_place(const Place &);
const Place &get_place();
const Place& get_place();
const GpuPlace default_gpu(); const GpuPlace default_gpu();
const CpuPlace default_cpu(); const CpuPlace default_cpu();
bool is_gpu_place(const Place&); bool is_gpu_place(const Place &);
bool is_cpu_place(const Place&); bool is_cpu_place(const Place &);
bool places_are_same_class(const Place&, const Place&); bool places_are_same_class(const Place &, const Place &);
std::ostream& operator<<(std::ostream&, const majel::Place&); std::ostream &operator<<(std::ostream &, const Place &);
} // namespace majel } // namespace platform
} // namespace paddle
#include "paddle/majel/place.h" #include "paddle/platform/place.h"
#include <sstream> #include <sstream>
#include "gtest/gtest.h" #include "gtest/gtest.h"
TEST(Place, Equality) { TEST(Place, Equality) {
majel::CpuPlace cpu; paddle::platform::CpuPlace cpu;
majel::GpuPlace g0(0), g1(1), gg0(0); paddle::platform::GpuPlace g0(0), g1(1), gg0(0);
EXPECT_EQ(cpu, cpu); EXPECT_EQ(cpu, cpu);
EXPECT_EQ(g0, g0); EXPECT_EQ(g0, g0);
...@@ -13,28 +13,28 @@ TEST(Place, Equality) { ...@@ -13,28 +13,28 @@ TEST(Place, Equality) {
EXPECT_NE(g0, g1); EXPECT_NE(g0, g1);
EXPECT_TRUE(majel::places_are_same_class(g0, gg0)); EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0));
EXPECT_FALSE(majel::places_are_same_class(g0, cpu)); EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu));
} }
TEST(Place, Default) { TEST(Place, Default) {
EXPECT_TRUE(majel::is_gpu_place(majel::get_place())); EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::get_place()));
EXPECT_TRUE(majel::is_gpu_place(majel::default_gpu())); EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu()));
EXPECT_TRUE(majel::is_cpu_place(majel::default_cpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu()));
majel::set_place(majel::CpuPlace()); paddle::platform::set_place(paddle::platform::CpuPlace());
EXPECT_TRUE(majel::is_cpu_place(majel::get_place())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place()));
} }
TEST(Place, Print) { TEST(Place, Print) {
{ {
std::stringstream ss; std::stringstream ss;
ss << majel::GpuPlace(1); ss << paddle::platform::GpuPlace(1);
EXPECT_EQ("GpuPlace(1)", ss.str()); EXPECT_EQ("GpuPlace(1)", ss.str());
} }
{ {
std::stringstream ss; std::stringstream ss;
ss << majel::CpuPlace(); ss << paddle::platform::CpuPlace();
EXPECT_EQ("CpuPlace", ss.str()); EXPECT_EQ("CpuPlace", ss.str());
} }
} }
...@@ -26,6 +26,13 @@ set(TRAINER_HEADERS ...@@ -26,6 +26,13 @@ set(TRAINER_HEADERS
ThreadParameterUpdater.h ThreadParameterUpdater.h
TrainerConfigHelper.h) TrainerConfigHelper.h)
add_library(paddle_trainer_lib STATIC add_library(paddle_trainer_lib STATIC
...@@ -34,7 +41,7 @@ add_style_check_target(paddle_trainer_lib ...@@ -34,7 +41,7 @@ add_style_check_target(paddle_trainer_lib
add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib
add_dependencies(paddle_trainer_lib add_dependencies(paddle_trainer_lib
gen_proto_cpp paddle_pserver_cclient_lib) gen_proto_cpp)
macro(add_paddle_exe TARGET_NAME) macro(add_paddle_exe TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN}) add_executable(${TARGET_NAME} ${ARGN})
...@@ -63,5 +70,8 @@ if(APPLE) ...@@ -63,5 +70,8 @@ if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif() endif()
target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) if(WITH_GOLANG)
target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a) add_dependencies(paddle_trainer_lib paddle_pserver_cclient)
target_link_libraries(paddle_trainer ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a)
target_link_libraries(paddle_trainer_lib ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a)
...@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) { ...@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool beam_search) { bool beam_search) {
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0"; FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
for (auto useGpu : useGpuConfs) { for (auto useGpu : useGpuConfs) {
LOG(INFO) << configFile << " useGpu=" << useGpu
<< " beam_search=" << beam_search;
testGeneration(configFile, useGpu, hasSubseq, expRetFile); testGeneration(configFile, useGpu, hasSubseq, expRetFile);
} }
}; };
...@@ -5,6 +5,7 @@ set(proto_filenames ...@@ -5,6 +5,7 @@ set(proto_filenames
ParameterConfig.proto ParameterConfig.proto
ParameterService.proto ParameterService.proto
TrainerConfig.proto TrainerConfig.proto
ParameterServerConfig.proto) ParameterServerConfig.proto)
...@@ -35,10 +36,8 @@ foreach(filename ${proto_filenames}) ...@@ -35,10 +36,8 @@ foreach(filename ${proto_filenames})
DEPENDS ${filename} ${external_project_dependencies}) DEPENDS ${filename} ${external_project_dependencies})
endforeach() endforeach()
add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN}) add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN})
add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY})
add_library(paddle_proto STATIC
${PROTO_GEN}) add_library(paddle_proto STATIC ${PROTO_GEN})
target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package paddle;
message SGDConfig {
// SGD
// momentum: float >= 0. Parameter updates momentum.
// decay: float >= 0. Learning rate decay over each update.
// nesterov: boolean. Whether to apply Nesterov momentum.
optional double momentum = 21 [default = 0.0];
optional double decay = 23 [default = 0.0];
optional bool nesterov =24 [default = false];
message AdadeltaConfig {
// Adadelta
// It is recommended to leave it at the default value.
// rho: float >= 0.
// epsilon: float >= 0. Fuzz factor.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
optional double rho = 33 [default = 0.90];
optional double epsilon = 31 [default = 1e-5];
optional double decay = 32 [default = 0.0];
message AdagradConfig {
// Adagrad
// epsilon: float >= 0.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
optional double epsilon = 41 [default = 1e-5];
optional double decay = 42 [default = 0.0];
message AdamConfig {
// Adaj
// beta_1: float, 0 < beta < 1. Generally close to 1.
// beta_2: float, 0 < beta < 1. Generally close to 1.
// epsilon: float >= 0. Fuzz factor.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
optional double beta_1 = 41;
optional double beta_2 = 42;
optional double epsilon = 43;
optional double decay = 44;
message ConstLrConfig {
// learninRate Policy
optional double learning_rate = 1 [default = 1.0];
message LinearLrConfig {
// learninRate Policy
optional double learning_rate = 1 [default = 1.0];
optional double lr_decay_a = 2;
optional double lr_decay_b = 3;
message TensorProto {
enum DataType {
optional DataType data_type = 1;
repeated bytes content = 2;
message SGDOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
message AdadeltaOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
optional TensorProto accum_delta = 3;
optional TensorProto update_delta = 4;
message AdagradOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
message AdamOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
optional TensorProto velocitys = 3;
message OptimizerConfig {
enum Optimizer {
SGD = 1;
Adadelta = 2;
Adagrad = 3;
Adam = 4;
optional Optimizer optimizer = 1;
optional SGDConfig sgd = 3;
optional AdadeltaConfig adadelta = 4;
optional AdagradConfig adagrad = 5;
optional AdamConfig adam = 6;
enum LrPolicy {
Const = 0;
Linear = 1;
optional LrPolicy lr_policy = 11;
optional ConstLrConfig const_lr = 12;
optional LinearLrConfig linear_lr = 13;
// common config of optimizer
// gradient clip when L2 exceeding value
optional double clip_norm = 101;
// gradient clip when L1 exceeding value
optional double clip_value = 102;
...@@ -18,7 +18,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ...@@ -18,7 +18,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} paddle_master_shared) DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp)
...@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name) SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0 in_links_count = 0
for linkid, link in enumerate(in_links): for linkid, link in enumerate(in_links):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
in_links_has_subseq == has_subseq,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
in_links_count += 1 in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name) layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name] layer = g_layer_map[layer_name]
if has_subseq: ScatterAgentLayer(name=name, size=layer.size)
SequenceScatterAgentLayer(name=name, size=layer.size)
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name) pair.link_name = MakeLayerNameInSubmodel(name)
pair.has_subseq = has_subseq
@config_func @config_func
def RecurrentLayerGroupSetOutLink(link): def RecurrentLayerGroupSetOutLink(link):
if isinstance(link, basestring): if isinstance(link, basestring):
name = link name = link
has_subseq = False
else: else:
name = link.link_name name = link.link_name
has_subseq = link.has_subseq
layer_name = MakeLayerNameInParentSubmodel(name) layer_name = MakeLayerNameInParentSubmodel(name)
pair = g_current_submodel.out_links.add() pair = g_current_submodel.out_links.add()
pair.layer_name = MakeLayerNameInSubmodel(name) pair.layer_name = MakeLayerNameInSubmodel(name)
pair.link_name = layer_name pair.link_name = layer_name
pair.has_subseq = has_subseq
def RecurrentLayerGroupSetGenerator(generator=None): def RecurrentLayerGroupSetGenerator(generator=None):
...@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name, ...@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator=None, generator=None,
target_inlinkname="", target_inlinkname="",
seq_reversed=False): seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed, RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed)
for link in out_links: for link in out_links:
RecurrentLayerGroupSetOutLink(link) RecurrentLayerGroupSetOutLink(link)
...@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name): ...@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name = GetLayerBaseName(pair.link_name) agent_name = GetLayerBaseName(pair.link_name)
if prev_submodel.HasField("generator"): if prev_submodel.HasField("generator"):
DataLayer(name=agent_name, size=layer.size) DataLayer(name=agent_name, size=layer.size)
elif pair.has_subseq:
SequenceGatherAgentLayer(name=agent_name, size=layer.size)
else: else:
GatherAgentLayer(name=agent_name, size=layer.size) GatherAgentLayer(name=agent_name, size=layer.size)
...@@ -1651,8 +1628,14 @@ class SelectiveFCLayer(LayerBase): ...@@ -1651,8 +1628,14 @@ class SelectiveFCLayer(LayerBase):
@config_layer('print') @config_layer('print')
class PrintLayer(LayerBase): class PrintLayer(LayerBase):
def __init__(self, name, inputs): def __init__(self, name, inputs, format=None):
super(PrintLayer, self).__init__(name, 'print', 0, inputs) super(PrintLayer, self).__init__(name, 'print', 0, inputs)
if format is None:
format = "\n".join([
"layer=" + input.input_layer_name + " %s"
for input in self.inputs
self.config.user_arg = format
@config_layer('priorbox') @config_layer('priorbox')
...@@ -1949,7 +1932,6 @@ class BatchNormLayer(LayerBase): ...@@ -1949,7 +1932,6 @@ class BatchNormLayer(LayerBase):
def __init__(self, def __init__(self,
name, name,
inputs, inputs,
bias=True, bias=True,
use_global_stats=True, use_global_stats=True,
moving_average_fraction=0.9, moving_average_fraction=0.9,
...@@ -1987,12 +1969,7 @@ class BatchNormLayer(LayerBase): ...@@ -1987,12 +1969,7 @@ class BatchNormLayer(LayerBase):
cudnn_version >= 4007 cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__( super(BatchNormLayer, self).__init__(
name, name, self.layer_type, 0, inputs=inputs, **xargs)
if use_global_stats is not None: if use_global_stats is not None:
self.config.use_global_stats = use_global_stats self.config.use_global_stats = use_global_stats
...@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase): ...@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name, 'agent', size, inputs=[], device=device) name, 'agent', size, inputs=[], device=device)
class SequenceAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceAgentLayer, self).__init__(
name, 'sequence_agent', size, inputs=[], device=device)
@config_layer('gather_agent') @config_layer('gather_agent')
class GatherAgentLayer(LayerBase): class GatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None): def __init__(self, name, size, device=None):
...@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase): ...@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name, 'scatter_agent', size, inputs=[], device=device) name, 'scatter_agent', size, inputs=[], device=device)
class SequenceGatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceGatherAgentLayer, self).__init__(
name, 'sequence_gather_agent', size, inputs=[], device=device)
class SequenceScatterAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceScatterAgentLayer, self).__init__(
name, 'sequence_scatter_agent', size, inputs=[], device=device)
@config_layer('multiplex') @config_layer('multiplex')
class MultiplexLayer(LayerBase): class MultiplexLayer(LayerBase):
def __init__(self, name, inputs, size, device=None): def __init__(self, name, inputs, size, device=None):
...@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase): ...@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@config_func @config_func
def Link( def Link(name, has_subseq=False):
name, """
has_subseq=False, ): Still keeping has_subseq for backward compatibility
link_config = LinkConfig() link_config = LinkConfig()
link_config.link_name = name link_config.link_name = name
link_config.has_subseq = has_subseq
return link_config return link_config
...@@ -2341,20 +2297,13 @@ def Memory(name, ...@@ -2341,20 +2297,13 @@ def Memory(name,
config_assert(name is not None, "name needs cannot be None") config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1" memory_name = name + "+delay1"
agent_name = memory_name agent_name = memory_name
if is_sequence: agent_layer = AgentLayer(agent_name, size)
boot_layer is not None,
"there must be boot_layer in network when is_sequence = True")
agent_layer = SequenceAgentLayer(agent_name, size)
agent_layer = AgentLayer(agent_name, size)
config_assert(g_current_submodel.is_recurrent_layer_group, config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only') 'Memory should be used in recurrent layer group only')
memory = g_current_submodel.memories.add() memory = g_current_submodel.memories.add()
if name is not None: if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name) memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name) memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias), options = sum((boot_layer is not None, bool(boot_bias),
boot_with_const_id is not None)) boot_with_const_id is not None))
config_assert( config_assert(
...@@ -2428,15 +2377,23 @@ class ExpandLayer(LayerBase): ...@@ -2428,15 +2377,23 @@ class ExpandLayer(LayerBase):
@config_layer('featmap_expand') @config_layer('featmap_expand')
class FeatMapExpandLayer(LayerBase): class FeatMapExpandLayer(LayerBase):
def __init__(self, name, inputs, device=None, num_filters=None, bias=False): def __init__(self,
super(FeatMapExpandLayer, self).__init__( super(FeatMapExpandLayer, self).__init__(
name, 'featmap_expand', 0, inputs=inputs, device=device) name, 'featmap_expand', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs') len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
if num_filters is not None: if num_filters is not None:
self.config.num_filters = num_filters self.config.num_filters = num_filters
else: else:
logger.fatal("FeatMapExpandLayer must specify num_filters.") logger.fatal("FeatMapExpandLayer must specify num_filters.")
if not as_row_vector:
self.config.user_arg = "as_col_vec"
self.set_layer_size(self.get_input_layer(0).size * num_filters) self.set_layer_size(self.get_input_layer(0).size * num_filters)
...@@ -2446,14 +2403,12 @@ class MaxLayer(LayerBase): ...@@ -2446,14 +2403,12 @@ class MaxLayer(LayerBase):
name, name,
inputs, inputs,
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
output_max_index=None, output_max_index=None,
**xargs): **xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.active_type = active_type
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size) self.set_layer_size(input_layer.size)
...@@ -2495,18 +2450,12 @@ class SequenceLastInstanceLayer(LayerBase): ...@@ -2495,18 +2450,12 @@ class SequenceLastInstanceLayer(LayerBase):
def __init__(self, def __init__(self,
name, name,
inputs, inputs,
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
stride=-1, stride=-1,
**xargs): **xargs):
super(SequenceLastInstanceLayer, self).__init__( super(SequenceLastInstanceLayer, self).__init__(
name, name, 'seqlastins', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
if trans_type == 'seq': if trans_type == 'seq':
...@@ -2522,7 +2471,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): ...@@ -2522,7 +2471,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__(self, def __init__(self,
name, name,
inputs, inputs,
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
stride=-1, stride=-1,
...@@ -2530,7 +2478,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): ...@@ -2530,7 +2478,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
super(SequenceFirstInstanceLayer, self).__init__( super(SequenceFirstInstanceLayer, self).__init__(
name, name,
inputs=inputs, inputs=inputs,
trans_type=trans_type, trans_type=trans_type,
bias=bias, bias=bias,
stride=stride, stride=stride,
...@@ -2540,14 +2487,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): ...@@ -2540,14 +2487,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
@config_layer('seqconcat') @config_layer('seqconcat')
class SequenceConcatLayer(LayerBase): class SequenceConcatLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): def __init__(self, name, inputs, bias=False, **xargs):
super(SequenceConcatLayer, self).__init__( super(SequenceConcatLayer, self).__init__(
name, name, 'seqconcat', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2558,20 +2500,9 @@ class SequenceConcatLayer(LayerBase): ...@@ -2558,20 +2500,9 @@ class SequenceConcatLayer(LayerBase):
@config_layer('seqreshape') @config_layer('seqreshape')
class SequenceReshapeLayer(LayerBase): class SequenceReshapeLayer(LayerBase):
def __init__(self, def __init__(self, name, size, inputs, bias=False, **xargs):
super(SequenceReshapeLayer, self).__init__( super(SequenceReshapeLayer, self).__init__(
name, name, 'seqreshape', size, inputs=inputs, **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size) self.set_layer_size(size)
...@@ -2580,9 +2511,9 @@ class SequenceReshapeLayer(LayerBase): ...@@ -2580,9 +2511,9 @@ class SequenceReshapeLayer(LayerBase):
@config_layer('subseq') @config_layer('subseq')
class SubSequenceLayer(LayerBase): class SubSequenceLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): def __init__(self, name, inputs, bias=False, **xargs):
super(SubSequenceLayer, self).__init__( super(SubSequenceLayer, self).__init__(
name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs) name, 'subseq', 0, inputs=inputs, **xargs)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
size = input_layer0.size size = input_layer0.size
...@@ -2738,11 +2669,10 @@ class AverageLayer(LayerBase): ...@@ -2738,11 +2669,10 @@ class AverageLayer(LayerBase):
inputs, inputs,
average_strategy='average', average_strategy='average',
trans_type='non-seq', trans_type='non-seq',
bias=False, bias=False,
**xargs): **xargs):
super(AverageLayer, self).__init__( super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, active_type=active_type, **xargs) name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy self.config.average_strategy = average_strategy
self.config.trans_type = trans_type self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
...@@ -311,18 +311,6 @@ class LayerOutput(object): ...@@ -311,18 +311,6 @@ class LayerOutput(object):
self.outputs = outputs self.outputs = outputs
self.reverse = reverse self.reverse = reverse
def __repr__(self):
Disable __repr__ for debug reason. Will be implemented when release
assert False, "this method should not be invoked"
def __str__(self):
Disable __str__ for debug reason. Will be implemented when release
assert False, "this method should not be invoked"
def set_input(self, input): def set_input(self, input):
""" """
Set the input for a memory layer. Can only be used for memory layer Set the input for a memory layer. Can only be used for memory layer
...@@ -976,7 +964,7 @@ def fc_layer(input, ...@@ -976,7 +964,7 @@ def fc_layer(input,
@wrap_name_default("print") @wrap_name_default("print")
def printer_layer(input, name=None): def printer_layer(input, format=None, name=None):
""" """
Print the output value of input layers. This layer is useful for debugging. Print the output value of input layers. This layer is useful for debugging.
...@@ -994,6 +982,7 @@ def printer_layer(input, name=None): ...@@ -994,6 +982,7 @@ def printer_layer(input, name=None):
Layer( Layer(
name=name, name=name,
type=LayerType.PRINT_LAYER, type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input], ) inputs=[l.name for l in input], )
# this layer don't return anything, can not be input of other layer. # this layer don't return anything, can not be input of other layer.
...@@ -1565,14 +1554,24 @@ def expand_layer(input, ...@@ -1565,14 +1554,24 @@ def expand_layer(input,
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def repeat_layer(input, num_repeats, name=None, layer_attr=None): def repeat_layer(input,
""" """
A layer for repeating the input for num_repeats times. This is equivalent A layer for repeating the input for num_repeats times.
to apply concat_layer() with num_repeats same input.
If as_row_vector:
.. math:: .. math::
y = [x, x, \cdots, x] y = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n]
If not as_row_vector:
.. math::
y = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n]
The example usage is: The example usage is:
...@@ -1585,6 +1584,14 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): ...@@ -1585,6 +1584,14 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
:param num_repeats: Repeat the input so many times :param num_repeats: Repeat the input so many times
:type num_repeats: int :type num_repeats: int
:param name: Layer name. :param name: Layer name.
:param as_row_vector: True for treating input as row vector and repeating
in the column direction. This is equivalent to apply
concat_layer() with num_repeats same input.
False for treating input as column vector and repeating
in the row direction.
:type as_row_vector: bool
:param act: Activation type.
:type act: BaseActivation
:type name: basestring :type name: basestring
:param layer_attr: extra layer attributes. :param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
...@@ -1595,13 +1602,16 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): ...@@ -1595,13 +1602,16 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
l = Layer( l = Layer(
inputs=[input.name], inputs=[input.name],
name=name, name=name,
num_filters=num_repeats, num_filters=num_repeats,
**ExtraAttr.to_kwargs(layer_attr)) **ExtraAttr.to_kwargs(layer_attr))
return LayerOutput( return LayerOutput(
name=name, name=name,
size=l.config.size, size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER, layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
parents=[input]) parents=[input])
...@@ -2846,11 +2856,13 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, ...@@ -2846,11 +2856,13 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
Concat sequence a with sequence b. Concat sequence a with sequence b.
Inputs: Inputs:
- a = [a1, a2, ..., an] - a = [a1, a2, ..., am]
- b = [b1, b2, ..., bn] - b = [b1, b2, ..., bn]
- Note that the length of a and b should be the same.
Output: [a1, b1, a2, b2, ..., an, bn] Output: [a1, ..., am, b1, ..., bn]
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The example usage is: The example usage is:
...@@ -2944,7 +2956,7 @@ def memory(name, ...@@ -2944,7 +2956,7 @@ def memory(name,
:param memory_name: the name of the memory. :param memory_name: the name of the memory.
It is ignored when name is provided. It is ignored when name is provided.
:type memory_name: basestring :type memory_name: basestring
:param is_seq: is sequence for boot_layer :param is_seq: DEPRECATED. is sequence for boot_layer
:type is_seq: bool :type is_seq: bool
:param boot_layer: boot layer of memory. :param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None :type boot_layer: LayerOutput|None
...@@ -2971,7 +2983,6 @@ def memory(name, ...@@ -2971,7 +2983,6 @@ def memory(name,
memory_name = Memory( memory_name = Memory(
name, name,
size, size,
boot_layer=boot_layer.name if boot_layer is not None else None, boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias, boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name, boot_bias_active_type=boot_bias_active_type.name,
...@@ -3318,19 +3329,21 @@ class StaticInput(object): ...@@ -3318,19 +3329,21 @@ class StaticInput(object):
""" """
StaticInput is only used in recurrent_group which defines a read-only memory StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence. that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
""" """
def __init__(self, input, is_seq=False, size=None): def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerOutput) assert isinstance(input, LayerOutput)
self.input = input self.input = input
self.is_seq = is_seq assert input.size is not None
assert input.size is not None or size is not None
if size is not None: if size is not None:
input.size = size assert input.size == size
class SubsequenceInput(object): def SubsequenceInput(input):
""" """
Input sequence has sub-sequence, used in recurrent_group. Input sequence has sub-sequence, used in recurrent_group.
The example usage is: The example usage is:
...@@ -3339,11 +3352,7 @@ class SubsequenceInput(object): ...@@ -3339,11 +3352,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer) input = SubsequenceInput(layer)
""" """
return input
def __init__(self, input):
assert isinstance(input, LayerOutput)
assert input.size is not None
self.input = input
@wrap_name_default("recurrent_group") @wrap_name_default("recurrent_group")
...@@ -3407,7 +3416,8 @@ def recurrent_group(step, ...@@ -3407,7 +3416,8 @@ def recurrent_group(step,
input sequence in a reverse order. input sequence in a reverse order.
:type reverse: bool :type reverse: bool
:param targetInlink: the input layer which share info with layer group's output :param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input SubsequenceInput inputs, config should assign one input
...@@ -3429,46 +3439,21 @@ def recurrent_group(step, ...@@ -3429,46 +3439,21 @@ def recurrent_group(step,
model_type('recurrent_nn') model_type('recurrent_nn')
def is_single_input(x): def is_single_input(x):
return isinstance(x, LayerOutput) or isinstance(x, StaticInput) \ return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
or isinstance(x, SubsequenceInput)
if is_single_input(input): if is_single_input(input):
input = [input] input = [input]
assert isinstance(input, collections.Sequence) assert isinstance(input, collections.Sequence)
def is_in_links(x): def is_in_links(x):
return isinstance(x, LayerOutput) or isinstance(x, SubsequenceInput) return isinstance(x, LayerOutput)
in_links = filter(is_in_links, input) in_links = filter(is_in_links, input)
def targetInlink_in_inlinks():
for inlink in in_links:
if isinstance(inlink, SubsequenceInput):
if targetInlink == inlink.input:
return True
elif targetInlink == inlink:
return True
return False
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
def map_in_links(x):
if isinstance(x, SubsequenceInput):
contains_sub_seq[0] = True
return Link(name=x.input.name, has_subseq=True)
return x.name
RecurrentLayerGroupWithoutOutLinksBegin( RecurrentLayerGroupWithoutOutLinksBegin(
name=name, name=name,
in_links=map(map_in_links, in_links), in_links=map(lambda x: x.name, in_links),
seq_reversed=reverse, seq_reversed=reverse)
in_args = [] in_args = []
has_LayerOutput = False has_LayerOutput = False
for each_input in input: for each_input in input:
...@@ -3476,21 +3461,13 @@ def recurrent_group(step, ...@@ -3476,21 +3461,13 @@ def recurrent_group(step,
if isinstance(each_input, LayerOutput): if isinstance(each_input, LayerOutput):
in_args.append(each_input) in_args.append(each_input)
has_LayerOutput = True has_LayerOutput = True
elif isinstance(each_input, SubsequenceInput): else: # StaticInput
has_LayerOutput = True
mem_name = "__%s_memory__" % each_input.input.name mem_name = "__%s_memory__" % each_input.input.name
mem = memory( mem = memory(
name=mem_name, name=None,
size=each_input.input.size, size=each_input.input.size,
boot_layer=each_input.input) boot_layer=each_input.input)
with mixed_layer( mem.set_input(mem)
act=IdentityActivation()) as mix:
mix += identity_projection(mem)
in_args.append(mem) in_args.append(mem)
assert (is_generating != has_LayerOutput) assert (is_generating != has_LayerOutput)
...@@ -3503,10 +3480,7 @@ def recurrent_group(step, ...@@ -3503,10 +3480,7 @@ def recurrent_group(step,
for ot in layer_outs: for ot in layer_outs:
assert isinstance(ot, LayerOutput) assert isinstance(ot, LayerOutput)
ot.reverse = reverse ot.reverse = reverse
if contains_sub_seq[0]: RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupSetOutLink(Link(ot.name, has_subseq=True))
RecurrentLayerGroupEnd(name=name) RecurrentLayerGroupEnd(name=name)
...@@ -5608,13 +5582,13 @@ def row_conv_layer(input, ...@@ -5608,13 +5582,13 @@ def row_conv_layer(input,
to deploy in an online and low-latency setting. The lookahead convolution to deploy in an online and low-latency setting. The lookahead convolution
incorporates information from future subsequences in a computationally incorporates information from future subsequences in a computationally
efficient manner to improve unidirectional recurrent neural networks. efficient manner to improve unidirectional recurrent neural networks.
The connection of row convolution is different form the 1D sequence The connection of row convolution is different form the 1D sequence
convolution. Assumed that, the future context-length is k, that is to say, convolution. Assumed that, the future context-length is k, that is to say,
it can get the output at timestep t by using the the input feature from t-th it can get the output at timestep t by using the the input feature from t-th
timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
activations are d, the activations r_t for the new layer at time-step t are: activations are d, the activations r_t for the new layer at time-step t are:
.. math:: .. math::
r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}} r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
#!/bin/bash #!/bin/bash
export configs=(test_fc layer_activations projections test_print_layer export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
...@@ -9,7 +9,7 @@ layers { ...@@ -9,7 +9,7 @@ layers {
name: "__first_seq_0__" name: "__first_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -21,7 +21,7 @@ layers { ...@@ -21,7 +21,7 @@ layers {
name: "__first_seq_1__" name: "__first_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -33,7 +33,7 @@ layers { ...@@ -33,7 +33,7 @@ layers {
name: "__last_seq_0__" name: "__last_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -44,7 +44,7 @@ layers { ...@@ -44,7 +44,7 @@ layers {
name: "__last_seq_1__" name: "__last_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -55,7 +55,7 @@ layers { ...@@ -55,7 +55,7 @@ layers {
name: "__first_seq_2__" name: "__first_seq_2__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -67,7 +67,7 @@ layers { ...@@ -67,7 +67,7 @@ layers {
name: "__last_seq_2__" name: "__last_seq_2__"
type: "seqlastins" type: "seqlastins"
size: 30 size: 30
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data" input_layer_name: "data"
} }
...@@ -123,7 +123,7 @@ layers { ...@@ -123,7 +123,7 @@ layers {
name: "__last_seq_0__" name: "__last_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__simple_gru_0__" input_layer_name: "__simple_gru_0__"
} }
...@@ -134,7 +134,7 @@ layers { ...@@ -134,7 +134,7 @@ layers {
name: "__last_seq_1__" name: "__last_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__simple_gru_1__" input_layer_name: "__simple_gru_1__"
} }
...@@ -256,19 +256,15 @@ sub_models { ...@@ -256,19 +256,15 @@ sub_models {
memories { memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group" layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group" link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__simple_gru_0___transform" layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group" link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group" layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__" link_name: "__simple_gru_0__"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__simple_gru_1___recurrent_group" name: "__simple_gru_1___recurrent_group"
...@@ -280,18 +276,14 @@ sub_models { ...@@ -280,18 +276,14 @@ sub_models {
memories { memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group" layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group" link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__simple_gru_1___transform" layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group" link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group" layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__" link_name: "__simple_gru_1__"
has_subseq: false
} }
target_inlinkid: -1
} }
...@@ -205,7 +205,7 @@ layers { ...@@ -205,7 +205,7 @@ layers {
name: "__last_seq_0__" name: "__last_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__lstm_group_0__" input_layer_name: "__lstm_group_0__"
} }
...@@ -216,7 +216,7 @@ layers { ...@@ -216,7 +216,7 @@ layers {
name: "__last_seq_1__" name: "__last_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__lstm_group_1__" input_layer_name: "__lstm_group_1__"
} }
...@@ -341,24 +341,19 @@ sub_models { ...@@ -341,24 +341,19 @@ sub_models {
memories { memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
} }
memories { memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__mixed_0__" layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group" link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__" link_name: "__lstm_group_0__"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__lstm_group_1___recurrent_group" name: "__lstm_group_1___recurrent_group"
...@@ -373,23 +368,18 @@ sub_models { ...@@ -373,23 +368,18 @@ sub_models {
memories { memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group" link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
} }
memories { memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group" layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group" link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__mixed_1__" layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group" link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group" layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__" link_name: "__lstm_group_1__"
has_subseq: false
} }
target_inlinkid: -1
} }
...@@ -138,7 +138,7 @@ layers { ...@@ -138,7 +138,7 @@ layers {
name: "__last_seq_0__" name: "__last_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__recurrent_layer_0__" input_layer_name: "__recurrent_layer_0__"
} }
...@@ -149,7 +149,7 @@ layers { ...@@ -149,7 +149,7 @@ layers {
name: "__first_seq_0__" name: "__first_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__recurrent_layer_1__" input_layer_name: "__recurrent_layer_1__"
} }
...@@ -161,7 +161,7 @@ layers { ...@@ -161,7 +161,7 @@ layers {
name: "__last_seq_1__" name: "__last_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__lstmemory_0__" input_layer_name: "__lstmemory_0__"
} }
...@@ -172,7 +172,7 @@ layers { ...@@ -172,7 +172,7 @@ layers {
name: "__first_seq_1__" name: "__first_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__lstmemory_1__" input_layer_name: "__lstmemory_1__"
} }
...@@ -184,7 +184,7 @@ layers { ...@@ -184,7 +184,7 @@ layers {
name: "__last_seq_2__" name: "__last_seq_2__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__gru_0__" input_layer_name: "__gru_0__"
} }
...@@ -195,7 +195,7 @@ layers { ...@@ -195,7 +195,7 @@ layers {
name: "__first_seq_2__" name: "__first_seq_2__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__gru_1__" input_layer_name: "__gru_1__"
} }
...@@ -12,6 +12,7 @@ layers { ...@@ -12,6 +12,7 @@ layers {
inputs { inputs {
input_layer_name: "input" input_layer_name: "input"
} }
user_arg: "layer=input %s"
} }
input_layer_names: "input" input_layer_names: "input"
output_layer_names: "input" output_layer_names: "input"
type: "nn"
layers {
name: "data"
type: "data"
size: 30
active_type: ""
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 300
active_type: ""
inputs {
input_layer_name: "data"
num_filters: 10
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 300
active_type: "tanh"
inputs {
input_layer_name: "data"
num_filters: 10
user_arg: "as_col_vec"
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__repeat_layer_0__"
layer_names: "__repeat_layer_1__"
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
is_recurrent_layer_group: false
...@@ -91,7 +91,7 @@ layers { ...@@ -91,7 +91,7 @@ layers {
name: "__last_seq_0__" name: "__last_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "rnn_forward" input_layer_name: "rnn_forward"
} }
...@@ -140,7 +140,7 @@ layers { ...@@ -140,7 +140,7 @@ layers {
name: "__first_seq_0__" name: "__first_seq_0__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "rnn_back" input_layer_name: "rnn_back"
} }
...@@ -155,7 +155,7 @@ layers { ...@@ -155,7 +155,7 @@ layers {
} }
layers { layers {
name: "sub_seq_input@__recurrent_group_2__" name: "sub_seq_input@__recurrent_group_2__"
type: "sequence_scatter_agent" type: "scatter_agent"
size: 100 size: 100
active_type: "" active_type: ""
} }
...@@ -182,7 +182,7 @@ layers { ...@@ -182,7 +182,7 @@ layers {
} }
layers { layers {
name: "rnn_subseq_forward" name: "rnn_subseq_forward"
type: "sequence_gather_agent" type: "gather_agent"
size: 200 size: 200
active_type: "" active_type: ""
} }
...@@ -190,7 +190,7 @@ layers { ...@@ -190,7 +190,7 @@ layers {
name: "__last_seq_1__" name: "__last_seq_1__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "rnn_subseq_forward" input_layer_name: "rnn_subseq_forward"
} }
...@@ -280,7 +280,7 @@ layers { ...@@ -280,7 +280,7 @@ layers {
name: "__last_seq_2__" name: "__last_seq_2__"
type: "seqlastins" type: "seqlastins"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__lstm_group_0__" input_layer_name: "__lstm_group_0__"
} }
...@@ -329,7 +329,7 @@ layers { ...@@ -329,7 +329,7 @@ layers {
name: "__last_seq_3__" name: "__last_seq_3__"
type: "seqlastins" type: "seqlastins"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__gru_group_0__" input_layer_name: "__gru_group_0__"
} }
...@@ -378,7 +378,7 @@ layers { ...@@ -378,7 +378,7 @@ layers {
name: "__last_seq_4__" name: "__last_seq_4__"
type: "seqlastins" type: "seqlastins"
size: 200 size: 200
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "__fc_layer_0__" input_layer_name: "__fc_layer_0__"
} }
...@@ -618,19 +618,15 @@ sub_models { ...@@ -618,19 +618,15 @@ sub_models {
memories { memories {
layer_name: "rnn_forward@__recurrent_group_0__" layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__" link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
} }
in_links { in_links {
layer_name: "seq_input" layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__" link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
} }
out_links { out_links {
layer_name: "rnn_forward@__recurrent_group_0__" layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward" link_name: "rnn_forward"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__recurrent_group_1__" name: "__recurrent_group_1__"
...@@ -642,19 +638,15 @@ sub_models { ...@@ -642,19 +638,15 @@ sub_models {
memories { memories {
layer_name: "rnn_back@__recurrent_group_1__" layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__" link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
} }
in_links { in_links {
layer_name: "seq_input" layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__" link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
} }
out_links { out_links {
layer_name: "rnn_back@__recurrent_group_1__" layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back" link_name: "rnn_back"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__recurrent_group_2__" name: "__recurrent_group_2__"
...@@ -666,19 +658,15 @@ sub_models { ...@@ -666,19 +658,15 @@ sub_models {
memories { memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__" layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__" link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
} }
in_links { in_links {
layer_name: "sub_seq_input" layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__" link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
} }
out_links { out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__" layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward" link_name: "rnn_subseq_forward"
has_subseq: true
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__lstm_group_0___recurrent_group" name: "__lstm_group_0___recurrent_group"
...@@ -693,24 +681,19 @@ sub_models { ...@@ -693,24 +681,19 @@ sub_models {
memories { memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group" link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
} }
memories { memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group" link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__mixed_0__" layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group" link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group" layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__" link_name: "__lstm_group_0__"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__gru_group_0___recurrent_group" name: "__gru_group_0___recurrent_group"
...@@ -722,19 +705,15 @@ sub_models { ...@@ -722,19 +705,15 @@ sub_models {
memories { memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group" link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
} }
in_links { in_links {
layer_name: "__mixed_1__" layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group" link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
} }
out_links { out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group" layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__" link_name: "__gru_group_0__"
has_subseq: false
} }
target_inlinkid: -1
} }
sub_models { sub_models {
name: "__recurrent_group_3__" name: "__recurrent_group_3__"
...@@ -746,18 +725,14 @@ sub_models { ...@@ -746,18 +725,14 @@ sub_models {
memories { memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__" layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__" link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
} }
in_links { in_links {
layer_name: "seq_input" layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__" link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
} }
out_links { out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__" layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__" link_name: "__fc_layer_0__"
has_subseq: false
} }
target_inlinkid: -1
} }
...@@ -27,7 +27,7 @@ layers { ...@@ -27,7 +27,7 @@ layers {
name: "__seqreshape_0__" name: "__seqreshape_0__"
type: "seqreshape" type: "seqreshape"
size: 5 size: 5
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "data1" input_layer_name: "data1"
} }
...@@ -9,7 +9,7 @@ layers { ...@@ -9,7 +9,7 @@ layers {
name: "__seq_pooling_0__" name: "__seq_pooling_0__"
type: "max" type: "max"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -19,7 +19,7 @@ layers { ...@@ -19,7 +19,7 @@ layers {
name: "__seq_pooling_1__" name: "__seq_pooling_1__"
type: "max" type: "max"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -29,7 +29,7 @@ layers { ...@@ -29,7 +29,7 @@ layers {
name: "__seq_pooling_2__" name: "__seq_pooling_2__"
type: "average" type: "average"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -40,7 +40,7 @@ layers { ...@@ -40,7 +40,7 @@ layers {
name: "__seq_pooling_3__" name: "__seq_pooling_3__"
type: "average" type: "average"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -51,7 +51,7 @@ layers { ...@@ -51,7 +51,7 @@ layers {
name: "__seq_pooling_4__" name: "__seq_pooling_4__"
type: "average" type: "average"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -62,7 +62,7 @@ layers { ...@@ -62,7 +62,7 @@ layers {
name: "__seq_pooling_5__" name: "__seq_pooling_5__"
type: "average" type: "average"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -73,7 +73,7 @@ layers { ...@@ -73,7 +73,7 @@ layers {
name: "__seq_pooling_6__" name: "__seq_pooling_6__"
type: "max" type: "max"
size: 100 size: 100
active_type: "linear" active_type: ""
inputs { inputs {
input_layer_name: "dat_in" input_layer_name: "dat_in"
} }
...@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None): ...@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else: else:
extra_layers = [] extra_layers = []
layer_names = __get_used_layers__(output_layers + extra_layers) layer_names = __get_used_layers__(list(output_layers) + list(extra_layers))
submodel_names = __get_used_submodels__(layer_names) submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root') submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names) evaluator_names = __get_used_evaluators__(layer_names)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册