提交 15bf6e05 编写于 作者: Z zlx

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into improve_pruning

......@@ -47,6 +47,7 @@ option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
# CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE)
......@@ -107,6 +108,7 @@ include(configure) # add paddle env configuration
include_directories("${PROJ_ROOT}")
include_directories("${PROJ_ROOT}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient")
set(EXTERNAL_LIBS
${GFLAGS_LIBRARIES}
......@@ -126,9 +128,12 @@ endif(WITH_GPU)
add_subdirectory(proto)
add_subdirectory(paddle)
add_subdirectory(go/master/c)
add_subdirectory(python)
add_subdirectory(go/pserver/cclient)
if(WITH_GOLANG)
#TODO (add go/master/c back when fixed)
add_subdirectory(go/pserver/cclient)
endif(WITH_GOLANG)
if(WITH_DOC)
add_subdirectory(doc)
......
......@@ -40,6 +40,10 @@ if(NOT CMAKE_CROSSCOMPILING)
endif()
endif()
if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG)
if(NOT WITH_GPU)
add_definitions(-DPADDLE_ONLY_CPU)
add_definitions(-DHPPL_STUB_FUNC)
......
......@@ -11,11 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# To simplify the build process of PaddlePaddle, we defined couple of
# fundamental abstractions, e.g., how to build library, binary and
# test in C++, CUDA and Go.
# generic.cmake defines CMakes functions that look like Bazel's
# building rules (https://bazel.build/).
#
#
# -------------------------------------------
# C++ CUDA C++ Go
......@@ -25,27 +26,131 @@
# cc_test nv_test go_test
# -------------------------------------------
#
# cmake_parse_arguments can help us to achieve this goal.
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
# To build a static library example.a from example.cc using the system
# compiler (like GCC):
#
# cc_library(example SRCS example.cc)
#
# To build a static library example.a from multiple source files
# example{1,2,3}.cc:
#
# cc_library(example SRCS example1.cc example2.cc example3.cc)
#
# To build a shared library example.so from example.cc:
#
# cc_library(example SHARED SRCS example.cc)
#
# To build a library using Nvidia's NVCC from .cu file(s), use the nv_
# prefixed version:
#
# nv_library(example SRCS example.cu)
#
# To specify that a library new_example.a depends on other libraies:
#
# cc_library(new_example SRCS new_example.cc DEPS example)
#
# Static libraries can be composed of other static libraries:
#
# cc_library(composed DEPS dependent1 dependent2 dependent3)
#
# To build an executable binary file from some source files and
# dependent libraries:
#
# cc_binary(example SRCS main.cc something.cc DEPS example1 example2)
#
# To build an executable binary file using NVCC, use the nv_ prefixed
# version:
#
# nv_binary(example SRCS main.cc something.cu DEPS example1 example2)
#
# To build a unit test binary, which is an executable binary with
# GoogleTest linked:
#
# cc_test(example_test SRCS example_test.cc DEPS example)
#
# To build a unit test binary using NVCC, use the nv_ prefixed version:
#
# nv_test(example_test SRCS example_test.cu DEPS example)
#
# It is pretty often that executable and test binaries depend on
# pre-defined external libaries like glog and gflags defined in
# /cmake/external/*.cmake:
#
# cc_test(example_test SRCS example_test.cc DEPS example glog gflags)
if(NOT APPLE)
find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT})
endif(NOT APPLE)
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor
# SRCS
# tensor.cc
# DEPS
# variant)
function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
# First get the file names of the libraries to be merged
foreach(lib ${libs})
get_target_property(libtype ${lib} TYPE)
if(NOT libtype STREQUAL "STATIC_LIBRARY")
message(FATAL_ERROR "merge_static_libs can only process static libraries")
endif()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
if(APPLE) # Use OSX's libtool to merge archives
add_custom_target(${TARGET_NAME}_archive
COMMAND libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${libs}
)
add_library(${TARGET_NAME} STATIC IMPORTED GLOBAL)
set_property(TARGET ${TARGET_NAME} PROPERTY
IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a")
add_dependencies(${TARGET_NAME} ${TARGET_NAME}_archive)
else() # general UNIX: use "ar" to extract objects and re-add to a common lib
foreach(lib ${libs})
set(objlistfile ${lib}.objlist) # list of objects in the input library
set(objdir ${lib}.objdir)
add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir})
add_custom_command(OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ../${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
# Empty dummy source file that goes into merged library
set(mergebase ${lib}.mergebase.c)
add_custom_command(OUTPUT ${mergebase}
COMMAND ${CMAKE_COMMAND} -E touch ${mergebase}
DEPENDS ${objlistfile})
list(APPEND mergebases "${mergebase}")
endforeach()
# We need a target for the output merged library
add_library(${TARGET_NAME} STATIC ${mergebases})
set(outlibfile "$<TARGET_FILE:${TARGET_NAME}>")
foreach(lib ${libs})
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${objlistfile}"
WORKING_DIRECTORY ${objdir})
endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_RANLIB} ${outlibfile})
endif()
endfunction(merge_static_libs)
function(cc_library TARGET_NAME)
set(options OPTIONAL)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${cc_library_OPTIONAL} STREQUAL "SHARED")
if (cc_library_SRCS)
if (cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
......@@ -53,14 +158,17 @@ function(cc_library TARGET_NAME)
if (cc_library_DEPS)
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
else(cc_library_SRCS)
if (cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
else()
message(FATAL "Please specify source file or library in cc_library.")
endif()
endif(cc_library_SRCS)
endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# SRCS
# tensor.cc)
function(cc_binary TARGET_NAME)
set(options OPTIONAL)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -71,13 +179,6 @@ function(cc_binary TARGET_NAME)
endif()
endfunction(cc_binary)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
# cc_test(tensor_test
# SRCS
# tensor_test.cc
# DEPS
# tensor)
function(cc_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
......@@ -91,21 +192,14 @@ function(cc_test TARGET_NAME)
endif()
endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
# their parameters, so ops depend on tensor. This implies that if
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
# nv_library(ops
# SRCS
# ops.cu
# DEPS
# tensor)
function(nv_library TARGET_NAME)
if (WITH_GPU)
set(options OPTIONAL)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
if(nv_library_SRCS)
if (nv_library_SHARED OR nv_library_shared) # build *.so
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
......@@ -113,6 +207,13 @@ function(nv_library TARGET_NAME)
if (nv_library_DEPS)
add_dependencies(${TARGET_NAME} ${nv_library_DEPS})
endif()
else(nv_library_SRCS)
if (nv_library_DEPS)
merge_static_libs(${TARGET_NAME} ${nv_library_DEPS})
else()
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(nv_library_SRCS)
endif()
endfunction(nv_library)
......@@ -130,13 +231,6 @@ function(nv_binary TARGET_NAME)
endif()
endfunction(nv_binary)
# The dependency to target tensor implies that if any of
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
# nv_test(ops_test
# SRCS
# ops_test.cu
# DEPS
# ops)
function(nv_test TARGET_NAME)
if (WITH_GPU AND WITH_TESTING)
set(options "")
......
......@@ -84,6 +84,7 @@ function(link_paddle_exe TARGET_NAME)
paddle_parameter
paddle_proto
paddle_cuda
paddle_optimizer
${EXTERNAL_LIBS}
${CMAKE_THREAD_LIBS_INIT}
${CMAKE_DL_LIBS}
......
......@@ -11,13 +11,4 @@ include(flags)
go_library(paddle_pserver_cclient STATIC)
if(PROJ_ROOT)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.h ${PROJ_ROOT}/paddle/trainer/
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/libpaddle_pserver_cclient.a ${PROJ_ROOT}/paddle/trainer/
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
DEPENDS paddle_pserver_cclient)
add_custom_target(paddle_pserver_cclient_lib ALL DEPENDS ${PROJ_ROOT}/paddle/trainer/libpaddle_pserver_cclient.a)
endif(PROJ_ROOT)
add_subdirectory(test)
......@@ -8,6 +8,7 @@ add_subdirectory(gserver)
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(scripts)
add_subdirectory(optimizer)
add_subdirectory(strings)
# Do not build go directory until go cmake is working smoothly.
......@@ -19,8 +20,8 @@ find_package(Boost QUIET)
if(Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_subdirectory(majel)
add_subdirectory(platform)
add_subdirectory(framework)
endif()
if(WITH_C_API)
......
......@@ -16,7 +16,7 @@ set(API_HEADER
Internal.h)
add_library(paddle_api STATIC ${API_SOURCES})
add_dependencies(paddle_api gen_proto_cpp paddle_pserver_cclient_lib)
add_dependencies(paddle_api gen_proto_cpp paddle_trainer_lib)
INCLUDE(${SWIG_USE_FILE})
INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle)
......
......@@ -842,7 +842,8 @@ public:
int passCount,
bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, const std::string pserverSpec);
OptimizationConfig* config,
const std::string pserverSpec) throw(UnsupportError);
~ParameterUpdater();
/**
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#include "PaddleAPI.h"
#include "PaddleAPIPrivate.h"
#ifndef PADDLE_WITHOUT_GOLANG
#include "paddle/trainer/NewRemoteParameterUpdater.h"
#endif
#include "paddle/trainer/RemoteParameterUpdater.h"
#include "paddle/trainer/ThreadParameterUpdater.h"
......@@ -30,11 +32,16 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
}
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, const std::string pserverSpec) {
OptimizationConfig *config,
const std::string pserverSpec) throw(UnsupportError) {
#ifndef PADDLE_WITHOUT_GOLANG
auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec));
return updater;
#else
throw UnsupportError();
#endif
}
ParameterUpdater *ParameterUpdater::createRemoteUpdater(
......
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
#include "paddle/majel/ddim.h"
#include "paddle/framework/ddim.h"
namespace majel {
namespace paddle {
namespace framework {
///@cond HIDDEN
......@@ -66,7 +67,7 @@ DDim make_ddim(const std::vector<int>& dims) {
///@cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors
class DynamicMutableIndexer : public boost::static_visitor<int&> {
public:
public:
DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D>
......@@ -74,12 +75,12 @@ public:
return dim[idx_];
}
private:
private:
int idx_;
};
class DynamicConstIndexer : public boost::static_visitor<int> {
public:
public:
DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D>
......@@ -87,7 +88,7 @@ public:
return dim[idx_];
}
private:
private:
int idx_;
};
......@@ -213,10 +214,11 @@ struct DDimPrinter : boost::static_visitor<void> {
///\endcond
std::ostream& operator<<(std::ostream& os, const majel::DDim& ddim) {
std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
DDimPrinter printer(os);
boost::apply_visitor(printer, ddim);
return os;
}
} // namespace majel
} // namespace framework
} // namespace paddle
......@@ -5,20 +5,14 @@
#include <stdexcept>
#include <vector>
#include "paddle/majel/dim.h"
#include "paddle/framework/dim.h"
namespace majel {
namespace paddle {
namespace framework {
namespace {
typedef boost::variant<Dim<1>,
Dim<2>,
Dim<3>,
Dim<4>,
Dim<5>,
Dim<6>,
Dim<7>,
Dim<8>,
Dim<9>>
typedef boost::variant<Dim<1>, Dim<2>, Dim<3>, Dim<4>, Dim<5>, Dim<6>, Dim<7>,
Dim<8>, Dim<9>>
DDimVar;
}
......@@ -95,14 +89,15 @@ ssize_t product(const DDim& ddim);
int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const majel::DDim&);
std::ostream& operator<<(std::ostream&, const DDim&);
} // namespace majel
} // namespace framework
} // namespace paddle
namespace boost {
template <typename T>
T get(const majel::DDim& in) {
T get(const paddle::framework::DDim& in) {
return boost::get<T>(in.var);
}
......
......@@ -4,18 +4,18 @@
#include <vector>
#include "gtest/gtest.h"
#include "paddle/majel/ddim.h"
#include "paddle/framework/ddim.h"
TEST(DDim, Equality) {
// construct a DDim from an initialization list
majel::DDim ddim = majel::make_ddim({9, 1, 5});
paddle::framework::DDim ddim = paddle::framework::make_ddim({9, 1, 5});
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
// construct a DDim from a vector
std::vector<int> vec({9, 1, 5});
majel::DDim vddim = majel::make_ddim(vec);
paddle::framework::DDim vddim = paddle::framework::make_ddim(vec);
EXPECT_EQ(ddim[0], 9);
EXPECT_EQ(ddim[1], 1);
EXPECT_EQ(ddim[2], 5);
......@@ -23,43 +23,43 @@ TEST(DDim, Equality) {
// mutate a DDim
ddim[1] = 2;
EXPECT_EQ(ddim[1], 2);
majel::set(ddim, 0, 6);
EXPECT_EQ(majel::get(ddim, 0), 6);
paddle::framework::set(ddim, 0, 6);
EXPECT_EQ(paddle::framework::get(ddim, 0), 6);
// vectorize a DDim
std::vector<int> res_vec = majel::vectorize(vddim);
std::vector<int> res_vec = paddle::framework::vectorize(vddim);
EXPECT_EQ(res_vec[0], 9);
EXPECT_EQ(res_vec[1], 1);
EXPECT_EQ(res_vec[2], 5);
majel::Dim<3> d(3, 2, 1);
res_vec = majel::vectorize(majel::DDim(d));
paddle::framework::Dim<3> d(3, 2, 1);
res_vec = paddle::framework::vectorize(paddle::framework::DDim(d));
EXPECT_EQ(res_vec[0], 3);
EXPECT_EQ(res_vec[1], 2);
EXPECT_EQ(res_vec[2], 1);
// add two DDims
majel::DDim ddim_sum = ddim + vddim;
paddle::framework::DDim ddim_sum = ddim + vddim;
EXPECT_EQ(ddim_sum[0], 15);
EXPECT_EQ(ddim_sum[1], 3);
EXPECT_EQ(ddim_sum[2], 10);
// multiply two DDims
majel::DDim ddim_mul = ddim * vddim;
paddle::framework::DDim ddim_mul = ddim * vddim;
EXPECT_EQ(ddim_mul[0], 54);
EXPECT_EQ(ddim_mul[1], 2);
EXPECT_EQ(ddim_mul[2], 25);
// arity of a DDim
EXPECT_EQ(majel::arity(ddim), 3);
EXPECT_EQ(paddle::framework::arity(ddim), 3);
// product of a DDim
EXPECT_EQ(majel::product(vddim), 45);
EXPECT_EQ(paddle::framework::product(vddim), 45);
}
TEST(DDim, Print) {
// print a DDim
std::stringstream ss;
majel::DDim ddim = majel::make_ddim({2, 3, 4});
paddle::framework::DDim ddim = paddle::framework::make_ddim({2, 3, 4});
ss << ddim;
EXPECT_EQ("2, 3, 4", ss.str());
}
......@@ -5,10 +5,11 @@
#include <stdexcept>
#include <type_traits>
#include "paddle/majel/detail/cuda_assert.h"
#include "paddle/majel/detail/hostdevice.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/hostdevice.h"
namespace majel {
namespace paddle {
namespace framework {
// Statically sized, statically indexed dimension
template <int i>
......@@ -74,7 +75,7 @@ struct Dim<1> {
throw std::invalid_argument("Index out of range.");
}
#else
MAJEL_ASSERT(idx < size.head);
PADDLE_ASSERT(idx < size.head);
#endif
}
......@@ -131,7 +132,7 @@ HOSTDEVICE int& indexer(Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
......@@ -146,7 +147,7 @@ HOSTDEVICE int& indexer<1>(Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
PADDLE_ASSERT(idx == 0);
#endif
return dim.head;
}
......@@ -158,7 +159,7 @@ HOSTDEVICE int indexer(const Dim<D>& dim, int idx) {
throw std::invalid_argument("Tried to access a negative dimension");
}
#else
MAJEL_ASSERT(idx >= 0);
PADDLE_ASSERT(idx >= 0);
#endif
if (idx == 0) {
return dim.head;
......@@ -173,7 +174,7 @@ HOSTDEVICE int indexer<1>(const Dim<1>& dim, int idx) {
throw std::invalid_argument("Invalid index");
}
#else
MAJEL_ASSERT(idx == 0);
PADDLE_ASSERT(idx == 0);
#endif
return dim.head;
}
......@@ -411,7 +412,7 @@ HOSTDEVICE Dim<sizeof...(Args)> make_dim(Args... idxes) {
// XXX For some reason, overloading fails to resolve this correctly
template <int i>
typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
std::ostream& os, const Dim<i>& d) {
os << d.head << ", " << d.tail;
return os;
}
......@@ -420,7 +421,7 @@ typename std::enable_if<(i > 1), std::ostream&>::type operator<<(
// XXX I wish this could be an overload instead of a template
template <int i>
typename std::enable_if<(i == 1), std::ostream&>::type operator<<(
std::ostream& os, const majel::Dim<i>& d) {
std::ostream& os, const Dim<i>& d) {
os << d.head;
return os;
}
......@@ -448,4 +449,5 @@ HOSTDEVICE Dim<D> linear_to_dimension(int linear_index, Dim<D> extents) {
return result;
}
} // namespace majel
} // namespace framework
} // namespace paddle
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/framework/dim.h"
#include "gtest/gtest.h"
__global__ void test(paddle::framework::Dim<2>* o) {
o[0] = paddle::framework::make_dim(5, 6);
}
__global__ void dyn_idx_gpu(int* o) {
auto d = paddle::framework::make_dim(5, 6);
o[0] = d[1];
}
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = paddle::framework::make_dim(3, 4);
EXPECT_EQ(paddle::framework::get<0>(a), 3);
EXPECT_EQ(paddle::framework::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<paddle::framework::Dim<2>> t(2);
test<<<1,1>>>(thrust::raw_pointer_cast(t.data()));
a = t[0];
EXPECT_EQ(paddle::framework::get<0>(a), 5);
EXPECT_EQ(paddle::framework::get<1>(a), 6);
// linearization
auto b = paddle::framework::make_dim(7, 8);
EXPECT_EQ(paddle::framework::linearize(a, b), 83);
// product
EXPECT_EQ(paddle::framework::product(a), 30);
// mutate a Dim
paddle::framework::get<1>(b) = 10;
EXPECT_EQ(paddle::framework::get<0>(b), 7);
EXPECT_EQ(paddle::framework::get<1>(b), 10);
// dynamic access
paddle::framework::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(paddle::framework::get<0>(b), 8);
EXPECT_EQ(paddle::framework::get<1>(b), 11);
EXPECT_EQ(paddle::framework::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data()));
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 12);
// contiguous_strides
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 1, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 0);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 10, 1));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 10);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(1, 10, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 0);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(2, 3, 4));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 2);
EXPECT_EQ(paddle::framework::get<2>(c), 6);
// generate from an index
auto size = paddle::framework::make_dim(4, 5, 2);
c = paddle::framework::Dim<3>(14, size);
EXPECT_EQ(paddle::framework::get<0>(c), 2);
EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::Dim<3>(25, size);
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 1);
}
TEST(Dim, Bool) {
auto a = paddle::framework::make_dim(3, 4);
auto b = paddle::framework::make_dim(5, 6);
auto c = paddle::framework::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(paddle::framework::contained(a, b));
EXPECT_FALSE(paddle::framework::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
paddle::framework::Dim<3> sizef(x, y, z);
paddle::framework::Dim<3> stridea(1, x, x*y);
paddle::framework::Dim<3> strideb(2, 2*x, 2*x*y);
paddle::framework::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(paddle::framework::contiguous(sizef, stridea));
EXPECT_FALSE(paddle::framework::contiguous(sizef, strideb));
EXPECT_FALSE(paddle::framework::contiguous(sizef, stridec));
}
TEST(Dim, Print) {
{
std::stringstream ss;
auto a = paddle::framework::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
}
{
std::stringstream ss;
ss << paddle::framework::make_dim(8);
EXPECT_EQ(ss.str(), "8");
}
}
......@@ -68,12 +68,10 @@ public:
numOutputs_ = 1;
}
virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {}
// input can be INPUT and INPUT_GRAD
// filter can be FILTER and FILTER_GRAD
// output can be OUTPUT and OUTPUT_GRAD
void check(const TensorShape& input,
void checkShape(const TensorShape& input,
const TensorShape& filter,
const TensorShape& output) {
// inputs and outputs arguments should be 4-dimensional.
......
......@@ -117,15 +117,23 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// TODO(hedaoyuan): Need to define some index macros,
// to avoid useing 0 and 1.
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {
......@@ -209,16 +217,24 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
// Since the implementation of Col2ImFunctor is ADD_TO,
// this function only supports ADD_TO mode.
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape();
check(input, filter, output);
size_t batchSize = input[0];
size_t inputChannels = input[1];
......@@ -295,13 +311,21 @@ public:
ConvFunctionBase::init(config);
}
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape();
check(input, filter, output);
real beta;
if (outputs[0].getArgType() == ADD_TO) {
......
......@@ -54,8 +54,8 @@ public:
T inValue;
const int inH = inStartH + fH;
const int inW = inStartW + fW;
if ((inH >= 0 && inH < inputHeight) &&
(inW >= 0 && inW < inputWidth)) {
if ((inH >= 0 && inH < (int)inputHeight) &&
(inW >= 0 && inW < (int)inputWidth)) {
size_t offsetInput =
batch * inputChannels * inputHeight * inputWidth +
inC * inputHeight * inputWidth + inH * inputWidth + inW;
......@@ -90,14 +90,19 @@ public:
ConvFunctionBase::init(config);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
virtual void check(const BufferArgs& inputs,
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape();
check(input, filter, output);
checkShape(input, filter, output);
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO);
check(inputs, outputs);
size_t batchSize = inputs[0].shape()[0];
size_t inputChannels = inputs[0].shape()[1];
......
......@@ -284,6 +284,16 @@ public:
}
protected:
std::vector<Argument::SeqInfo> commonSeqInfo_;
ICpuGpuVectorPtr sequenceStartPositions_;
void calcSequenceStartPositions();
void checkInputConsistency(int inlinkId,
const std::vector<Argument::SeqInfo>& seqInfo);
void reorganizeInput(PassType passType);
void reorganizeOutput(PassType passType);
void connectFrames(PassType passType);
void calcNumSequencesAtEachStep();
void resizeOrCreateFrames(int numFrames);
void resizeBootFrame(int numSequences);
......@@ -295,7 +305,6 @@ protected:
std::string linkName;
LayerPtr inLayer;
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
bool hasSubseq;
Argument outArg; // scatter output argument
};
std::vector<InFrameLine> inFrameLines_;
......@@ -318,7 +327,6 @@ protected:
std::vector<LayerPtr> agents;
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
Argument outArg; // scatter output argument
bool is_sequence;
// Different memoryFrameLine have different element as follows
IVectorPtr allIds; // scattered id of realLayer
ICpuGpuVectorPtr
......@@ -330,22 +338,27 @@ protected:
// and all outFrameLines(outlinks) share the info with one inFrameLine,
// which is assigned by targetInfoInlinkId_.
struct Info {
IVectorPtr allIds; // scattered id of realLayer
std::vector<int> idIndex; // index of allIds
// The original positions in the original batch
IVectorPtr allIds; // scattered id of realLayer [batchSize]
// index of allIds for each step [maxSequenceLength_]
// idIndex[i] is the total length of the first i sequences
std::vector<int> idIndex;
ICpuGpuVectorPtr
sequenceStartPositions; // scattered sequenceStartPositions
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
};
std::vector<Info> info_;
std::vector<Info> info_; // for input
// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
// Equivalently, numSeqs_[i] is the number of sequences at step i;
std::vector<int> numSeqs_;
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// the id of inlink which share info with outlinks
int targetInfoInlinkId_;
void checkOutputConsistency(OutFrameLine& outFrameLine);
/* create scattered id infomation for all realLayer of inFrameLines one time.
* If hasSubseq, will also create scattered sequenceStartPositions infomation
......@@ -354,6 +367,28 @@ protected:
void createInFrameInfo(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_nonseq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_seq(int inlinks_id,
const Argument& input,
PassType passType);
void createInFrameInfo_subseq(int inlinks_id,
const Argument& input,
PassType passType);
void createOutFrameInfo(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
Info& info,
ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions);
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
PassType passType);
......@@ -386,9 +421,7 @@ protected:
NeuralNetwork* rootNetwork_;
bool reversed_;
// if hasSubseq: max number of sentences(subseq)in batchsize samples
// else: max number of tokens in batchsize samples(sentences)
int maxSequenceLength_;
int maxSequenceLength_; // Max top-level length
bool useGpu_;
bool stopBeamSearch_;
......
......@@ -35,36 +35,15 @@ bool AgentLayer::init(const LayerMap& layerMap,
void AgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realHeight = realOutput.getBatchSize();
CHECK_LE(numSamples_, realHeight);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realHeight) {
if (realOutput.ids) {
output_.ids =
IVector::create(realOutput.ids->getData(), numSamples_, useGpu_);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
}
} else {
output_ = realOutput;
}
}
void SequenceAgentLayer::forward(PassType passType) {
Layer::forward(passType);
Argument& realOutput = realLayer_->getOutput();
int realNumSequences = realOutput.getNumSequences();
CHECK_LE(numSamples_, realNumSequences);
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
if (realOutput.hasSeq()) {
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput,
/* offset */ 0,
numRows,
......@@ -74,13 +53,15 @@ void SequenceAgentLayer::forward(PassType passType) {
/* seqFlag */ true,
/* seqStart */ 0,
/* seqSize */ numSamples_ + 1);
} else {
output_.subArgFrom(
realOutput, /* offset */ 0, numSamples_, getSize(), useGpu_);
}
} else {
output_ = realOutput;
}
}
REGISTER_LAYER(sequence_agent, SequenceAgentLayer);
bool GatherAgentLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CHECK_EQ(config_.inputs_size(), 0);
......@@ -91,18 +72,26 @@ bool GatherAgentLayer::init(const LayerMap& layerMap,
return true;
}
void GatherAgentLayer::copyIdAndSequenceInfo(const Argument& input,
void GatherAgentLayer::copyIdAndSequenceInfo(
ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& ids,
const std::vector<int>& idIndex) {
output_.sequenceStartPositions = input.sequenceStartPositions;
output_.subSequenceStartPositions = input.subSequenceStartPositions;
realLayers_.clear();
output_.sequenceStartPositions = sequenceStartPositions;
output_.subSequenceStartPositions = subSequenceStartPositions;
allIds_ = ids;
idIndex_ = idIndex;
}
void GatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
forwardIds(passType);
forwardValue(passType);
}
void GatherAgentLayer::forwardValue(PassType passType) {
MatrixPtr valueReal = realLayers_[0]->getOutputValue();
if (!valueReal) return;
int height = allIds_->getSize();
int width = this->getSize();
......@@ -147,7 +136,9 @@ void ScatterAgentLayer::forward(PassType passType) {
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
if (realOutArg_.hasSeq()) {
forwardSequence(passType);
} else if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(
realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_);
} else { // used in generation
......@@ -174,7 +165,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
if (realGrad) {
// for agent in inFrameLines and memoryFrameLines,
// only first scatterAgentLayer should do addToRows in backward
if (idIndex_ == 0) {
if (handleBackward_) {
outputGrad->addToRows(*realGrad, *ids_);
}
}
......@@ -183,12 +174,14 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void SequenceGatherAgentLayer::forward(PassType passType) {
Layer::forward(passType);
void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0;
int* starts = output_.subSequenceStartPositions->getMutableData(false);
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (idReal) {
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
......@@ -212,13 +205,11 @@ void SequenceGatherAgentLayer::forward(PassType passType) {
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
// Gather output.value, same as GatherAgentLayer
CHECK(output_.subSequenceStartPositions);
GatherAgentLayer::forward(passType);
LOG(FATAL) << "Not implemented";
}
}
void SequenceScatterAgentLayer::forward(PassType passType) {
void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
......@@ -241,6 +232,7 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
// Putting the generation logic here is really an ugly hack!
// used in generation
int height = 0;
size_t numSequences = ids_->getSize();
......@@ -284,7 +276,4 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
}
REGISTER_LAYER(sequence_gather_agent, SequenceGatherAgentLayer);
REGISTER_LAYER(sequence_scatter_agent, SequenceScatterAgentLayer);
} // namespace paddle
......@@ -49,18 +49,6 @@ public:
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* like AgentLayer, but use first *numSamples* sequences
*/
class SequenceAgentLayer : public AgentLayer {
public:
explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {}
~SequenceAgentLayer() {}
void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override {}
};
/**
* Like AgentLayer, but it can gather many real layers. Each real
* layer give a few rows of a sequence, after gather all real layers,
......@@ -83,7 +71,10 @@ public:
const ParameterMap& parameterMap) override;
// call before addRealLayer
void copyIdAndSequenceInfo(const Argument& input,
void clearRealLayers() { realLayers_.clear(); }
void copyIdAndSequenceInfo(ICpuGpuVectorPtr sequenceStartPositions,
ICpuGpuVectorPtr subSequenceStartPositions,
const IVectorPtr& allIds,
const std::vector<int>& idIndex);
......@@ -92,24 +83,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like GatherAgentLayer, but select a few sequence in real layer.
* *ids* in addRealLayer() are the ids of selected sequence.
* It's used to reorder sequence output.
*/
class SequenceGatherAgentLayer : public GatherAgentLayer {
public:
explicit SequenceGatherAgentLayer(const LayerConfig& config)
: GatherAgentLayer(config) {}
virtual ~SequenceGatherAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
// same as GatherAgentLayer
GatherAgentLayer::backward(callback);
}
void forwardValue(PassType passType);
void forwardIds(PassType passType);
};
/**
......@@ -129,6 +104,11 @@ protected:
int idSize_;
int seqStartPosIndex_;
int numSequences_; // number of sequences in this scatterAgentLayer
bool handleBackward_;
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {}
......@@ -147,13 +127,10 @@ public:
* false(default) in ScatterAgentLayer, and
* true in SequenceScatterAgentLayer.
*/
void setRealLayer(LayerPtr layer,
const std::vector<int>& ids,
bool copyId = false) {
void setRealLayer(LayerPtr layer, const std::vector<int>& ids) {
realLayer_ = layer;
IVector::resizeOrCreate(ids_, ids.size(), useGpu_);
ids_->copyFrom(ids.data(), ids.size());
if (copyId) {
if (useGpu_) {
IVector::resizeOrCreate(cpuIds_, ids.size(), false);
cpuIds_->copyFrom(ids.data(), ids.size());
......@@ -161,7 +138,6 @@ public:
cpuIds_ = ids_;
}
}
}
// set real layer and output, [idIndex, idIndex + idSize) of *ids*
// are selected row for realOutArg in realLayer
......@@ -169,12 +145,14 @@ public:
const Argument& outArg,
const IVectorPtr& ids,
int idIndex,
int idSize) {
int idSize,
bool handleBackward) {
realLayer_ = layer;
realOutArg_ = outArg;
ids_ = ids;
idIndex_ = idIndex;
idSize_ = idSize;
handleBackward_ = handleBackward;
}
void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions,
......@@ -187,28 +165,8 @@ public:
void forward(PassType passType) override;
void backward(const UpdateCallback& callback) override;
};
/**
* Like ScatterAgentLayer, but select a few sequence in real layer.
* *ids* in setRealLayer() or setRealLayerAndOutput() are the ids of
* selected sequence. It's used to reorder sequence input.
*/
class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
ICpuGpuVectorPtr inputStartPos_;
public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
: ScatterAgentLayer(config) {}
virtual ~SequenceScatterAgentLayer() {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {
ScatterAgentLayer::backward(callback);
}
void forwardSequence(PassType passType);
};
} // namespace paddle
......@@ -40,6 +40,7 @@ namespace paddle {
class FeatureMapExpandLayer : public Layer {
private:
int numFilters_;
bool asRowVector_;
public:
explicit FeatureMapExpandLayer(const LayerConfig& config) : Layer(config) {}
......@@ -62,6 +63,7 @@ bool FeatureMapExpandLayer::init(const LayerMap& layerMap,
CHECK_EQ(inputLayers_.size(), 1UL);
numFilters_ = config_.num_filters();
asRowVector_ = config_.user_arg() != "as_col_vec";
return true;
}
......@@ -76,6 +78,7 @@ void FeatureMapExpandLayer::forward(PassType passType) {
{
AsyncGpuBlock asyncGpuBlock;
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
......@@ -87,6 +90,19 @@ void FeatureMapExpandLayer::forward(PassType passType) {
inputV->getData() + i * imgSize, 1, imgSize, false, useGpu_);
outVTmp->addRowVector(*inVTmp);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outVTmp =
Matrix::create(outputV->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inVTmp = Matrix::create(
inputV->getData() + i * imgSize, imgSize, 1, false, useGpu_);
outVTmp->addColVector(*inVTmp);
}
}
}
/* activation */ {
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
......@@ -102,8 +118,13 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
MatrixPtr outGrad = getOutputGrad();
size_t batchSize = getInput(0).getBatchSize();
int imgSize = inGrad->getWidth();
/* Do activation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
{
AsyncGpuBlock asyncGpuBlock;
if (asRowVector_) {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
......@@ -115,10 +136,19 @@ void FeatureMapExpandLayer::backward(const UpdateCallback& callback) {
inGrad->getData() + i * imgSize, 1, imgSize, false, useGpu_);
inGradTmp->collectBias(*outGradTmp, 1);
}
} else {
for (size_t i = 0; i < batchSize; i++) {
MatrixPtr outGradTmp =
Matrix::create(outGrad->getData() + i * imgSize * numFilters_,
imgSize,
numFilters_,
false,
useGpu_);
MatrixPtr inGradTmp = Matrix::create(
inGrad->getData() + i * imgSize, imgSize, 1, false, useGpu_);
inGradTmp->sumRows(*outGradTmp, 1, 1);
}
}
/* Do derivation */ {
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
backwardActivation();
}
}
......
......@@ -22,10 +22,33 @@ public:
void forward(PassType passType) override {
Layer::forward(passType);
std::vector<std::string> vals;
for (size_t i = 0; i != inputLayers_.size(); ++i) {
getInput(i).printValueString(LOG(INFO),
"layer=" + inputLayers_[i]->getName() + " ");
std::ostringstream s;
getInput(i).printValueString(s, "");
vals.push_back(s.str());
}
size_t pos = 0;
int i = 0;
std::ostringstream s;
const std::string& format = config_.user_arg();
while (true) {
size_t pos1 = format.find("%s", pos);
if (pos1 == std::string::npos) break;
if (i >= vals.size()) {
break;
}
s << format.substr(pos, pos1 - pos) << vals[i];
pos = pos1 + 2;
++i;
}
if (i != inputLayers_.size()) {
LOG(ERROR) << "Number of value in the format (" << format
<< ") is not same as the number of inputs ("
<< inputLayers_.size() << ") at " << getName();
}
s << format.substr(pos);
LOG(INFO) << s.str();
}
void backward(const UpdateCallback& callback) override {}
......
......@@ -46,6 +46,9 @@ void SequencePoolLayer::forward(PassType passType) {
Layer::forward(passType);
const Argument& input = getInput(0);
CHECK(input.hasSeq() || input.hasSubseq())
<< "Input should be a sequence or subsequence for layer " << getName();
newBatchSize_ = type_ ? input.getNumSubSequences() : input.getNumSequences();
size_t dim = getSize();
// check
......
......@@ -95,3 +95,22 @@ def process_unequalength_seq(settings, file_name):
words1 = reduce(lambda x, y: x + y, d[0])
words2 = reduce(lambda x, y: x + y, d[1])
yield words1, words2, d[2]
###########################################################
data3 = [
[[[1, 2], [4, 5, 2]], [1, 2], 0],
[[[0, 2], [2, 5], [0, 1, 2]], [2, 3, 0], 1],
]
# Used for sequence_nest_mixed_inputs.conf
@provider(
input_types=[
integer_value_sub_sequence(10), integer_value_sequence(10),
integer_value(2)
],
should_shuffle=False)
def process_mixed(settings, file_name):
for d in data3:
yield d
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq2')
obj='process_subseq')
settings(batch_size=2, learning_rate=0.01)
......
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(subseq, seq, nonseq):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner', input=[subseq, seq, nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[
subseq, expand_layer(
seq, expand_as=subseq,
expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer(
nonseq,
expand_as=subseq,
expand_level=ExpandLevel.FROM_NO_SEQUENCE),
StaticInput(encoding)
])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
# edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
######################## data source ################################
define_py_data_sources2(
train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_mixed')
settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 2
hidden_dim = 2
label_dim = 2
data1 = data_layer(name="word1", size=dict_dim)
data2 = data_layer(name="word2", size=dict_dim)
label = data_layer(name="label", size=label_dim)
encoding = embedding_layer(input=data2, size=word_dim)
# This hierarchical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn_multi_unequalength_inputs.conf
def outer_step(subseq, seq, nonseq, encoding):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(data1, data2, label):
inner_mem = memory(
name="inner_rnn_state", size=hidden_dim, boot_layer=outer_mem)
subseq = embedding_layer(input=data1, size=word_dim)
seq = embedding_layer(input=data2, size=word_dim)
nonseq = embedding_layer(input=label, size=word_dim)
print_layer(input=[data1, seq, label, inner_mem])
out = fc_layer(
input=[subseq, seq, nonseq, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name='inner_rnn_state')
return out
decoder = recurrent_group(
step=inner_step, name='inner',
input=[subseq, StaticInput(seq), nonseq])
last = last_seq(name="outer_rnn_state", input=decoder)
context = simple_attention(
encoded_sequence=encoding, encoded_proj=encoding, decoder_state=last)
return context
out = recurrent_group(
name="outer",
step=outer_step,
input=[data1, data2, StaticInput(label), StaticInput(encoding)])
rep = last_seq(input=out)
prob = fc_layer(
size=label_dim, input=rep, act=SoftmaxActivation(), bias_attr=True)
outputs(classification_cost(input=prob, label=label))
......@@ -19,7 +19,7 @@ from paddle.trainer_config_helpers import *
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq2')
obj='process_seq')
settings(batch_size=2, learning_rate=0.01)
......
......@@ -1598,6 +1598,8 @@ TEST(Layer, FeatureMapExpandLayer) {
/* paraSize= */ 0});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
for (auto asRowVec : {false, true}) {
config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec");
testLayerGrad(config,
"featmap_expand",
/*batch_size*/ 100,
......@@ -1605,6 +1607,7 @@ TEST(Layer, FeatureMapExpandLayer) {
useGpu,
/* useWeight */ true);
}
}
}
TEST(Layer, MultiplexLayer) {
......
......@@ -155,6 +155,15 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) {
}
}
TEST(RecurrentGradientMachine, rnn_mixed_input) {
for (bool useGpu : {false, true}) {
test("gserver/tests/sequence_rnn_mixed_inputs.py",
"gserver/tests/sequence_rnn_matched_inputs.py",
1e-6,
useGpu);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
......
build
third-party
\ No newline at end of file
#include <thrust/device_vector.h>
#include <sstream>
#include "paddle/majel/dim.h"
#include "gtest/gtest.h"
__global__ void test(majel::Dim<2>* o) {
o[0] = majel::make_dim(5, 6);
}
__global__ void dyn_idx_gpu(int* o) {
auto d = majel::make_dim(5, 6);
o[0] = d[1];
}
TEST(Dim, Equality) {
// construct a Dim on the CPU
auto a = majel::make_dim(3, 4);
EXPECT_EQ(majel::get<0>(a), 3);
EXPECT_EQ(majel::get<1>(a), 4);
// construct a Dim on the GPU
thrust::device_vector<majel::Dim<2>> t(2);
test<<<1,1>>>(thrust::raw_pointer_cast(t.data()));
a = t[0];
EXPECT_EQ(majel::get<0>(a), 5);
EXPECT_EQ(majel::get<1>(a), 6);
// linearization
auto b = majel::make_dim(7, 8);
EXPECT_EQ(majel::linearize(a, b), 83);
// product
EXPECT_EQ(majel::product(a), 30);
// mutate a Dim
majel::get<1>(b) = 10;
EXPECT_EQ(majel::get<0>(b), 7);
EXPECT_EQ(majel::get<1>(b), 10);
// dynamic access
majel::get(b, 0) = 8;
b[1] = 11;
EXPECT_EQ(majel::get<0>(b), 8);
EXPECT_EQ(majel::get<1>(b), 11);
EXPECT_EQ(majel::get(b, 0), 8);
EXPECT_EQ(b[1], 11);
// dynamic access on GPU
thrust::device_vector<int> r(1);
dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data()));
int res = r[0];
EXPECT_EQ(res, 6);
// ex_prefix_mul
majel::Dim<3> c = majel::ex_prefix_mul(majel::Dim<3>(3, 4, 5));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 12);
// contiguous_strides
c = majel::contiguous_strides(majel::Dim<3>(10, 1, 10));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 0);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(10, 10, 1));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 10);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::contiguous_strides(majel::Dim<3>(1, 10, 10));
EXPECT_EQ(majel::get<0>(c), 0);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 10);
c = majel::contiguous_strides(majel::Dim<3>(2, 3, 4));
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 2);
EXPECT_EQ(majel::get<2>(c), 6);
// generate from an index
auto size = majel::make_dim(4, 5, 2);
c = majel::Dim<3>(14, size);
EXPECT_EQ(majel::get<0>(c), 2);
EXPECT_EQ(majel::get<1>(c), 3);
EXPECT_EQ(majel::get<2>(c), 0);
c = majel::Dim<3>(25, size);
EXPECT_EQ(majel::get<0>(c), 1);
EXPECT_EQ(majel::get<1>(c), 1);
EXPECT_EQ(majel::get<2>(c), 1);
}
TEST(Dim, Bool) {
auto a = majel::make_dim(3, 4);
auto b = majel::make_dim(5, 6);
auto c = majel::make_dim(3, 4);
// in_bounds check
EXPECT_TRUE(majel::contained(a, b));
EXPECT_FALSE(majel::contained(b, a));
// comparison
EXPECT_TRUE(a == a);
EXPECT_FALSE(a == b);
EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
majel::Dim<3> sizef(x, y, z);
majel::Dim<3> stridea(1, x, x*y);
majel::Dim<3> strideb(2, 2*x, 2*x*y);
majel::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(majel::contiguous(sizef, stridea));
EXPECT_FALSE(majel::contiguous(sizef, strideb));
EXPECT_FALSE(majel::contiguous(sizef, stridec));
}
TEST(Dim, Print) {
{
std::stringstream ss;
auto a = majel::make_dim(2, 3);
ss << a;
EXPECT_EQ(ss.str(), "2, 3");
}
{
std::stringstream ss;
ss << majel::make_dim(8);
EXPECT_EQ(ss.str(), "8");
}
}
......@@ -908,12 +908,13 @@ const T* CpuGpuVectorT<T>::getData(bool useGpu) const {
// Operation will change data and need to reset sync_ & syncFlag_.
#define MUTABLE_VECTOR_OP(OP, useGpu, args...) \
do { \
setSync(useGpu); \
if (useGpu) { \
copyToGpu(); \
setSync(useGpu); \
return gpuVectorT_->OP(args); \
} else { \
copyToCpu(); \
setSync(useGpu); \
return cpuVectorT_->OP(args); \
} \
} while (0)
......@@ -1030,7 +1031,7 @@ void CpuGpuVectorT<T>::copyToCpu() {
case DATA_AT_GPU:
CHECK(gpuVectorT_);
this->resizeOrCreate(gpuVectorT_->getSize(), false);
cpuVectorT_->copyFrom(*gpuVectorT_, HPPL_STREAM_DEFAULT);
cpuVectorT_->copyFrom(*gpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_CPU:
......@@ -1049,7 +1050,7 @@ void CpuGpuVectorT<T>::copyToGpu() {
case DATA_AT_CPU:
CHECK(cpuVectorT_);
this->resizeOrCreate(cpuVectorT_->getSize(), true);
gpuVectorT_->copyFrom(*cpuVectorT_, HPPL_STREAM_DEFAULT);
gpuVectorT_->copyFrom(*cpuVectorT_);
setSync(SYNCED);
break;
case DATA_AT_GPU:
......
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(OPITMIZER_SRCS
adadelta_optimizer.cc
adagrad_optimizer.cc
adam_optimizer.cc
optimizer.cc
parameter_optimizer.cc
sgd_optimizer.cc
)
add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(paddle_optimizer gen_proto_cpp)
if(WITH_TESTING)
add_simple_unittest(serialization_test)
add_simple_unittest(parameter_optimizer_test)
endif()
#include "adadelta_optimizer.h"
#include <algorithm>
#include <cmath>
namespace paddle {
namespace optimizer {
void AdadeltaOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
const Tensor& grad = *gradient;
Tensor& accum_g = *accum_gradient_;
Tensor& accum_d = *accum_delta_;
Tensor& update_d = *update_delta_;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] = rho_ * accum_g[i] + (1.0 - rho_) * grad[i] * grad[i];
update_d[i] = std::sqrt(accum_d[i] + epsilon_) /
std::sqrt(accum_g[i] + epsilon_) * grad[i];
accum_d[i] = rho_ * accum_d[i] + (1.0 - rho_) * update_d[i] * update_d[i];
param[i] -= learning_rate * update_d[i] + learning_rate * decay_ * param[i];
}
}
const char* AdadeltaOptimizer::SerializeState(int* state_len) {
AdadeltaOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
TensorToProto(*accum_delta_, state.mutable_accum_delta());
TensorToProto(*update_delta_, state.mutable_update_delta());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}
void AdadeltaOptimizer::DeserializeState(const std::string& str) {
AdadeltaOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
ProtoToTensor(state.accum_delta(), accum_delta_);
ProtoToTensor(state.update_delta(), update_delta_);
}
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdadeltaOptimizer : public ParameterOptimizer {
public:
AdadeltaOptimizer(
Tensor *parameter, LrPolicy *lr, double rho, double epsilon, double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
accum_delta_(new Tensor(parameter->size())),
update_delta_(new Tensor(parameter->size())),
rho_(rho),
epsilon_(epsilon),
decay_(decay) {}
~AdadeltaOptimizer() {
if (accum_gradient_) delete accum_gradient_;
if (accum_delta_) delete accum_delta_;
if (update_delta_) delete update_delta_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
private:
Tensor *accum_gradient_;
Tensor *accum_delta_;
Tensor *update_delta_;
double rho_;
double epsilon_;
double decay_;
};
} // namespace optimizer
} // namespace paddle
#include <cmath>
#include "adagrad_optimizer.h"
namespace paddle {
namespace optimizer {
void AdagradOptimizer::Update(const Tensor* gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
Tensor& param = *parameter_;
Tensor& accum_g = *accum_gradient_;
const Tensor& grad = *gradient;
for (size_t i = 0; i < param.size(); ++i) {
accum_g[i] += grad[i] * grad[i];
param[i] += learning_rate * grad[i] / std::sqrt(accum_g[i] + epsilon_) +
learning_rate * decay_ * param[i];
}
}
const char* AdagradOptimizer::SerializeState(int* state_len) {
AdagradOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*accum_gradient_, state.mutable_accum_gradient());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}
void AdagradOptimizer::DeserializeState(const std::string& str) {
AdagradOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.accum_gradient(), accum_gradient_);
}
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdagradOptimizer : public ParameterOptimizer {
public:
AdagradOptimizer(Tensor *parameter,
LrPolicy *lr,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
accum_gradient_(new Tensor(parameter->size())),
epsilon_(epsilon),
decay_(decay) {}
~AdagradOptimizer() {
if (accum_gradient_) delete accum_gradient_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
private:
Tensor *accum_gradient_;
double epsilon_;
double decay_;
};
} // namespace optimizer
} // namespace paddle
#include "adam_optimizer.h"
#include <cmath>
namespace paddle {
namespace optimizer {
void AdamOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
double coef1 = 1.0 - std::pow(beta_1_, num_sample_passed_);
double coef2 = 1.0 - std::pow(beta_2_, num_sample_passed_);
learning_rate *= std::sqrt(coef2) / coef1;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
Tensor &v = *velocitys_;
for (size_t i = 0; i < param.size(); ++i) {
m[i] = beta_1_ * m[i] + (1.0 - beta_1_) * grad[i];
v[i] = beta_2_ * v[i] + (1.0 - beta_2_) * grad[i] * grad[i];
param[i] -=
learning_rate * (m[i] / std::sqrt(v[i] + epsilon_) + decay_ * param[i]);
}
}
const char *AdamOptimizer::SerializeState(int *state_len) {
AdamOptimizerState state;
// TODO(zhihong) : add lr_policy serialization
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
TensorToProto(*momentums_, state.mutable_momentums());
TensorToProto(*velocitys_, state.mutable_velocitys());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}
void AdamOptimizer::DeserializeState(const std::string &str) {
AdamOptimizerState state;
state.ParseFromString(str);
// TODO(zhihong) : add lr_policy DeserializeState
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
ProtoToTensor(state.momentums(), momentums_);
ProtoToTensor(state.velocitys(), velocitys_);
}
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class AdamOptimizer : public ParameterOptimizer {
public:
AdamOptimizer(Tensor *parameter,
LrPolicy *lr,
double beta_1,
double beta_2,
double epsilon,
double decay)
: ParameterOptimizer(parameter, lr),
momentums_(new Tensor(parameter->size())),
velocitys_(new Tensor(parameter->size())),
beta_1_(beta_1),
beta_2_(beta_2),
epsilon_(epsilon),
decay_(decay) {}
~AdamOptimizer() {
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
}
void Update(const Tensor *gradient);
const char *SerializeState(int *state_len);
void DeserializeState(const std::string &state);
private:
Tensor *momentums_;
Tensor *velocitys_;
double beta_1_;
double beta_2_;
double epsilon_;
double decay_;
};
} // namespace optimizer
} // namespace paddle
#pragma once
#include <algorithm>
#include "OptimizerConfig.pb.h"
namespace paddle {
namespace optimizer {
class LrPolicy {
public:
virtual ~LrPolicy() {}
virtual double LearningRate(const uint64_t num_sample_passed) = 0;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeserializeState(const std::string &state) = 0;
};
// constant learning rate policy
class ConstLr final : public LrPolicy {
public:
ConstLr(double lr) : learning_rate(lr){};
double LearningRate(const uint64_t num_sample_passed) {
return learning_rate;
}
const char *SerializeState(int *state_len) { return nullptr; }
void DeserializeState(const std::string &state) {}
private:
double learning_rate;
};
class LinearLr final : public LrPolicy {
public:
LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: learning_rate(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double LearningRate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
}
const char *SerializeState(int *state_len) {
// TODO(zhihong) : add lr_policy serialization
return nullptr;
}
void DeserializeState(const std::string &state) {
// TODO(zhihong) : add lr_policy serialization
}
private:
double learning_rate;
double lr_decay_a;
double lr_decay_b;
};
} // namespace optimizer
} // namespace paddle
#include "optimizer.h"
#include <string>
#include "parameter_optimizer.h"
using namespace paddle;
using namespace paddle::optimizer;
template <paddle_element_type VALUE>
struct EnumToType {};
template <class T>
struct TypeToEnum {};
#define MATCH_ENUM_TYPE(TYPE, ENUM) \
template <> \
struct TypeToEnum<TYPE> { \
static paddle_element_type v() { return ENUM; }; \
static constexpr TYPE value = ENUM; \
}; \
template <> \
struct EnumToType<ENUM> { \
typedef TYPE Type; \
}
MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32);
MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32);
MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64);
// TODO(zhihong): only implement below type, need to fix
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32);
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64);
struct paddle_optimizer {
paddle::optimizer::ParameterOptimizer* impl;
};
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len,
const paddle_element_type data_type,
void* param_buffer,
int num_bytes,
const char* state,
const int state_len) {
paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len);
Tensor* parameter =
new Tensor(reinterpret_cast<float*>(param_buffer), num_bytes);
optimizer->impl = ParameterOptimizer::Create(config, parameter);
if (state != nullptr) {
std::string s(state, state + state_len);
optimizer->impl->DeserializeState(s);
}
return optimizer;
}
int paddle_release_optimizer(paddle_optimizer* o) {
if (o != nullptr) delete o->impl;
return PADDLE_SUCCESS;
}
int paddle_update_parameter(paddle_optimizer* o,
const paddle_element_type data_type,
const void* grad_buffer,
int num_bytes) {
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const float*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<float*>(grad_type), num_bytes);
o->impl->Update(gradient);
return PADDLE_SUCCESS;
}
int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer) {
int param_size = 0;
*param_buffer = (void*)o->impl->get_weight(&param_size);
return param_size;
}
int paddle_optimizer_get_state(paddle_optimizer* o, const char** state) {
int state_len = 0;
*state = o->impl->SerializeState(&state_len);
return state_len;
}
#pragma once
#include <stdbool.h>
#include <stdint.h>
/**
* @brief optimizer library in independent with other module
* which will be used in :
* Case A, the gradient optimized locally on the trainer.
*
* Case B, the gradient optimized on the parameter server.
*/
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
PADDLE_ELEMENT_TYPE_INT32 = 0,
PADDLE_ELEMENT_TYPE_UINT32 = 1,
PADDLE_ELEMENT_TYPE_INT64 = 2,
PADDLE_ELEMENT_TYPE_UINT64 = 3,
PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
} paddle_element_type;
/**
* @brief execution status code
*/
const int32_t PADDLE_SUCCESS = 0;
const int32_t PADDLE_ERROR = -1;
typedef struct paddle_optimizer paddle_optimizer;
/**
* this group interface called in order :
* 1. create optimizer with config
* 2. set weights
* 3. update_parameter
* 4. get_weights
* 5. release optimizer
*/
/**
* @brief create optimizer with proto_config
* @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail
* @return return optimizer instance
*/
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int config_proto_len,
const paddle_element_type data_type,
void* param_buffer,
int num_bytes,
const char* state,
const int state_len);
/**
* @brief release optimizer
* @param optimizer
* @return return exec status
*/
int paddle_release_optimizer(paddle_optimizer* o);
/**
* @brief optimizer instance
* @param datatype of gradient and parameter
* @param gradient, calculate by optimzizer caller.
* TODO(zhihong): just pass loss to reduce communicate overhead.
* Project Adam Ms'14 paper for detail
* @param num_bytes, gradient size
* @return return exec status
*/
int paddle_update_parameter(paddle_optimizer* o,
const paddle_element_type data_type,
const void* gradient,
int num_bytes);
/**
* @brief optimizer for get parameter buffer
* @param param_buffer, initilized parameter buffer
* @return return content length
*/
int paddle_optimizer_get_weights(paddle_optimizer* o, void** param_buffer);
/**
* @brief optimzizer for saving training state
* @param training state for receive SerializeState
* @return return state_buffer length
*/
int paddle_optimizer_get_state(paddle_optimizer* o, const char** state);
#ifdef __cplusplus
}
#endif
#include <glog/logging.h>
#include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "lr_policy.h"
#include "sgd_optimizer.h"
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
ParameterOptimizer *ParameterOptimizer::Create(const std::string &config_proto,
Tensor *parameter) {
paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == true)
<< "failed parse optimizer config";
auto select_lr_policy = [=](const OptimizerConfig &config) -> LrPolicy * {
if (config.lr_policy() == OptimizerConfig::Const)
return new ConstLr(config.const_lr().learning_rate());
if (config.lr_policy() == OptimizerConfig::Linear)
return new LinearLr(config.linear_lr().learning_rate(),
config.linear_lr().lr_decay_a(),
config.linear_lr().lr_decay_b());
// default
LOG(WARNING) << " have not select any LrPolicy. use ConstLr in default";
return new ConstLr(0.1);
};
LrPolicy *lr = select_lr_policy(config);
auto select_optimizer = [=](
Tensor *parameter,
const OptimizerConfig &config) -> ParameterOptimizer * {
if (config.optimizer() == OptimizerConfig::SGD) {
return new SGDOptimizer(parameter,
lr,
config.sgd().momentum(),
config.sgd().decay(),
config.sgd().nesterov());
}
if (config.optimizer() == OptimizerConfig::Adadelta) {
return new AdadeltaOptimizer(parameter,
lr,
config.adadelta().rho(),
config.adadelta().epsilon(),
config.adadelta().decay());
}
if (config.optimizer() == OptimizerConfig::Adagrad) {
return new AdagradOptimizer(
parameter, lr, config.adagrad().epsilon(), config.adagrad().decay());
}
if (config.optimizer() == OptimizerConfig::Adam) {
return new AdamOptimizer(parameter,
lr,
config.adam().beta_1(),
config.adam().beta_2(),
config.adam().epsilon(),
config.adam().decay());
}
// default
LOG(WARNING)
<< "have not select any Optimizer. use SGDOptimizer in default";
return new SGDOptimizer(parameter, lr, 0.0, 0.0, false);
};
return select_optimizer(parameter, config);
}
float *ParameterOptimizer::get_weight(int *param_size) const {
*param_size = (int)parameter_->size();
return parameter_->get_buffer();
}
} // namespace optimizer
} // namespace paddle
#pragma once
#include <glog/logging.h>
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "lr_policy.h"
#include "serialization.h"
#include "tensor.h"
namespace paddle {
namespace optimizer {
class ParameterOptimizer {
public:
/**
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer(Tensor *parameter, LrPolicy *lr)
: parameter_(parameter), lr_policy_(lr), num_sample_passed_(0) {}
virtual ~ParameterOptimizer() {
delete parameter_;
delete lr_policy_;
}
static ParameterOptimizer *Create(const std::string &config_proto,
Tensor *parameter);
virtual void Update(const Tensor *gradient) = 0;
virtual float *get_weight(int *param_size) const;
virtual const char *SerializeState(int *state_len) = 0;
virtual void DeserializeState(const std::string &state) = 0;
protected:
Tensor *parameter_;
// learning rate policy
LrPolicy *lr_policy_;
uint64_t num_sample_passed_;
};
} // namespace optimizer
} // namespace paddle
#include "parameter_optimizer.h"
#include <cmath>
#include <map>
#include <vector>
#include "gtest/gtest.h"
#include "lr_policy.h"
using namespace paddle;
using namespace paddle::optimizer;
Tensor* FillTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) {
p[i] = (float)rand() / (float)RAND_MAX;
}
return param;
}
Tensor* FixedTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) {
p[i] = i;
}
return param;
}
class OptimizerTest : public testing::Test {
public:
// init tensor shape
const size_t kSize = 5;
virtual void SetUp() {
CreateSGD();
CreateAdam();
}
virtual void TearDown() {}
void CreateSGD() {
Tensor* parameter = FixedTensor(kSize);
config_.set_optimizer(OptimizerConfig::SGD);
config_.mutable_sgd()->set_momentum(0.0);
config_.mutable_sgd()->set_decay(0.0);
config_.mutable_sgd()->set_nesterov(false);
config_.set_lr_policy(OptimizerConfig::Const);
config_.mutable_const_lr()->set_learning_rate(0.1);
std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter);
opts_.push_back(opt);
}
void CreateAdam() {
Tensor* parameter = FixedTensor(kSize);
config_.set_optimizer(OptimizerConfig::Adam);
config_.mutable_adam()->set_beta_1(0.9);
config_.mutable_adam()->set_beta_2(0.1);
config_.mutable_adam()->set_epsilon(1e-3);
config_.mutable_adam()->set_decay(0.0);
config_.set_lr_policy(OptimizerConfig::Const);
config_.mutable_const_lr()->set_learning_rate(0.1);
std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter);
opts_.push_back(opt);
}
void TestGetWeight() {
Tensor* p = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
int s = 0;
float* newp = (float*)opts_[i]->get_weight(&s);
for (size_t j = 0; j < kSize; ++j) {
EXPECT_EQ(newp[j], (*p)[j]);
}
}
}
void TestUpdate() {
Tensor* g = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) {
opts_[i]->Update(g);
}
}
void TestCheckPoint() {
for (size_t i = 0; i < opts_.size(); ++i) {
int state_len = 0;
std::string state = opts_[i]->SerializeState(&state_len);
opts_[i]->DeserializeState(state);
}
}
private:
std::vector<ParameterOptimizer*> opts_;
OptimizerConfig config_;
};
TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); }
TEST_F(OptimizerTest, TestUpdate) { TestUpdate(); }
TEST_F(OptimizerTest, TestCheckPoint) { TestCheckPoint(); }
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#pragma once
#include <iostream>
#include <sstream>
#include <string>
#include <type_traits>
#include "OptimizerConfig.pb.h"
#include "paddle/utils/Logging.h"
#include "tensor.h"
namespace paddle {
namespace optimizer {
static void TensorToProto(const Tensor& tensor, TensorProto* proto) {
proto->set_data_type(TensorProto::PADDLE_ELEMENT_TYPE_FLOAT32);
std::stringstream os;
for (size_t i = 0; i < tensor.size(); ++i) {
os << tensor[i];
proto->add_content(os.str());
os.str(std::string());
}
}
static void ProtoToTensor(const TensorProto& proto, Tensor* tensor) {
std::stringstream sin;
for (auto i = 0; i < proto.content_size(); ++i) {
sin << proto.content(i);
sin >> (*tensor)[i];
sin.str(std::string());
sin.clear();
}
}
} // namespace optimizer
} // namespace paddle
#include "serialization.h"
#include "gtest/gtest.h"
using namespace paddle;
using namespace paddle::optimizer;
TEST(TensorToProto, Case1) {
Tensor t(3), t1(3);
for (size_t i = 0; i < t.size(); ++i) {
t[i] = i;
t1[i] = 0;
}
TensorProto proto;
TensorToProto(t, &proto);
ProtoToTensor(proto, &t1);
for (size_t i = 0; i < t1.size(); ++i) {
EXPECT_EQ(t1[i], t[i]);
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#include "sgd_optimizer.h"
#include "serialization.h"
namespace paddle {
namespace optimizer {
void SGDOptimizer::Update(const Tensor *gradient) {
num_sample_passed_ += 1;
double learning_rate = lr_policy_->LearningRate(num_sample_passed_);
float velocity = 0.0;
Tensor &param = *parameter_;
const Tensor &grad = *gradient;
Tensor &m = *momentums_;
for (size_t i = 0; i < param.size(); ++i) {
if (momentum_ == 0.0) {
velocity = -learning_rate * grad[i] - learning_rate * decay_ * param[i];
} else {
m[i] = momentum_ * m[i] - learning_rate * grad[i] -
learning_rate * decay_ * param[i];
velocity = m[i];
}
if (nesterov_) {
param[i] += momentum_ * velocity - learning_rate * grad[i];
} else {
param[i] += velocity;
}
}
}
const char *SGDOptimizer::SerializeState(int *state_len) {
SGDOptimizerState state;
state.set_num_sample_passed(num_sample_passed_);
TensorToProto(*parameter_, state.mutable_parameter());
if (momentum_ != 0.0) TensorToProto(*momentums_, state.mutable_momentums());
auto str = state.SerializeAsString();
*state_len = str.size();
return str.c_str();
}
void SGDOptimizer::DeserializeState(const std::string &str) {
SGDOptimizerState state;
state.ParseFromString(str);
num_sample_passed_ = state.num_sample_passed();
ProtoToTensor(state.parameter(), parameter_);
if (momentum_ != 0.0) ProtoToTensor(state.parameter(), momentums_);
}
} // namespace optimizer
} // namespace paddle
#pragma once
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
class SGDOptimizer : public ParameterOptimizer {
public:
SGDOptimizer(Tensor* parameter, LrPolicy* lr, double m, double d, bool n)
: ParameterOptimizer(parameter, lr),
momentums_(nullptr),
momentum_(m),
decay_(d),
nesterov_(n) {
if (momentum_ != 0.0) {
size_t size = parameter->size();
// TODO: fix it with align aware allocator bind to Tensor
momentums_ = new Tensor(size);
}
}
virtual ~SGDOptimizer() {
if (momentums_) delete momentums_;
}
void Update(const Tensor* gradient);
const char* SerializeState(int* state_len);
void DeserializeState(const std::string& state);
private:
Tensor* momentums_;
double momentum_;
double decay_;
bool nesterov_;
};
} // namespace optimizer
} // namespace paddle
#pragma once
/**
* @brief tensor used by optimizer
*/
#include <string.h>
#include <memory>
#include "paddle/utils/Common.h"
#include "paddle/utils/Logging.h"
namespace paddle {
namespace optimizer {
template <class T>
class TensorT {
public:
TensorT(size_t size) : height_(1), width_(size) {
data_ptr_ = std::shared_ptr<T>(new T[size], std::default_delete<T[]>());
data_ = data_ptr_.get();
}
TensorT(T* data, size_t size)
: height_(1), width_(size), data_ptr_(nullptr), data_(data) {}
TensorT(T* data, size_t h, size_t w)
: height_(h), width_(w), data_ptr_(nullptr), data_(data) {}
virtual ~TensorT() {}
T* get_buffer() { return this->data_; }
T& operator[](const size_t idx) {
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
return data_[idx];
}
T& operator[](const size_t idx) const {
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
return data_[idx];
}
// TODO: replace with tensorshape
size_t size() const { return this->width_ * this->height_; }
protected:
size_t height_;
size_t width_;
std::shared_ptr<T> data_ptr_;
T* data_;
};
// TODO(zhihong): design problem of dynamic datatype, need to fix it
typedef TensorT<float> Tensor;
} // namespace optimizer
} // namespace paddle
......@@ -149,6 +149,7 @@ struct Argument {
: getBatchSize();
}
bool hasSeq() const { return sequenceStartPositions != nullptr; }
bool hasSubseq() const { return subSequenceStartPositions != nullptr; }
const int* getCpuStartPositions() const {
......
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
nv_test(cuda_test SRCS cuda_test.cu)
cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
cc_library(ddim SRCS ddim.cc)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(cuda_test SRCS cuda_test.cu)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
......@@ -5,28 +5,25 @@
#if defined(__APPLE__) && defined(__CUDA_ARCH__) && !defined(NDEBUG)
#include <stdio.h>
#define MAJEL_ASSERT(e) \
#define PADDLE_ASSERT(e) \
do { \
if (!(e)) { \
printf( \
"%s:%d Assertion `%s` failed.\n", __FILE__, __LINE__, TOSTRING(e)); \
printf("%s:%d Assertion `%s` failed.\n", __FILE__, __LINE__, \
TOSTRING(e)); \
asm("trap;"); \
} \
} while (0)
#define MAJEL_ASSERT_MSG(e, m) \
#define PADDLE_ASSERT_MSG(e, m) \
do { \
if (!(e)) { \
printf("%s:%d Assertion `%s` failed (%s).\n", \
__FILE__, \
__LINE__, \
TOSTRING(e), \
m); \
printf("%s:%d Assertion `%s` failed (%s).\n", __FILE__, __LINE__, \
TOSTRING(e), m); \
asm("trap;"); \
} \
} while (0)
#else
#include <assert.h>
#define MAJEL_ASSERT(e) assert(e)
#define MAJEL_ASSERT_MSG(e, m) assert((e) && (m))
#define PADDLE_ASSERT(e) assert(e)
#define PADDLE_ASSERT_MSG(e, m) assert((e) && (m))
#endif
#include "paddle/majel/place.h"
#include "paddle/platform/place.h"
namespace majel {
namespace paddle {
namespace platform {
namespace detail {
class PlacePrinter : public boost::static_visitor<> {
private:
std::ostream& os_;
public:
PlacePrinter(std::ostream &os) : os_(os) {}
void operator()(const CpuPlace &) { os_ << "CpuPlace"; }
void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; }
public:
PlacePrinter(std::ostream& os) : os_(os) {}
void operator()(const CpuPlace&) { os_ << "CpuPlace"; }
void operator()(const GpuPlace& p) { os_ << "GpuPlace(" << p.device << ")"; }
private:
std::ostream &os_;
};
} // namespace detail
static Place the_default_place;
void set_place(const Place& place) { the_default_place = place; }
const Place& get_place() { return the_default_place; }
void set_place(const Place &place) { the_default_place = place; }
const Place &get_place() { return the_default_place; }
const GpuPlace default_gpu() { return GpuPlace(0); }
const CpuPlace default_cpu() { return CpuPlace(); }
bool is_gpu_place(const Place& p) {
bool is_gpu_place(const Place &p) {
return boost::apply_visitor(IsGpuPlace(), p);
}
bool is_cpu_place(const Place& p) {
bool is_cpu_place(const Place &p) {
return !boost::apply_visitor(IsGpuPlace(), p);
}
bool places_are_same_class(const Place& p1, const Place& p2) {
bool places_are_same_class(const Place &p1, const Place &p2) {
return is_gpu_place(p1) == is_gpu_place(p2);
}
std::ostream& operator<<(std::ostream& os, const majel::Place& p) {
majel::detail::PlacePrinter printer(os);
std::ostream &operator<<(std::ostream &os, const Place &p) {
detail::PlacePrinter printer(os);
boost::apply_visitor(printer, p);
return os;
}
} // namespace majel
} // namespace platform
} // namespace paddle
......@@ -2,49 +2,48 @@
#include <boost/variant.hpp>
#include <iostream>
namespace majel {
namespace paddle {
namespace platform {
struct CpuPlace {
CpuPlace() {} // WORKAROUND: for some reason, omitting this constructor
// WORKAROUND: for some reason, omitting this constructor
// causes errors with boost 1.59 and OSX
// needed for variant equality comparison
inline bool operator==(const CpuPlace&) const { return true; }
CpuPlace() {}
inline bool operator!=(const CpuPlace&) const { return false; }
// needed for variant equality comparison
inline bool operator==(const CpuPlace &) const { return true; }
inline bool operator!=(const CpuPlace &) const { return false; }
};
struct GpuPlace {
GpuPlace() : GpuPlace(0) {}
GpuPlace(int d) : device(d) {}
// needed for variant equality comparison
inline bool operator==(const GpuPlace& o) const { return device == o.device; }
inline bool operator!=(const GpuPlace& o) const { return !(*this == o); }
inline bool operator==(const GpuPlace &o) const { return device == o.device; }
inline bool operator!=(const GpuPlace &o) const { return !(*this == o); }
GpuPlace() : GpuPlace(0) {}
int device;
};
class IsGpuPlace : public boost::static_visitor<bool> {
public:
bool operator()(const CpuPlace&) const { return false; }
bool operator()(const GpuPlace& gpu) const { return true; }
struct IsGpuPlace : public boost::static_visitor<bool> {
bool operator()(const CpuPlace &) const { return false; }
bool operator()(const GpuPlace &gpu) const { return true; }
};
typedef boost::variant<GpuPlace, CpuPlace> Place;
void set_place(const Place&);
const Place& get_place();
void set_place(const Place &);
const Place &get_place();
const GpuPlace default_gpu();
const CpuPlace default_cpu();
bool is_gpu_place(const Place&);
bool is_cpu_place(const Place&);
bool places_are_same_class(const Place&, const Place&);
bool is_gpu_place(const Place &);
bool is_cpu_place(const Place &);
bool places_are_same_class(const Place &, const Place &);
std::ostream& operator<<(std::ostream&, const majel::Place&);
std::ostream &operator<<(std::ostream &, const Place &);
} // namespace majel
} // namespace platform
} // namespace paddle
#include "paddle/majel/place.h"
#include "paddle/platform/place.h"
#include <sstream>
#include "gtest/gtest.h"
TEST(Place, Equality) {
majel::CpuPlace cpu;
majel::GpuPlace g0(0), g1(1), gg0(0);
paddle::platform::CpuPlace cpu;
paddle::platform::GpuPlace g0(0), g1(1), gg0(0);
EXPECT_EQ(cpu, cpu);
EXPECT_EQ(g0, g0);
......@@ -13,28 +13,28 @@ TEST(Place, Equality) {
EXPECT_NE(g0, g1);
EXPECT_TRUE(majel::places_are_same_class(g0, gg0));
EXPECT_FALSE(majel::places_are_same_class(g0, cpu));
EXPECT_TRUE(paddle::platform::places_are_same_class(g0, gg0));
EXPECT_FALSE(paddle::platform::places_are_same_class(g0, cpu));
}
TEST(Place, Default) {
EXPECT_TRUE(majel::is_gpu_place(majel::get_place()));
EXPECT_TRUE(majel::is_gpu_place(majel::default_gpu()));
EXPECT_TRUE(majel::is_cpu_place(majel::default_cpu()));
EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::get_place()));
EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu()));
EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu()));
majel::set_place(majel::CpuPlace());
EXPECT_TRUE(majel::is_cpu_place(majel::get_place()));
paddle::platform::set_place(paddle::platform::CpuPlace());
EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place()));
}
TEST(Place, Print) {
{
std::stringstream ss;
ss << majel::GpuPlace(1);
ss << paddle::platform::GpuPlace(1);
EXPECT_EQ("GpuPlace(1)", ss.str());
}
{
std::stringstream ss;
ss << majel::CpuPlace();
ss << paddle::platform::CpuPlace();
EXPECT_EQ("CpuPlace", ss.str());
}
}
......@@ -26,6 +26,13 @@ set(TRAINER_HEADERS
ThreadParameterUpdater.h
TrainerConfigHelper.h)
if(NOT WITH_GOLANG)
list(REMOVE_ITEM TRAINER_SOURCES
NewRemoteParameterUpdater.cpp)
list(REMOVE_ITEM TRAINER_HEADERS
NewRemoteParameterUpdater.h)
endif()
add_library(paddle_trainer_lib STATIC
${TRAINER_SOURCES})
......@@ -34,7 +41,7 @@ add_style_check_target(paddle_trainer_lib
add_style_check_target(paddle_trainer_lib
${TRAINER_HEADERS})
add_dependencies(paddle_trainer_lib
gen_proto_cpp paddle_pserver_cclient_lib)
gen_proto_cpp)
macro(add_paddle_exe TARGET_NAME)
add_executable(${TARGET_NAME} ${ARGN})
......@@ -63,5 +70,8 @@ if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
endif()
target_link_libraries(paddle_trainer ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
target_link_libraries(paddle_trainer_lib ${CMAKE_CURRENT_SOURCE_DIR}/libpaddle_pserver_cclient.a)
if(WITH_GOLANG)
add_dependencies(paddle_trainer_lib paddle_pserver_cclient)
target_link_libraries(paddle_trainer ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a)
target_link_libraries(paddle_trainer_lib ${CMAKE_BINARY_DIR}/go/pserver/cclient/libpaddle_pserver_cclient.a)
endif(WITH_GOLANG)
......@@ -124,6 +124,8 @@ TEST(RecurrentGradientMachine, test_generation) {
bool beam_search) {
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
for (auto useGpu : useGpuConfs) {
LOG(INFO) << configFile << " useGpu=" << useGpu
<< " beam_search=" << beam_search;
testGeneration(configFile, useGpu, hasSubseq, expRetFile);
}
};
......
......@@ -5,6 +5,7 @@ set(proto_filenames
ParameterConfig.proto
ParameterService.proto
TrainerConfig.proto
OptimizerConfig.proto
ParameterServerConfig.proto)
set(PROTO_GEN)
......@@ -35,10 +36,8 @@ foreach(filename ${proto_filenames})
DEPENDS ${filename} ${external_project_dependencies})
endforeach()
include_directories(${CMAKE_CURRENT_BINARY_DIR}/proto)
add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN})
add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY})
add_library(paddle_proto STATIC
${PROTO_GEN})
add_library(paddle_proto STATIC ${PROTO_GEN})
target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package paddle;
message SGDConfig {
// SGD
// momentum: float >= 0. Parameter updates momentum.
// decay: float >= 0. Learning rate decay over each update.
// nesterov: boolean. Whether to apply Nesterov momentum.
optional double momentum = 21 [default = 0.0];
optional double decay = 23 [default = 0.0];
optional bool nesterov =24 [default = false];
}
message AdadeltaConfig {
// Adadelta
// It is recommended to leave it at the default value.
// rho: float >= 0.
// epsilon: float >= 0. Fuzz factor.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
optional double rho = 33 [default = 0.90];
optional double epsilon = 31 [default = 1e-5];
optional double decay = 32 [default = 0.0];
}
message AdagradConfig {
// Adagrad
// epsilon: float >= 0.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
optional double epsilon = 41 [default = 1e-5];
optional double decay = 42 [default = 0.0];
}
message AdamConfig {
// Adaj
// beta_1: float, 0 < beta < 1. Generally close to 1.
// beta_2: float, 0 < beta < 1. Generally close to 1.
// epsilon: float >= 0. Fuzz factor.
// decay: float >= 0. Learning rate decay over each update.
// reference : [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
optional double beta_1 = 41;
optional double beta_2 = 42;
optional double epsilon = 43;
optional double decay = 44;
}
message ConstLrConfig {
// learninRate Policy
optional double learning_rate = 1 [default = 1.0];
}
message LinearLrConfig {
// learninRate Policy
optional double learning_rate = 1 [default = 1.0];
optional double lr_decay_a = 2;
optional double lr_decay_b = 3;
}
message TensorProto {
enum DataType {
PADDLE_ELEMENT_TYPE_INT32 = 0;
PADDLE_ELEMENT_TYPE_UINT32 = 1;
PADDLE_ELEMENT_TYPE_INT64 = 2;
PADDLE_ELEMENT_TYPE_UINT64 = 3;
PADDLE_ELEMENT_TYPE_FLOAT32 = 4;
PADDLE_ELEMENT_TYPE_FLOAT64 = 5;
}
optional DataType data_type = 1;
repeated bytes content = 2;
}
message SGDOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
}
message AdadeltaOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
optional TensorProto accum_delta = 3;
optional TensorProto update_delta = 4;
}
message AdagradOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto accum_gradient = 2;
}
message AdamOptimizerState {
// learning rate policy
optional double learning_rate = 101;
optional double lr_decay_a = 102;
optional double lr_decay_b = 103;
optional double num_sample_passed = 104;
// state
optional TensorProto parameter = 1;
optional TensorProto momentums = 2;
optional TensorProto velocitys = 3;
}
message OptimizerConfig {
enum Optimizer {
SGD = 1;
Adadelta = 2;
Adagrad = 3;
Adam = 4;
}
optional Optimizer optimizer = 1;
optional SGDConfig sgd = 3;
optional AdadeltaConfig adadelta = 4;
optional AdagradConfig adagrad = 5;
optional AdamConfig adam = 6;
enum LrPolicy {
Const = 0;
Linear = 1;
}
optional LrPolicy lr_policy = 11;
optional ConstLrConfig const_lr = 12;
optional LinearLrConfig linear_lr = 13;
// common config of optimizer
// gradient clip when L2 exceeding value
optional double clip_norm = 101;
// gradient clip when L1 exceeding value
optional double clip_value = 102;
}
......@@ -18,7 +18,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies} paddle_master_shared)
DEPENDS gen_proto_py ${PY_FILES} ${external_project_dependencies})
add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp)
......
......@@ -328,53 +328,33 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
SubModelBegin(name)
g_current_submodel.is_recurrent_layer_group = True
g_current_submodel.reversed = seq_reversed
g_current_submodel.target_inlinkid = -1
in_links_count = 0
for linkid, link in enumerate(in_links):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
# assign target_inlinkid according to target_inlinkname
if target_inlinkname == name:
g_current_submodel.target_inlinkid = linkid
if in_links_count == 0:
in_links_has_subseq = has_subseq
else:
config_assert(
in_links_has_subseq == has_subseq,
"The sequence type of in_links should be the same in RecurrentLayerGroup"
)
in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name]
if has_subseq:
SequenceScatterAgentLayer(name=name, size=layer.size)
else:
ScatterAgentLayer(name=name, size=layer.size)
pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name
pair.link_name = MakeLayerNameInSubmodel(name)
pair.has_subseq = has_subseq
@config_func
def RecurrentLayerGroupSetOutLink(link):
if isinstance(link, basestring):
name = link
has_subseq = False
else:
name = link.link_name
has_subseq = link.has_subseq
layer_name = MakeLayerNameInParentSubmodel(name)
pair = g_current_submodel.out_links.add()
pair.layer_name = MakeLayerNameInSubmodel(name)
pair.link_name = layer_name
pair.has_subseq = has_subseq
def RecurrentLayerGroupSetGenerator(generator=None):
......@@ -389,8 +369,7 @@ def RecurrentLayerGroupBegin(name,
generator=None,
target_inlinkname="",
seq_reversed=False):
RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed,
target_inlinkname)
RecurrentLayerGroupWithoutOutLinksBegin(name, in_links, seq_reversed)
for link in out_links:
RecurrentLayerGroupSetOutLink(link)
......@@ -425,8 +404,6 @@ def RecurrentLayerGroupEnd(name):
agent_name = GetLayerBaseName(pair.link_name)
if prev_submodel.HasField("generator"):
DataLayer(name=agent_name, size=layer.size)
elif pair.has_subseq:
SequenceGatherAgentLayer(name=agent_name, size=layer.size)
else:
GatherAgentLayer(name=agent_name, size=layer.size)
......@@ -1651,8 +1628,14 @@ class SelectiveFCLayer(LayerBase):
@config_layer('print')
class PrintLayer(LayerBase):
def __init__(self, name, inputs):
def __init__(self, name, inputs, format=None):
super(PrintLayer, self).__init__(name, 'print', 0, inputs)
if format is None:
format = "\n".join([
"layer=" + input.input_layer_name + " %s"
for input in self.inputs
])
self.config.user_arg = format
@config_layer('priorbox')
......@@ -1949,7 +1932,6 @@ class BatchNormLayer(LayerBase):
def __init__(self,
name,
inputs,
active_type="linear",
bias=True,
use_global_stats=True,
moving_average_fraction=0.9,
......@@ -1987,12 +1969,7 @@ class BatchNormLayer(LayerBase):
cudnn_version >= 4007
self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
super(BatchNormLayer, self).__init__(
name,
self.layer_type,
0,
active_type=active_type,
inputs=inputs,
**xargs)
name, self.layer_type, 0, inputs=inputs, **xargs)
if use_global_stats is not None:
self.config.use_global_stats = use_global_stats
......@@ -2253,13 +2230,6 @@ class AgentLayer(LayerBase):
name, 'agent', size, inputs=[], device=device)
@config_layer('sequence_agent')
class SequenceAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceAgentLayer, self).__init__(
name, 'sequence_agent', size, inputs=[], device=device)
@config_layer('gather_agent')
class GatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
......@@ -2274,20 +2244,6 @@ class ScatterAgentLayer(LayerBase):
name, 'scatter_agent', size, inputs=[], device=device)
@config_layer('sequence_gather_agent')
class SequenceGatherAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceGatherAgentLayer, self).__init__(
name, 'sequence_gather_agent', size, inputs=[], device=device)
@config_layer('sequence_scatter_agent')
class SequenceScatterAgentLayer(LayerBase):
def __init__(self, name, size, device=None):
super(SequenceScatterAgentLayer, self).__init__(
name, 'sequence_scatter_agent', size, inputs=[], device=device)
@config_layer('multiplex')
class MultiplexLayer(LayerBase):
def __init__(self, name, inputs, size, device=None):
......@@ -2303,12 +2259,12 @@ class MultiplexLayer(LayerBase):
@config_func
def Link(
name,
has_subseq=False, ):
def Link(name, has_subseq=False):
"""
Still keeping has_subseq for backward compatibility
"""
link_config = LinkConfig()
link_config.link_name = name
link_config.has_subseq = has_subseq
return link_config
......@@ -2341,12 +2297,6 @@ def Memory(name,
config_assert(name is not None, "name needs cannot be None")
memory_name = name + "+delay1"
agent_name = memory_name
if is_sequence:
config_assert(
boot_layer is not None,
"there must be boot_layer in network when is_sequence = True")
agent_layer = SequenceAgentLayer(agent_name, size)
else:
agent_layer = AgentLayer(agent_name, size)
config_assert(g_current_submodel.is_recurrent_layer_group,
'Memory should be used in recurrent layer group only')
......@@ -2354,7 +2304,6 @@ def Memory(name,
if name is not None:
memory.layer_name = MakeLayerNameInSubmodel(name)
memory.link_name = MakeLayerNameInSubmodel(agent_name)
memory.is_sequence = is_sequence
options = sum((boot_layer is not None, bool(boot_bias),
boot_with_const_id is not None))
config_assert(
......@@ -2428,15 +2377,23 @@ class ExpandLayer(LayerBase):
@config_layer('featmap_expand')
class FeatMapExpandLayer(LayerBase):
def __init__(self, name, inputs, device=None, num_filters=None, bias=False):
def __init__(self,
name,
inputs,
num_filters=None,
as_row_vector=True,
bias=False,
**xargs):
super(FeatMapExpandLayer, self).__init__(
name, 'featmap_expand', 0, inputs=inputs, device=device)
name, 'featmap_expand', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
if num_filters is not None:
self.config.num_filters = num_filters
else:
logger.fatal("FeatMapExpandLayer must specify num_filters.")
if not as_row_vector:
self.config.user_arg = "as_col_vec"
self.set_layer_size(self.get_input_layer(0).size * num_filters)
......@@ -2446,14 +2403,12 @@ class MaxLayer(LayerBase):
name,
inputs,
trans_type='non-seq',
active_type='linear',
bias=False,
output_max_index=None,
**xargs):
super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type
self.config.active_type = active_type
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
self.set_layer_size(input_layer.size)
......@@ -2495,18 +2450,12 @@ class SequenceLastInstanceLayer(LayerBase):
def __init__(self,
name,
inputs,
active_type='linear',
trans_type='non-seq',
bias=False,
stride=-1,
**xargs):
super(SequenceLastInstanceLayer, self).__init__(
name,
'seqlastins',
0,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqlastins', 0, inputs=inputs, **xargs)
config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
if trans_type == 'seq':
......@@ -2522,7 +2471,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__(self,
name,
inputs,
active_type='linear',
trans_type='non-seq',
bias=False,
stride=-1,
......@@ -2530,7 +2478,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
super(SequenceFirstInstanceLayer, self).__init__(
name,
inputs=inputs,
active_type=active_type,
trans_type=trans_type,
bias=bias,
stride=stride,
......@@ -2540,14 +2487,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
@config_layer('seqconcat')
class SequenceConcatLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
def __init__(self, name, inputs, bias=False, **xargs):
super(SequenceConcatLayer, self).__init__(
name,
'seqconcat',
0,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqconcat', 0, inputs=inputs, **xargs)
config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)):
......@@ -2558,20 +2500,9 @@ class SequenceConcatLayer(LayerBase):
@config_layer('seqreshape')
class SequenceReshapeLayer(LayerBase):
def __init__(self,
name,
size,
inputs,
active_type='linear',
bias=False,
**xargs):
def __init__(self, name, size, inputs, bias=False, **xargs):
super(SequenceReshapeLayer, self).__init__(
name,
'seqreshape',
size,
inputs=inputs,
active_type=active_type,
**xargs)
name, 'seqreshape', size, inputs=inputs, **xargs)
config_assert(
len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size)
......@@ -2580,9 +2511,9 @@ class SequenceReshapeLayer(LayerBase):
@config_layer('subseq')
class SubSequenceLayer(LayerBase):
def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
def __init__(self, name, inputs, bias=False, **xargs):
super(SubSequenceLayer, self).__init__(
name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs)
name, 'subseq', 0, inputs=inputs, **xargs)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0)
size = input_layer0.size
......@@ -2738,11 +2669,10 @@ class AverageLayer(LayerBase):
inputs,
average_strategy='average',
trans_type='non-seq',
active_type='linear',
bias=False,
**xargs):
super(AverageLayer, self).__init__(
name, 'average', 0, inputs=inputs, active_type=active_type, **xargs)
name, 'average', 0, inputs=inputs, **xargs)
self.config.average_strategy = average_strategy
self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
......
......@@ -311,18 +311,6 @@ class LayerOutput(object):
self.outputs = outputs
self.reverse = reverse
def __repr__(self):
"""
Disable __repr__ for debug reason. Will be implemented when release
"""
assert False, "this method should not be invoked"
def __str__(self):
"""
Disable __str__ for debug reason. Will be implemented when release
"""
assert False, "this method should not be invoked"
def set_input(self, input):
"""
Set the input for a memory layer. Can only be used for memory layer
......@@ -976,7 +964,7 @@ def fc_layer(input,
@wrap_name_default("print")
def printer_layer(input, name=None):
def printer_layer(input, format=None, name=None):
"""
Print the output value of input layers. This layer is useful for debugging.
......@@ -994,6 +982,7 @@ def printer_layer(input, name=None):
Layer(
name=name,
format=format,
type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input], )
# this layer don't return anything, can not be input of other layer.
......@@ -1565,14 +1554,24 @@ def expand_layer(input,
@wrap_name_default()
@wrap_act_default(act=IdentityActivation())
@layer_support()
def repeat_layer(input, num_repeats, name=None, layer_attr=None):
def repeat_layer(input,
num_repeats,
as_row_vector=True,
act=None,
name=None,
layer_attr=None):
"""
A layer for repeating the input for num_repeats times. This is equivalent
to apply concat_layer() with num_repeats same input.
A layer for repeating the input for num_repeats times.
If as_row_vector:
.. math::
y = [x, x, \cdots, x]
y = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n]
If not as_row_vector:
.. math::
y = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n]
The example usage is:
......@@ -1585,6 +1584,14 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
:param num_repeats: Repeat the input so many times
:type num_repeats: int
:param name: Layer name.
:param as_row_vector: True for treating input as row vector and repeating
in the column direction. This is equivalent to apply
concat_layer() with num_repeats same input.
False for treating input as column vector and repeating
in the row direction.
:type as_row_vector: bool
:param act: Activation type.
:type act: BaseActivation
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
......@@ -1595,13 +1602,16 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
l = Layer(
inputs=[input.name],
name=name,
active_type=act.name,
num_filters=num_repeats,
as_row_vector=as_row_vector,
type=LayerType.FEATURE_MAP_EXPAND_LAYER,
**ExtraAttr.to_kwargs(layer_attr))
return LayerOutput(
name=name,
size=l.config.size,
layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
activation=act,
parents=[input])
......@@ -2846,11 +2856,13 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
Concat sequence a with sequence b.
Inputs:
- a = [a1, a2, ..., an]
- a = [a1, a2, ..., am]
- b = [b1, b2, ..., bn]
- Note that the length of a and b should be the same.
Output: [a1, b1, a2, b2, ..., an, bn]
Output: [a1, ..., am, b1, ..., bn]
Note that the above computation is for one sample. Multiple samples are
processed in one batch.
The example usage is:
......@@ -2944,7 +2956,7 @@ def memory(name,
:param memory_name: the name of the memory.
It is ignored when name is provided.
:type memory_name: basestring
:param is_seq: is sequence for boot_layer
:param is_seq: DEPRECATED. is sequence for boot_layer
:type is_seq: bool
:param boot_layer: boot layer of memory.
:type boot_layer: LayerOutput|None
......@@ -2971,7 +2983,6 @@ def memory(name,
memory_name = Memory(
name,
size,
is_sequence=is_seq,
boot_layer=boot_layer.name if boot_layer is not None else None,
boot_bias=boot_bias,
boot_bias_active_type=boot_bias_active_type.name,
......@@ -3318,19 +3329,21 @@ class StaticInput(object):
"""
StaticInput is only used in recurrent_group which defines a read-only memory
that can be a sequence or non-sequence.
:param size: DEPRECATED
:param is_seq: DEPRECATED
"""
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerOutput)
self.input = input
self.is_seq = is_seq
assert input.size is not None or size is not None
assert input.size is not None
if size is not None:
input.size = size
assert input.size == size
class SubsequenceInput(object):
def SubsequenceInput(input):
"""
DEPRECATED.
Input sequence has sub-sequence, used in recurrent_group.
The example usage is:
......@@ -3339,11 +3352,7 @@ class SubsequenceInput(object):
input = SubsequenceInput(layer)
"""
def __init__(self, input):
assert isinstance(input, LayerOutput)
assert input.size is not None
self.input = input
return input
@wrap_name_default("recurrent_group")
......@@ -3407,7 +3416,8 @@ def recurrent_group(step,
input sequence in a reverse order.
:type reverse: bool
:param targetInlink: the input layer which share info with layer group's output
:param targetInlink: DEPRECATED.
The input layer which share info with layer group's output
Param input specifies multiple input layers. For
SubsequenceInput inputs, config should assign one input
......@@ -3429,46 +3439,21 @@ def recurrent_group(step,
model_type('recurrent_nn')
def is_single_input(x):
return isinstance(x, LayerOutput) or isinstance(x, StaticInput) \
or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput) or isinstance(x, StaticInput)
if is_single_input(input):
input = [input]
assert isinstance(input, collections.Sequence)
def is_in_links(x):
return isinstance(x, LayerOutput) or isinstance(x, SubsequenceInput)
return isinstance(x, LayerOutput)
in_links = filter(is_in_links, input)
def targetInlink_in_inlinks():
for inlink in in_links:
if isinstance(inlink, SubsequenceInput):
if targetInlink == inlink.input:
return True
elif targetInlink == inlink:
return True
return False
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
def map_in_links(x):
if isinstance(x, SubsequenceInput):
contains_sub_seq[0] = True
return Link(name=x.input.name, has_subseq=True)
else:
return x.name
RecurrentLayerGroupWithoutOutLinksBegin(
name=name,
in_links=map(map_in_links, in_links),
seq_reversed=reverse,
target_inlinkname=targetInlinkName)
in_links=map(lambda x: x.name, in_links),
seq_reversed=reverse)
in_args = []
has_LayerOutput = False
for each_input in input:
......@@ -3476,21 +3461,13 @@ def recurrent_group(step,
if isinstance(each_input, LayerOutput):
in_args.append(each_input)
has_LayerOutput = True
elif isinstance(each_input, SubsequenceInput):
in_args.append(each_input.input)
has_LayerOutput = True
else:
else: # StaticInput
mem_name = "__%s_memory__" % each_input.input.name
mem = memory(
name=mem_name,
is_seq=each_input.is_seq,
name=None,
size=each_input.input.size,
boot_layer=each_input.input)
with mixed_layer(
name=mem_name,
size=each_input.input.size,
act=IdentityActivation()) as mix:
mix += identity_projection(mem)
mem.set_input(mem)
in_args.append(mem)
assert (is_generating != has_LayerOutput)
......@@ -3503,9 +3480,6 @@ def recurrent_group(step,
for ot in layer_outs:
assert isinstance(ot, LayerOutput)
ot.reverse = reverse
if contains_sub_seq[0]:
RecurrentLayerGroupSetOutLink(Link(ot.name, has_subseq=True))
else:
RecurrentLayerGroupSetOutLink(ot.name)
RecurrentLayerGroupEnd(name=name)
......
#!/bin/bash
export configs=(test_fc layer_activations projections test_print_layer
export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
test_sequence_pooling test_lstmemory_layer test_grumemory_layer
last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
......
......@@ -9,7 +9,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -21,7 +21,7 @@ layers {
name: "__first_seq_1__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -33,7 +33,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -44,7 +44,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -55,7 +55,7 @@ layers {
name: "__first_seq_2__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......@@ -67,7 +67,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 30
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data"
}
......
......@@ -123,7 +123,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__simple_gru_0__"
}
......@@ -134,7 +134,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__simple_gru_1__"
}
......@@ -256,19 +256,15 @@ sub_models {
memories {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_0___transform"
link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
link_name: "__simple_gru_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__simple_gru_1___recurrent_group"
......@@ -280,18 +276,14 @@ sub_models {
memories {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__simple_gru_1___transform"
link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
link_name: "__simple_gru_1__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -205,7 +205,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_0__"
}
......@@ -216,7 +216,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_1__"
}
......@@ -341,24 +341,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_1___recurrent_group"
......@@ -373,23 +368,18 @@ sub_models {
memories {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_1___state@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1___state+delay1@__lstm_group_1___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__lstm_group_1___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_1__@__lstm_group_1___recurrent_group"
link_name: "__lstm_group_1__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -138,7 +138,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__recurrent_layer_0__"
}
......@@ -149,7 +149,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__recurrent_layer_1__"
}
......@@ -161,7 +161,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstmemory_0__"
}
......@@ -172,7 +172,7 @@ layers {
name: "__first_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstmemory_1__"
}
......@@ -184,7 +184,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_0__"
}
......@@ -195,7 +195,7 @@ layers {
name: "__first_seq_2__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_1__"
}
......
......@@ -12,6 +12,7 @@ layers {
inputs {
input_layer_name: "input"
}
user_arg: "layer=input %s"
}
input_layer_names: "input"
output_layer_names: "input"
......
type: "nn"
layers {
name: "data"
type: "data"
size: 30
active_type: ""
}
layers {
name: "__repeat_layer_0__"
type: "featmap_expand"
size: 300
active_type: ""
inputs {
input_layer_name: "data"
}
num_filters: 10
}
layers {
name: "__repeat_layer_1__"
type: "featmap_expand"
size: 300
active_type: "tanh"
inputs {
input_layer_name: "data"
}
num_filters: 10
user_arg: "as_col_vec"
}
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__repeat_layer_0__"
layer_names: "__repeat_layer_1__"
input_layer_names: "data"
output_layer_names: "__repeat_layer_0__"
output_layer_names: "__repeat_layer_1__"
is_recurrent_layer_group: false
}
......@@ -91,7 +91,7 @@ layers {
name: "__last_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_forward"
}
......@@ -140,7 +140,7 @@ layers {
name: "__first_seq_0__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_back"
}
......@@ -155,7 +155,7 @@ layers {
}
layers {
name: "sub_seq_input@__recurrent_group_2__"
type: "sequence_scatter_agent"
type: "scatter_agent"
size: 100
active_type: ""
}
......@@ -182,7 +182,7 @@ layers {
}
layers {
name: "rnn_subseq_forward"
type: "sequence_gather_agent"
type: "gather_agent"
size: 200
active_type: ""
}
......@@ -190,7 +190,7 @@ layers {
name: "__last_seq_1__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "rnn_subseq_forward"
}
......@@ -280,7 +280,7 @@ layers {
name: "__last_seq_2__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__lstm_group_0__"
}
......@@ -329,7 +329,7 @@ layers {
name: "__last_seq_3__"
type: "seqlastins"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__gru_group_0__"
}
......@@ -378,7 +378,7 @@ layers {
name: "__last_seq_4__"
type: "seqlastins"
size: 200
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "__fc_layer_0__"
}
......@@ -618,19 +618,15 @@ sub_models {
memories {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward+delay1@__recurrent_group_0__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_0__"
has_subseq: false
}
out_links {
layer_name: "rnn_forward@__recurrent_group_0__"
link_name: "rnn_forward"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_1__"
......@@ -642,19 +638,15 @@ sub_models {
memories {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back+delay1@__recurrent_group_1__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_1__"
has_subseq: false
}
out_links {
layer_name: "rnn_back@__recurrent_group_1__"
link_name: "rnn_back"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_2__"
......@@ -666,19 +658,15 @@ sub_models {
memories {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward+delay1@__recurrent_group_2__"
is_sequence: false
}
in_links {
layer_name: "sub_seq_input"
link_name: "sub_seq_input@__recurrent_group_2__"
has_subseq: true
}
out_links {
layer_name: "rnn_subseq_forward@__recurrent_group_2__"
link_name: "rnn_subseq_forward"
has_subseq: true
}
target_inlinkid: -1
}
sub_models {
name: "__lstm_group_0___recurrent_group"
......@@ -693,24 +681,19 @@ sub_models {
memories {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
memories {
layer_name: "__lstm_group_0___state@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0___state+delay1@__lstm_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_0__"
link_name: "__mixed_0__@__lstm_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__lstm_group_0__@__lstm_group_0___recurrent_group"
link_name: "__lstm_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__gru_group_0___recurrent_group"
......@@ -722,19 +705,15 @@ sub_models {
memories {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__+delay1@__gru_group_0___recurrent_group"
is_sequence: false
}
in_links {
layer_name: "__mixed_1__"
link_name: "__mixed_1__@__gru_group_0___recurrent_group"
has_subseq: false
}
out_links {
layer_name: "__gru_group_0__@__gru_group_0___recurrent_group"
link_name: "__gru_group_0__"
has_subseq: false
}
target_inlinkid: -1
}
sub_models {
name: "__recurrent_group_3__"
......@@ -746,18 +725,14 @@ sub_models {
memories {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__memory_6__@__recurrent_group_3__"
is_sequence: false
}
in_links {
layer_name: "seq_input"
link_name: "seq_input@__recurrent_group_3__"
has_subseq: false
}
out_links {
layer_name: "__fc_layer_0__@__recurrent_group_3__"
link_name: "__fc_layer_0__"
has_subseq: false
}
target_inlinkid: -1
}
......@@ -27,7 +27,7 @@ layers {
name: "__seqreshape_0__"
type: "seqreshape"
size: 5
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "data1"
}
......
......@@ -9,7 +9,7 @@ layers {
name: "__seq_pooling_0__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -19,7 +19,7 @@ layers {
name: "__seq_pooling_1__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -29,7 +29,7 @@ layers {
name: "__seq_pooling_2__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -40,7 +40,7 @@ layers {
name: "__seq_pooling_3__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -51,7 +51,7 @@ layers {
name: "__seq_pooling_4__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -62,7 +62,7 @@ layers {
name: "__seq_pooling_5__"
type: "average"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......@@ -73,7 +73,7 @@ layers {
name: "__seq_pooling_6__"
type: "max"
size: 100
active_type: "linear"
active_type: ""
inputs {
input_layer_name: "dat_in"
}
......
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
din = data_layer(name='data', size=30)
outputs(
repeat_layer(
input=din, num_repeats=10, as_row_vector=True),
repeat_layer(
input=din, num_repeats=10, act=TanhActivation(), as_row_vector=False))
......@@ -26,7 +26,6 @@ import evaluator
from . import dataset
from . import reader
from . import plot
from . import master
import attr
import op
import pooling
......@@ -57,7 +56,6 @@ __all__ = [
'plot',
'evaluator',
'image',
'master',
]
......
......@@ -15,6 +15,7 @@
import requests
import hashlib
import os
import errno
import shutil
import sys
import importlib
......@@ -27,7 +28,12 @@ __all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader']
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
if not os.path.exists(DATA_HOME):
try:
os.makedirs(DATA_HOME)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
pass
def md5file(fname):
......
......@@ -260,7 +260,7 @@ def parse_network(output_layers, extra_layers=None):
else:
extra_layers = []
layer_names = __get_used_layers__(output_layers + extra_layers)
layer_names = __get_used_layers__(list(output_layers) + list(extra_layers))
submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names)
......
......@@ -8,8 +8,7 @@ packages=['paddle',
'paddle.v2',
'paddle.v2.dataset',
'paddle.v2.reader',
'paddle.v2.plot',
'paddle.v2.master']
'paddle.v2.plot']
setup_requires=["requests",
"numpy",
......@@ -25,7 +24,6 @@ setup(name='paddle',
description='Parallel Distributed Deep Learning',
install_requires=setup_requires,
packages=packages,
package_data={'paddle.v2.master': ['libpaddle_master.so'], },
package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}'
},
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册