diff --git a/.travis.yml b/.travis.yml index f9b4a7e08315a42a61a58d6c61c45771df962c4d..a53bd1809416d6f14a1ec7f603622d3303d1ab28 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,23 +1,21 @@ language: cpp cache: directories: - - $HOME/third_party - $HOME/.ccache - $HOME/.cache/pip + - $TRAVIS_BUILD_DIR/build/third_party sudo: required dist: trusty os: - linux env: - - JOB=DOCS - - JOB=BUILD_AND_TEST - - JOB=PRE_COMMIT + - JOB=build_doc + - JOB=check_style addons: apt: packages: - gcc-4.8 - g++-4.8 - - gfortran-4.8 - git - build-essential - python @@ -34,18 +32,7 @@ addons: - libtool - ccache before_install: - - | - if [ ${JOB} == "BUILD_AND_TEST" ]; then - local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE` - if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test. - if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)' - then - echo "Only markdown docs were updated, stopping build process." - exit - fi - fi - fi - - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi + - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker @@ -54,8 +41,8 @@ before_install: - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - | - timeout 2580 paddle/scripts/travis/main.sh # 43min timeout + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d7f2c7ec76dcc7befcd16798d26a7d54a19328..24a7066adc57c510030b0926c81849daa4caa6ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ if(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING) find_package(Git REQUIRED) find_package(Threads REQUIRED) +find_package(Boost QUIET) include(simd) @@ -71,7 +72,7 @@ if(ANDROID) "Disable RDMA when cross-compiling for Android" FORCE) endif(ANDROID) -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") if (WITH_C_API AND WITH_PYTHON) @@ -92,6 +93,7 @@ include(external/openblas) # download, build, install openblas include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any +include(external/eigen) # download eigen3 include(generic) # simplify cmake module include(package) # set paddle packages @@ -109,6 +111,7 @@ include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient") +include_directories(${Boost_INCLUDE_DIRS}) set(EXTERNAL_LIBS ${GFLAGS_LIBRARIES} diff --git a/Dockerfile b/Dockerfile index 39af60966b6cab7d8b9e644f4ea658613f8ba518..bf227737c5a67b006ccc221235daf6d8ad7b3bd8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ apt-get install -y \ git python-pip python-dev openssh-server bison \ - wget unzip tar xz-utils bzip2 gzip coreutils \ + wget unzip tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-numpy python-matplotlib gcc g++ \ automake locales clang-format-3.8 swig doxygen cmake \ diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake new file mode 100644 index 0000000000000000000000000000000000000000..45f44f617dcb46062355df4e35d537086215a46d --- /dev/null +++ b/cmake/external/eigen.cmake @@ -0,0 +1,29 @@ +INCLUDE(ExternalProject) + +SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) + +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) + +ExternalProject_Add( + eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + # for latest version, please get from official website + # URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" + # URL_MD5 "1a47e78efe365a97de0c022d127607c3" + + # for no-ssl http support, please get from bazel's mirror + # URL "http://mirror.bazel.build/bitbucket.org/eigen/eigen/get/f3a22f35b044.tar.gz" + # URL_MD5 "4645c66075982da6fa0bcf6b20f3e8f7" + + # get from github mirror + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG "a46d2e7337c4656f00abe54a8115f6d76153a048" + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +LIST(APPEND external_project_dependencies eigen3) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 2341e3785bd8e951e10e3f6bbf8a32f63e4ae44d..5b9d9844ed21ceb507a8e01676c3533f4e3dd8fb 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -21,7 +21,8 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) - SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}" + SET(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 7340394b1e1fad9e1893ac87d62febb8dd72751c..2f267adc203f3da80615318f168de9798c537080 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -13,12 +13,53 @@ # limitations under the License. INCLUDE(ExternalProject) +# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp +FIND_PACKAGE(Protobuf QUIET) +SET(PROTOBUF_FOUND "OFF") + +# Print and set the protobuf library information, +# finish this cmake process and exit from this file. macro(PROMPT_PROTOBUF_LIB) + SET(protobuf_DEPS ${ARGN}) + MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + + # Assuming that all the protobuf libraries are of the same type. + IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE STATIC) + ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE SHARED) + ELSE() + MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") + ENDIF() + + ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) + + ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) + + ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET libprotoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + + ADD_EXECUTABLE(protoc IMPORTED GLOBAL) + SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOBUF_PROTOC_EXECUTABLE}) + # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. + # make `protobuf_generate_cpp` happy. + SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) + + FOREACH(dep ${protobuf_DEPS}) + ADD_DEPENDENCIES(protobuf ${dep}) + ADD_DEPENDENCIES(protobuf_lite ${dep}) + ADD_DEPENDENCIES(libprotoc ${dep}) + ADD_DEPENDENCIES(protoc ${dep}) + ENDFOREACH() + + LIST(APPEND external_project_dependencies protobuf) RETURN() endmacro() macro(SET_PROTOBUF_VERSION) @@ -43,22 +84,23 @@ if (NOT "${PROTOBUF_ROOT}" STREQUAL "") endif() FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) - SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_NAME}) - SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_NAME}) + STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(${TARGET_NAME}_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_PROTOC_EXECUTABLE - "${PROTOBUF_INSTALL_DIR}/bin/protoc${EXECUTABLE_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" PARENT_SCOPE) SET(OPTIONAL_CACHE_ARGS "") @@ -109,6 +151,8 @@ IF(NOT CMAKE_CROSSCOMPILING) SET_PROTOBUF_VERSION() IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0") SET(PROTOBUF_FOUND OFF) + ELSE() + PROMPT_PROTOBUF_LIB() ENDIF() ENDIF(PROTOBUF_FOUND) ELSE() @@ -120,18 +164,22 @@ ELSE() ENDIF() IF(NOT PROTOBUF_FOUND) - build_protobuf(protobuf FALSE) - LIST(APPEND external_project_dependencies protobuf) + build_protobuf(extern_protobuf FALSE) - SET(PROTOBUF_INCLUDE_DIR ${protobuf_INCLUDE_DIR} + SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} CACHE PATH "protobuf include directory." FORCE) - IF(NOT CMAKE_CROSSCOMPILING) - SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_PROTOC_EXECUTABLE} + SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY} + CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY} + CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} + CACHE FILEPATH "protoc library." FORCE) + + IF(CMAKE_CROSSCOMPILING) + PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf) + ELSE() + SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} CACHE FILEPATH "protobuf executable." FORCE) + PROMPT_PROTOBUF_LIB(extern_protobuf) ENDIF() - SET(PROTOBUF_LITE_LIBRARY ${protobuf_LITE_LIBRARY} CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY ${protobuf_LIBRARY} CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY ${protobuf_PROTOC_LIBRARY} CACHE FILEPATH "protoc library." FORCE) ENDIF(NOT PROTOBUF_FOUND) - -PROMPT_PROTOBUF_LIB() \ No newline at end of file diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 69e8164a00d1fb57b79c63ba88c2846d30d80cd2..61353a4a2622257eddb05578c5085c44c1719b98 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -77,6 +77,18 @@ # /cmake/external/*.cmake: # # cc_test(example_test SRCS example_test.cc DEPS example glog gflags) +# +# To build a go static library using Golang, use the go_ prefixed version: +# +# go_library(example STATIC) +# +# To build a go shared library using Golang, use the go_ prefixed version: +# +# go_library(example SHARED) +# + +# including binary directory for generated headers. +include_directories(${CMAKE_BINARY_DIR}) if(NOT APPLE) find_package(Threads REQUIRED) @@ -246,42 +258,53 @@ endfunction(nv_test) set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") file(MAKE_DIRECTORY ${GOPATH}) +set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") -# Because api.go defines a GO wrapper to ops and tensor, it depends on -# both. This implies that if any of tensor.{h,cc}, ops.{h,cu}, or -# api.go is changed, api need to be re-built. -# go_library(api -# SRCS -# api.go -# DEPS -# tensor # Because ops depend on tensor, this line is optional. -# ops) function(go_library TARGET_NAME) - set(options OPTIONAL) + set(options STATIC static SHARED shared) set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) + set(multiValueArgs DEPS) cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (${go_library_OPTIONAL} STREQUAL "SHARED") + + if (go_library_SHARED OR go_library_shared) set(BUILD_MODE "-buildmode=c-shared") - if(APPLE) - set(LIB_NAME "lib${TARGET_NAME}.dylib") - else() - set(LIB_NAME "lib${TARGET_NAME}.so") - endif() + set(LIB_NAME "${CMAKE_SHARED_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") else() set(BUILD_MODE "-buildmode=c-archive") - set(LIB_NAME "lib${TARGET_NAME}.a") + set(LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") endif() - add_custom_command(OUTPUT ${TARGET_NAME}_timestamp + + # Add dummy code to support `make target_name` under Terminal Command + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) + file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";") + if (go_library_SHARED OR go_library_shared) + add_library(${TARGET_NAME} SHARED ${dummyfile}) + else() + add_library(${TARGET_NAME} STATIC ${dummyfile}) + endif() + if(go_library_DEPS) + add_dependencies(${TARGET_NAME} ${go_library_DEPS}) + endif(go_library_DEPS) + + # we need to symlink Paddle directory into GOPATH. If we + # don't do it and we have code that depends on Paddle, go + # get ./... will download a new Paddle repo from Github, + # without the changes in our current Paddle repo that we + # want to build. + file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" + # Symlink Paddle directory into GOPATH + COMMAND mkdir -p ${PADDLE_IN_GOPATH} + COMMAND rm -rf ${PADDLE_IN_GOPATH} + COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} + # Automatically get all dependencies specified in the source code + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... + # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - ${go_library_SRCS} + ${GO_SOURCE} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS}) - add_library(${TARGET_NAME} STATIC IMPORTED) - set_property(TARGET ${TARGET_NAME} PROPERTY - IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}") - add_dependencies(${TARGET_NAME} ${TARGET_NAME}_lib) endfunction(go_library) function(go_binary TARGET_NAME) @@ -312,9 +335,12 @@ function(go_test TARGET_NAME) add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}) endfunction(go_test) -# go_extern will download extern go project. -# go_extern(target_name extern_source) -# go_extern(go_redis github.com/hoisie/redis) -function(go_extern TARGET_NAME) - add_custom_target(${TARGET_NAME} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get ${ARGN}) -endfunction(go_extern) +function(proto_library TARGET_NAME) + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(proto_srcs) + set(proto_hdrs) + protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS protobuf) +endfunction() diff --git a/cmake/system.cmake b/cmake/system.cmake index 904652413e026e3a7f3f2a19f48f4e906ce6babb..adf5e2c539740076ad1808353522c7467d765e64 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -33,6 +33,7 @@ ELSE(WIN32) SET(CMAKE_OSX_DEPLOYMENT_TARGET ${MACOS_VERSION} CACHE STRING "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value.") ENDIF() + set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") ELSE(APPLE) IF(EXISTS "/etc/issue") @@ -84,24 +85,6 @@ IF(DEFINED CMAKE_SYSTEM_NAME) ENDIF() ENDIF() -# prefix and suffix on different os -IF(WIN32) - SET(LIBRARY_PREFIX "") - SET(SHARED_LIBRARY_SUFFIX ".dll") - SET(STATIC_LIBRARY_SUFFIX ".lib") - SET(EXECUTABLE_SUFFIX ".exe") -ELSE(WIN32) - SET(LIBRARY_PREFIX "lib") - IF(APPLE) - SET(SHARED_LIBRARY_SUFFIX ".dylib") - ELSE(APPLE) - SET(SHARED_LIBRARY_SUFFIX ".so") - ENDIF(APPLE) - - SET(STATIC_LIBRARY_SUFFIX ".a") - SET(EXECUTABLE_SUFFIX "") -ENDIF(WIN32) - # external dependencies log output SET(EXTERNAL_PROJECT_LOG_ARGS LOG_DOWNLOAD 0 # Wrap download in script to log output diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 6fa42fd0c71e78cc2fa6b0fe2cb970baf4ac89ed..94dd3457fb5b513441c4c8e339e1862de9092517 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -27,10 +27,6 @@ sphinx_add_target(paddle_docs ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_EN}) -add_dependencies(paddle_docs - gen_proto_py) - - # configured documentation tools and intermediate build results set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build") @@ -51,6 +47,3 @@ sphinx_add_target(paddle_docs_cn ${SPHINX_CACHE_DIR_CN} ${CMAKE_CURRENT_SOURCE_DIR} ${SPHINX_HTML_DIR_CN}) - -add_dependencies(paddle_docs_cn - gen_proto_py) diff --git a/doc/api/v2/config/evaluators.rst b/doc/api/v2/config/evaluators.rst index 39db51fa4abc370855ca3f2778b47464f33b6fce..9ac972fb193a2fb525edc507f7ba1303d2c8eabe 100644 --- a/doc/api/v2/config/evaluators.rst +++ b/doc/api/v2/config/evaluators.rst @@ -99,3 +99,12 @@ value_printer .. automodule:: paddle.v2.evaluator :members: value_printer :noindex: + +Detection +===== + +detection_map +------------- +.. automodule:: paddle.v2.evaluator + :members: detection_map + :noindex: diff --git a/doc/design/scope.md b/doc/design/scope.md new file mode 100644 index 0000000000000000000000000000000000000000..afe6bc028cafc5ee24b0041905857af58d3f5790 --- /dev/null +++ b/doc/design/scope.md @@ -0,0 +1,124 @@ +# Design of Scope in Paddle + +## Overview + +Scope is an important concept in programming languages, which defines a program region that a set of bindings between names and entities applies. In a specific scope, a valid name is uniquely associated with an entity, such as a variable. And in another scope, this name may refer to other entity or nothing at all. It clearly restricts the visibility and validity of names in a program. Hence **Scope** is introduced to PaddlePaddle to manage variables in context. But different from the original abstract concept, Scope now becomes an object with two important attributes: + +- Scope is an association of a name to variable. +- Variables in a parent scope can be retrieved from local scope. + +A detailed explanation of these two attributes goes as following. + + +## Scope is an association of a name to variable. + +Scope is an association of a name to variable. All variables belong to `Scope`. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. One net can run in different scopes and update different variable in the scope. + + +1. Scope only contains a map of a name to variable. + + All parameters, data, states in a Net should be variables and stored inside a scope. Each op should get inputs and outputs to do computation from a scope, such as data buffer, state(momentum) etc. + +1. Variable can only be created by Scope and a variable can only be got from Scope. User cannot create or get a variable outside a scope. This is a constraints of our framework, and will keep our framework simple and clear. + +1. Scope only contains methods that are used to Create and Get Variables. Scope do not contain Operators and have no information to run them. + `Net` is designed to drive the computation and Scope only contains a map of variables. There is no computation logic inside a `Scope`. Scope just handles the lifetime management of variables. + - `Create` is used to create a Variable by its name and add the mapping relation. + - `Get` is used to find a Variable by name. + +1. Every variable only belongs to one certain Scope. + + Variable can not belong to many scopes. If you want to use variables from parent scope, you can use `parent scope`. + +1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. + + Because Variable can only be got from Scope. When destroying Scope, we also need to destroy all the Variables in it. If user store `Variable` pointer to private data member or some global variable, the pointer will be a invalid pointer when associated `Scope` is destroyed. + +```cpp +class Scope { + public: + Variable* CreateVariable(const std::string& name); + const Variable* GetVariable(const std::string& name) const; + + private: + std::unordered_map> vars_; +}; +``` + + +## Parent scope and local scope + +Just like [scope](https://en.wikipedia.org/wiki/Scope_(computer_science)) in programming languages, `Scope` in the neural network can also be a local scope. There are two attributes about local scope. + +1. We can create local variables in a local scope. When that local scope are destroyed, all local variables should also be destroyed. +2. Variables in a parent scope can be retrieved from local scopes of that parent scope, i.e., when user get a variable from a scope, it will try to search this variable in current scope. If there is no such variable in the local scope, `scope` will keep searching from its parent, until the variable is found or there is no parent. + +```cpp +class Scope { + public: + Scope(const std::shared_ptr& scope): parent_(scope) {} + + Variable* GetVariable(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } else if (parent_ != nullptr) { + return parent_->GetVariable(name); + } else { + return nullptr; + } + } + + private: + std::shared_ptr parent_ {nullptr}; +}; +``` + +In `Scope` class, there is a private data member called `parent_`. `parent_` is a smart pointer to its parent scope. When user `Get` a variable by its `name`, the `name` will be searched inside the current scope. If the variable cannot be found locally and parent scope is not a `nullptr`, the variable will be searched inside that parent scope. `parent_` pointer's default value is `nullptr`. It means that the scope is a global scope when `parent_` is nullptr. + +A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. + +# Interface Design + +```cpp +class Variable { + private: + Variable() = default; + friend class Scope; +}; + +class Scope { + private: + Scope(const std::shared_ptr& parent = nullptr); + + public: + static std::shared_ptr Create(const std::shared_ptr& parent = nullptr); + + // return nullptr if not found. + Variable* GetVariable(const std::string& name) const; + + // return if already contains same name variable. + Variable* CreateVariable(const std::string& name); + + private: + std::shared_ptr parent_; + std::unordered_map> vars_; +}; +``` +## Only scope can create a variable + +To ensure `only scope can create a variable`, we should mark `Variable`'s constructor as a private member function, and Scope is a friend class of Variable. And then only `CreateVariable` can construct `Variable`. + +## When scope destroyed, all variables inside this scope should be destroyed together + +The scope hold unique pointers for all variables. User can `GetVariable` from scope, but he should not hold this pointer as a member variable. Because when scope is destroyed, all variables inside this scope will be destroyed together. + +## Sharing a parent scope + +Local scope contains a `parent_` pointer. It is a linked-list for scopes. Using a `shared_ptr` because when a local scope is using, its parents cannot be destroyed. + +Also, as the parent scope is a `shared_ptr`, we can only `Create()` a scope shared pointer. We cannot construct a scope variable, because it cannot be passed to other scope as `parent` pointer. + +## Orthogonal interface + +`GetVariable` will return `nullptr` when `name` is not found. It can be used as `Contains` method. `CreateVariable` will return a `Error` when there is a name conflict locally. Combine `GetVariable` and `CreateVariable`, we can implement `CreateOrGetVariable` easily. diff --git a/doc/getstarted/concepts/use_concepts_cn.rst b/doc/getstarted/concepts/use_concepts_cn.rst index e63ca11102c8ce457afcc3c262fa5f159361c01d..f15b11bd780402a3ec1755900e8c648f5d2a7bc5 100644 --- a/doc/getstarted/concepts/use_concepts_cn.rst +++ b/doc/getstarted/concepts/use_concepts_cn.rst @@ -111,7 +111,7 @@ PaddlePaddle支持不同类型的输入数据,主要包括四种类型,和 # define training dataset reader def train_reader(): train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]) - train_y = np.array([-2, -3, -7, -7]) + train_y = np.array([[-2], [-3], [-7], [-7]]) def reader(): for i in xrange(train_y.shape[0]): yield train_x[i], train_y[i] diff --git a/go/cmake/CMakeDetermineGoCompiler.cmake b/go/cmake/CMakeDetermineGoCompiler.cmake deleted file mode 100644 index a9bb6906c7440782bd648bb7505a548248a11bb0..0000000000000000000000000000000000000000 --- a/go/cmake/CMakeDetermineGoCompiler.cmake +++ /dev/null @@ -1,44 +0,0 @@ -if(NOT CMAKE_Go_COMPILER) - if(NOT $ENV{GO_COMPILER} STREQUAL "") - get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT) - - if(CMAKE_Go_FLAGS_ENV_INIT) - set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler") - endif() - - if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT}) - message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.") - endif() - - endif() - - set(Go_BIN_PATH - $ENV{GOPATH} - $ENV{GOROOT} - $ENV{GOROOT}/../bin - $ENV{GO_COMPILER} - /usr/bin - /usr/local/bin - ) - - if(CMAKE_Go_COMPILER_INIT) - set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler") - else() - find_program(CMAKE_Go_COMPILER - NAMES go - PATHS ${Go_BIN_PATH} - ) - EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION) - STRING(REGEX MATCH "go[0-9]+.[0-9]+.[0-9]+[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}") - message("-- The Golang compiler identification is ${VERSION}") - message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}") - endif() - -endif() - -mark_as_advanced(CMAKE_Go_COMPILER) - -configure_file(${CMAKE_MODULE_PATH}/CMakeGoCompiler.cmake.in - ${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY) - -set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/go/cmake/CMakeGoCompiler.cmake.in b/go/cmake/CMakeGoCompiler.cmake.in deleted file mode 100644 index a71f08e064656fbaad8cfa77aea6f216515712ef..0000000000000000000000000000000000000000 --- a/go/cmake/CMakeGoCompiler.cmake.in +++ /dev/null @@ -1,8 +0,0 @@ -set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@") -set(CMAKE_Go_COMPILER_LOADED 1) - -set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go) -set(CMAKE_Go_LINKER_PREFERENCE 40) -set(CMAKE_Go_OUTPUT_EXTENSION .o) -set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1) -set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER") diff --git a/go/cmake/CMakeGoInformation.cmake b/go/cmake/CMakeGoInformation.cmake deleted file mode 100644 index ba51ac93fcd429478f324b66bd5129d94ea2a8f4..0000000000000000000000000000000000000000 --- a/go/cmake/CMakeGoInformation.cmake +++ /dev/null @@ -1,7 +0,0 @@ -if(NOT CMAKE_Go_COMPILE_OBJECT) - set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o ") -endif() - -if(NOT CMAKE_Go_LINK_EXECUTABLE) - set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o ") -endif() diff --git a/go/cmake/CMakeTestGoCompiler.cmake b/go/cmake/CMakeTestGoCompiler.cmake deleted file mode 100644 index b9891b015baced05b51e34dba562fd98a84fe14c..0000000000000000000000000000000000000000 --- a/go/cmake/CMakeTestGoCompiler.cmake +++ /dev/null @@ -1 +0,0 @@ -set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "") diff --git a/go/cmake/flags.cmake b/go/cmake/flags.cmake deleted file mode 100644 index a167c432a920e9ee93878603f3b946e8593412f6..0000000000000000000000000000000000000000 --- a/go/cmake/flags.cmake +++ /dev/null @@ -1,45 +0,0 @@ -# Setting Paddle Compile Flags -include(CheckCXXCompilerFlag) -include(CheckCCompilerFlag) -include(CheckCXXSymbolExists) -include(CheckTypeSize) - -function(CheckCompilerCXX11Flag) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) - message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" - # Apple Clang is a different compiler than upstream Clang which havs different version numbers. - # https://gist.github.com/yamaya/2924292 - if(APPLE) # cmake < 3.0 compiler id "Clang" on Mac OS X - if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 5.1) - message(FATAL_ERROR "Unsupported AppleClang version. AppleClang >= 5.1 required.") - endif() - else() - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3) - message(FATAL_ERROR "Unsupported Clang version. Clang >= 3.3 required.") - endif() - endif() - endif() -endfunction() - -CheckCompilerCXX11Flag() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - -# Common gpu architectures: Kepler, Maxwell -foreach(capability 30 35 50) - list(APPEND __arch_flags " -gencode arch=compute_${capability},code=sm_${capability}") -endforeach() - -if (CUDA_VERSION VERSION_GREATER "7.0" OR CUDA_VERSION VERSION_EQUAL "7.0") - list(APPEND __arch_flags " -gencode arch=compute_52,code=sm_52") -endif() - -# Modern gpu architectures: Pascal -if (CUDA_VERSION VERSION_GREATER "8.0" OR CUDA_VERSION VERSION_EQUAL "8.0") - list(APPEND __arch_flags " -gencode arch=compute_60,code=sm_60") -endif() - -set(CUDA_NVCC_FLAGS ${__arch_flags} ${CUDA_NVCC_FLAGS}) diff --git a/go/cmake/golang.cmake b/go/cmake/golang.cmake deleted file mode 100644 index a5a43886f887e495500fa26b3c26fa69c63eded0..0000000000000000000000000000000000000000 --- a/go/cmake/golang.cmake +++ /dev/null @@ -1,48 +0,0 @@ -set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") -file(MAKE_DIRECTORY ${GOPATH}) -set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle") -file(MAKE_DIRECTORY ${PADDLE_IN_GOPATH}) - -function(GO_LIBRARY NAME BUILD_TYPE) - if(BUILD_TYPE STREQUAL "STATIC") - set(BUILD_MODE -buildmode=c-archive) - set(LIB_NAME "lib${NAME}.a") - else() - set(BUILD_MODE -buildmode=c-shared) - if(APPLE) - set(LIB_NAME "lib${NAME}.dylib") - else() - set(LIB_NAME "lib${NAME}.so") - endif() - endif() - - file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") - file(RELATIVE_PATH rel ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) - - # find Paddle directory. - get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) - get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) - get_filename_component(PADDLE_DIR ${PARENT_DIR} DIRECTORY) - - # automatically get all dependencies specified in the source code - # for given target. - add_custom_target(${NAME}_goGet env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ${rel}/...) - - # make a symlink that references Paddle inside $GOPATH, so go get - # will use the local changes in Paddle rather than checkout Paddle - # in github. - add_custom_target(${NAME}_copyPaddle - COMMAND rm -rf ${PADDLE_IN_GOPATH}/Paddle - COMMAND ln -sf ${PADDLE_DIR} ${PADDLE_IN_GOPATH}/Paddle) - add_dependencies(${NAME}_goGet ${NAME}_copyPaddle) - - add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} - -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" - ${CMAKE_GO_FLAGS} ${GO_SOURCE} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - - add_custom_target(${NAME} ALL DEPENDS ${OUTPUT_DIR}/.timestamp ${ARGN}) - add_dependencies(${NAME} ${NAME}_goGet) - -endfunction(GO_LIBRARY) diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index 25cd1cafcdf328094a019638f37f908591f5f374..54fa254863156455f66fa87de9077042a45f9735 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -1,45 +1,69 @@ package main import ( + "fmt" "net" "net/http" "net/rpc" "strconv" + "strings" "time" "github.com/namsral/flag" + log "github.com/sirupsen/logrus" "github.com/PaddlePaddle/Paddle/go/master" + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" ) func main() { port := flag.Int("port", 8080, "port of the master server.") - - faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") + ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.") + endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") flag.Parse() - if *faultTolerance { - panic("fault tolernance not implemented.") + if *endpoints == "" { + log.Warningln("-endpoints not set, fault tolerance not be enabled.") + } + + var store master.Store + if *endpoints != "" { + eps := strings.Split(*endpoints, ",") + ip, err := networkhelper.GetExternalIP() + if err != nil { + log.Fatal(err) + } + addr := fmt.Sprintf("%s:%d", ip, *port) + store, err = master.NewEtcdClient(eps, addr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, *ttlSec) + if err != nil { + log.Fatal(err) + } + } else { + store = &master.InMemStore{} + } + + s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) + if err != nil { + log.Fatal(err) } - s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) - err := rpc.Register(s) + err = rpc.Register(s) if err != nil { - panic(err) + log.Fatal(err) } rpc.HandleHTTP() l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) if err != nil { - panic(err) + log.Fatal(err) } err = http.Serve(l, nil) if err != nil { - panic(err) + log.Fatal(err) } } diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index f0be251c2471cc9ddc069f040417b5181a78c058..8a42d4f8af1713e246f9efaf5dc7ba878c3b271e 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -5,18 +5,42 @@ import ( "net/http" "net/rpc" "strconv" + "time" "github.com/namsral/flag" "github.com/PaddlePaddle/Paddle/go/pserver" + log "github.com/sirupsen/logrus" ) func main() { port := flag.Int("port", 0, "port of the pserver") + etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", + "comma separated endpoint string for pserver to connect to etcd") + etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") + numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") + logLevel := flag.String("log-level", "info", + "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() - s := pserver.NewService() - err := rpc.Register(s) + level, err := log.ParseLevel(*logLevel) + if err != nil { + panic(err) + } + log.SetLevel(level) + + timeout := time.Second * time.Duration((*etcdTimeout)) + e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) + idx, err := e.Register() + if err != nil { + panic(err) + } + + s, err := pserver.NewService(idx) + if err != nil { + panic(err) + } + err = rpc.Register(s) if err != nil { panic(err) } @@ -27,7 +51,9 @@ func main() { panic(err) } + log.Infof("start pserver at port %d", *port) err = http.Serve(l, nil) + if err != nil { panic(err) } diff --git a/go/master/c/client.go b/go/master/c/client.go index b186474dc33138aeb02a2ffe34418b379b7a2db0..9e35e986002c0ae3b7593150ece96dba29a1521b 100644 --- a/go/master/c/client.go +++ b/go/master/c/client.go @@ -13,10 +13,13 @@ typedef int paddle_master_client; import "C" import ( + "strings" "sync" + "time" "unsafe" "github.com/PaddlePaddle/Paddle/go/master" + "github.com/coreos/etcd/clientv3" log "github.com/sirupsen/logrus" ) @@ -48,16 +51,33 @@ func remove(client C.paddle_master_client) *master.Client { return h } -type addresser string - -func (a addresser) Address() string { - return string(a) +//export paddle_new_etcd_master_client +func paddle_new_etcd_master_client(etcdEndpoints *C.char, timeout int, bufSize int) C.paddle_master_client { + p := C.GoString(etcdEndpoints) + cli, err := clientv3.New(clientv3.Config{ + Endpoints: strings.Split(p, ","), + DialTimeout: time.Second * time.Duration(timeout), + }) + if err != nil { + panic(err) + } + ch := make(chan string, 1) + a, err := master.GetKey(cli, master.DefaultAddrPath, timeout) + if err != nil { + panic(err) + } + ch <- a + go master.WatchKey(cli, master.DefaultAddrPath, ch) + c := master.NewClient(ch, bufSize) + return add(c) } //export paddle_new_master_client func paddle_new_master_client(addr *C.char, bufSize int) C.paddle_master_client { a := C.GoString(addr) - c := master.NewClient(addresser(a), bufSize) + ch := make(chan string, 1) + ch <- a + c := master.NewClient(ch, bufSize) return add(c) } diff --git a/go/master/client.go b/go/master/client.go index 8451820c1963dd5a4eff0c3ab7763eb6a8e05ba4..d3bea49d0a8166420e83478076cc7bc81e48598d 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -2,18 +2,12 @@ package master import ( "os" - "time" "github.com/PaddlePaddle/Paddle/go/connection" "github.com/PaddlePaddle/recordio" log "github.com/sirupsen/logrus" ) -// Addresser provide the address of the master server. -type Addresser interface { - Address() string -} - // Client is the client of the master server. type Client struct { conn *connection.Conn @@ -24,11 +18,11 @@ type Client struct { // // bufSize is the record buffer size. NextRecord will read from this // buffer. -func NewClient(addr Addresser, bufSize int) *Client { +func NewClient(addrCh <-chan string, bufSize int) *Client { c := &Client{} c.conn = connection.New() c.ch = make(chan []byte, bufSize) - go c.monitorMaster(addr) + go c.monitorMaster(addrCh) go c.getRecords() return c } @@ -72,12 +66,10 @@ func (c *Client) getRecords() { } } -func (c *Client) monitorMaster(addr Addresser) { +func (c *Client) monitorMaster(addrCh <-chan string) { lastMaster := "" - monitor := func() { - // get the lastest address of the master server, + for curMaster := range addrCh { // connect to the new address once address changed. - curMaster := addr.Address() if curMaster != lastMaster { if curMaster == "" { err := c.conn.Close() @@ -94,18 +86,10 @@ func (c *Client) monitorMaster(addr Addresser) { // to retry next time. curMaster = lastMaster } - } } - lastMaster = curMaster } - - monitor() - ticker := time.NewTicker(10 * time.Second) - for _ = range ticker.C { - monitor() - } } // SetDataset set dataset for the master server to dispatch. diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 00fcca0e2cf44d0f4855fd366a8f80895abf8865..364dce7b58cf6366af711bde9107559a762563a4 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -26,12 +26,6 @@ func init() { log.SetLevel(log.ErrorLevel) } -type TestAddresser string - -func (a TestAddresser) Address() string { - return string(a) -} - func TestGetFinishTask(t *testing.T) { const path = "/tmp/master_client_test_0" @@ -45,11 +39,14 @@ func TestGetFinishTask(t *testing.T) { if err != nil { panic(err) } - go func(l net.Listener) { - s := NewService(chunkPerTask, time.Second, 1) + s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) + if err != nil { + panic(err) + } + server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } @@ -78,9 +75,11 @@ func TestGetFinishTask(t *testing.T) { // Manually intialize client to avoid calling c.getRecords() c := &Client{} c.conn = connection.New() - go c.monitorMaster(TestAddresser(fmt.Sprintf(":%d", p))) + addr := fmt.Sprintf(":%d", p) + ch := make(chan string, 1) + ch <- addr + go c.monitorMaster(ch) c.SetDataset([]string{path}) - checkOnePass := func(i int) { var tasks []Task for idx := 0; idx < totalTask; idx++ { diff --git a/go/master/client_test.go b/go/master/client_test.go index 2b3f873ecf3a650cd91d1d9c20b414b05bbb0cd6..c00aeebfd5d1fef6de4a8c67bf7f998a42ee863b 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -20,7 +20,6 @@ func TestNextRecord(t *testing.T) { path = "/tmp/master_client_TestFull" total = 50 ) - l, err := net.Listen("tcp", ":0") if err != nil { panic(err) @@ -31,11 +30,14 @@ func TestNextRecord(t *testing.T) { if err != nil { panic(err) } - go func(l net.Listener) { - s := master.NewService(10, time.Second, 1) + s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1) + if err != nil { + panic(err) + } + server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } @@ -59,10 +61,10 @@ func TestNextRecord(t *testing.T) { } w.Close() f.Close() - - c := master.NewClient(master.TestAddresser(fmt.Sprintf(":%d", p)), 10) + curAddr := make(chan string, 1) + curAddr <- fmt.Sprintf(":%d", p) + c := master.NewClient(curAddr, 10) c.SetDataset([]string{path}) - for pass := 0; pass < 50; pass++ { received := make(map[byte]bool) for i := 0; i < total; i++ { diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go new file mode 100644 index 0000000000000000000000000000000000000000..e27c014792f31ca27fe1a1636d69acccc4206ea3 --- /dev/null +++ b/go/master/etcd_client.go @@ -0,0 +1,172 @@ +package master + +import ( + "context" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" +) + +const ( + // DefaultLockPath is the default etcd master lock path. + DefaultLockPath = "/master/lock" + // DefaultStatePath is the default etcd key for master state. + DefaultStatePath = "/master/state" + // DefaultAddrPath is the default etcd key for master address. + DefaultAddrPath = "/master/addr" +) + +// EtcdClient is the etcd client that the master uses for fault +// tolerance and service registry. +type EtcdClient struct { + lockPath string + statePath string + client *clientv3.Client + lock *concurrency.Mutex +} + +// NewEtcdClient creates a new EtcdClient. +func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) { + log.Debugf("Connecting to etcd at %v", endpoints) + // TODO(helin): gracefully shutdown etcd store. Becuase etcd + // store holds a etcd lock, even though the lock will expire + // when the lease timeout, we need to implement graceful + // shutdown to release the lock. + cli, err := clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: dialTimeout, + }) + if err != nil { + return nil, err + } + + sess, err := concurrency.NewSession(cli, concurrency.WithTTL(ttlSec)) + if err != nil { + return nil, err + } + + lock := concurrency.NewMutex(sess, lockPath) + // It's fine for the lock to get stuck, in this case we have + // multiple master servers running (only configured to have + // one master running, but split-brain problem may cuase + // multiple master servers running), and the cluster management + // software will kill one of them. + log.Debugf("Trying to acquire lock at %s.", lockPath) + err = lock.Lock(context.TODO()) + if err != nil { + return nil, err + } + log.Debugf("Successfully acquired lock at %s.", lockPath) + + put := clientv3.OpPut(addrPath, string(addr)) + resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit() + if err != nil { + return nil, err + } + + if !resp.Succeeded { + log.Fatal("No longer owns the master lock. Exiting.") + } + + e := &EtcdClient{ + lockPath: lockPath, + statePath: statePath, + client: cli, + lock: lock, + } + + return e, nil +} + +// Save saves the state into the etcd. +func (e *EtcdClient) Save(state []byte) error { + ctx := context.TODO() + put := clientv3.OpPut(e.statePath, string(state)) + resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit() + if err != nil { + return err + } + + if !resp.Succeeded { + log.Errorln("No longer owns the lock, trying to lock again") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + err := e.lock.Lock(ctx) + cancel() + if err != nil { + // We lost the master lock and can not acquire + // it back, it means some other master is + // already started. We don't want cluster + // managment system to kill the master server + // who is holding the lock and running + // correctly. So the most feasible solution is + // to kill current master server. The current + // state is not saved, but the trainer's RPC + // call will fail, so the trainer will retry. + log.Fatalf("Could not acquire the lock at %s: %v. Exiting.", e.lockPath, err) + } + log.Infof("Successfully acquired lock at %s.", e.lockPath) + return e.Save(state) + } + + return nil +} + +// Load loads the state from etcd. +func (e *EtcdClient) Load() ([]byte, error) { + ctx := context.TODO() + get := clientv3.OpGet(e.statePath) + + resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(get).Commit() + if err != nil { + return nil, err + } + + if !resp.Succeeded { + log.Errorln("No longer owns the lock, trying to lock and load again.") + err = e.lock.Lock(context.Background()) + if err != nil { + return nil, err + } + + return e.Load() + } + + kvs := resp.Responses[0].GetResponseRange().Kvs + if len(kvs) == 0 { + // No state exists + return nil, nil + } + + state := kvs[0].Value + return state, nil +} + +// GetKey gets the value by the specify key. +func GetKey(c *clientv3.Client, key string, timeout int) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(timeout)) + resp, err := c.Get(ctx, key) + cancel() + if err != nil { + return "", err + } + kvs := resp.Kvs + if len(kvs) == 0 { + return "", nil + } + v := kvs[0].Value + return string(v), nil +} + +// WatchKey watches the specify key and send to valChan if there is some event. +func WatchKey(c *clientv3.Client, key string, valChan chan<- string) { + rch := c.Watch(context.Background(), key) + for wresp := range rch { + for _, ev := range wresp.Events { + // if received event is DELETE, the value will be an empty string + log.Infof("received event %s, %q : %q\n", ev.Type, ev.Kv.Key, ev.Kv.Value) + valChan <- string(ev.Kv.Value) + } + } +} diff --git a/go/master/inmem_store.go b/go/master/inmem_store.go new file mode 100644 index 0000000000000000000000000000000000000000..bcd549b20e46381783bad11caa08cb7f4ba40add --- /dev/null +++ b/go/master/inmem_store.go @@ -0,0 +1,28 @@ +package master + +import "sync" + +// InMemStore is an in memory implementation of Store interface. +// +// It does not tolerate the fault that casues the program to crash. +type InMemStore struct { + mu sync.Mutex + buf []byte +} + +// Save saves the state into the in-memory store. +func (m *InMemStore) Save(state []byte) error { + m.mu.Lock() + defer m.mu.Unlock() + + m.buf = state + return nil +} + +// Load loads the state from the in-memory store. +func (m *InMemStore) Load() ([]byte, error) { + m.mu.Lock() + defer m.mu.Unlock() + + return m.buf, nil +} diff --git a/go/master/service.go b/go/master/service.go index 55e1e2d1a4a5cd6f5d5797b247e2ebe433607576..58e68e744859933aa74cac231356d4ff9dfb8d7b 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -1,6 +1,9 @@ package master import ( + "bytes" + "compress/gzip" + "encoding/gob" "errors" "os" "path/filepath" @@ -12,24 +15,54 @@ import ( "github.com/PaddlePaddle/recordio" ) +const ( + dialTimeout = 5 * time.Second +) + +// Store is the interface for save and load the master state. +type Store interface { + Save([]byte) error + Load() ([]byte, error) +} + +// Chunk is a chunk of data consisted of several data instances. +type Chunk struct { + Path string + Index recordio.Index // chunk index +} + +// Task is the basic unit of data instances assigned to trainers. +type Task struct { + ID int + Chunks []Chunk +} + +type taskEntry struct { + Epoch int + NumTimeout int + Task Task +} + +type taskQueues struct { + Todo []taskEntry + Pending map[int]taskEntry // map from task ID to task entry + Done []taskEntry + Failed []Task +} + // Service is the master server service. type Service struct { chunksPerTask int timeoutDur time.Duration timeoutMax int ready chan struct{} + store Store mu sync.Mutex initDone bool taskQueues taskQueues } -// Recover recovers service state from etcd. -func Recover() (*Service, error) { - // TODO(helin): recover from snapshot state from etcd. - return nil, nil -} - func partition(chunks []Chunk, chunksPerTask int) []taskEntry { id := 0 if chunksPerTask <= 0 { @@ -58,7 +91,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { +func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) (*Service, error) { s := &Service{} s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur @@ -66,38 +99,82 @@ func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Se s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) s.ready = make(chan struct{}) - return s -} + s.store = store + recovered, err := s.recover() + if err != nil { + return nil, err + } -// Chunk is a chunk of data consisted of several data instances. -type Chunk struct { - Path string - Index recordio.Index // chunk index -} + if recovered { + // Recovered. Now the state is already initialized, + // and the master is ready. + s.initDone = true + close(s.ready) + log.Info("Master recovered from saved state.") + } -// Task is the basic unit of data instances assigned to trainers. -type Task struct { - ID int - Chunks []Chunk + return s, nil } -type taskEntry struct { - Epoch int - NumTimeout int - Task Task -} +// recover recovers service state from etcd. +func (s *Service) recover() (bool, error) { + state, err := s.store.Load() + if err != nil { + return false, err + } -type taskQueues struct { - Todo []taskEntry - Pending map[int]taskEntry // map from task ID to task entry - Done []taskEntry - Failed []Task + if state == nil { + log.Infoln("No state exists, not recovered.") + return false, nil + } + + log.Infof("Loaded snapshot of size: %d bytes.", len(state)) + gr, err := gzip.NewReader(bytes.NewReader(state)) + if err != nil { + return false, err + } + + dec := gob.NewDecoder(gr) + var tqs taskQueues + err = dec.Decode(&tqs) + if err != nil { + return false, err + } + + err = gr.Close() + if err != nil { + // Only close failed, recover actually succeed, so + // just log error. + log.Errorln(err) + } + + s.taskQueues = tqs + return true, nil } -// *must* be called with s.mu being held. +// snapshot *must* be called with s.mu being held. func (s *Service) snapshot() error { - // TODO(helin): snapshot state on etcd. - return nil + // TOOD(helin): etcd request has a size limit, so the snapshot + // size is limited by the max request size. We should either + // divide the snapshot into smaller chunks and save under + // different keys, or configure the request size to be big + // enough: + // https://github.com/coreos/etcd/blob/2f84f3d8d8ed8f9537ab6ffa44a3a1c7eddfa9b1/embed/config.go#L44 + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + enc := gob.NewEncoder(gw) + err := enc.Encode(s.taskQueues) + if err != nil { + return err + } + err = gw.Close() + if err != nil { + return err + } + + state := buf.Bytes() + log.Infof("Saving snapshot of size: %d bytes.", len(state)) + return s.store.Save(state) } func readChunks(globPaths []string) ([]Chunk, error) { @@ -207,12 +284,12 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { t.NumTimeout++ if t.NumTimeout > s.timeoutMax { - log.Warningf("Task %v timed out %d times, discard.\n", t.Task, t.NumTimeout) + log.Warningf("Task %v timed out %d times, discard.", t.Task, t.NumTimeout) s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) return } - log.Warningf("Task %v timed out %d times, retry.\n", t.Task, t.NumTimeout) + log.Warningf("Task %v timed out %d times, retry.", t.Task, t.NumTimeout) s.taskQueues.Todo = append(s.taskQueues.Todo, t) } } diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt index b3e79ca661d0832821628b7cc6b540e17db45118..e12cf880683958bff54e541f7c391b8c1e1281de 100644 --- a/go/pserver/cclient/CMakeLists.txt +++ b/go/pserver/cclient/CMakeLists.txt @@ -1,16 +1,4 @@ -cmake_minimum_required(VERSION 3.0) - -get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) -get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake") - -project(cxx_go C Go) - -include(golang) -include(flags) - cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags) - go_library(paddle_pserver_cclient STATIC) if(WITH_TESTING) add_subdirectory(test) diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 92a41b7f5434842c6318704dd85adf9e51c19944..bbaf43d9f1434a278568bc110a709718b9b8c222 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -133,7 +133,7 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name) + log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name) return C.PSERVER_OK } log.Errorln(err) @@ -200,7 +200,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, for i, p := range ps { pn[i] = p.Name } - log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", ")) return C.PSERVER_ERROR } @@ -210,7 +210,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, for i, p := range ps { pn[i] = p.Name } - log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", ")) return C.PSERVER_ERROR } } diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/cclient/test/CMakeLists.txt index 722bd45d2f0838ea2d907004808052c21ebe8bd6..eddce640b5888060b2bb4cfed8a446185c745fc2 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/cclient/test/CMakeLists.txt @@ -1,18 +1 @@ -cmake_minimum_required(VERSION 3.0) - -add_executable(test_cclient test_cclient.c) -add_dependencies(test_cclient paddle_pserver_cclient) - -if(APPLE) - set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security") -else() - set(CMAKE_EXE_LINKER_FLAGS "-pthread") -endif() - -if(PROJ_ROOT) - include_directories(${CMAKE_CURRENT_BINARY_DIR}/..) - target_link_libraries(test_cclient ${CMAKE_CURRENT_BINARY_DIR}/../libpaddle_pserver_cclient.a pthread) -else(PROJ_ROOT) - include_directories(${CMAKE_BINARY_DIR}) - target_link_libraries(test_cclient ${CMAKE_BINARY_DIR}/libpaddle_pserver_cclient.a pthread) -endif(PROJ_ROOT) +cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) \ No newline at end of file diff --git a/go/pserver/client.go b/go/pserver/client.go index dda915977282d4880ddcc8c18ef6fd80ede9e01b..6938b9d5ce6f6d73c05bd6e3154777023965c319 100644 --- a/go/pserver/client.go +++ b/go/pserver/client.go @@ -1,6 +1,7 @@ package pserver import ( + "errors" "hash/fnv" "sort" "time" @@ -123,6 +124,9 @@ func (c *Client) FinishInitParams() error { // SendGrads sends gradients to parameter servers for updating // parameters. func (c *Client) SendGrads(grads []Gradient) error { + if len(grads) == 0 { + return errors.New("no gradient received") + } errCh := make(chan error, len(grads)) for _, g := range grads { go func(g Gradient) { diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index d746bf3f26949551778194c8659575a3198e112d..a248a3fb696a1e03b799f89afceb255de68662b1 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -31,9 +31,12 @@ func init() { port[i] = p go func(l net.Listener) { - s := pserver.NewService() + s, err := pserver.NewService(0) + if err != nil { + panic(err) + } server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go new file mode 100644 index 0000000000000000000000000000000000000000..4d88243edd4aa817ddc263ba316a3f6be9e1e67f --- /dev/null +++ b/go/pserver/etcd_client.go @@ -0,0 +1,181 @@ +package pserver + +import ( + "context" + "errors" + "strconv" + "strings" + "time" + + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" +) + +// EtcdClient is the etcd client that the pserver uses for fault +// tolerance, service registry and coordination. +type EtcdClient struct { + numPservers int + etcdEndpoints string + etcdClient *clientv3.Client + // etcdTimeout is also used as retry intervals. + etcdTimeout time.Duration + // FIXME: ensure GetExternalIP gets the correct ip for trainers to connect. + externalIP string + // desired number of pservers in the job. + // assume desired will not change during one training job. + desired int +} + +// NewEtcdClient creates an EtcdClient +func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *EtcdClient { + return &EtcdClient{ + etcdTimeout: timeout, + numPservers: numPservers, + etcdEndpoints: endpoints, + } +} + +// Register registers the pserver on etcd +// +// Register returns the index of the current pserver. +func (e *EtcdClient) Register() (int, error) { + + var err error + e.externalIP, err = networkhelper.GetExternalIP() + if err != nil { + return 0, err + } + + // initialize connection to etcd. + ep := strings.Split(e.etcdEndpoints, ",") + for { + cli, err := clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: e.etcdTimeout, + }) + if err != nil { + log.Errorf("connect to etcd error: %v", err) + time.Sleep(e.etcdTimeout) + continue + } + e.etcdClient = cli + log.Debugf("inited client to %s", e.etcdEndpoints) + break + } + // init /ps_desired using transaction, for multiple pservers may want to write + // it at the same time. + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := e.initDesiredPsercers(ctx, e.numPservers) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(e.etcdTimeout) + continue + } + break + } + // TODO: when implementing extending or reducing pservers, /ps_desired is + // changed, then we need to watch /ps_desired node for events. For now, just + // write once when init and read from it. + // wait and set s.desired init value + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + resp, err := e.etcdClient.Get(ctx, PsDesired) + cancel() + if err != nil { + log.Errorf("getting %s error: %v", PsDesired, err) + time.Sleep(e.etcdTimeout) + continue + } + if len(resp.Kvs) != 0 { + e.desired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("value of %s invalid %v\n", PsDesired, err) + time.Sleep(e.etcdTimeout) + // NOTE: wait util ps_desired value change + continue + } + break + } + } + + var pserverIdx int + // try register pserver node on etcd + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + var err error + pserverIdx, err = e.registerPserverEtcd(ctx) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(e.etcdTimeout) + continue + } + break + } + + return pserverIdx, nil +} + +func (e *EtcdClient) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { + dsStr := c.Get(PsDesired) + if dsStr == "" { + c.Put(PsDesired, strconv.Itoa(numPservers)) + } + return nil + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) +} + +// registerPserverEtcd registers pserver node on etcd using transaction. +func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { + var idx int + _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { + registered := false + for i := 0; i < e.desired; i++ { + psKey := "/ps/" + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + ps := c.Get(psKey) + log.Debugf("got value (%s) for key: %s", ps, psKey) + + if ps == "" { + resp, err := e.etcdClient.Grant(context.TODO(), 5) + if err != nil { + log.Fatal(err) + } + // find the first id and write info + c.Put(psKey, e.externalIP, clientv3.WithLease(resp.ID)) + log.Debugf("set pserver node %s with value %s", psKey, e.externalIP) + ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID) + if kaerr != nil { + log.Errorf("keepalive etcd node error: %v", kaerr) + return kaerr + } + + // Eat the keep alive message so etcd + // will not expire the lease. + go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { + ka := <-ch + log.Debugf("keepalive: %d\n", ka.TTL) + }(ch) + log.Debug("register finished") + idx = i + registered = true + break + } + } + if registered == true { + return nil + } + return errors.New("not registerd, may due to already have enough pservers") + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) + + if err != nil { + return 0, err + } + + return idx, nil +} diff --git a/go/pserver/service.go b/go/pserver/service.go index 555d379bcbe639a9af59f55a5275b4e4dcf1887c..e15a4e5a58a3bb1a154157b1212d141478e96231 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -24,6 +24,9 @@ const ( Float64 ) +// PsDesired is etcd path for store desired pserver count +const PsDesired = "/ps_desired" + // Parameter is a piece of data to sync with the parameter server. type Parameter struct { Name string @@ -43,17 +46,21 @@ type Gradient Parameter // Service is the RPC service for pserver. type Service struct { initialized chan struct{} + idx int mu sync.Mutex optMap map[string]*optimizer } -// NewService creates a new service. -func NewService() *Service { - s := &Service{} - s.optMap = make(map[string]*optimizer) +// NewService creates a new service, will bypass etcd registration if no +// endpoints specified. +func NewService(idx int) (*Service, error) { + s := &Service{ + idx: idx, + } + s.optMap = make(map[string]*optimizer) s.initialized = make(chan struct{}) - return s + return s, nil } // InitParam initializes a parameter. diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 57397fe586e11f74014ee2d1a0e7ab7f4b6c3cd3..c62f92e09bb3d65aa9552e3624e5d8e1a1945e56 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,8 +10,12 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) -func TestServiceFull(t *testing.T) { - s := pserver.NewService() + +func TestFull(t *testing.T) { + s, err := pserver.NewService(0) + if err != nil { + t.Error(err) + } var p pserver.Parameter p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} @@ -79,8 +83,11 @@ func TestServiceFull(t *testing.T) { } func TestMultipleInit(t *testing.T) { - s := pserver.NewService() - err := s.FinishInitParams(0, nil) + s, err := pserver.NewService(0) + if err != nil { + t.Error(err) + } + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } @@ -92,15 +99,18 @@ func TestMultipleInit(t *testing.T) { } func TestUninitialized(t *testing.T) { - s := pserver.NewService() - err := s.SendGrad(pserver.Gradient{}, nil) + s, err := pserver.NewService(0) + err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.FailNow() } } func TestBlockUntilInitialized(t *testing.T) { - s := pserver.NewService() + s, err := pserver.NewService(0) + if err != nil { + t.Error(err) + } ch := make(chan struct{}, 2) errCh := make(chan error, 2) var wg sync.WaitGroup @@ -145,6 +155,7 @@ func TestBlockUntilInitialized(t *testing.T) { t.Fatalf("read optimizer proto failed") } err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) + if err != nil { t.FailNow() } diff --git a/go/utils/networkhelper/helper.go b/go/utils/networkhelper/helper.go new file mode 100644 index 0000000000000000000000000000000000000000..fbeaea8f5e7d93309befbd23063e474a4c6df46e --- /dev/null +++ b/go/utils/networkhelper/helper.go @@ -0,0 +1,45 @@ +package networkhelper + +import ( + "errors" + "net" +) + +// GetExternalIP returns the ip address of local network interface, not the +// loopback device. +func GetExternalIP() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + for _, iface := range ifaces { + if iface.Flags&net.FlagUp == 0 { + continue // interface down + } + if iface.Flags&net.FlagLoopback != 0 { + continue // loopback interface + } + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + for _, addr := range addrs { + var ip net.IP + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip == nil || ip.IsLoopback() { + continue + } + ip = ip.To4() + if ip == nil { + continue // not an ipv4 address + } + return ip.String(), nil + } + } + return "", errors.New("are you connected to the network?") +} diff --git a/go/utils/networkhelper/helper_test.go b/go/utils/networkhelper/helper_test.go new file mode 100644 index 0000000000000000000000000000000000000000..4208f9e358fc4345b73a2b8a9211b8889c1190d8 --- /dev/null +++ b/go/utils/networkhelper/helper_test.go @@ -0,0 +1,10 @@ +package networkhelper + +import "testing" + +func TestGetIP(t *testing.T) { + _, err := GetExternalIP() + if err != nil { + t.Errorf("GetExternalIP returns error : %v\n", err) + } +} diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 573bd937a351a6f308974e14f3bc92cbe1b541bc..307e99bbe3a833f1fe26057ec38d0b96e04bc0fe 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -9,17 +9,10 @@ add_subdirectory(pserver) add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(optimizer) -add_subdirectory(strings) - -# Do not build go directory until go cmake is working smoothly. -# if(CMAKE_Go_COMPILER) -# add_subdirectory(go) -# endif() - -find_package(Boost QUIET) +add_subdirectory(string) if(Boost_FOUND) - include_directories(${Boost_INCLUDE_DIRS}) + add_subdirectory(memory) add_subdirectory(platform) add_subdirectory(framework) endif() diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index f2315e31cc06d8b5fea7a9fd203a697bac603a90..39d8aa075bc072d37dc8df67746f0d2b503418a6 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -16,7 +16,7 @@ set(API_HEADER Internal.h) add_library(paddle_api STATIC ${API_SOURCES}) -add_dependencies(paddle_api gen_proto_cpp paddle_trainer_lib) +add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt index 206f512563466d40e9ad1db0ddb4753ffb6bf55a..11022d17541476c97a2b29be8eb8fecce7e39435 100644 --- a/paddle/capi/CMakeLists.txt +++ b/paddle/capi/CMakeLists.txt @@ -26,7 +26,7 @@ target_include_directories(paddle_capi PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) add_style_check_target(paddle_capi ${CAPI_SOURCES} ${CAPI_HEADER} ${CAPI_PRIVATE_HEADER}) -add_dependencies(paddle_capi gen_proto_cpp) +add_dependencies(paddle_capi paddle_proto) # combine all paddle static libraries together, into libpaddle_capi_whole.a diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt index f9061e96deb659dcf7bfb88b46e6509af0425199..73ffa690d9d91b673079fc0ecf91f17cbabfdb1e 100755 --- a/paddle/cuda/CMakeLists.txt +++ b/paddle/cuda/CMakeLists.txt @@ -83,7 +83,7 @@ else() ${CUDA_CXX_SOURCES}) endif() -add_dependencies(paddle_cuda ${external_project_dependencies}) +add_dependencies(paddle_cuda paddle_proto ${external_project_dependencies}) add_style_check_target(paddle_cuda ${CUDA_SOURCES} diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 673cfa19ac35116288a2481b85858b6f88f3378e..6aa6b9bc2db6a223dd8562b76ba9d777206bfd40 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -1,4 +1,7 @@ +# ddim lib cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) - nv_test(dim_test SRCS dim_test.cu DEPS ddim) +cc_test(variable_test SRCS variable_test.cc) +cc_test(scope_test SRCS scope_test.cc) +cc_test(enforce_test SRCS enforce_test.cc) diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index e5c84d7abe9d476f285c8c5cd904d2e570eb0e4f..36eef02370e0196c2af2c05f49176b70ce69235a 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,5 +1,3 @@ -//#include -//#include #include #include diff --git a/paddle/framework/enforce.h b/paddle/framework/enforce.h new file mode 100644 index 0000000000000000000000000000000000000000..56cb7f95647e81efef58b156002d0d378ee22820 --- /dev/null +++ b/paddle/framework/enforce.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include + +namespace paddle { +namespace framework { + +/** + * @brief Enforce exception. Inherits std::exception + * + * All enforce condition not met, will throw an EnforceNotMet exception. + */ +class EnforceNotMet : public std::exception { + public: + EnforceNotMet(const std::string& msg, const char* file, int fileline) { + std::ostringstream sout; + sout << msg << " at [" << file << ":" << fileline << "];"; + all_msg_ = sout.str(); + } + + const char* what() const noexcept override { return all_msg_.c_str(); } + + private: + std::string all_msg_; +}; + +// From https://stackoverflow.com/questions/30130930/ +// __buildin_expect is in C++ 11 standard. Since the condition which enforced +// should be true in most situation, it will make the compiler generate faster +// code by adding `UNLIKELY` macro. +#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) + +/** + * @brief Throw a EnforceNotMet exception, automatically filled __FILE__ & + * __LINE__ + * + * This macro take __VA_ARGS__, user can pass any type if that type can + * serialize to std::ostream + */ +#define PADDLE_THROW(...) \ + do { \ + throw ::paddle::framework::EnforceNotMet( \ + ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__); \ + } while (0) + +/** + * @brief Enforce a condition, otherwise throw an EnforceNotMet + */ +#define PADDLE_ENFORCE(condition, ...) \ + do { \ + if (UNLIKELY(!(condition))) { \ + PADDLE_THROW(__VA_ARGS__); \ + } \ + } while (0) + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/enforce_test.cc b/paddle/framework/enforce_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f8da1a192f63a54324d80725c9d2f156fb11a481 --- /dev/null +++ b/paddle/framework/enforce_test.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +TEST(ENFORCE, OK) { + PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); + size_t val = 1; + const size_t limit = 10; + PADDLE_ENFORCE(val < limit, "Enforce is OK too"); +} + +TEST(ENFORCE, FAILED) { + bool in_catch = false; + try { + PADDLE_ENFORCE(false, "Enforce is not ok %d at all", 123); + } catch (paddle::framework::EnforceNotMet err) { + in_catch = true; + std::string msg = "Enforce is not ok 123 at all"; + const char* what = err.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + ASSERT_TRUE(in_catch); +} \ No newline at end of file diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h new file mode 100644 index 0000000000000000000000000000000000000000..a4470f726fb0d59a82db29b3239c111ea1569c55 --- /dev/null +++ b/paddle/framework/scope.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/framework/variable.h" + +namespace paddle { +namespace framework { + +/** + * @brief Scope that manage all variables. + * + * Scope is an association of a name to Variable. All variables belong to + * Scope. You need to specify a scope to run a Net, i.e., `net.Run(&scope)`. + * One net can run in different scopes and update different variable in the + * scope. + */ +class Scope { + public: + /** + * @brief Initialize s Scope without parent. + */ + Scope() {} + + /** + * @brief Initialize a Scope with parent. + */ + explicit Scope(const std::shared_ptr& parent) : parent_(parent) {} + + /** + * @brief Create Variable + * + * Create Variable in this Scope. Return the exist one if Variable already + * been created. + */ + Variable* CreateVariable(const std::string& name) { + auto var = GetVariable(name); + if (var) { + return var; + } else { + vars_[name] = std::unique_ptr(new Variable()); + return GetVariable(name); + } + } + + /** + * @brief Get Variable. + * + * Get Variable from this Scope, this function will recursive find Variable + * from it's parent scope. Return nullptr if not found. + */ + Variable* GetVariable(const std::string& name) const { + auto it = vars_.find(name); + if (it != vars_.end()) { + return it->second.get(); + } else if (parent_ != nullptr) { + return parent_->GetVariable(name); + } else { + return nullptr; + } + } + + /** + * @brief If this scope has a Var named name. + * + * Find if there is a Variable in this scope and it's parent scope + */ + bool HasVariable(const std::string& name) const { + return (vars_.find(name) != vars_.end() || + (parent_ && parent_->HasVariable(name))); + } + + private: + std::unordered_map> vars_; + std::shared_ptr parent_{nullptr}; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/scope_test.cc b/paddle/framework/scope_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..df1afb200ce9e75c5b1e40f2da56667487ae3576 --- /dev/null +++ b/paddle/framework/scope_test.cc @@ -0,0 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/scope.h" +#include "gtest/gtest.h" + +TEST(Scope, Create) { + using paddle::framework::Scope; + using paddle::framework::Variable; + + auto scope = std::make_shared(); + + Variable* var0 = scope->CreateVariable(""); + EXPECT_NE(var0, nullptr); + + /// GetVariable will return nullptr if not exist. + Variable* var1 = scope->GetVariable("a"); + EXPECT_EQ(var1, nullptr); + + /// CreateVariable will return one. + Variable* var2 = scope->CreateVariable("a"); + EXPECT_NE(var2, nullptr); + + /// Get the created variable. + Variable* var3 = scope->GetVariable("a"); + EXPECT_EQ(var2, var3); + + /// CreateVariable will just return the variable if it's + /// already exist. + Variable* var4 = scope->CreateVariable("a"); + EXPECT_EQ(var4, var2); +} + +TEST(Scope, Parent) { + using paddle::framework::Scope; + using paddle::framework::Variable; + + auto parent_scope = std::make_shared(); + auto scope = std::make_shared(parent_scope); + + Variable* var0 = parent_scope->CreateVariable("a"); + EXPECT_NE(var0, nullptr); + + /// GetVariable will get Variable from parent scope if exist. + Variable* var1 = scope->GetVariable("a"); + EXPECT_EQ(var0, var1); +} diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h new file mode 100644 index 0000000000000000000000000000000000000000..72c4a7a2a1d1cf93a784f24e687727ee8481484c --- /dev/null +++ b/paddle/framework/variable.h @@ -0,0 +1,71 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "paddle/platform/assert.h" + +namespace paddle { +namespace framework { + +class Variable { + public: + template + const T& Get() const { + PADDLE_ASSERT(IsType()); + return *static_cast(holder_->Ptr()); + } + + template + T* GetMutable() { + if (!IsType()) { + holder_.reset(new PlaceholderImpl(new T())); + } + return static_cast(holder_->Ptr()); + } + + template + bool IsType() const { + return holder_ != nullptr && + std::type_index(typeid(T)) == std::type_index(holder_->Type()); + } + + private: + struct Placeholder { + virtual ~Placeholder() {} + virtual const std::type_info& Type() const = 0; + virtual void* Ptr() const = 0; + }; + + // Placeholder hides type T, so it doesn't appear as a template + // parameter of Variable. + template + struct PlaceholderImpl : public Placeholder { + PlaceholderImpl(T* ptr) : ptr_(ptr), type_(typeid(T)) {} + + virtual const std::type_info& Type() const { return type_; } + virtual void* Ptr() const { return static_cast(ptr_.get()); } + + std::unique_ptr ptr_; + const std::type_info& type_; + }; + + std::unique_ptr + holder_; // pointers to a PlaceholderImpl object indeed. +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/variable.md b/paddle/framework/variable.md new file mode 100644 index 0000000000000000000000000000000000000000..f44d5ea46e7ce98dd443d684ad42308496bc4179 --- /dev/null +++ b/paddle/framework/variable.md @@ -0,0 +1,52 @@ +# Design Doc: Variable + + +Variable is also known as *blob* in MxNet and Caffe2. It is the input and output type of operators, where a neural network is a graph of operators. + +## Requirements: Lazy Memory Allocation + +For the flexibility of a DL system, a variable should be able to contain any typed value -- a tensor in most cases, but could also be some integer IDs or a scope of other variables in the case of RNN. + +To use the minimum amount of memory, we'd like that a variable to allocate memory when it has to, or, lazy memory allocation. Let's take the following example: + +```cpp +Variable vr, v1, v2; + +Tensor* t1 = new Tensor(); +Tensor* t2 = new Tensor(); + +Randomize( + /* malloc */ v1.GetMutable().mutable_data(DDim(100,200)), + /* size */ t1.Size()); + +Randomize( + /* malloc */ v2.GetMutable().mutable_data(DDim(200,300)), + /* size */ t2.Size()); + +Mult( + /*result*/ vr.GetMutable().mutable_data(SizeOfMult(v1, v2)), + /*input1*/ v1.Get().data(), + /*input2*/ v2.Get().data()); +``` + +We see that a variable holds nothing until `Variable::GetMutable()` allocates a tensor and puts it in the variable. Similarly, a tensor gets its memory until `Tensor::mutable_data()`. + +This syntax for lazy memory allocation when we call `Randomize` and `Mult`, those functions that mutate the variable, so it saves us some line of C++ code. + + +## Implementation: Type Hiding + +To make memory allocation lazy, we cannot assume that we know the type held by a variable at definition time. In other words, `class Variable` cannot be a template `template class Variable`. + +Because we don't know the type `T`, we cannot save a `T*` as `Variable's` data member. Instead, we save an interface object `Placeholder`, who can return the pointer to the saved object via `Placeholder::Ptr()` as `void*`. + +But anyway, Variable needs to know `T` so could it `delete(ptr)` and so could `Variable::Get` checks the expected type and the saved object's type. + +We save `T` in `PlaceholderImpl`, the implementation of `Placeholder`. Please be aware that `PlaceholderImpl` is a class template and `T` is passed in as a template parameter. + +Because `PlaceholderImpl` knows `T`, it can save and return `typeid(T)` for the type comparison in `Variable::Get` and `Variable::GetMutable`. + + +## Conclusion + +The technique type hiding utilizes C++ class templates, interface and derivation, and C++ RTTI (typeid). This combination saves us from definition something like `caffe2::TypeMata`, which takes hundreds of lines of C++ code. diff --git a/paddle/framework/variable_test.cc b/paddle/framework/variable_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..aea03bcf5719dacc01d2d78b52b33e8a0b29b5e5 --- /dev/null +++ b/paddle/framework/variable_test.cc @@ -0,0 +1,40 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include + +#include "gtest/gtest.h" +#include "paddle/framework/variable.h" + +TEST(Variable, GetMutable) { + using paddle::framework::Variable; + + struct Tensor { + int content_; + }; + + std::unique_ptr v(new Variable()); + + Tensor* t = v->GetMutable(); + t->content_ = 1234; + + const Tensor& tt = v->Get(); + EXPECT_EQ(1234, tt.content_); + + std::string* s = v->GetMutable(); + *s = "hello"; + + const std::string& ss = v->Get(); + EXPECT_EQ("hello", ss); +} diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 5e170714cf5b183fcf6e76d34746333397e6b060..1c39ced3c9e3da4079a66e29c00be9cc18411b68 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -12,7 +12,7 @@ endif() add_library(paddle_function STATIC ${cpp_files} ${cu_objs}) add_dependencies(paddle_function ${external_project_dependencies}) -add_dependencies(paddle_function gen_proto_cpp) +add_dependencies(paddle_function paddle_proto) if(WITH_TESTING) if(WITH_GPU) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 93a6a99848aa13bb36c9c5c7091fbaa891fc9823..0012636b8f618a1b45cfc801c04781e67694956f 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_style_check_target(paddle_gserver ${GSERVER_SOURCES}) add_style_check_target(paddle_gserver ${GSERVER_HEADER}) -add_dependencies(paddle_gserver gen_proto_cpp) +add_dependencies(paddle_gserver paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b825db574cf8bac2cf7b7538d0583a8adc2c158 --- /dev/null +++ b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp @@ -0,0 +1,308 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Evaluator.h" +#include "paddle/gserver/layers/DetectionUtil.h" + +using std::map; +using std::vector; +using std::pair; +using std::make_pair; + +namespace paddle { + +/** + * @brief detection map Evaluator + * + * The config file api is detection_map_evaluator. + */ +class DetectionMAPEvaluator : public Evaluator { +public: + DetectionMAPEvaluator() + : evaluateDifficult_(false), cpuOutput_(nullptr), cpuLabel_(nullptr) {} + + virtual void start() { + Evaluator::start(); + allTruePos_.clear(); + allFalsePos_.clear(); + numPos_.clear(); + } + + virtual real evalImp(std::vector& arguments) { + overlapThreshold_ = config_.overlap_threshold(); + backgroundId_ = config_.background_id(); + evaluateDifficult_ = config_.evaluate_difficult(); + apType_ = config_.ap_type(); + + MatrixPtr detectTmpValue = arguments[0].value; + Matrix::resizeOrCreate(cpuOutput_, + detectTmpValue->getHeight(), + detectTmpValue->getWidth(), + false, + false); + + MatrixPtr labelTmpValue = arguments[1].value; + Matrix::resizeOrCreate(cpuLabel_, + labelTmpValue->getHeight(), + labelTmpValue->getWidth(), + false, + false); + + cpuOutput_->copyFrom(*detectTmpValue); + cpuLabel_->copyFrom(*labelTmpValue); + + Argument label = arguments[1]; + const int* labelIndex = label.sequenceStartPositions->getData(false); + size_t batchSize = label.getNumSequences(); + + vector>> allGTBBoxes; + vector>>> allDetectBBoxes; + + for (size_t n = 0; n < batchSize; ++n) { + map> bboxes; + for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) { + vector bbox; + getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox); + int c = cpuLabel_->getData()[i * 6]; + bboxes[c].push_back(bbox[0]); + } + allGTBBoxes.push_back(bboxes); + } + + size_t n = 0; + const real* cpuOutputData = cpuOutput_->getData(); + for (size_t imgId = 0; imgId < batchSize; ++imgId) { + map>> bboxes; + size_t curImgId = static_cast((cpuOutputData + n * 7)[0]); + while (curImgId == imgId && n < cpuOutput_->getHeight()) { + vector label; + vector score; + vector bbox; + getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox); + bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); + ++n; + curImgId = static_cast((cpuOutputData + n * 7)[0]); + } + allDetectBBoxes.push_back(bboxes); + } + + for (size_t n = 0; n < batchSize; ++n) { + for (map>::iterator it = + allGTBBoxes[n].begin(); + it != allGTBBoxes[n].end(); + ++it) { + size_t count = 0; + if (evaluateDifficult_) { + count = it->second.size(); + } else { + for (size_t i = 0; i < it->second.size(); ++i) + if (!(it->second[i].isDifficult)) ++count; + } + if (numPos_.find(it->first) == numPos_.end() && count != 0) { + numPos_[it->first] = count; + } else { + numPos_[it->first] += count; + } + } + } + + // calcTFPos + calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes); + + return 0; + } + + virtual void printStats(std::ostream& os) const { + real mAP = calcMAP(); + os << "Detection mAP=" << mAP; + } + + virtual void distributeEval(ParameterClient2* client) { + LOG(FATAL) << "Distribute detection evaluation not implemented."; + } + +protected: + void calcTFPos(const size_t batchSize, + const vector>>& allGTBBoxes, + const vector>>>& + allDetectBBoxes) { + for (size_t n = 0; n < allDetectBBoxes.size(); ++n) { + if (allGTBBoxes[n].size() == 0) { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + for (size_t i = 0; i < it->second.size(); ++i) { + allTruePos_[label].push_back(make_pair(it->second[i].first, 0)); + allFalsePos_[label].push_back(make_pair(it->second[i].first, 1)); + } + } + } else { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + vector> predBBoxes = it->second; + if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) { + for (size_t i = 0; i < predBBoxes.size(); ++i) { + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back(make_pair(predBBoxes[i].first, 1)); + } + } else { + vector gtBBoxes = + allGTBBoxes[n].find(label)->second; + vector visited(gtBBoxes.size(), false); + // Sort detections in descend order based on scores + std::sort(predBBoxes.begin(), + predBBoxes.end(), + sortScorePairDescend); + for (size_t i = 0; i < predBBoxes.size(); ++i) { + real maxOverlap = -1.0; + size_t maxIdx = 0; + for (size_t j = 0; j < gtBBoxes.size(); ++j) { + real overlap = + jaccardOverlap(predBBoxes[i].second, gtBBoxes[j]); + if (overlap > maxOverlap) { + maxOverlap = overlap; + maxIdx = j; + } + } + if (maxOverlap > overlapThreshold_) { + if (evaluateDifficult_ || + (!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) { + if (!visited[maxIdx]) { + allTruePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 0)); + visited[maxIdx] = true; + } else { + allTruePos_[label].push_back( + make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } else { + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } + } + } + } + } + + real calcMAP() const { + real mAP = 0.0; + size_t count = 0; + for (map::const_iterator it = numPos_.begin(); + it != numPos_.end(); + ++it) { + size_t label = it->first; + size_t labelNumPos = it->second; + if (labelNumPos == 0 || allTruePos_.find(label) == allTruePos_.end()) + continue; + vector> labelTruePos = allTruePos_.find(label)->second; + vector> labelFalsePos = + allFalsePos_.find(label)->second; + // Compute average precision. + vector tpCumSum; + getAccumulation(labelTruePos, &tpCumSum); + vector fpCumSum; + getAccumulation(labelFalsePos, &fpCumSum); + std::vector precision, recall; + size_t num = tpCumSum.size(); + // Compute Precision. + for (size_t i = 0; i < num; ++i) { + CHECK_LE(tpCumSum[i], labelNumPos); + precision.push_back(static_cast(tpCumSum[i]) / + static_cast(tpCumSum[i] + fpCumSum[i])); + recall.push_back(static_cast(tpCumSum[i]) / labelNumPos); + } + // VOC2007 style + if (apType_ == "11point") { + vector maxPrecisions(11, 0.0); + int startIdx = num - 1; + for (int j = 10; j >= 0; --j) + for (int i = startIdx; i >= 0; --i) { + if (recall[i] < j / 10.) { + startIdx = i; + if (j > 0) maxPrecisions[j - 1] = maxPrecisions[j]; + break; + } else { + if (maxPrecisions[j] < precision[i]) + maxPrecisions[j] = precision[i]; + } + } + for (int j = 10; j >= 0; --j) mAP += maxPrecisions[j] / 11; + ++count; + } else if (apType_ == "Integral") { + // Nature integral + real averagePrecisions = 0.; + real prevRecall = 0.; + for (size_t i = 0; i < num; ++i) { + if (fabs(recall[i] - prevRecall) > 1e-6) + averagePrecisions += precision[i] * fabs(recall[i] - prevRecall); + prevRecall = recall[i]; + } + mAP += averagePrecisions; + ++count; + } else { + LOG(FATAL) << "Unkown ap version: " << apType_; + } + } + if (count != 0) mAP /= count; + return mAP * 100; + } + + void getAccumulation(vector> inPairs, + vector* accuVec) const { + std::stable_sort( + inPairs.begin(), inPairs.end(), sortScorePairDescend); + accuVec->clear(); + size_t sum = 0; + for (size_t i = 0; i < inPairs.size(); ++i) { + sum += inPairs[i].second; + accuVec->push_back(sum); + } + } + + std::string getTypeImpl() const { return "detection_map"; } + + real getValueImpl() const { return calcMAP(); } + +private: + real overlapThreshold_; // overlap threshold when determining whether matched + bool evaluateDifficult_; // whether evaluate difficult ground truth + size_t backgroundId_; // class index of background + std::string apType_; // how to calculate mAP (Integral or 11point) + + MatrixPtr cpuOutput_; + MatrixPtr cpuLabel_; + + map numPos_; // counts of true objects each classification + map>> + allTruePos_; // true positive prediction + map>> + allFalsePos_; // false positive prediction +}; + +REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator); + +} // namespace paddle diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 3159026e6b92355ba7480b09535388c969a504e2..018da6c76dc27a74b074ec52c18347beba8164fc 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -166,11 +166,21 @@ MultiGradientMachine::MultiGradientMachine(const ModelConfig& config, outArgStream_ = HPPL_STREAM_1; + start(); +} + +void MultiGradientMachine::start() { for (auto& thread : threads_) { thread->start(); } } +void MultiGradientMachine::finish() { + for (auto& thread : threads_) { + thread->stop(); + } +} + std::vector*> MultiGradientMachine::getSlaveParameters() { std::vector*> vec; @@ -326,12 +336,6 @@ void MultiGradientMachine::onPassEnd() { } } -void MultiGradientMachine::finish() { - for (auto& thread : threads_) { - thread->stop(); - } -} - Evaluator* MultiGradientMachine::makeEvaluator() const { return threads_[0]->getGradientMachine()->makeEvaluator(); } @@ -445,7 +449,7 @@ TrainerThread::TrainerThread(const ModelConfig& config, gradStream_ = HPPL_STREAM_2; valueStream_ = HPPL_STREAM_3; - stopping_ = false; + stopping_ = true; updateCounter_ = 0; parameterUpdated_ = false; } @@ -453,6 +457,10 @@ TrainerThread::TrainerThread(const ModelConfig& config, TrainerThread::~TrainerThread() { stop(); } void TrainerThread::start() { + if (!stopping_) return; + + stopping_ = false; + gradientMachine_->start(); computeThread_.reset(new std::thread([this]() { computeThread(); })); @@ -593,7 +601,7 @@ void TrainerThread::backward() { void TrainerThread::backwardCallback(Parameter* para) { // CPU parameters are merged in the end - if (!para->useGpu()) return; + if (!para->useGpu() || para->isStatic()) return; int paramId = para->getID(); if (multiMachine_->getNumThreads() == 1) { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 70203bbb97fe79d72fbc6bd2b5d427cb1de7b61f..5e7622f929fd57de6e38855528a752b5586c4cd1 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -176,6 +176,10 @@ public: explicit MultiGradientMachine(const ModelConfig& config, bool useGpu); + virtual void start(); + + virtual void finish(); + virtual void prefetch(const std::vector& inArgs); virtual void forward(const std::vector& inArgs, @@ -193,8 +197,6 @@ public: virtual void onPassEnd(); - virtual void finish(); - virtual Evaluator* makeEvaluator() const; virtual void eval(Evaluator* evaluator) const; diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 4512aacc81f86bf87fc9ea30adcf081327663f16..2e839f640503b8f4e390fc87d9d59960dbc37f6e 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector& inArgs, dataLayers_[i]->setData(inArgs[i]); } + gLayerStackTrace.set_stage(true); + { for (auto& layer : layers_) { REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); gLayerStackTrace.push(layer->getName()); layer->forward(passType); + gLayerStackTrace.pop(layer->getName()); } } @@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector& inArgs, for (auto& layer : outputLayers_) { outArgs->push_back(layer->getOutput()); } - if (passType == PASS_TEST) { - gLayerStackTrace.clear(); - } } void NeuralNetwork::resetState() { @@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { - gLayerStackTrace.pop(""); // tell layer trace is during backward. + gLayerStackTrace.set_stage(false); FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); + gLayerStackTrace.push((*layer)->getName()); if ((*layer)->needGradient()) { (*layer)->backward(callback); } @@ -308,35 +309,35 @@ public: void addEvaluator(std::unique_ptr&& evaluator) { evaluators_.emplace_back(std::move(evaluator)); } - virtual void start() { + void start() override { for (auto& evaluator : evaluators_) { evaluator->start(); } } - virtual void finish() { + void finish() override { for (auto& evaluator : evaluators_) { evaluator->finish(); } } - virtual void eval(const NeuralNetwork& nn) { + void eval(const NeuralNetwork& nn) override { for (auto& evaluator : evaluators_) { evaluator->eval(nn); } } - virtual real evalImp(std::vector& arguments) { + real evalImp(std::vector& arguments) override { (void)arguments; return -1; } - virtual void printStats(std::ostream& os) const { + void printStats(std::ostream& os) const override { for (auto& evaluator : evaluators_) { evaluator->printStats(os); os << ' '; } } - virtual void distributeEval(ParameterClient2* client) { + void distributeEval(ParameterClient2* client) override { for (auto& evaluator : evaluators_) { evaluator->distributeEval(client); } @@ -351,7 +352,7 @@ public: * @brief getNames will return all inside evaluators' names. * @param names [out]: return names. */ - void getNames(std::vector* names) { + void getNames(std::vector* names) override { for (auto& eval : evaluators_) { eval->getNames(names); } @@ -360,7 +361,7 @@ public: /** * @brief getValue could get all inside evaluators' value. */ - real getValue(const std::string& name, Error* err) const { + real getValue(const std::string& name, Error* err) const override { return this->getMethodHelper( name, err, [&name, err](const std::unique_ptr& eval) { return eval->getValue(name, err); @@ -370,7 +371,7 @@ public: /** * @brief getType could get all inside evaluators' type. */ - std::string getType(const std::string& name, Error* err) const { + std::string getType(const std::string& name, Error* err) const override { return this->getMethodHelper( name, err, [&name, err](const std::unique_ptr& eval) { return eval->getType(name, err); @@ -395,6 +396,30 @@ private: } }; +class SubnetEvaluator : public CombinedEvaluator { +public: + SubnetEvaluator(const std::string& layerName, + std::unique_ptr&& evaluator) + : layerName_(layerName) { + addEvaluator(std::move(evaluator)); + } + virtual void eval(const NeuralNetwork& nn) override { + const LayerPtr& layer = nn.getLayer(layerName_); + CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " + << nn.getName(); + bool accessed = false; + layer->accessSubNetwork([this, &accessed](NeuralNetwork& subnet) { + subnet.eval(evaluators_[0].get()); + accessed = true; + }); + CHECK(accessed) << "There is no subnetwork for layer " << layerName_ + << " in submodel " << nn.getName(); + } + +protected: + std::string layerName_; +}; + Evaluator* NeuralNetwork::makeEvaluator() const { CombinedEvaluator* combinedEvaluator = new CombinedEvaluator(); auto subModelConfig = std::find_if(config_.sub_models().begin(), @@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const { combinedEvaluator->addEvaluator(std::move(evaluator)); } } + for (auto& layer : layers_) { + layer->accessSubNetwork( + [layer, combinedEvaluator](NeuralNetwork& subnet) { + std::unique_ptr subEvaluator(new SubnetEvaluator( + layer->getName(), + std::unique_ptr(subnet.makeEvaluator()))); + combinedEvaluator->addEvaluator(std::move(subEvaluator)); + }); + } } else { for (const EvaluatorConfig& evalConfig : config_.evaluators()) { std::unique_ptr evaluator(Evaluator::create(evalConfig)); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index e7b6c438407e7eab6eab1f6ed496f35caa9f2177..12810f642519b7965fc1b7d751290445e3350dd5 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -129,6 +129,8 @@ public: static NeuralNetwork* newNeuralNetwork(const std::string& name = "", NeuralNetwork* rootNetwork = nullptr); + const std::string& getName() const { return subModelName_; } + protected: /** * The constructor of NeuralNetwork. diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 3e930380226bce58cc90704b4c4cfa36e9f70968..9a972466d66ba1417b2c31e66dc375b3da229aa8 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -208,6 +208,7 @@ void RecurrentGradientMachine::init( }); CHECK(subModelConfig != config.sub_models().end()); reversed_ = subModelConfig->reversed(); + generating_ = subModelConfig->has_generator(); inFrameLines_.resize(subModelConfig->in_links_size()); for (size_t i = 0; i < inFrameLines_.size(); ++i) { @@ -287,10 +288,6 @@ void RecurrentGradientMachine::init( parameterIds_.push_back(para->getID()); } } - - if (subModelConfig->evaluator_names_size() > 0) { - evaluator_.reset(frames_[0]->makeEvaluator()); - } } void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { @@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, The outputs are outFramesLines_[i].agentLayer */ - if (inFrameLines_.empty() && passType == PASS_TEST) { + if (generating_) { generateSequence(); return; } // else forward.. @@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, std::vector outArgs; frames_[i]->forward(inArgs, &outArgs, passType); } - if (evaluator_ && passType == PASS_TEST) { - this->eval(evaluator_.get()); - } reorganizeOutput(passType); } void RecurrentGradientMachine::backward(const UpdateCallback& callback) { + if (generating_) { + return; + } REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); AsyncGpuBlock asyncGpuBlock; for (int i = maxSequenceLength_ - 1; i >= 0; --i) { @@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) { for (auto& memoryFrameLine : memoryFrameLines_) { memoryFrameLine.bootLayer->backward(nullptr); } - - // call printers here so the gradient can be printed - if (evaluator_) { - this->eval(evaluator_.get()); - } } void RecurrentGradientMachine::forwardBackward( @@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward( void RecurrentGradientMachine::eval(Evaluator* evaluator) const { // call printers frame by frame for (int i = 0; i < maxSequenceLength_; ++i) { - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " begin"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " begin"; evaluator->eval(*(frames_[i].get())); - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " end"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " end"; } } @@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { copyDataOutlinkFrame(machineCur); - // call value printer - if (evaluator_) { - evaluator_->eval(*(frames_[machineCur].get())); - } // check eos const IVectorPtr& eosVec = eosFrameLine_->layers[machineCur]->getOutput().ids; @@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() { batchMachineIdVec_.clear(); generator_.ids.clear(); + int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); + starts[0] = 0; if (numResults > 1) { real* probs = generator_.outArg.in->getData(); - int* starts = - generator_.outArg.sequenceStartPositions->getMutableData(false); - starts[0] = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { Path& path = finalPaths_[i][j]; @@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() { } else { for (size_t i = 0; i < finalPaths_.size(); ++i) { CHECK(!finalPaths_[i].empty()); - generator_.ids = finalPaths_[i][0].ids; + generator_.ids.insert(generator_.ids.begin(), + finalPaths_[i][0].ids.begin(), + finalPaths_[i][0].ids.end()); + starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); } } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 8d94d7e2df216c4657d759c16dd6b1f2848996e0..f245620cf668bb341df99cf498105cbd996a6b24 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -414,6 +414,7 @@ protected: std::vector ids; // store generated sequences Argument outArg; // final output argument }; + bool generating_; Generator generator_; std::vector> frames_; @@ -428,8 +429,6 @@ protected: std::vector parameterIds_; // parameters actually used by this Layer Group - std::unique_ptr evaluator_; // frame printers in this layer group - // store final argument of outFrameLines_ std::vector dataArgs_; // store each frame's output argument of outFrameLines_ diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 31463823b3fc04cc24068d95887a9d3ed25a6168..15e7411b5fde0fa3a532394cf7d0e8477ef052d0 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) { } } +namespace { + +// dest[index[i]] <- src[i] for each i +void copyElements(const IVector& srcVec, + const IVector& indexVec, + IVector& destVec) { + const int* src = srcVec.getData(); + const int* index = indexVec.getData(); + int* dest = destVec.getData(); + int len = indexVec.getSize(); + CHECK_EQ(srcVec.getSize(), indexVec.getSize()); + for (int i = 0; i < len; ++i) { + dest[index[i]] = src[i]; + } +} +} + +void GatherAgentLayer::forwardIds(PassType passType) { + IVectorPtr realId = realLayers_[0]->getOutputLabel(); + if (!realId) return; + + IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_); + IVectorPtr outId = output_.ids; + idsVec_.resize(idIndex_.size()); + + for (size_t i = 0; i < realLayers_.size(); ++i) { + const IVectorPtr& realId = realLayers_[i]->getOutputLabel(); + idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i], + /* size */ realId->getSize(), + useGpu_); + execViaCpu(©Elements, *realId, *idsVec_[i], *outId); + } +} + void GatherAgentLayer::backward(const UpdateCallback& callback) { (void)callback; const MatrixPtr& outputGrad = getOutputGrad(); @@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) { CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); int width = this->getSize(); - if (realOutArg_.hasSeq()) { - forwardSequence(passType); - } else if (realOutArg_.value || realOutArg_.ids) { - output_.subArgFrom( - realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); - } else { // used in generation - if (realLayer_->getOutput().ids) { - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } - if (realLayer_->getOutput().value) { - int height = ids_->getSize(); - resetOutput(height, width); - - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); + if (selectionMode_) { + forwardWithSelection(passType); + } else { + if (realOutArg_.hasSeq()) { + output_.subArgFrom(realOutArg_, + /* offset */ idIndex_, + idSize_, + width, + useGpu_, + /* trans */ false, + /* seqFlag */ true, + /* seqStart */ seqStartPosIndex_, + /* seqSize */ numSequences_); + } else { + output_.subArgFrom( + realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); } } } @@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) { void ScatterAgentLayer::backward(const UpdateCallback& callback) { (void)callback; + CHECK(!selectionMode_); + const MatrixPtr& outputGrad = realOutArg_.grad; const MatrixPtr& realGrad = realLayer_->getOutputGrad(); if (realGrad) { @@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer); -void GatherAgentLayer::forwardIds(PassType passType) { - int height = 0; - IVectorPtr idReal = realLayers_[0]->getOutputLabel(); - - if (!idReal) return; - - if (output_.subSequenceStartPositions) { - int* starts = output_.subSequenceStartPositions->getMutableData(false); - // Gather generator.idsVec - // if is beam search generation result. Get first result. - if (idReal->getData()[idReal->getSize() - 1] == -1) { - for (size_t i = 0; i < realLayers_.size(); ++i) { - // The first element stores first result size - idReal = realLayers_[i]->getOutputLabel(); - idReal->subVecFrom(*idReal, 1, idReal->getData()[0]); - } - } - for (size_t i = 0; i < realLayers_.size(); ++i) { - CHECK(realLayers_[i]->getOutputLabel()); - starts[i] = height; - height += realLayers_[i]->getOutputLabel()->getSize(); - } - starts[realLayers_.size()] = height; - output_.sequenceStartPositions->getMutableData(false)[1] = height; - - IVector::resizeOrCreate(output_.ids, height, false); - for (size_t i = 0; i < realLayers_.size(); ++i) { - output_.ids->subVec(starts[i], starts[i + 1] - starts[i]) - ->copyFrom(*realLayers_[i]->getOutputLabel()); - } - } else { - LOG(FATAL) << "Not implemented"; - } -} - -void ScatterAgentLayer::forwardSequence(PassType passType) { +void ScatterAgentLayer::forwardWithSelection(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); @@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - if (realOutArg_.value || realOutArg_.ids) { - CHECK(realOutArg_.sequenceStartPositions); - output_.subArgFrom(realOutArg_, - /* offset */ idIndex_, - idSize_, - width, - useGpu_, - /* trans */ false, - /* seqFlag */ true, - /* seqStart */ seqStartPosIndex_, - /* seqSize */ numSequences_); + if (!input.hasSeq()) { + if (realLayer_->getOutput().ids) { + IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); + output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); + } + if (realLayer_->getOutput().value) { + int height = ids_->getSize(); + resetOutput(height, width); + + const MatrixPtr& outV = getOutputValue(); + const MatrixPtr& realV = realLayer_->getOutputValue(); + outV->selectRows(*realV, *ids_); + } } else { // Putting the generation logic here is really an ugly hack! // used in generation diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/gserver/layers/AgentLayer.h index 461b84b17e556b53e0734bff8e37a0d529a3290e..29681b29c6a9a10715548839f2d365eb4a0c7381 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/gserver/layers/AgentLayer.h @@ -110,6 +110,9 @@ protected: // of real layer. ICpuGpuVectorPtr inputStartPos_; + // true for setRealLayer, false for setRealLayerAndOutput + bool selectionMode_; + public: explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} @@ -137,6 +140,7 @@ public: } else { cpuIds_ = ids_; } + selectionMode_ = true; } // set real layer and output, [idIndex, idIndex + idSize) of *ids* @@ -153,6 +157,7 @@ public: idIndex_ = idIndex; idSize_ = idSize; handleBackward_ = handleBackward; + selectionMode_ = false; } void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, @@ -166,7 +171,7 @@ public: void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; - void forwardSequence(PassType passType); + void forwardWithSelection(PassType passType); }; } // namespace paddle diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 125aaf947f3c9d976b117667d1d1b7700a029cc6..4b92b5d163ad107c0783beae45f8c936112fcccf 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) { void Layer::copyOutputToOtherDevice() { for (size_t i = 0; i != outputOtherDevice_.size(); i++) { SetDevice device(outputOtherDevice_[i].deviceId); + // If outputOtherDevice_[i].value is a CpuMatrix, + // the copyFrom is a synchronous interface. + // If outputOtherDevice_[i].value is a GpuMatrix, since subsequent + // calculations are all on HPPL_STREAM_DEFAULT, + // copyFrom can be an asynchronous interface. outputOtherDevice_[i].value->copyFrom(*getOutputValue(), HPPL_STREAM_DEFAULT); outputOtherDevice_[i].sequenceStartPositions = diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp index 4f5fdbb37ce024e18b8d39c5dda74c69bf82166a..93996392d221d531f65caf465decbffdbc2d0384 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/gserver/tests/test_Evaluator.cpp @@ -138,6 +138,23 @@ void testEvaluatorAll(TestConfig testConf, testEvaluator(testConf, testEvaluatorName, batchSize, false); } +TEST(Evaluator, detection_map) { + TestConfig config; + config.evaluatorConfig.set_type("detection_map"); + config.evaluatorConfig.set_overlap_threshold(0.5); + config.evaluatorConfig.set_background_id(0); + config.evaluatorConfig.set_ap_type("Integral"); + config.evaluatorConfig.set_evaluate_difficult(0); + + config.inputDefs.push_back({INPUT_DATA, "output", 7}); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "label", 6}); + config.evaluatorConfig.set_evaluate_difficult(false); + testEvaluatorAll(config, "detection_map", 100); + + config.evaluatorConfig.set_evaluate_difficult(true); + testEvaluatorAll(config, "detection_map", 100); +} + TEST(Evaluator, classification_error) { TestConfig config; config.evaluatorConfig.set_type("classification_error"); diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index f5657c4690ca71200346efd4e2c5244c02c92eb1..9981de61606bda6baac103592125b929d4c12a3d 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -33,7 +33,7 @@ endif() add_style_check_target(paddle_math ${MATH_SOURCES}) add_style_check_target(paddle_math ${MATH_HEADERS}) -add_dependencies(paddle_math gen_proto_cpp) # depends +add_dependencies(paddle_math paddle_proto ${external_project_dependencies}) # depends if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c910146164ebfb0737583c72c48ce6dbc5b49939..4431d613f655c1d0c8da13bb5ac9225980c650ad 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { const_cast(src.getData()), sizeof(real) * elementCnt_, stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else if (typeid(src) == typeid(CpuMatrix)) { memcpy(data_, src.getData(), sizeof(real) * elementCnt_); } else { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 748be850b4c902d1b48c1dafbb0d5ea2bf197e6e..7dfd593225065e18830b2b0c0ce854fe7a2d5178 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -239,7 +239,8 @@ public: LOG(FATAL) << "Not implemented"; } - // asynchronous copy + // For GpuMatrix this is an asynchronous copy interface + // For CpuMatrix this is an synchronous copy interface virtual void copyFrom(const Matrix& src, hl_stream_t stream) { LOG(FATAL) << "Not implemented"; } diff --git a/paddle/math/Vector.cpp b/paddle/math/Vector.cpp index c519ca500afb1dbfdff6e8d211786f4e18ccf1fd..eb87ee9bb7936d27c0c32a1a4b35ff49871c0a10 100644 --- a/paddle/math/Vector.cpp +++ b/paddle/math/Vector.cpp @@ -657,6 +657,8 @@ void CpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { (void*)src.getData(), sizeof(T) * this->getSize(), stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else { src.copyTo(this); } diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 9af6e30c9e13895ad95653a787ec1c1ad77a248f..80b9775fccf10c57bb48145ef56165ec7c86d8b8 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -168,11 +168,11 @@ public: virtual void copyFrom(const VectorT& src) = 0; /** - * If use_gpu, this function will push the copy-task to the specifed-stream - * and return immediately. + * If GpuVector, this function is an asynchronous interface, + * will push the copy-task to the specifed-stream and return immediately. * - * If not use GPU, this function is same as - * the copyFrom(const VectorT& src), which use stream HPPL_STREAM_DEFAULT. + * If CpuVector, this function is an synchronous interface, + * same as the copyFrom(const VectorT& src). */ virtual void copyFrom(const VectorT& src, hl_stream_t stream) = 0; diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 5a0dffe086c4e265d17c79dba435b66c0873e3c7..354f58df39365410ff9aec2576c768e58db9e0d2 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) { } } +TEST(CpuMatrix, copyFrom) { + const size_t height = 1000; + const size_t width = 1000; + CpuMatrix cpu(height, width); + GpuMatrix gpu(height, width); + CpuMatrix copy(height, width); + + cpu.randomizeUniform(); + gpu.copyFrom(cpu); + copy.copyFrom(gpu, HPPL_STREAM_DEFAULT); + + TensorCheckEqual(cpu, copy); +} + #endif diff --git a/paddle/memory/.clang-format b/paddle/memory/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..29282dc87e2c499988c17d90d47d44cd5cf7f115 --- /dev/null +++ b/paddle/memory/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: Google +Standard: Cpp11 +... diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3943c3cfad31d13a00645aba6fc153d3d13da987 --- /dev/null +++ b/paddle/memory/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(detail) diff --git a/paddle/memory/README.md b/paddle/memory/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96a331a486f57d3e030408fee182199bad5b38c2 --- /dev/null +++ b/paddle/memory/README.md @@ -0,0 +1,140 @@ +## Design + +### Usage + +To allocate 4KB CPU memory: + +```cpp +p = memory::Alloc(platform::CPUPlace(), 4*1024); +``` + +To allocate 4KB memory on the 3rd GPU: + +```cpp +p = memory::Alloc(platform::GPUPlace(2), 4*1024); +``` + +To free memory and check the so-far used amount of memory on a place: + +```cpp +auto pl = platform::GPUPlace(0); +p = memory::Alloc(pl, 4*1024); +cout << memory::Used(pl); +memory::Free(pl, p); +``` + +### API + +In `paddle/memory/memory.h` we have: + +```cpp +namespace memory { +template void* Alloc(Place, size_t); +template void Free(Place, void*); +template size_t Used(Place); +} // namespace memory +``` + +These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: + +```cpp +template<> +void* Alloc(CPUPlace p, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} +``` + +and + +```cpp +template<> +void Alloc(GPUPlace p, size_t size) { + return GetGPUBuddyAllocator(p.id)->Alloc(size); +} +``` + +Similar specializations exist for `Free` and `Used`. + +### Implementation + +`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. + +```cpp +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = NULL; + if (a == NULL) { + a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...); + } + return a; +} + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator* as = NULL; + if (as == NULL) { + as = new BuddyAllocator*[platform::NumGPUs()]; + for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) { + as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...); + } + } + return as[gpu_id); +``` + +#### `BuddyAllocator` + +`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm: + +```cpp +BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) { + ... +} +``` + +Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object: + +```cpp +class BuddyAllocator { + private: + struct Block { + size_t size; + Block* left, right; + size_t index; // allocator id + }; + ... +}; +``` + +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. + +#### System Allocators + +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. + +## Justification + +I got inspiration from Majel and Caffe2, though above design look different from both. + +### Caffe2 + +In Caffe2, `Tensor::mutable_data()` allocates the memroy. In particular, [`Tensor::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479). + +There are two implementations of `Context`: + +1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. + +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. + +### Majel + +In Majel, there are basically two allocator types: + +1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`. +1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`. + +However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces. + +In Majel there are hidden global variables like: + +1. `cpu::SystemAllocator g_cpu_allocator`, and +1. `vector g_gpu_allocators(NUM_GPUS)`. + +Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`. diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..72d3749ad789eca9a4b10944131171c0cf8dfe5a --- /dev/null +++ b/paddle/memory/detail/CMakeLists.txt @@ -0,0 +1,7 @@ +if(${WITH_GPU}) + nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) + nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) +else(${WITH_GPU}) + cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) + cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags) +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/buddy_allocator.cc b/paddle/memory/detail/buddy_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..ebe680f5eea4948339fb8c5584a5b9f5d71c752e --- /dev/null +++ b/paddle/memory/detail/buddy_allocator.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/buddy_allocator.h" + +namespace paddle { +namespace memory { +namespace detail { + +BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools, + SystemAllocator* system_allocator) + : pool_size_(pool_size), + max_pools_(max_pools), + system_allocator_(system_allocator) { + PADDLE_ASSERT(pool_size > 0); + PADDLE_ASSERT(max_pools > 0); + PADDLE_ASSERT(system_allocator != nullptr); +} + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/buddy_allocator.h b/paddle/memory/detail/buddy_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..82e6aaedc719966b4074449ce1ef7193c73dc265 --- /dev/null +++ b/paddle/memory/detail/buddy_allocator.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/memory/detail/system_allocator.h" + +#include +#include + +namespace paddle { +namespace memory { +namespace detail { + +class BuddyAllocator { + public: + BuddyAllocator(size_t pool_size, size_t max_pools, + SystemAllocator* system_allocator); + ~BuddyAllocator(); + + void* Alloc(size_t size); + void Free(void*); + size_t Used(); + + private: + struct Block { + size_t size_; + Block* left_; // left buddy + Block* right_; // right buddy + }; + + // Initially, there is only one pool. If a Alloc founds not enough + // memory from that pool, and there has not been max_num_pools_, + // create a new pool by calling system_allocator_.Alloc(pool_size_). + std::vector pools_; + + size_t pool_size_; // the size of each pool; + size_t max_num_pools_; // the size of all pools; + + SystemAllocator* system_allocator_; + + std::mutex mutex_; + + // Disable copy and assignment. + BuddyAllocator(const BuddyAllocator&) = delete; + BuddyAllocator& operator=(const BuddyAllocator&) = delete; +}; + +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = nullptr; + if (a == nullptr) { + a = new BuddyAllocator(); + } + return a; +} + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator** as = NULL; + if (as == NULL) { + int gpu_num = platform::GetDeviceCount(); + as = new BuddyAllocator*[gpu_num]; + for (int gpu = 0; gpu < gpu_num; gpu++) { + as[gpu] = new BuddyAllocator(); + } + } + return as[gpu_id]; +} + +#endif // PADDLE_ONLY_CPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..50bec926f83dee8a4343d0b16aeb088f9d2a4871 --- /dev/null +++ b/paddle/memory/detail/system_allocator.cc @@ -0,0 +1,90 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/system_allocator.h" + +#include // for malloc and free +#include // for mlock and munlock + +#include "gflags/gflags.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cuda.h" + +// If use_pinned_memory is true, CPUAllocator calls mlock, which +// returns pinned and locked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the amount +// of memory available to the system for paging. So, by default, we +// should set false to use_pinned_memory. +DEFINE_bool(use_pinned_memory, false, + "If set, allocate cpu/gpu pinned memory."); + +namespace paddle { +namespace memory { +namespace detail { + +void* CPUAllocator::Alloc(size_t size) { + // According to http://www.cplusplus.com/reference/cstdlib/malloc/, + // malloc might not return nullptr if size is zero, but the returned + // pointer shall not be dereferenced -- so we make it nullptr. + if (size <= 0) return nullptr; + + void* p = malloc(size); + if (p != nullptr && FLAGS_use_pinned_memory) { + mlock(p, size); + } + return p; +} + +void CPUAllocator::Free(void* p, size_t size) { + if (p != nullptr && FLAGS_use_pinned_memory) { + munlock(p, size); + } + free(p); +} + +#ifndef PADDLE_ONLY_CPU + +void* GPUAllocator::Alloc(size_t size) { + // CUDA documentation doesn't explain if cudaMalloc returns nullptr + // if size is 0. We just make sure it does. + if (size <= 0) { + return nullptr; + } + + void* p = 0; + cudaError_t result = + FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); + if (result != cudaSuccess) { + cudaGetLastError(); // clear error if there is any. + } + return result == cudaSuccess ? p : nullptr; +} + +void GPUAllocator::Free(void* p, size_t size) { + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p); + if (err != cudaErrorCudartUnloading) { + platform::throw_on_error(err, "cudaFree{Host} failed"); + } +} + +#endif // PADDLE_ONLY_CPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..184b383f7f78244fa6632a3bffb1a0a78b3aa664 --- /dev/null +++ b/paddle/memory/detail/system_allocator.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for size_t + +namespace paddle { +namespace memory { +namespace detail { + +// SystemAllocator is the parent class of CPUAllocator and +// GPUAllocator. A BuddyAllocator object uses a SystemAllocator* +// pointing to the underlying system allocator. An alternative to +// this class hierarchy is to pass a system allocator class to +// BuddyAllocator as a template parameter. This approach makes +// BuddyAllocator a class template, and it's very complicated +// algorithm would make the buddy_allocator.h messy. +class SystemAllocator { + public: + virtual ~SystemAllocator() {} + virtual void* Alloc(size_t size) = 0; + virtual void Free(void* p, size_t size) = 0; +}; + +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); +}; + +#ifndef PADDLE_ONLY_CPU +class GPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size); + virtual void Free(void* p, size_t size); +}; +#endif // PADDLE_ONLY_CPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/system_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9bd5706a4e4d1546a8c879ebbac0f3349c9d59f6 --- /dev/null +++ b/paddle/memory/detail/system_allocator_test.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/system_allocator.h" + +#include +#include + +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +DECLARE_bool(use_pinned_memory); + +void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { + bool freed = false; + { + void* p = a.Alloc(size); + if (size > 0) { + EXPECT_NE(p, nullptr); + } else { + EXPECT_EQ(p, nullptr); + } + + int* i = static_cast(p); + std::shared_ptr ptr(i, [&](void* p) { + freed = true; + a.Free(p, size); + }); + } + EXPECT_TRUE(freed); +} + +TEST(CPUAllocator, NoLockMem) { + FLAGS_use_pinned_memory = false; + paddle::memory::detail::CPUAllocator a; + TestAllocator(a, 2048); + TestAllocator(a, 0); +} + +TEST(CPUAllocator, LockMem) { + FLAGS_use_pinned_memory = true; + paddle::memory::detail::CPUAllocator a; + TestAllocator(a, 2048); + TestAllocator(a, 0); +} + +#ifndef PADDLE_ONLY_CPU +TEST(GPUAllocator, NoStaging) { + FLAGS_use_pinned_memory = false; + paddle::memory::detail::GPUAllocator a; + TestAllocator(a, 2048); + TestAllocator(a, 0); +} +TEST(GPUAllocator, Staging) { + FLAGS_use_pinned_memory = true; + paddle::memory::detail::GPUAllocator a; + TestAllocator(a, 2048); + TestAllocator(a, 0); +} +#endif // PADDLE_ONLY_CPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc new file mode 100644 index 0000000000000000000000000000000000000000..0d123d99e234a378ee64850eebacece223e2b121 --- /dev/null +++ b/paddle/memory/memory.cc @@ -0,0 +1,59 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/memory.h" +#include "paddle/memory/detail/buddy_allocator.h" +#include "paddle/memory/detail/system_allocator.h" +#include "paddle/platform/assert.h" + +#include + +namespace paddle { +namespace memory { + +void* Alloc(platform::Place pl, size_t size) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + size_t gpu_id = boost::get(pl).device; + return detail::GetGPUBuddyAllocator(gpu_id)->Alloc(size); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + return detail::GetCPUBuddyAllocator()->Alloc(size); +} + +void Free(paddle::platform::Place pl, void* p) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + size_t gpu_id = boost::get(pl).device; + detail::GetGPUBuddyAllocator(gpu_id)->Free(p); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + detail::GetCPUBuddyAllocator()->Free(p); +} + +size_t Used(paddle::platform::Place pl) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + size_t gpu_id = boost::get(pl).device; + return detail::GetGPUBuddyAllocator(gpu_id)->Used(); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + return detail::GetCPUBuddyAllocator()->Used(); +} + +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h new file mode 100644 index 0000000000000000000000000000000000000000..a33092bade65e6df0faee226a8967c9fc9caa032 --- /dev/null +++ b/paddle/memory/memory.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/platform/place.h" + +namespace paddle { +namespace memory { + +void* Alloc(paddle::platform::Place, size_t); +void Free(paddle::platform::Place, void*); +size_t Used(paddle::platform::Place); + +} // namespace memory +} // namespace paddle diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 35f04789cfe6dc445973f0f922269f6f78b713a3..926fee47e1f86efa60dc40a2727edb06499bec4f 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -10,7 +10,7 @@ set(OPITMIZER_SRCS ) add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) -add_dependencies(paddle_optimizer gen_proto_cpp) +add_dependencies(paddle_optimizer paddle_proto ${external_project_dependencies}) if(WITH_TESTING) diff --git a/paddle/parameter/CMakeLists.txt b/paddle/parameter/CMakeLists.txt index a35e46997fb04e9378e106bf428a629b286c2e8c..d2ae1c16c6b7316f1a6facdef4b933693d6ba818 100644 --- a/paddle/parameter/CMakeLists.txt +++ b/paddle/parameter/CMakeLists.txt @@ -7,7 +7,7 @@ add_library(paddle_parameter STATIC ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_SOURCES}) add_style_check_target(paddle_parameter ${PARAMETERS_HEADERS}) -add_dependencies(paddle_parameter gen_proto_cpp) +add_dependencies(paddle_parameter paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/parameter/ParameterUpdaterHook.cpp b/paddle/parameter/ParameterUpdaterHook.cpp index f826e8448c666bb3305c150f2bd95aade23223fb..c8b47687f5d3c00f6609b858103a5fec526b970a 100644 --- a/paddle/parameter/ParameterUpdaterHook.cpp +++ b/paddle/parameter/ParameterUpdaterHook.cpp @@ -14,11 +14,13 @@ limitations under the License. */ #include "ParameterUpdaterHook.h" +#include #include #include #include #include #include +#include #include "paddle/math/Vector.h" #include "paddle/parameter/Parameter.h" @@ -29,106 +31,76 @@ namespace paddle { /** * The static pruning hook - * - * Static means user load a mask map before training started. This map will - * define which link/weight between neural is disabled. + * Static means user specify a sparsity_ratio before training started, and the + * network will prune the parameters based on the sparsity_ratio. More details + * can be found https://arxiv.org/pdf/1506.02626.pdf. */ + class StaticPruningHook : public IParameterUpdaterHook { public: - /** - * The Mask Map Header. - * The map file started with this header. - * - * In Version 0, reset file will be: - * contains header.size bit, each bit means such weight is enabled or not. - * if bit is 1, then such weight is enabled. - * at end, the file will round to byte, and the low bits of end byte will be - * filled by zero. - * - */ - struct StaticMaskHeader { - uint32_t version; - size_t size; - } __attribute__((__packed__)); - - explicit StaticPruningHook(const std::string& mask_filename) : initCount_(0) { - bool ok = this->loadMaskFile(mask_filename); - if (!ok) { - LOG(WARNING) << "Fail to load mask file " << mask_filename - << " in current directory, searching in init_model_path"; - std::string combineMaskFilename = - path::join(FLAGS_init_model_path, mask_filename); - CHECK(this->loadMaskFile(combineMaskFilename)) - << "Cannot load " << mask_filename << " in ./" << mask_filename - << " and " << combineMaskFilename; - } - VLOG(3) << mask_filename << " mask size = " << this->mask_.size(); + explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig) + : initCount_(0) { + sparsityRatio_ = hookConfig.sparsity_ratio(); } - void update(Parameter* para) { + static bool sortPairAscend(const std::pair &pair1, + const std::pair &pair2) { + return pair1.first > pair2.first; + } + + void update(Parameter *para) { updateThreadChecker_.check(); - auto& vec = para->getBuf(PARAMETER_GRADIENT); + auto &vec = para->getBuf(PARAMETER_GRADIENT); if (vec) { vec->dotMul(*maskVec_); } } - void init(Parameter* para) { - size_t initCount = this->initCount_.fetch_add(1); - CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " - "in same ParamterUpdater"; - VLOG(3) << "Initialize Parameter " << para; - SetDevice device(para->getDeviceId()); + void generateMask(Parameter *para) { + VectorPtr maskTemp = Vector::create(para->getSize(), false); + maskTemp->zeroMem(); + real *maskTempData = maskTemp->getData(); + size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_); - auto maskVec = Vector::create(this->mask_.size(), false); - { // Initialize maskVec with float mask vector - real* dataPtr = maskVec->getData(); - size_t i = 0; - for (bool m : mask_) { - dataPtr[i++] = m ? 1.0 : 0.0; - } - } + VectorPtr paraVec = para->getBuf(PARAMETER_VALUE); + VectorPtr paraCpuCopy = Vector::create(para->getSize(), false); + + paraCpuCopy->copyFrom(*paraVec); + std::vector> param; + + for (size_t i = 0; i < para->getSize(); i++) + param.push_back(std::make_pair(fabs(paraCpuCopy->getData()[i]), i)); + + std::partial_sort( + param.begin(), param.begin() + nonZeroNum, param.end(), sortPairAscend); + for (size_t i = 0; i < nonZeroNum; i++) maskTempData[param[i].second] = 1.0; // Currently just use a mask vector for hack. - // @TODO(yuyang18): Implemented the mask operation in vector. if (para->useGpu()) { - maskVec_ = Vector::create(this->mask_.size(), para->useGpu()); - maskVec_->copyFrom(*maskVec); + maskVec_ = Vector::create(para->getSize(), para->useGpu()); + maskVec_->copyFrom(*maskTemp); } else { - maskVec_ = maskVec; + maskVec_ = maskTemp; } - - auto& vec = para->getBuf(PARAMETER_VALUE); - vec->dotMul(*maskVec_); } -private: - bool loadMaskFile(const std::string& mask_filename) { - std::ifstream fin; - fin.open(mask_filename); - if (fin.is_open()) { - StaticMaskHeader header; - fin.read(reinterpret_cast(&header), sizeof(StaticMaskHeader)); - CHECK_EQ(header.version, 0UL); - mask_.resize(header.size); - uint8_t buf; - for (size_t i = 0; i < header.size; ++i, buf <<= 1) { - if (i % 8 == 0) { - fin.read(reinterpret_cast(&buf), sizeof(uint8_t)); - } - mask_[i] = buf & 0x80; - } - fin.close(); - return true; - } else { - return false; - } + void init(Parameter *para) { + generateMask(para); + size_t initCount = this->initCount_.fetch_add(1); + CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " + "in same ParamterUpdater"; + VLOG(3) << "Initialize Parameter " << para; + SetDevice device(para->getDeviceId()); + + auto ¶Vec = para->getBuf(PARAMETER_VALUE); + paraVec->dotMul(*maskVec_); } +private: SameThreadChecker updateThreadChecker_; std::atomic initCount_; VectorPtr maskVec_; - std::vector mask_; + real sparsityRatio_; }; IParameterUpdaterHook::IParameterUpdaterHook() {} @@ -145,7 +117,7 @@ IParameterUpdaterHook::~IParameterUpdaterHook() {} */ class StringIntPairHasher { public: - size_t operator()(const std::pair& k) const { + size_t operator()(const std::pair &k) const { return intHasher_(strHasher_(k.first) + k.second); } @@ -162,19 +134,19 @@ static WeakKVCache, /** * ParameterUpdaterHook actually factory method. */ -static IParameterUpdaterHook* createImpl( - const ParameterUpdaterHookConfig& config) { - auto& type = config.type(); +static IParameterUpdaterHook *createImpl( + const ParameterUpdaterHookConfig &config) { + auto &type = config.type(); if (type == "pruning") { - if (config.has_purning_mask_filename()) { - return new StaticPruningHook(config.purning_mask_filename()); - } + return new StaticPruningHook(config); } + + LOG(FATAL) << "Unknown Hook type: " << type; return nullptr; } std::shared_ptr IParameterUpdaterHook::create( - const ParameterConfig& paramConfig, int idx) { + const ParameterConfig ¶mConfig, int idx) { std::pair key = {paramConfig.name(), idx}; return g_hookCache_.get( key, [&] { return createImpl(paramConfig.update_hooks(idx)); }); diff --git a/paddle/platform/cuda.h b/paddle/platform/cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..8fe891f9ce6c3add1df48a8b1f79fd811c7a4362 --- /dev/null +++ b/paddle/platform/cuda.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef PADDLE_ONLY_CPU + +#include +#include + +namespace paddle { +namespace platform { + +inline void throw_on_error(cudaError_t e, const char* message) { + if (e) { + throw thrust::system_error(e, thrust::cuda_category(), message); + } +} + +int GetDeviceCount(void) { + int count; + throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed"); + return count; +} + +} // namespace platform +} // namespace paddle + +#endif // PADDLE_ONLY_CPU diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 1afd03c01169d395b086c1da458ce25c66a12a51..0704820aa05079401eb56814d689d6e280311edb 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -8,8 +8,8 @@ namespace detail { class PlacePrinter : public boost::static_visitor<> { public: PlacePrinter(std::ostream &os) : os_(os) {} - void operator()(const CpuPlace &) { os_ << "CpuPlace"; } - void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; } + void operator()(const CPUPlace &) { os_ << "CPUPlace"; } + void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } private: std::ostream &os_; @@ -22,14 +22,14 @@ static Place the_default_place; void set_place(const Place &place) { the_default_place = place; } const Place &get_place() { return the_default_place; } -const GpuPlace default_gpu() { return GpuPlace(0); } -const CpuPlace default_cpu() { return CpuPlace(); } +const GPUPlace default_gpu() { return GPUPlace(0); } +const CPUPlace default_cpu() { return CPUPlace(); } bool is_gpu_place(const Place &p) { - return boost::apply_visitor(IsGpuPlace(), p); + return boost::apply_visitor(IsGPUPlace(), p); } bool is_cpu_place(const Place &p) { - return !boost::apply_visitor(IsGpuPlace(), p); + return !boost::apply_visitor(IsGPUPlace(), p); } bool places_are_same_class(const Place &p1, const Place &p2) { diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 489572c526e162500c8f747f0ec8df10da9d86a2..7cead183884bc9379355cd931921b40d6c11ce90 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -1,43 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once + #include #include namespace paddle { namespace platform { -struct CpuPlace { +struct CPUPlace { // WORKAROUND: for some reason, omitting this constructor // causes errors with boost 1.59 and OSX - CpuPlace() {} + CPUPlace() {} // needed for variant equality comparison - inline bool operator==(const CpuPlace &) const { return true; } - inline bool operator!=(const CpuPlace &) const { return false; } + inline bool operator==(const CPUPlace &) const { return true; } + inline bool operator!=(const CPUPlace &) const { return false; } }; -struct GpuPlace { - GpuPlace() : GpuPlace(0) {} - GpuPlace(int d) : device(d) {} +struct GPUPlace { + GPUPlace() : GPUPlace(0) {} + GPUPlace(int d) : device(d) {} // needed for variant equality comparison - inline bool operator==(const GpuPlace &o) const { return device == o.device; } - inline bool operator!=(const GpuPlace &o) const { return !(*this == o); } + inline bool operator==(const GPUPlace &o) const { return device == o.device; } + inline bool operator!=(const GPUPlace &o) const { return !(*this == o); } int device; }; -struct IsGpuPlace : public boost::static_visitor { - bool operator()(const CpuPlace &) const { return false; } - bool operator()(const GpuPlace &gpu) const { return true; } +struct IsGPUPlace : public boost::static_visitor { + bool operator()(const CPUPlace &) const { return false; } + bool operator()(const GPUPlace &gpu) const { return true; } }; -typedef boost::variant Place; +typedef boost::variant Place; void set_place(const Place &); const Place &get_place(); -const GpuPlace default_gpu(); -const CpuPlace default_cpu(); +const GPUPlace default_gpu(); +const CPUPlace default_cpu(); bool is_gpu_place(const Place &); bool is_cpu_place(const Place &); diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc index 73fccceedf6918148a26100f64cf322305c3ac20..33e2e5a439ce6801c02daba4bcbd462a74d7a614 100644 --- a/paddle/platform/place_test.cc +++ b/paddle/platform/place_test.cc @@ -3,8 +3,8 @@ #include "gtest/gtest.h" TEST(Place, Equality) { - paddle::platform::CpuPlace cpu; - paddle::platform::GpuPlace g0(0), g1(1), gg0(0); + paddle::platform::CPUPlace cpu; + paddle::platform::GPUPlace g0(0), g1(1), gg0(0); EXPECT_EQ(cpu, cpu); EXPECT_EQ(g0, g0); @@ -22,19 +22,19 @@ TEST(Place, Default) { EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu())); - paddle::platform::set_place(paddle::platform::CpuPlace()); + paddle::platform::set_place(paddle::platform::CPUPlace()); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place())); } TEST(Place, Print) { { std::stringstream ss; - ss << paddle::platform::GpuPlace(1); - EXPECT_EQ("GpuPlace(1)", ss.str()); + ss << paddle::platform::GPUPlace(1); + EXPECT_EQ("GPUPlace(1)", ss.str()); } { std::stringstream ss; - ss << paddle::platform::CpuPlace(); - EXPECT_EQ("CpuPlace", ss.str()); + ss << paddle::platform::CPUPlace(); + EXPECT_EQ("CPUPlace", ss.str()); } } diff --git a/paddle/pserver/CMakeLists.txt b/paddle/pserver/CMakeLists.txt index b7f85ea1a6dfda2a37c315ba15c6ca1979cf4131..2245c7d88ca74922f9919db91977dfa6cb3ca468 100644 --- a/paddle/pserver/CMakeLists.txt +++ b/paddle/pserver/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_network STATIC add_style_check_target(paddle_network ${NETWORK_SOURCES}) add_style_check_target(paddle_network ${NETWORK_HEADERS}) -add_dependencies(paddle_network gen_proto_cpp) +add_dependencies(paddle_network paddle_proto ${external_project_dependencies}) ################### paddle_pserver ###################### set(PSERVER_SOURCES @@ -40,7 +40,7 @@ add_library(paddle_pserver STATIC add_style_check_target(paddle_pserver ${PSERVER_SOURCES}) add_style_check_target(paddle_pserver ${PSERVER_HEADERS}) -add_dependencies(paddle_pserver gen_proto_cpp) +add_dependencies(paddle_pserver paddle_proto ${external_project_dependencies}) set(PSERVER_MAIN_SOURCES ParameterServer2Main.cpp) diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index edc2e0292378fea0cd904d7f017762c1dade6caf..43614b9779d21795f1f274589ea93639e923ce75 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -109,6 +109,10 @@ class DenseScanner(IScanner): if len(self.__shape__) > 3: raise ValueError( "The dimension of input cannot be greater than 3.") + if len(self.__shape__) == 0: + raise ValueError( + "The input should be a vector, please check your input data." + ) self.__dim__ = reduce(lambda x, y: x * y, self.__shape__) if len(self.__shape__) == 1 and self.__dim__ != self.input_type.dim: raise ValueError( @@ -140,7 +144,7 @@ class DenseScanner(IScanner): if len(self.__shape__) > 1: # The last-two dimenstions are the frame height and width. # For example, the layout is CHW for 3-D feature of image. - # The H and W are the fram height and width. + # The H and W are the frame height and width. h, w = self.__shape__[-2:] argument.setSlotFrameHeight(self.pos, h) argument.setSlotFrameWidth(self.pos, w) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 2b48e4dc0f875be9a87797fa14885926999a5010..a182e5f4aef9de8c6f20681328d5ba6c0e6944ef 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,6 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_SWIG_PY=ON -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} @@ -43,6 +44,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_SWIG_PY=ON \ -DCUDNN_ROOT=/usr/ \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh deleted file mode 100755 index f2cbc561652a3c7502de94be37d75783fc40b9c1..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/build_and_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -source ./common.sh - -NPROC=1 -export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages -export PYTHONHOME=/opt/python/2.7.12 -export PATH=/opt/python/2.7.12/bin:${PATH} -cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DWITH_COVERAGE=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} -NRPOC=`nproc` -make -j $NPROC -make coveralls -sudo make install diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/build_doc.sh similarity index 84% rename from paddle/scripts/travis/docs.sh rename to paddle/scripts/travis/build_doc.sh index c784293695bf134b5e990639778b6e84ba45d00d..a44bd35357fde41c379134bed6b7fb242efe49e5 100755 --- a/paddle/scripts/travis/docs.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -1,15 +1,19 @@ #!/bin/bash +set -e + +# Create the build directory for CMake. +mkdir -p $TRAVIS_BUILD_DIR/build +cd $TRAVIS_BUILD_DIR/build -# Add set -e, cd to directory. -source ./common.sh # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS} +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF + mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * -cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} -make paddle_docs paddle_docs_cn +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON +make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links linkchecker doc/en/html/index.html diff --git a/paddle/scripts/travis/precommit.sh b/paddle/scripts/travis/check_style.sh similarity index 54% rename from paddle/scripts/travis/precommit.sh rename to paddle/scripts/travis/check_style.sh index 7a59b1131d0a410be9c5cef08e3cc11633d2ba67..4754bdd4c80de9700d92b0e33ecfdfc582f42813 100755 --- a/paddle/scripts/travis/precommit.sh +++ b/paddle/scripts/travis/check_style.sh @@ -1,14 +1,14 @@ #!/bin/bash function abort(){ - echo "Your commit not fit PaddlePaddle code style" 1>&2 - echo "Please use pre-commit scripts to auto-format your code" 1>&2 + echo "Your change doesn't follow PaddlePaddle's code style." 1>&2 + echo "Please use pre-commit to reformat your code and git push again." 1>&2 exit 1 } trap 'abort' 0 set -e -source common.sh -cd .. + +cd $TRAVIS_BUILD_DIR export PATH=/usr/bin:$PATH pre-commit install clang-format --version diff --git a/paddle/scripts/travis/common.sh b/paddle/scripts/travis/common.sh deleted file mode 100755 index f05c7530a3b0632948e4b18c477d6dc6aad04c03..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/common.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -mkdir -p ../../../build -cd ../../../build -mkdir -p $HOME/third_party -EXTRA_CMAKE_OPTS="-DTHIRD_PARTY_PATH=${HOME}/third_party" diff --git a/paddle/scripts/travis/main.sh b/paddle/scripts/travis/main.sh deleted file mode 100755 index 13f2552d29db38041a73edca0acd202945c67484..0000000000000000000000000000000000000000 --- a/paddle/scripts/travis/main.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -cd `dirname $0` - -if [ ${JOB} == "BUILD_AND_TEST" ]; then - ./build_and_test.sh -elif [ ${JOB} == "DOCS" ]; then - ./docs.sh -elif [ ${JOB} == "PRE_COMMIT" ]; then - ./precommit.sh -else - echo Unknown job ${JOB} - exit 1 -fi diff --git a/paddle/string/CMakeLists.txt b/paddle/string/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5becf62672d0c606c98ea1a1a4383df97088ab05 --- /dev/null +++ b/paddle/string/CMakeLists.txt @@ -0,0 +1,4 @@ +cc_library(stringpiece SRCS piece.cc) +cc_test(stringpiece_test SRCS piece_test.cc DEPS stringpiece glog gflags) + +cc_test(stringprintf_test SRCS printf_test.cc DEPS glog gflags) diff --git a/paddle/string/piece.cc b/paddle/string/piece.cc new file mode 100644 index 0000000000000000000000000000000000000000..b80afdec82d642fd3a8245b96ce1bb2bea17cbae --- /dev/null +++ b/paddle/string/piece.cc @@ -0,0 +1,138 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/string/piece.h" + +#include + +#include +#include +#include + +namespace paddle { +namespace string { + +Piece::Piece() : data_(NULL), size_(0) {} + +Piece::Piece(const char* d, size_t n) : data_(d), size_(n) { + if (d == NULL && n != 0) + throw std::invalid_argument("Piece requires len to be 0 for NULL data"); +} + +Piece::Piece(const char* s) : data_(s) { size_ = (s == NULL) ? 0 : strlen(s); } + +Piece::Piece(const std::string& s) : data_(s.data()), size_(s.size()) {} + +char Piece::operator[](size_t n) const { + if (n >= len()) throw std::invalid_argument("index out of Piece length"); + return data_[n]; +} + +int Compare(Piece a, Piece b) { + const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); + int r = memcmp(a.data(), b.data(), min_len); + if (r == 0) { + if (a.len() < b.len()) + return -1; + else if (a.len() > b.len()) + return 1; + } + return r; +} + +bool operator==(Piece x, Piece y) { + return ((x.len() == y.len()) && + (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); +} + +bool operator!=(Piece x, Piece y) { return !(x == y); } + +bool operator<(Piece x, Piece y) { return Compare(x, y) < 0; } +bool operator>(Piece x, Piece y) { return Compare(x, y) > 0; } + +bool operator<=(Piece x, Piece y) { return Compare(x, y) <= 0; } +bool operator>=(Piece x, Piece y) { return Compare(x, y) >= 0; } + +bool HasPrefix(Piece s, Piece x) { + return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); +} + +bool HasSuffix(Piece s, Piece x) { + return ((s.len() >= x.len()) && + (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); +} + +Piece SkipPrefix(Piece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than Piece length"); + return Piece(s.data() + n, s.len() - n); +} + +Piece SkipSuffix(Piece s, size_t n) { + if (n > s.len()) + throw std::invalid_argument("Skip distance larger than Piece length"); + return Piece(s.data(), s.len() - n); +} + +Piece TrimPrefix(Piece s, Piece x) { + return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; +} + +Piece TrimSuffix(Piece s, Piece x) { + return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; +} + +bool Contains(Piece s, Piece sub) { + return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); +} + +size_t Index(Piece s, Piece sub) { + auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); + return e != s.end() ? e - s.data() : Piece::npos; +} + +size_t Find(Piece s, char c, size_t pos) { + if (pos >= s.len()) { + return Piece::npos; + } + const char* result = + reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); + return result != nullptr ? result - s.data() : Piece::npos; +} + +size_t RFind(Piece s, char c, size_t pos) { + if (s.len() == 0) return Piece::npos; + for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); + p--) { + if (*p == c) { + return p - s.data(); + } + } + return Piece::npos; +} + +Piece SubStr(Piece s, size_t pos, size_t n) { + if (pos > s.len()) pos = s.len(); + if (n > s.len() - pos) n = s.len() - pos; + return Piece(s.data() + pos, n); +} + +std::ostream& operator<<(std::ostream& o, Piece piece) { + return o << piece.ToString(); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/strings/stringpiece.h b/paddle/string/piece.h similarity index 57% rename from paddle/strings/stringpiece.h rename to paddle/string/piece.h index adff713e86f49349b8f189c1d24584bfc1bb8aa7..db7c3e69804a6a8f0510ba376432fe560ae74442 100644 --- a/paddle/strings/stringpiece.h +++ b/paddle/string/piece.h @@ -20,33 +20,34 @@ #include namespace paddle { +namespace string { -// StringPiece points into a std::string object but doesn't own the +// Piece points into a std::string object but doesn't own the // string. It is for efficient access to strings. Like Go's string -// type. Not that StringPiece doesn't mutate the underlying string, +// type. Not that Piece doesn't mutate the underlying string, // so it is thread-safe given that the underlying string doesn't -// change. Because StringPiece contains a little data members, and +// change. Because Piece contains a little data members, and // its syntax is simple as it doesn't own/manage the string, it is -// cheap to construct StringPieces and pass them around. -class StringPiece { +// cheap to construct Pieces and pass them around. +class Piece { public: static const size_t npos = static_cast(-1); // We provide non-explicit singleton constructors so users can - // pass in a "const char*" or a "string" wherever a "StringPiece" + // pass in a "const char*" or a "string" wherever a "Piece" // is expected. These contructors ensure that if data_ is NULL, // size_ is 0. - StringPiece(); - StringPiece(const char* d, size_t n); - StringPiece(const char* d); - StringPiece(const std::string& s); + Piece(); + Piece(const char* d, size_t n); + Piece(const char* d); + Piece(const std::string& s); const char* data() const { return data_; } size_t len() const { return size_; } char operator[](size_t n) const; - // StringPiece doesn't own the string, so both iterator and const + // Piece doesn't own the string, so both iterator and const // iterator are const char* indeed. typedef const char* const_iterator; typedef const char* iterator; @@ -63,43 +64,44 @@ private: // Intentionally copyable }; -int Compare(StringPiece a, StringPiece b); +int Compare(Piece a, Piece b); -bool operator==(StringPiece x, StringPiece y); -bool operator!=(StringPiece x, StringPiece y); -bool operator<(StringPiece x, StringPiece y); -bool operator>(StringPiece x, StringPiece y); -bool operator<=(StringPiece x, StringPiece y); -bool operator>=(StringPiece x, StringPiece y); +bool operator==(Piece x, Piece y); +bool operator!=(Piece x, Piece y); +bool operator<(Piece x, Piece y); +bool operator>(Piece x, Piece y); +bool operator<=(Piece x, Piece y); +bool operator>=(Piece x, Piece y); -bool HasPrefix(StringPiece s, StringPiece prefix); -bool HasSuffix(StringPiece s, StringPiece suffix); +bool HasPrefix(Piece s, Piece prefix); +bool HasSuffix(Piece s, Piece suffix); -StringPiece SkipPrefix(StringPiece s, size_t n); -StringPiece SkipSuffix(StringPiece s, size_t n); +Piece SkipPrefix(Piece s, size_t n); +Piece SkipSuffix(Piece s, size_t n); // Skip the prefix (or suffix) if it matches with the string. -StringPiece TrimPrefix(StringPiece s, StringPiece prefix); -StringPiece TrimSuffix(StringPiece s, StringPiece suffix); +Piece TrimPrefix(Piece s, Piece prefix); +Piece TrimSuffix(Piece s, Piece suffix); // Returns if s contains sub. Any s except for empty s contains an // empty sub. -bool Contains(StringPiece s, StringPiece sub); +bool Contains(Piece s, Piece sub); // Return the first occurrence of sub in s, or npos. If both s and // sub is empty, it returns npos; otherwise, if only sub is empty, it // returns 0. -size_t Index(StringPiece s, StringPiece sub); +size_t Index(Piece s, Piece sub); // Return the first occurrence of c in s[pos:end], or npos. -size_t Find(StringPiece s, char c, size_t pos); +size_t Find(Piece s, char c, size_t pos); // Search range is [0..pos] inclusive. If pos == npos, search everything. -size_t RFind(StringPiece s, char c, size_t pos); +size_t RFind(Piece s, char c, size_t pos); -StringPiece SubStr(StringPiece s, size_t pos, size_t n); +Piece SubStr(Piece s, size_t pos, size_t n); -// allow StringPiece to be logged -std::ostream& operator<<(std::ostream& o, StringPiece piece); +// allow Piece to be logged +std::ostream& operator<<(std::ostream& o, Piece piece); +} // namespace string } // namespace paddle diff --git a/paddle/strings/stringpiece_test.cc b/paddle/string/piece_test.cc similarity index 77% rename from paddle/strings/stringpiece_test.cc rename to paddle/string/piece_test.cc index 2ba66a04f641c3457efa713383484491a213668f..cf5152ff5a3cb0a2afae0c90b787abf291122fa3 100644 --- a/paddle/strings/stringpiece_test.cc +++ b/paddle/string/piece_test.cc @@ -14,7 +14,7 @@ limitations under the License. */ -#include "paddle/strings/stringpiece.h" +#include "paddle/string/piece.h" #include @@ -22,42 +22,44 @@ TEST(StringPiece, Construct) { { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(NULL, s.data()); EXPECT_EQ(0U, s.len()); } - { EXPECT_THROW(paddle::StringPiece s(NULL, 10000U), std::invalid_argument); } { - paddle::StringPiece s(NULL); + EXPECT_THROW(paddle::string::Piece s(NULL, 10000U), std::invalid_argument); + } + { + paddle::string::Piece s(NULL); EXPECT_EQ(0U, s.len()); } { std::string a; EXPECT_EQ(0U, a.size()); - paddle::StringPiece s(a); + paddle::string::Piece s(a); EXPECT_EQ(0U, s.len()); } } TEST(StringPiece, CopyAndAssign) { - paddle::StringPiece empty; + paddle::string::Piece empty; EXPECT_EQ(0U, empty.len()); - paddle::StringPiece a("hello"); - paddle::StringPiece b = a; + paddle::string::Piece a("hello"); + paddle::string::Piece b = a; EXPECT_EQ(b.len(), strlen("hello")); EXPECT_EQ(a, b); std::string storage("hello"); - paddle::StringPiece c(storage); + paddle::string::Piece c(storage); EXPECT_EQ(a, c); EXPECT_NE(a.data(), c.data()); } TEST(StringPiece, Compare) { { - paddle::StringPiece a("hello"); - paddle::StringPiece b("world"); + paddle::string::Piece a("hello"); + paddle::string::Piece b("world"); EXPECT_TRUE(a != b); EXPECT_FALSE(a == b); EXPECT_TRUE(a < b); @@ -68,7 +70,7 @@ TEST(StringPiece, Compare) { EXPECT_GT(Compare(b, a), 0); } { - paddle::StringPiece a, b; + paddle::string::Piece a, b; EXPECT_TRUE(a == b); EXPECT_FALSE(a != b); EXPECT_FALSE(a < b); @@ -82,31 +84,31 @@ TEST(StringPiece, Compare) { TEST(StringPiece, ToString) { { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(std::string(""), s.ToString()); } { - paddle::StringPiece s(NULL); + paddle::string::Piece s(NULL); EXPECT_EQ(std::string(""), s.ToString()); } { - paddle::StringPiece s("hello"); + paddle::string::Piece s("hello"); EXPECT_EQ(std::string("hello"), s.ToString()); } } TEST(StringPiece, HasPrefixSuffix) { - using paddle::HasPrefix; - using paddle::HasSuffix; + using paddle::string::HasPrefix; + using paddle::string::HasSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_FALSE(HasPrefix(s, "something")); EXPECT_TRUE(HasPrefix(s, "")); EXPECT_FALSE(HasSuffix(s, "something")); EXPECT_TRUE(HasSuffix(s, "")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_TRUE(HasPrefix(s, "")); EXPECT_TRUE(HasPrefix(s, "a")); EXPECT_TRUE(HasPrefix(s, "ap")); @@ -120,10 +122,10 @@ TEST(StringPiece, HasPrefixSuffix) { } TEST(StringPiece, SkipPrefixSuffix) { - using paddle::SkipPrefix; - using paddle::SkipSuffix; + using paddle::string::SkipPrefix; + using paddle::string::SkipSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", SkipPrefix(s, 0)); EXPECT_THROW(SkipPrefix(s, 1), std::invalid_argument); @@ -131,7 +133,7 @@ TEST(StringPiece, SkipPrefixSuffix) { EXPECT_THROW(SkipSuffix(s, 1), std::invalid_argument); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("app", SkipPrefix(s, 0)); EXPECT_EQ("pp", SkipPrefix(s, 1)); EXPECT_EQ("p", SkipPrefix(s, 2)); @@ -147,10 +149,10 @@ TEST(StringPiece, SkipPrefixSuffix) { } TEST(StringPiece, TrimPrefixSuffix) { - using paddle::TrimPrefix; - using paddle::TrimSuffix; + using paddle::string::TrimPrefix; + using paddle::string::TrimSuffix; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", TrimPrefix(s, "")); EXPECT_EQ("", TrimPrefix(s, "something")); @@ -158,7 +160,7 @@ TEST(StringPiece, TrimPrefixSuffix) { EXPECT_EQ("", TrimSuffix(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("app", TrimPrefix(s, "")); EXPECT_EQ("pp", TrimPrefix(s, "a")); EXPECT_EQ("p", TrimPrefix(s, "ap")); @@ -174,14 +176,14 @@ TEST(StringPiece, TrimPrefixSuffix) { } TEST(StringPiece, Contains) { - using paddle::Contains; + using paddle::string::Contains; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_FALSE(Contains(s, "")); EXPECT_FALSE(Contains(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_TRUE(Contains(s, "")); EXPECT_TRUE(Contains(s, "a")); EXPECT_TRUE(Contains(s, "p")); @@ -193,15 +195,15 @@ TEST(StringPiece, Contains) { } TEST(StringPiece, Index) { - using paddle::Index; - auto npos = paddle::StringPiece::npos; + using paddle::string::Index; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, Index(s, "")); EXPECT_EQ(npos, Index(s, "something")); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(0U, Index(s, "")); EXPECT_EQ(0U, Index(s, "a")); EXPECT_EQ(1U, Index(s, "p")); @@ -213,14 +215,14 @@ TEST(StringPiece, Index) { } TEST(StringPiece, Find) { - using paddle::Find; - auto npos = paddle::StringPiece::npos; + using paddle::string::Find; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, Find(s, 'a', 0U)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(0U, Find(s, 'a', 0U)); EXPECT_EQ(1U, Find(s, 'p', 0U)); EXPECT_EQ(1U, Find(s, 'p', 1U)); @@ -230,14 +232,14 @@ TEST(StringPiece, Find) { } TEST(StringPiece, RFind) { - using paddle::RFind; - auto npos = paddle::StringPiece::npos; + using paddle::string::RFind; + auto npos = paddle::string::Piece::npos; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ(npos, RFind(s, 'a', 0U)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ(2U, RFind(s, 'p', 2U)); EXPECT_EQ(0U, RFind(s, 'a', 2U)); EXPECT_EQ(1U, RFind(s, 'p', 1U)); @@ -247,15 +249,15 @@ TEST(StringPiece, RFind) { } TEST(StringPiece, SubStr) { - using paddle::SubStr; + using paddle::string::SubStr; { - paddle::StringPiece s; + paddle::string::Piece s; EXPECT_EQ("", SubStr(s, 0, 0)); EXPECT_EQ("", SubStr(s, 0, 1)); EXPECT_EQ("", SubStr(s, 1, 0)); } { - paddle::StringPiece s("app"); + paddle::string::Piece s("app"); EXPECT_EQ("", SubStr(s, 0, 0)); EXPECT_EQ("", SubStr(s, 1, 0)); EXPECT_EQ("", SubStr(s, 2, 0)); @@ -279,15 +281,15 @@ TEST(StringPiece, SubStr) { } TEST(StringPiece, StreamOutput) { - using paddle::StringPiece; + using paddle::string::Piece; std::stringstream o; - o << StringPiece(); + o << paddle::string::Piece(); EXPECT_EQ("", o.str()); - o << StringPiece("hello"); + o << paddle::string::Piece("hello"); EXPECT_EQ("hello", o.str()); - o << StringPiece(); + o << paddle::string::Piece(); EXPECT_EQ("hello", o.str()); } diff --git a/paddle/string/printf.h b/paddle/string/printf.h new file mode 100644 index 0000000000000000000000000000000000000000..8b5ce63a8e8dfe77962ff1e7415911d381a28aac --- /dev/null +++ b/paddle/string/printf.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Compared with std::stringstream, there are primary purpose of +// string::Printf: +// +// 1. Type-safe printing, with why and how explained in +// http://www.drdobbs.com/stringprintf-a-typesafe-printf-family-fo/184401999. +// Implementation includes +// +// https://github.com/c42f/tinyformat +// boost::format +// std::stringstream +// +// std::stringstream is not convenient enough in many cases. For example: +// +// std::cout << std::setprecision(2) << std::fixed << 1.23456 << "\n"; +// +// boost::format is the most convenient one. We can have +// +// std::cout << format("%2% %1%") % 36 % 77; +// +// or +// +// format fmter("%2% %1%"); +// fmter % 36; fmter % 77; +// std::cout << fmter.c_str(); +// +// But the overloading of % might be overkilling and it would be +// more efficient if it can write to std::cout directly. +// +// tinyformat has an interface compatible with the C-printf style, +// and it can writes to a stream or returns a std::string: +// +// std::cout << tfm::printf( +// "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// or +// +// tfm::format(std::cout, +// "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// 2. High-performance -- most printed strings are not too long and +// doens't need dynamic memory allocation. Many StringPrintf +// implementations doesn't enforce type-safe, but are +// high-performance, including +// +// https://developers.google.com/optimization/reference/base/stringprintf/ +// https://github.com/adobe/chromium/blob/master/base/stringprintf.h +// https://github.com/google/protobuf/blob/master/src/google/protobuf/stubs/stringprintf.h +// +// According to +// https://github.com/c42f/tinyformat#compile-time-and-code-bloat, +// boost::format runs too slow and results in large executable binary +// files. So here we port tinyformat. + +#pragma once + +#include +#include +#include "paddle/string/tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat + +namespace paddle { +namespace string { + +template +void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { + tinyformat::vformat(out, fmt, tinyformat::makeFormatList(args...)); +} + +template +std::string Sprintf(const char* fmt, const Args&... args) { + std::ostringstream oss; + Fprintf(oss, fmt, args...); + return oss.str(); +} + +template +void Printf(const char* fmt, const Args&... args) { + Fprintf(std::cout, fmt, args...); +} + +} // namespace string +} // namespace paddle diff --git a/paddle/string/printf_test.cc b/paddle/string/printf_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..d8f2454165d741b3937f908dcfd87501940750d5 --- /dev/null +++ b/paddle/string/printf_test.cc @@ -0,0 +1,16 @@ +#include "paddle/string/printf.h" + +#include + +#include "gtest/gtest.h" + +TEST(StringPrintf, StringPrintf) { + std::string weekday = "Wednesday"; + const char* month = "July"; + size_t day = 27; + long hour = 14; + int min = 44; + EXPECT_EQ(std::string("Wednesday, July 27, 14:44"), + paddle::string::Sprintf( + "%s, %s %d, %.2d:%.2d", weekday, month, day, hour, min)); +} diff --git a/paddle/string/tinyformat/tinyformat.h b/paddle/string/tinyformat/tinyformat.h new file mode 100644 index 0000000000000000000000000000000000000000..f0e5e0160fb018b813c1dade727da2861a295147 --- /dev/null +++ b/paddle/string/tinyformat/tinyformat.h @@ -0,0 +1,902 @@ +// tinyformat.h +// Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com] +// +// Boost Software License - Version 1.0 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//------------------------------------------------------------------------------ +// Tinyformat: A minimal type safe printf replacement +// +// tinyformat.h is a type safe printf replacement library in a single C++ +// header file. Design goals include: +// +// * Type safety and extensibility for user defined types. +// * C99 printf() compatibility, to the extent possible using std::ostream +// * Simplicity and minimalism. A single header file to include and distribute +// with your projects. +// * Augment rather than replace the standard stream formatting mechanism +// * C++98 support, with optional C++11 niceties +// +// +// Main interface example usage +// ---------------------------- +// +// To print a date to std::cout: +// +// std::string weekday = "Wednesday"; +// const char* month = "July"; +// size_t day = 27; +// long hour = 14; +// int min = 44; +// +// tfm::printf("%s, %s %d, %.2d:%.2d\n", weekday, month, day, hour, min); +// +// The strange types here emphasize the type safety of the interface; it is +// possible to print a std::string using the "%s" conversion, and a +// size_t using the "%d" conversion. A similar result could be achieved +// using either of the tfm::format() functions. One prints on a user provided +// stream: +// +// tfm::format(std::cerr, "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// The other returns a std::string: +// +// std::string date = tfm::format("%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// std::cout << date; +// +// These are the three primary interface functions. There is also a +// convenience function printfln() which appends a newline to the usual result +// of printf() for super simple logging. +// +// +// User defined format functions +// ----------------------------- +// +// Simulating variadic templates in C++98 is pretty painful since it requires +// writing out the same function for each desired number of arguments. To make +// this bearable tinyformat comes with a set of macros which are used +// internally to generate the API, but which may also be used in user code. +// +// The three macros TINYFORMAT_ARGTYPES(n), TINYFORMAT_VARARGS(n) and +// TINYFORMAT_PASSARGS(n) will generate a list of n argument types, +// type/name pairs and argument names respectively when called with an integer +// n between 1 and 16. We can use these to define a macro which generates the +// desired user defined function with n arguments. To generate all 16 user +// defined function bodies, use the macro TINYFORMAT_FOREACH_ARGNUM. For an +// example, see the implementation of printf() at the end of the source file. +// +// Sometimes it's useful to be able to pass a list of format arguments through +// to a non-template function. The FormatList class is provided as a way to do +// this by storing the argument list in a type-opaque way. Continuing the +// example from above, we construct a FormatList using makeFormatList(): +// +// FormatListRef formatList = tfm::makeFormatList(weekday, month, day, hour, +// min); +// +// The format list can now be passed into any non-template function and used +// via a call to the vformat() function: +// +// tfm::vformat(std::cout, "%s, %s %d, %.2d:%.2d\n", formatList); +// +// +// Additional API information +// -------------------------- +// +// Error handling: Define TINYFORMAT_ERROR to customize the error handling for +// format strings which are unsupported or have the wrong number of format +// specifiers (calls assert() by default). +// +// User defined types: Uses operator<< for user defined types by default. +// Overload formatValue() for more control. + +#pragma once + +#include +#include +#include +#include + +namespace paddle { +namespace string { +namespace tinyformat { + +#ifndef TINYFORMAT_ERROR +#define TINYFORMAT_ERROR(reason) assert(0 && reason) +#endif + +//------------------------------------------------------------------------------ +namespace detail { + +// Test whether type T1 is convertible to type T2 +template +struct is_convertible { +private: + // two types of different size + struct fail { + char dummy[2]; + }; + struct succeed { + char dummy; + }; + // Try to convert a T1 to a T2 by plugging into tryConvert + static fail tryConvert(...); + static succeed tryConvert(const T2 &); + static const T1 &makeT1(); + +public: + // Standard trick: the (...) version of tryConvert will be chosen from + // the overload set only if the version taking a T2 doesn't match. + // Then we compare the sizes of the return types to check which + // function matched. Very neat, in a disgusting kind of way :) + static const bool value = sizeof(tryConvert(makeT1())) == sizeof(succeed); +}; + +// Format the value by casting to type fmtT. This default implementation +// should never be called. +template ::value> +struct formatValueAsType { + static void invoke(std::ostream & /*out*/, const T & /*value*/) { assert(0); } +}; +// Specialized version for types that can actually be converted to fmtT, as +// indicated by the "convertible" template parameter. +template +struct formatValueAsType { + static void invoke(std::ostream &out, const T &value) { + out << static_cast(value); + } +}; + +// Convert an arbitrary type to integer. The version with convertible=false +// throws an error. +template ::value> +struct convertToInt { + static int invoke(const T & /*value*/) { + TINYFORMAT_ERROR( + "tinyformat: Cannot convert from argument type to " + "integer for use as variable width or precision"); + return 0; + } +}; +// Specialization for convertToInt when conversion is possible +template +struct convertToInt { + static int invoke(const T &value) { return static_cast(value); } +}; + +// Format at most ntrunc characters to the given stream. +template +inline void formatTruncated(std::ostream &out, const T &value, int ntrunc) { + std::ostringstream tmp; + tmp << value; + std::string result = tmp.str(); + out.write(result.c_str(), + (std::min)(ntrunc, static_cast(result.size()))); +} +#define TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(type) \ + inline void formatTruncated(std::ostream &out, type *value, int ntrunc) { \ + std::streamsize len = 0; \ + while (len < ntrunc && value[len] != 0) ++len; \ + out.write(value, len); \ + } +// Overload for const char* and char*. Could overload for signed & unsigned +// char too, but these are technically unneeded for printf compatibility. +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(const char) +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char) +#undef TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR + +} // namespace detail + +//------------------------------------------------------------------------------ +// Variable formatting functions. May be overridden for user-defined types if +// desired. + +/// Format a value into a stream, delegating to operator<< by default. +/// +/// Users may override this for their own types. When this function is called, +/// the stream flags will have been modified according to the format string. +/// The format specification is provided in the range [fmtBegin, fmtEnd). For +/// truncating conversions, ntrunc is set to the desired maximum number of +/// characters, for example "%.7s" calls formatValue with ntrunc = 7. +/// +/// By default, formatValue() uses the usual stream insertion operator +/// operator<< to format the type T, with special cases for the %c and %p +/// conversions. +template +inline void formatValue(std::ostream &out, + const char * /*fmtBegin*/, + const char *fmtEnd, + int ntrunc, + const T &value) { + // The mess here is to support the %c and %p conversions: if these + // conversions are active we try to convert the type to a char or const + // void* respectively and format that instead of the value itself. For the + // %p conversion it's important to avoid dereferencing the pointer, which + // could otherwise lead to a crash when printing a dangling (const char*). + const bool canConvertToChar = detail::is_convertible::value; + const bool canConvertToVoidPtr = + detail::is_convertible::value; + if (canConvertToChar && *(fmtEnd - 1) == 'c') + detail::formatValueAsType::invoke(out, value); + else if (canConvertToVoidPtr && *(fmtEnd - 1) == 'p') + detail::formatValueAsType::invoke(out, value); + else if (ntrunc >= 0) { + // Take care not to overread C strings in truncating conversions like + // "%.4s" where at most 4 characters may be read. + detail::formatTruncated(out, value, ntrunc); + } else + out << value; +} + +// Overloaded version for char types to support printing as an integer +#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ + inline void formatValue(std::ostream &out, \ + const char * /*fmtBegin*/, \ + const char *fmtEnd, \ + int /**/, \ + charType value) { \ + switch (*(fmtEnd - 1)) { \ + case 'u': \ + case 'd': \ + case 'i': \ + case 'o': \ + case 'X': \ + case 'x': \ + out << static_cast(value); \ + break; \ + default: \ + out << value; \ + break; \ + } \ + } +// per 3.9.1: char, signed char and unsigned char are all distinct types +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(signed char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(unsigned char) +#undef TINYFORMAT_DEFINE_FORMATVALUE_CHAR + +//------------------------------------------------------------------------------ +// Tools for emulating variadic templates in C++98. The basic idea here is +// stolen from the boost preprocessor metaprogramming library and cut down to +// be just general enough for what we need. + +#define TINYFORMAT_ARGTYPES(n) TINYFORMAT_ARGTYPES_##n +#define TINYFORMAT_VARARGS(n) TINYFORMAT_VARARGS_##n +#define TINYFORMAT_PASSARGS(n) TINYFORMAT_PASSARGS_##n +#define TINYFORMAT_PASSARGS_TAIL(n) TINYFORMAT_PASSARGS_TAIL_##n + +// To keep it as transparent as possible, the macros below have been generated +// using python via the excellent cog.py code generation script. This avoids +// the need for a bunch of complex (but more general) preprocessor tricks as +// used in boost.preprocessor. +// +// To rerun the code generation in place, use `cog.py -r tinyformat.h` +// (see http://nedbatchelder.com/code/cog). Alternatively you can just create +// extra versions by hand. + +/*[[[cog +maxParams = 16 + +def makeCommaSepLists(lineTemplate, elemTemplate, startInd=1): + for j in range(startInd,maxParams+1): + list = ', '.join([elemTemplate % {'i':i} for i in range(startInd,j+1)]) + cog.outl(lineTemplate % {'j':j, 'list':list}) + +makeCommaSepLists('#define TINYFORMAT_ARGTYPES_%(j)d %(list)s', + 'class T%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_VARARGS_%(j)d %(list)s', + 'const T%(i)d& v%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_PASSARGS_%(j)d %(list)s', 'v%(i)d') + +cog.outl() +cog.outl('#define TINYFORMAT_PASSARGS_TAIL_1') +makeCommaSepLists('#define TINYFORMAT_PASSARGS_TAIL_%(j)d , %(list)s', + 'v%(i)d', startInd = 2) + +cog.outl() +cog.outl('#define TINYFORMAT_FOREACH_ARGNUM(m) \\\n ' + + ' '.join(['m(%d)' % (j,) for j in range(1,maxParams+1)])) +]]]*/ +#define TINYFORMAT_ARGTYPES_1 class T1 +#define TINYFORMAT_ARGTYPES_2 class T1, class T2 +#define TINYFORMAT_ARGTYPES_3 class T1, class T2, class T3 +#define TINYFORMAT_ARGTYPES_4 class T1, class T2, class T3, class T4 +#define TINYFORMAT_ARGTYPES_5 class T1, class T2, class T3, class T4, class T5 +#define TINYFORMAT_ARGTYPES_6 \ + class T1, class T2, class T3, class T4, class T5, class T6 +#define TINYFORMAT_ARGTYPES_7 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7 +#define TINYFORMAT_ARGTYPES_8 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8 +#define TINYFORMAT_ARGTYPES_9 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9 +#define TINYFORMAT_ARGTYPES_10 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10 +#define TINYFORMAT_ARGTYPES_11 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11 +#define TINYFORMAT_ARGTYPES_12 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12 +#define TINYFORMAT_ARGTYPES_13 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13 +#define TINYFORMAT_ARGTYPES_14 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14 +#define TINYFORMAT_ARGTYPES_15 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14, class T15 +#define TINYFORMAT_ARGTYPES_16 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, \ + class T8, class T9, class T10, class T11, class T12, class T13, \ + class T14, class T15, class T16 + +#define TINYFORMAT_VARARGS_1 const T1 &v1 +#define TINYFORMAT_VARARGS_2 const T1 &v1, const T2 &v2 +#define TINYFORMAT_VARARGS_3 const T1 &v1, const T2 &v2, const T3 &v3 +#define TINYFORMAT_VARARGS_4 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4 +#define TINYFORMAT_VARARGS_5 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5 +#define TINYFORMAT_VARARGS_6 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6 +#define TINYFORMAT_VARARGS_7 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7 +#define TINYFORMAT_VARARGS_8 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8 +#define TINYFORMAT_VARARGS_9 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9 +#define TINYFORMAT_VARARGS_10 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10 +#define TINYFORMAT_VARARGS_11 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11 +#define TINYFORMAT_VARARGS_12 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12 +#define TINYFORMAT_VARARGS_13 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13 +#define TINYFORMAT_VARARGS_14 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14 +#define TINYFORMAT_VARARGS_15 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, \ + const T15 &v15 +#define TINYFORMAT_VARARGS_16 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, \ + const T6 &v6, const T7 &v7, const T8 &v8, const T9 &v9, const T10 &v10, \ + const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, \ + const T15 &v15, const T16 &v16 + +#define TINYFORMAT_PASSARGS_1 v1 +#define TINYFORMAT_PASSARGS_2 v1, v2 +#define TINYFORMAT_PASSARGS_3 v1, v2, v3 +#define TINYFORMAT_PASSARGS_4 v1, v2, v3, v4 +#define TINYFORMAT_PASSARGS_5 v1, v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_6 v1, v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_7 v1, v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_8 v1, v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_9 v1, v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_10 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_11 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_12 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_13 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_14 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_15 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_16 \ + v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_PASSARGS_TAIL_1 +#define TINYFORMAT_PASSARGS_TAIL_2 , v2 +#define TINYFORMAT_PASSARGS_TAIL_3 , v2, v3 +#define TINYFORMAT_PASSARGS_TAIL_4 , v2, v3, v4 +#define TINYFORMAT_PASSARGS_TAIL_5 , v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_TAIL_6 , v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_TAIL_7 , v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_TAIL_8 , v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_TAIL_9 , v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_TAIL_10 , v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_TAIL_11 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_TAIL_12 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_TAIL_13 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_TAIL_14 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_TAIL_15 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_TAIL_16 \ + , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_FOREACH_ARGNUM(m) \ + m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) m(10) m(11) m(12) m(13) m(14) \ + m(15) m(16) +//[[[end]]] + +namespace detail { + +// Type-opaque holder for an argument to format(), with associated actions on +// the type held as explicit function pointers. This allows FormatArg's for +// each argument to be allocated as a homogenous array inside FormatList +// whereas a naive implementation based on inheritance does not. +class FormatArg { +public: + FormatArg() {} + + template + FormatArg(const T &value) + : m_value(static_cast(&value)), + m_formatImpl(&formatImpl), + m_toIntImpl(&toIntImpl) {} + + void format(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc) const { + m_formatImpl(out, fmtBegin, fmtEnd, ntrunc, m_value); + } + + int toInt() const { return m_toIntImpl(m_value); } + +private: + template + static void formatImpl(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc, + const void *value) { + formatValue(out, fmtBegin, fmtEnd, ntrunc, *static_cast(value)); + } + + template + static int toIntImpl(const void *value) { + return convertToInt::invoke(*static_cast(value)); + } + + const void *m_value; + void (*m_formatImpl)(std::ostream &out, + const char *fmtBegin, + const char *fmtEnd, + int ntrunc, + const void *value); + int (*m_toIntImpl)(const void *value); +}; + +// Parse and return an integer from the string c, as atoi() +// On return, c is set to one past the end of the integer. +inline int parseIntAndAdvance(const char *&c) { + int i = 0; + for (; *c >= '0' && *c <= '9'; ++c) i = 10 * i + (*c - '0'); + return i; +} + +// Print literal part of format string and return next format spec +// position. +// +// Skips over any occurrences of '%%', printing a literal '%' to the +// output. The position of the first % character of the next +// nontrivial format spec is returned, or the end of string. +inline const char *printFormatStringLiteral(std::ostream &out, + const char *fmt) { + const char *c = fmt; + for (;; ++c) { + switch (*c) { + case '\0': + out.write(fmt, c - fmt); + return c; + case '%': + out.write(fmt, c - fmt); + if (*(c + 1) != '%') return c; + // for "%%", tack trailing % onto next literal section. + fmt = ++c; + break; + default: + break; + } + } +} + +// Parse a format string and set the stream state accordingly. +// +// The format mini-language recognized here is meant to be the one from C99, +// with the form "%[flags][width][.precision][length]type". +// +// Formatting options which can't be natively represented using the ostream +// state are returned in spacePadPositive (for space padded positive numbers) +// and ntrunc (for truncating conversions). argIndex is incremented if +// necessary to pull out variable width and precision . The function returns a +// pointer to the character after the end of the current format spec. +inline const char *streamStateFromFormat(std::ostream &out, + bool &spacePadPositive, + int &ntrunc, + const char *fmtStart, + const detail::FormatArg *formatters, + int &argIndex, + int numFormatters) { + if (*fmtStart != '%') { + TINYFORMAT_ERROR( + "tinyformat: Not enough conversion specifiers in format string"); + return fmtStart; + } + // Reset stream state to defaults. + out.width(0); + out.precision(6); + out.fill(' '); + // Reset most flags; ignore irrelevant unitbuf & skipws. + out.unsetf(std::ios::adjustfield | std::ios::basefield | + std::ios::floatfield | std::ios::showbase | std::ios::boolalpha | + std::ios::showpoint | std::ios::showpos | std::ios::uppercase); + bool precisionSet = false; + bool widthSet = false; + int widthExtra = 0; + const char *c = fmtStart + 1; + // 1) Parse flags + for (;; ++c) { + switch (*c) { + case '#': + out.setf(std::ios::showpoint | std::ios::showbase); + continue; + case '0': + // overridden by left alignment ('-' flag) + if (!(out.flags() & std::ios::left)) { + // Use internal padding so that numeric values are + // formatted correctly, eg -00010 rather than 000-10 + out.fill('0'); + out.setf(std::ios::internal, std::ios::adjustfield); + } + continue; + case '-': + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + continue; + case ' ': + // overridden by show positive sign, '+' flag. + if (!(out.flags() & std::ios::showpos)) spacePadPositive = true; + continue; + case '+': + out.setf(std::ios::showpos); + spacePadPositive = false; + widthExtra = 1; + continue; + default: + break; + } + break; + } + // 2) Parse width + if (*c >= '0' && *c <= '9') { + widthSet = true; + out.width(parseIntAndAdvance(c)); + } + if (*c == '*') { + widthSet = true; + int width = 0; + if (argIndex < numFormatters) + width = formatters[argIndex++].toInt(); + else + TINYFORMAT_ERROR( + "tinyformat: Not enough arguments to read variable width"); + if (width < 0) { + // negative widths correspond to '-' flag set + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + width = -width; + } + out.width(width); + ++c; + } + // 3) Parse precision + if (*c == '.') { + ++c; + int precision = 0; + if (*c == '*') { + ++c; + if (argIndex < numFormatters) + precision = formatters[argIndex++].toInt(); + else + TINYFORMAT_ERROR( + "tinyformat: Not enough arguments to read variable precision"); + } else { + if (*c >= '0' && *c <= '9') + precision = parseIntAndAdvance(c); + else if (*c == '-') // negative precisions ignored, treated as zero. + parseIntAndAdvance(++c); + } + out.precision(precision); + precisionSet = true; + } + // 4) Ignore any C99 length modifier + while (*c == 'l' || *c == 'h' || *c == 'L' || *c == 'j' || *c == 'z' || + *c == 't') + ++c; + // 5) We're up to the conversion specifier character. + // Set stream flags based on conversion specifier (thanks to the + // boost::format class for forging the way here). + bool intConversion = false; + switch (*c) { + case 'u': + case 'd': + case 'i': + out.setf(std::ios::dec, std::ios::basefield); + intConversion = true; + break; + case 'o': + out.setf(std::ios::oct, std::ios::basefield); + intConversion = true; + break; + case 'X': + out.setf(std::ios::uppercase); + case 'x': + case 'p': + out.setf(std::ios::hex, std::ios::basefield); + intConversion = true; + break; + case 'E': + out.setf(std::ios::uppercase); + case 'e': + out.setf(std::ios::scientific, std::ios::floatfield); + out.setf(std::ios::dec, std::ios::basefield); + break; + case 'F': + out.setf(std::ios::uppercase); + case 'f': + out.setf(std::ios::fixed, std::ios::floatfield); + break; + case 'G': + out.setf(std::ios::uppercase); + case 'g': + out.setf(std::ios::dec, std::ios::basefield); + // As in boost::format, let stream decide float format. + out.flags(out.flags() & ~std::ios::floatfield); + break; + case 'a': + case 'A': + TINYFORMAT_ERROR( + "tinyformat: the %a and %A conversion specs " + "are not supported"); + break; + case 'c': + // Handled as special case inside formatValue() + break; + case 's': + if (precisionSet) ntrunc = static_cast(out.precision()); + // Make %s print booleans as "true" and "false" + out.setf(std::ios::boolalpha); + break; + case 'n': + // Not supported - will cause problems! + TINYFORMAT_ERROR("tinyformat: %n conversion spec not supported"); + break; + case '\0': + TINYFORMAT_ERROR( + "tinyformat: Conversion spec incorrectly " + "terminated by end of string"); + return c; + default: + break; + } + if (intConversion && precisionSet && !widthSet) { + // "precision" for integers gives the minimum number of digits (to be + // padded with zeros on the left). This isn't really supported by the + // iostreams, but we can approximately simulate it with the width if + // the width isn't otherwise used. + out.width(out.precision() + widthExtra); + out.setf(std::ios::internal, std::ios::adjustfield); + out.fill('0'); + } + return c + 1; +} + +//------------------------------------------------------------------------------ +inline void formatImpl(std::ostream &out, + const char *fmt, + const detail::FormatArg *formatters, + int numFormatters) { + // Saved stream state + std::streamsize origWidth = out.width(); + std::streamsize origPrecision = out.precision(); + std::ios::fmtflags origFlags = out.flags(); + char origFill = out.fill(); + + for (int argIndex = 0; argIndex < numFormatters; ++argIndex) { + // Parse the format string + fmt = printFormatStringLiteral(out, fmt); + bool spacePadPositive = false; + int ntrunc = -1; + const char *fmtEnd = streamStateFromFormat(out, + spacePadPositive, + ntrunc, + fmt, + formatters, + argIndex, + numFormatters); + if (argIndex >= numFormatters) { + // Check args remain after reading any variable width/precision + TINYFORMAT_ERROR("tinyformat: Not enough format arguments"); + return; + } + const FormatArg &arg = formatters[argIndex]; + // Format the arg into the stream. + if (!spacePadPositive) + arg.format(out, fmt, fmtEnd, ntrunc); + else { + // The following is a special case with no direct correspondence + // between stream formatting and the printf() behaviour. Simulate + // it crudely by formatting into a temporary string stream and + // munging the resulting string. + std::ostringstream tmpStream; + tmpStream.copyfmt(out); + tmpStream.setf(std::ios::showpos); + arg.format(tmpStream, fmt, fmtEnd, ntrunc); + std::string result = tmpStream.str(); // allocates... yuck. + for (size_t i = 0, iend = result.size(); i < iend; ++i) + if (result[i] == '+') result[i] = ' '; + out << result; + } + fmt = fmtEnd; + } + + // Print remaining part of format string. + fmt = printFormatStringLiteral(out, fmt); + if (*fmt != '\0') + TINYFORMAT_ERROR( + "tinyformat: Too many conversion specifiers in format string"); + + // Restore stream state + out.width(origWidth); + out.precision(origPrecision); + out.flags(origFlags); + out.fill(origFill); +} + +} // namespace detail + +/// List of template arguments format(), held in a type-opaque way. +/// +/// A const reference to FormatList (typedef'd as FormatListRef) may be +/// conveniently used to pass arguments to non-template functions: All type +/// information has been stripped from the arguments, leaving just enough of a +/// common interface to perform formatting as required. +class FormatList { +public: + FormatList(detail::FormatArg *formatters, int N) + : m_formatters(formatters), m_N(N) {} + + friend void vformat(std::ostream &out, + const char *fmt, + const FormatList &list); + +private: + const detail::FormatArg *m_formatters; + int m_N; +}; + +/// Reference to type-opaque format list for passing to vformat() +typedef const FormatList &FormatListRef; + +namespace detail { + +// Format list subclass with fixed storage to avoid dynamic allocation +template +class FormatListN : public FormatList { +public: + template + FormatListN(const Args &... args) + : FormatList(&m_formatterStore[0], N), + m_formatterStore{FormatArg(args)...} { + static_assert(sizeof...(args) == N, "Number of args must be N"); + } + +private: + FormatArg m_formatterStore[N]; +}; + +// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard +template <> +class FormatListN<0> : public FormatList { +public: + FormatListN() : FormatList(0, 0) {} +}; + +} // namespace detail + +//------------------------------------------------------------------------------ +// Primary API functions + +/// Make type-agnostic format list from list of template arguments. +/// +/// The exact return type of this function is an implementation detail and +/// shouldn't be relied upon. Instead it should be stored as a FormatListRef: +/// +/// FormatListRef formatList = makeFormatList( /*...*/ ); +template +detail::FormatListN makeFormatList(const Args &... args) { + return detail::FormatListN(args...); +} + +/// Format list of arguments to the stream according to the given format string. +/// +/// The name vformat() is chosen for the semantic similarity to vprintf(): the +/// list of format arguments is held in a single function argument. +inline void vformat(std::ostream &out, const char *fmt, FormatListRef list) { + detail::formatImpl(out, fmt, list.m_formatters, list.m_N); +} + +/// Format list of arguments to the stream according to given format string. +template +void format(std::ostream &out, const char *fmt, const Args &... args) { + vformat(out, fmt, makeFormatList(args...)); +} + +/// Format list of arguments according to the given format string and return +/// the result as a string. +template +std::string format(const char *fmt, const Args &... args) { + std::ostringstream oss; + format(oss, fmt, args...); + return oss.str(); +} + +/// Format list of arguments to std::cout, according to the given format string +template +void printf(const char *fmt, const Args &... args) { + format(std::cout, fmt, args...); +} + +template +void printfln(const char *fmt, const Args &... args) { + format(std::cout, fmt, args...); + std::cout << '\n'; +} + +} // namespace tinyformat +} // namespace string +} // namespace paddle diff --git a/paddle/strings/CMakeLists.txt b/paddle/strings/CMakeLists.txt deleted file mode 100644 index 4e55eecd484c0e218ecd51bbd19b3eb4f6f92a25..0000000000000000000000000000000000000000 --- a/paddle/strings/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -cc_library(stringpiece SRCS stringpiece.cc) -cc_test(stringpiece_test SRCS stringpiece_test.cc DEPS stringpiece glog gflags) diff --git a/paddle/strings/stringpiece.cc b/paddle/strings/stringpiece.cc deleted file mode 100644 index 415b3558d5dfffde26275bcb16ea3922424ca9f3..0000000000000000000000000000000000000000 --- a/paddle/strings/stringpiece.cc +++ /dev/null @@ -1,141 +0,0 @@ -/* - Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "paddle/strings/stringpiece.h" - -#include - -#include -#include -#include - -namespace paddle { - -StringPiece::StringPiece() : data_(NULL), size_(0) {} - -StringPiece::StringPiece(const char* d, size_t n) : data_(d), size_(n) { - if (d == NULL && n != 0) - throw std::invalid_argument( - "StringPiece requires len to be 0 for NULL data"); -} - -StringPiece::StringPiece(const char* s) : data_(s) { - size_ = (s == NULL) ? 0 : strlen(s); -} - -StringPiece::StringPiece(const std::string& s) - : data_(s.data()), size_(s.size()) {} - -char StringPiece::operator[](size_t n) const { - if (n >= len()) - throw std::invalid_argument("index out of StringPiece length"); - return data_[n]; -} - -int Compare(StringPiece a, StringPiece b) { - const size_t min_len = (a.len() < b.len()) ? a.len() : b.len(); - int r = memcmp(a.data(), b.data(), min_len); - if (r == 0) { - if (a.len() < b.len()) - return -1; - else if (a.len() > b.len()) - return 1; - } - return r; -} - -bool operator==(StringPiece x, StringPiece y) { - return ((x.len() == y.len()) && - (x.data() == y.data() || memcmp(x.data(), y.data(), x.len()) == 0)); -} - -bool operator!=(StringPiece x, StringPiece y) { return !(x == y); } - -bool operator<(StringPiece x, StringPiece y) { return Compare(x, y) < 0; } -bool operator>(StringPiece x, StringPiece y) { return Compare(x, y) > 0; } - -bool operator<=(StringPiece x, StringPiece y) { return Compare(x, y) <= 0; } -bool operator>=(StringPiece x, StringPiece y) { return Compare(x, y) >= 0; } - -bool HasPrefix(StringPiece s, StringPiece x) { - return ((s.len() >= x.len()) && (memcmp(s.data(), x.data(), x.len()) == 0)); -} - -bool HasSuffix(StringPiece s, StringPiece x) { - return ((s.len() >= x.len()) && - (memcmp(s.data() + (s.len() - x.len()), x.data(), x.len()) == 0)); -} - -StringPiece SkipPrefix(StringPiece s, size_t n) { - if (n > s.len()) - throw std::invalid_argument("Skip distance larger than StringPiece length"); - return StringPiece(s.data() + n, s.len() - n); -} - -StringPiece SkipSuffix(StringPiece s, size_t n) { - if (n > s.len()) - throw std::invalid_argument("Skip distance larger than StringPiece length"); - return StringPiece(s.data(), s.len() - n); -} - -StringPiece TrimPrefix(StringPiece s, StringPiece x) { - return HasPrefix(s, x) ? SkipPrefix(s, x.len()) : s; -} - -StringPiece TrimSuffix(StringPiece s, StringPiece x) { - return HasSuffix(s, x) ? SkipSuffix(s, x.len()) : s; -} - -bool Contains(StringPiece s, StringPiece sub) { - return std::search(s.begin(), s.end(), sub.begin(), sub.end()) != s.end(); -} - -size_t Index(StringPiece s, StringPiece sub) { - auto e = std::search(s.begin(), s.end(), sub.begin(), sub.end()); - return e != s.end() ? e - s.data() : StringPiece::npos; -} - -size_t Find(StringPiece s, char c, size_t pos) { - if (pos >= s.len()) { - return StringPiece::npos; - } - const char* result = - reinterpret_cast(memchr(s.data() + pos, c, s.len() - pos)); - return result != nullptr ? result - s.data() : StringPiece::npos; -} - -size_t RFind(StringPiece s, char c, size_t pos) { - if (s.len() == 0) return StringPiece::npos; - for (const char* p = s.data() + std::min(pos, s.len() - 1); p >= s.data(); - p--) { - if (*p == c) { - return p - s.data(); - } - } - return StringPiece::npos; -} - -StringPiece SubStr(StringPiece s, size_t pos, size_t n) { - if (pos > s.len()) pos = s.len(); - if (n > s.len() - pos) n = s.len() - pos; - return StringPiece(s.data() + pos, n); -} - -std::ostream& operator<<(std::ostream& o, StringPiece piece) { - return o << piece.ToString(); -} - -} // namespace paddle diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index c47add04b081cbdf78b5a5d3bca3a71025b3d9ac..4245df5ab72bf0fd67261818b307f0babdb5d685 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -2,7 +2,7 @@ if(WITH_TESTING) add_library(paddle_test_main STATIC TestMain.cpp) - add_dependencies(paddle_test_main gen_proto_cpp) + add_dependencies(paddle_test_main paddle_proto ${external_project_dependencies}) add_library(paddle_test_util STATIC TestUtil.cpp) - add_dependencies(paddle_test_util gen_proto_cpp) + add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) endif() diff --git a/paddle/trainer/CMakeLists.txt b/paddle/trainer/CMakeLists.txt index f34d53ae99f913a8aed8767b7271a538efce4778..6414c399561575c13074c41598184a78f84373ee 100644 --- a/paddle/trainer/CMakeLists.txt +++ b/paddle/trainer/CMakeLists.txt @@ -41,7 +41,8 @@ add_style_check_target(paddle_trainer_lib add_style_check_target(paddle_trainer_lib ${TRAINER_HEADERS}) add_dependencies(paddle_trainer_lib - gen_proto_cpp) + paddle_proto + ${external_project_dependencies}) macro(add_paddle_exe TARGET_NAME) add_executable(${TARGET_NAME} ${ARGN}) diff --git a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf index d669fbc40cbc19df309d8bf20c942a9d8fc8f47d..741a0aa71df7866c180ab2513f28638117d0f1ca 100644 --- a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf @@ -35,7 +35,7 @@ def outer_step(dummy_data): embedding_size=num_words)] def inner_step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -46,15 +46,15 @@ def outer_step(dummy_data): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=inner_step, input=gen_inputs, bos_id=0, eos_id=num_words-1, beam_size=2 if beam_flag else 1, - num_results_per_sample=2 if beam_flag else 1, - max_length=10) + num_results_per_sample=1, + max_length=10) return beam_gen beam_gen_concat = recurrent_group(name="rnn_gen_concat", diff --git a/paddle/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_rnn_gen.conf index 2b337282f6285afb527e9bbf138d2e8184700d8d..58d27f15ae1c0a38885ee105a7963b6e7bd55906 100644 --- a/paddle/trainer/tests/sample_trainer_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_rnn_gen.conf @@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2), embedding_size=num_words)] def step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -44,7 +44,7 @@ def step(dummy_memory, predict_word): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=step, input=gen_inputs, @@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen", eos_id=num_words-1, beam_size=2 if beam_flag else 1, num_results_per_sample=2 if beam_flag else 1, - max_length=10) + max_length=10) seqtext_printer_evaluator(input=beam_gen, id_input=sent_id, diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index af59951752d1799c95e293d3eae233e6aa26e5f3..7a4977935ede4878c07f4fb6ba0dd76bf50acd42 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(paddle_utils STATIC add_style_check_target(paddle_utils ${UTIL_HEADERS}) add_style_check_target(paddle_utils ${UTIL_SOURCES} ${UTIL_ARCH_SOURCES}) -add_dependencies(paddle_utils gen_proto_cpp) +add_dependencies(paddle_utils paddle_proto ${external_project_dependencies}) if(WITH_TESTING) add_subdirectory(tests) endif() diff --git a/paddle/utils/Compiler.h b/paddle/utils/Compiler.h deleted file mode 100644 index cebca5a2a3766110b83231eb0705e48800a7bda6..0000000000000000000000000000000000000000 --- a/paddle/utils/Compiler.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -/** - * This header defines some useful attribute by each compiler. It is the - * abstract layer of compilers. - */ -#ifdef __GNUC__ -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#else -#define GCC_VERSION -#endif - -/** - * __must_check macro. It make the function's return value must be used, - * otherwise it will raise a compile warning. And also Paddle treat all compile - * warnings as errors. - */ -#if GCC_VERSION >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#else -#define __must_check -#endif diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index 6992e856223494d6575ef3261d82cbdf4e375885..52a6df94979fd3d8d7d540ed0e3898bb3375d975 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -55,13 +55,17 @@ public: * Else, just set status to popping. */ void pop(const T& item) { - pushing() = false; auto& s = this->stack(); if (item == s.top()) { s.pop(); } } + /** + * @brief Indicate whether we are at forward or backward stage of computation + */ + void set_stage(bool isForward) { pushing() = isForward; } + /** * @brief clear current thread stack. */ diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h index cda1b5c37dada8d0c6c77fc2fb03bb614d5301b5..27ddaab3f003110a2684a871a2de17afb473d660 100644 --- a/paddle/utils/Error.h +++ b/paddle/utils/Error.h @@ -19,7 +19,21 @@ limitations under the License. */ #include #include #include -#include "Compiler.h" + +/** + * __must_check macro. It make the function's return value must be used, + * otherwise it will raise a compile warning. And also Paddle treat all compile + * warnings as errors. + */ +#ifdef __GNUC__ +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#else +#define __must_check +#endif +#else +#define __must_check +#endif namespace paddle { diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp index b5d9f93f1376048eabd726331006b0bb848bce11..c320074fbadab3e211ed72ce715d595c90673d6d 100644 --- a/paddle/utils/tests/test_CustomStackTrace.cpp +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) { for (size_t i = 0; i < layerSize; ++i) { tracer.push("layer_" + paddle::str::to_string(i)); } - tracer.pop(""); for (size_t i = 0; i < layerSize; ++i) { tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); } diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index c942620990765832f21c887d30f85a2d211a5f32..18584cafe7971bad281b498908c54780250791b7 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -1,43 +1,23 @@ -set(proto_filenames - DataConfig.proto - DataFormat.proto - ModelConfig.proto - ParameterConfig.proto - ParameterService.proto - TrainerConfig.proto - OptimizerConfig.proto - ParameterServerConfig.proto) +file(GLOB proto_filenames . *.proto) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +proto_library(paddle_proto SRCS ${proto_filenames}) set(PROTO_GEN) set(PROTO_GEN_PY) foreach(filename ${proto_filenames}) - get_filename_component(base_filename ${filename} NAME_WE) - set(CUR_PROTO_GEN - ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.pb.h - ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.pb.cc) - set(PROTO_GEN - ${PROTO_GEN} - ${CUR_PROTO_GEN}) - add_custom_command(OUTPUT ${CUR_PROTO_GEN} - COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} - --cpp_out ${CMAKE_CURRENT_BINARY_DIR} - --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} - DEPENDS ${filename} ${external_project_dependencies}) - + get_filename_component(ABS_FIL ${filename} ABSOLUTE) + get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${base_filename}_pb2.py) + ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY - ${CUR_PROTO_GEN_PY} - ${PROTO_GEN_PY}) + ${CUR_PROTO_GEN_PY} + ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} - COMMAND env ${py_env} ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${PROJ_ROOT}/python/paddle/proto - --proto_path ${PROJ_ROOT}/proto ${PROJ_ROOT}/proto/${filename} - DEPENDS ${filename} ${external_project_dependencies}) + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${external_project_dependencies}) endforeach() -add_custom_target(gen_proto_cpp ALL DEPENDS ${PROTO_GEN}) add_custom_target(gen_proto_py ALL DEPENDS ${PROTO_GEN_PY}) - -add_library(paddle_proto STATIC ${PROTO_GEN}) -target_include_directories(paddle_proto PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 29270829bbc3af6990aaf03a5228ef7f6a892a5c..ebe4f5cbb569ff37a46eb44de6362a7df337fe38 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -489,6 +489,15 @@ message EvaluatorConfig { // Used by ClassificationErrorEvaluator // top # classification error optional int32 top_k = 13 [default = 1]; + + // Used by DetectionMAPEvaluator + optional double overlap_threshold = 14 [default = 0.5]; + + optional int32 background_id = 15 [default = 0]; + + optional bool evaluate_difficult = 16 [default = false]; + + optional string ap_type = 17 [default = "11point"]; } message LinkConfig { diff --git a/proto/ParameterConfig.proto b/proto/ParameterConfig.proto index cbcd0af598df22c36c66767fdeb7add2aa49e87d..580d66324602df4c655dd2f1e1cd87159b5b346b 100644 --- a/proto/ParameterConfig.proto +++ b/proto/ParameterConfig.proto @@ -25,8 +25,10 @@ enum ParameterInitStrategy { } message ParameterUpdaterHookConfig { + // hook type such as 'pruning' required string type = 1; - optional string purning_mask_filename = 2; + // this represents the ratio of zero element to be set by the Parameter + optional double sparsity_ratio = 2 [default = 0.6]; } message ParameterConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c11dc09a8b98bb8a3c8455f811b1435714e825d0..58e4902f57aa8018b820f48f6cbf659f1e5f5183 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1280,20 +1280,23 @@ def parse_maxout(maxout, input_layer_name, maxout_conf): # Define an evaluator @config_func -def Evaluator( - name, - type, - inputs, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - top_k=None, - delimited=None, - excluded_chunk_types=None, ): +def Evaluator(name, + type, + inputs, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + top_k=None, + delimited=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): evaluator = g_config.model_config.evaluators.add() evaluator.type = type evaluator.name = MakeLayerNameInSubmodel(name) @@ -1327,6 +1330,18 @@ def Evaluator( if excluded_chunk_types: evaluator.excluded_chunk_types.extend(excluded_chunk_types) + if overlap_threshold is not None: + evaluator.overlap_threshold = overlap_threshold + + if background_id is not None: + evaluator.background_id = background_id + + if evaluate_difficult is not None: + evaluator.evaluate_difficult = evaluate_difficult + + if ap_type is not None: + evaluator.ap_type = ap_type + class LayerBase(object): def __init__( @@ -3124,11 +3139,11 @@ def Layer(name, type, **xargs): @config_func def ParameterHook(type, **kwargs): if type == 'pruning': - mask_filename = kwargs.get('mask_filename', None) - assert mask_filename is not None hook = ParameterUpdaterHookConfig() hook.type = type - hook.purning_mask_filename = mask_filename + sparsity_ratio = kwargs.get('sparsity_ratio', None) + if sparsity_ratio is not None: + hook.sparsity_ratio = sparsity_ratio return hook else: return None @@ -3236,13 +3251,13 @@ def Parameter(name, if update_hooks is not None: if hasattr(update_hooks, '__call__'): - update_hooks = update_hooks(para.name) + update_hooks = update_hooks() if isinstance(update_hooks, list): for hook in update_hooks: para.update_hooks.extend([hook]) else: - para.update_hooks.extend(update_hooks) + para.update_hooks.extend([update_hooks]) g_parameter_map[name] = para if initializer is not None: diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 4100697c9c3770f1b748ea630d5f8193167fe7fc..9b9f979bb615f37ec1dc9baa154d28741b1400d5 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -14,7 +14,8 @@ from paddle.trainer.config_parser import * __all__ = [ - 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute' + 'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', + 'ExtraLayerAttribute' ] @@ -55,6 +56,40 @@ def is_compatible_with(x, Type): return False +class HookAttribute(object): + """ + Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs + during training process of a layer with parameters, such as img_conv layer, fc layer. + + :param type: Hook type, currently supported types: + 'pruning' : user specify a sparsity_ratio before training started, and the + network will prune the parameters based on the sparsity_ratio. + eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6) + The specific usage can be paddle.layer.img_conv(input=img, filter_size=3, + num_channels=3, num_filters=64, + param_attr=ParameterAttribute(update_hooks=hk) ) + The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf + :type type: string + + :param sparsity_ratio: Must be specified if hook type is 'pruning', + it represents the ratio of the zero elements to be set by the Parameter. + :type sparsity_ratio: float or None + + """ + + def __init__(self, type, sparsity_ratio=None): + self.type = type + self.sparsity_ratio = sparsity_ratio + if self.sparsity_ratio is not None: + assert is_compatible_with( + self.sparsity_ratio, + float), 'sparisity_ratio must be float type' + assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] ' + + def __call__(self): + return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio) + + class ParameterAttribute(object): """ Parameter Attributes object. To fine-tuning network training process, user @@ -114,6 +149,7 @@ class ParameterAttribute(object): momentum=None, gradient_clipping_threshold=None, sparse_update=False, + update_hooks=None, initializer=None): self.attr = {} @@ -169,6 +205,9 @@ class ParameterAttribute(object): if initializer is not None: self.attr['initializer'] = initializer + if update_hooks: + self.attr['update_hooks'] = update_hooks + def set_default_parameter_name(self, name): """ Set default parameter name. If parameter not set, then will use default @@ -244,5 +283,6 @@ class ExtraLayerAttribute(object): return attr.attr +HookAttr = HookAttribute ParamAttr = ParameterAttribute ExtraAttr = ExtraLayerAttribute diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index a5234f3e47f6caa4b365de593648e0ee5ad6e4a2..44d52edfa7bae49bea196eba9387391b171840d8 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -21,7 +21,8 @@ __all__ = [ "chunk_evaluator", "sum_evaluator", "column_sum_evaluator", "value_printer_evaluator", "gradient_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator", - "seqtext_printer_evaluator", "classification_error_printer_evaluator" + "seqtext_printer_evaluator", "classification_error_printer_evaluator", + "detection_map_evaluator" ] @@ -31,10 +32,11 @@ class EvaluatorAttribute(object): FOR_RANK = 1 << 2 FOR_PRINT = 1 << 3 FOR_UTILS = 1 << 4 + FOR_DETECTION = 1 << 5 KEYS = [ "for_classification", "for_regression", "for_rank", "for_print", - "for_utils" + "for_utils", "for_detection" ] @staticmethod @@ -57,22 +59,25 @@ def evaluator(*attrs): return impl -def evaluator_base( - input, - type, - label=None, - weight=None, - name=None, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - delimited=None, - top_k=None, - excluded_chunk_types=None, ): +def evaluator_base(input, + type, + label=None, + weight=None, + name=None, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + delimited=None, + top_k=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): """ Evaluator will evaluate the network status while training/testing. @@ -107,6 +112,14 @@ def evaluator_base( :type weight: LayerOutput. :param top_k: number k in top-k error rate :type top_k: int + :param overlap_threshold: In detection tasks to filter detection results + :type overlap_threshold: float + :param background_id: Identifier of background class + :type background_id: int + :param evaluate_difficult: Whether to evaluate difficult objects + :type evaluate_difficult: bool + :param ap_type: How to calculate average persicion + :type ap_type: str """ # inputs type assertions. assert classification_threshold is None or isinstance( @@ -136,7 +149,61 @@ def evaluator_base( delimited=delimited, num_results=num_results, top_k=top_k, - excluded_chunk_types=excluded_chunk_types, ) + excluded_chunk_types=excluded_chunk_types, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) + + +@evaluator(EvaluatorAttribute.FOR_DETECTION) +@wrap_name_default() +def detection_map_evaluator(input, + label, + overlap_threshold=0.5, + background_id=0, + evaluate_difficult=False, + ap_type="11point", + name=None): + """ + Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection. + + The detection mAP Evaluator based on the output of detection_output layer counts + the true positive and the false positive bbox and integral them to get the + mAP. + + The simple usage is: + + .. code-block:: python + + eval = detection_map_evaluator(input=det_output,label=lbl) + + :param input: Input layer. + :type input: LayerOutput + :param label: Label layer. + :type label: LayerOutput + :param overlap_threshold: The bbox overlap threshold of a true positive. + :type overlap_threshold: float + :param background_id: The background class index. + :type background_id: int + :param evaluate_difficult: Whether evaluate a difficult ground truth. + :type evaluate_difficult: bool + """ + if not isinstance(input, list): + input = [input] + + if label: + input.append(label) + + evaluator_base( + name=name, + type="detection_map", + input=input, + label=label, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b8ce0373c0e9524518e42ad911fd2cd728facec4..a601d5c84ad222785e68b9fa81c51b1e120b4f29 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1149,10 +1149,10 @@ def pooling_layer(input, @layer_support(DROPOUT) def lstmemory(input, name=None, + size=None, reverse=False, act=None, gate_act=None, - size=None, state_act=None, bias_attr=None, param_attr=None, @@ -1194,6 +1194,8 @@ def lstmemory(input, :param name: The lstmemory layer name. :type name: basestring + :param size: DEPRECATED. size of the lstm cell + :type size: int :param input: input layer name. :type input: LayerOutput :param reverse: is sequence process reversed or not. @@ -1220,15 +1222,15 @@ def lstmemory(input, assert state_act.support_hppl assert act.support_hppl assert input.size is not None and input.size % 4 == 0 + if size is not None: if input.size / 4 == size: plog = logger.warning else: plog = logger.fatal - - plog("NOTE: The lstmemory layer[%s]'s size is set by previous input " - "layer. The lstm size should be equal with input layer size/4. The" - " size which is set explicitly will be ignored." % name) + plog("size of lstmemory layer: %s is automatically set to " + "size of input layer / 4. The parameter size passing to " + "this layer is ignored." % (name)) Layer( name=name, @@ -1255,11 +1257,11 @@ def lstmemory(input, @wrap_name_default("gru") @layer_support(DROPOUT) def grumemory(input, + size=None, name=None, reverse=False, act=None, gate_act=None, - size=None, bias_attr=None, param_attr=None, layer_attr=None): @@ -1318,6 +1320,8 @@ def grumemory(input, :type name: None|basestring :param input: input layer. :type input: LayerOutput. + :param size: DEPRECATED. size of the gru cell + :type size: int :param reverse: Whether sequence process is reversed or not. :type reverse: bool :param act: activation type, TanhActivation by default. This activation @@ -1334,9 +1338,6 @@ def grumemory(input, :type param_attr: ParameterAttribute|None|False :param layer_attr: Extra Layer attribute :type layer_attr: ExtraLayerAttribute|None - :param size: Stub parameter of size, but actually not used. If set this size - will get a warning. - :type size: None :return: LayerOutput object. :rtype: LayerOutput """ @@ -1348,9 +1349,9 @@ def grumemory(input, plog = logger.warning else: plog = logger.fatal - plog("NOTE: the gru memory layer's size is set by previous input layer," - " and should be input size / 3. Set size explicitly will be " - "ignored.") + plog("size of grumemory layer: %s is automatically set to " + "size of input layer / 3. The parameter size passing to this " + "layer is ignored." % (name)) Layer( name=name, @@ -2524,8 +2525,8 @@ def img_cmrnorm_layer(input, @wrap_bias_attr_default() -@wrap_param_attr_default(default_factory=lambda _: ParamAttr(initial_mean=1.0, - initial_std=0.)) +@wrap_param_attr_default( + default_factory=lambda _: ParamAttr(initial_mean=1.0, initial_std=0.)) @wrap_act_default(act=ReluActivation()) @wrap_name_default("batch_norm") @layer_support(DROPOUT) @@ -3013,25 +3014,25 @@ def lstm_step_layer(input, bias_attr=None, layer_attr=None): """ - LSTM Step Layer. It used in recurrent_group. The lstm equations are shown - as follow. + LSTM Step Layer. This function is used only in recurrent_group. + The lstm equations are shown as follows. .. math:: - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) + i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) + f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) + o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) h_t & = o_t tanh(c_t) The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use :code:`mixed_layer` and :code:`full_matrix_projection` to calculate these - input vector. + input vectors. The state of lstm step is :math:`c_{t-1}`. And lstm step layer will do @@ -3042,14 +3043,14 @@ def lstm_step_layer(input, ... - This layer contains two outputs. Default output is :math:`h_t`. The other - output is :math:`o_t`, which name is 'state' and can use + This layer has two outputs. Default output is :math:`h_t`. The other + output is :math:`o_t`, whose name is 'state' and can use :code:`get_output_layer` to extract this output. :param name: Layer's name. :type name: basestring - :param size: Layer's size. NOTE: lstm layer's size, should be equal as - :code:`input.size/4`, and should be equal as + :param size: Layer's size. NOTE: lstm layer's size, should be equal to + :code:`input.size/4`, and should be equal to :code:`state.size`. :type size: int :param input: input layer. :math:`Wx_t + Wh_{t-1}` @@ -3839,7 +3840,8 @@ def classification_cost(input, weight=None, name=None, evaluator=classification_error_evaluator, - layer_attr=None): + layer_attr=None, + coeff=1.): """ classification cost Layer. @@ -3855,6 +3857,8 @@ def classification_cost(input, :param evaluator: Evaluator method. :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute + :param coeff: The coefficient affects the gradient in the backward. + :type coeff: float :return: LayerOutput object. :rtype: LayerOutput """ @@ -3868,6 +3872,7 @@ def classification_cost(input, name=name, type="multi-class-cross-entropy", inputs=ipts, + coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) def __add_evaluator__(e): diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 1bf59ed4840ae69afc5bce49c86a08b60e9603ee..b77932ce5f09470329a97cc0a6273942a9155c6a 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -614,6 +614,7 @@ def simple_lstm(input, @wrap_name_default('lstm_unit') def lstmemory_unit(input, + memory_boot=None, name=None, size=None, param_attr=None, @@ -626,9 +627,9 @@ def lstmemory_unit(input, lstm_layer_attr=None, get_output_layer_attr=None): """ - Define calculations that a LSTM unit performs in a single time step. - This function itself is not a recurrent layer, so that it can not be - directly applied to sequence input. This function is always used in + Define calculations that a LSTM unit performs during a single time step. + This function itself is not a recurrent layer, so it can not be + directly used to process sequence inputs. This function is always used in recurrent_group (see layers.py for more details) to implement attention mechanism. @@ -638,13 +639,13 @@ def lstmemory_unit(input, .. math:: - i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i) + i_t & = \\sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i) - f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f) + f_t & = \\sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f) - c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c) + c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+W_{h_c}h_{t-1} + b_c) - o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o) + o_t & = \\sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o) h_t & = o_t tanh(c_t) @@ -661,6 +662,8 @@ def lstmemory_unit(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: lstmemory unit name. :type name: basestring :param size: lstmemory unit size. @@ -692,7 +695,8 @@ def lstmemory_unit(input, assert input.size % 4 == 0 size = input.size / 4 out_mem = memory(name=name, size=size) - state_mem = memory(name="%s_state" % name, size=size) + state_mem = memory( + name="%s_state" % name, size=size, boot_layer=memory_boot) with mixed_layer( name="%s_input_recurrent" % name, @@ -726,6 +730,7 @@ def lstmemory_unit(input, def lstmemory_group(input, size=None, name=None, + memory_boot=None, reverse=False, param_attr=None, act=None, @@ -737,7 +742,7 @@ def lstmemory_group(input, lstm_layer_attr=None, get_output_layer_attr=None): """ - lstm_group is a recurrent layer group version of Long Short Term Memory. It + lstm_group is a recurrent_group version of Long Short Term Memory. It does exactly the same calculation as the lstmemory layer (see lstmemory in layers.py for the maths) does. A promising benefit is that LSTM memory cell states, or hidden states in every time step are accessible to the @@ -748,8 +753,8 @@ def lstmemory_group(input, NOTE: In PaddlePaddle's implementation, the following input-to-hidden multiplications: - :math:`W_{xi}x_{t}` , :math:`W_{xf}x_{t}`, - :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` are not done in lstmemory_unit to + :math:`W_{x_i}x_{t}` , :math:`W_{x_f}x_{t}`, + :math:`W_{x_c}x_t`, :math:`W_{x_o}x_{t}` are not done in lstmemory_unit to speed up the calculations. Consequently, an additional mixed_layer with full_matrix_projection must be included before lstmemory_unit is called. @@ -765,10 +770,12 @@ def lstmemory_group(input, :param input: input layer name. :type input: LayerOutput - :param name: lstmemory group name. - :type name: basestring :param size: lstmemory group size. :type size: int + :param name: name of the lstmemory group. + :type name: basestring + :param memory_boot: the initialization state of LSTM cell. + :type memory_boot: LayerOutput | None :param reverse: is lstm reversed :type reverse: bool :param param_attr: Parameter config, None if use default. @@ -798,6 +805,7 @@ def lstmemory_group(input, def __lstm_step__(ipt): return lstmemory_unit( input=ipt, + memory_boot=memory_boot, name=name, size=size, mixed_bias_attr=mixed_bias_attr, @@ -819,6 +827,7 @@ def lstmemory_group(input, @wrap_name_default('gru_unit') def gru_unit(input, + memory_boot=None, size=None, name=None, gru_bias_attr=None, @@ -829,8 +838,8 @@ def gru_unit(input, naive=False): """ Define calculations that a gated recurrent unit performs in a single time - step. This function itself is not a recurrent layer, so that it can not be - directly applied to sequence input. This function is almost always used in + step. This function itself is not a recurrent layer, so it can not be + directly used to process sequence inputs. This function is always used in the recurrent_group (see layers.py for more details) to implement attention mechanism. @@ -838,6 +847,8 @@ def gru_unit(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: name of the gru group. :type name: basestring :param size: hidden size of the gru. @@ -856,7 +867,7 @@ def gru_unit(input, if size is None: size = input.size / 3 - out_mem = memory(name=name, size=size) + out_mem = memory(name=name, size=size, boot_layer=memory_boot) if naive: __step__ = gru_step_naive_layer @@ -878,6 +889,7 @@ def gru_unit(input, @wrap_name_default('gru_group') def gru_group(input, + memory_boot=None, size=None, name=None, reverse=False, @@ -888,7 +900,7 @@ def gru_group(input, gru_layer_attr=None, naive=False): """ - gru_group is a recurrent layer group version of Gated Recurrent Unit. It + gru_group is a recurrent_group version of Gated Recurrent Unit. It does exactly the same calculation as the grumemory layer does. A promising benefit is that gru hidden states are accessible to the user. This is especially useful in attention model. If you do not need to access @@ -908,6 +920,8 @@ def gru_group(input, :param input: input layer name. :type input: LayerOutput + :param memory_boot: the initialization state of the LSTM cell. + :type memory_boot: LayerOutput | None :param name: name of the gru group. :type name: basestring :param size: hidden size of the gru. @@ -929,6 +943,7 @@ def gru_group(input, def __gru_step__(ipt): return gru_unit( input=ipt, + memory_boot=memory_boot, name=name, size=size, gru_bias_attr=gru_bias_attr, @@ -1083,7 +1098,6 @@ def simple_gru2(input, return grumemory( name=name, - size=size, input=m, reverse=reverse, bias_attr=gru_bias_attr, @@ -1381,7 +1395,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(*[l.name for l in layers]) + Inputs(* [l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1438,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(*[l.name for l in layers]) + Outputs(* [l.name for l in layers]) return # just return outputs. if len(layers) != 1: diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py index 32f78614e7f8abe7cffdc7a50a9fa77f1fc1a780..5d23894d735c463d469f842b875ecbec1dbaf476 100644 --- a/python/paddle/v2/attr.py +++ b/python/paddle/v2/attr.py @@ -17,10 +17,12 @@ import paddle.trainer_config_helpers.attrs __all__ = [ "Param", "Extra", + "Hook", ] Param = paddle.trainer_config_helpers.attrs.ParameterAttribute Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute +Hook = paddle.trainer_config_helpers.attrs.HookAttribute for each in paddle.trainer_config_helpers.attrs.__all__: globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 26252d5bbd77ddb70b4f03843679e4737e2f96d3..2e4beb6882789249db09705f3f4d6c5c19e492cd 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -25,8 +25,9 @@ import uci_housing import sentiment import wmt14 import mq2007 +import flowers __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14', 'mq2007' + 'uci_housing', 'wmt14', 'mq2007', 'flowers' ] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 81af0a8e66a44a3476206147684d81bcac1be372..f885b2834e8ad502b752c6fd53daf7ef1693433f 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -31,10 +31,10 @@ images per class. import cPickle import itertools import numpy -from common import download +import paddle.v2.dataset.common import tarfile -__all__ = ['train100', 'test100', 'train10', 'test10'] +__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' @@ -75,7 +75,8 @@ def train100(): :rtype: callable """ return reader_creator( - download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'train') def test100(): @@ -88,7 +89,9 @@ def test100(): :return: Test reader creator. :rtype: callable """ - return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') + return reader_creator( + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'test') def train10(): @@ -102,7 +105,8 @@ def train10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'data_batch') def test10(): @@ -116,9 +120,20 @@ def test10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'test_batch') def fetch(): - download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100") + paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100") + paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10") + paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index e09ac1a7a0fe70dbf58a04f51cdf6916485e9be1..4a2eb59c340f5d0d3818170e56d730330e0bab29 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -23,17 +23,24 @@ import paddle.v2.dataset import cPickle import glob -__all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader'] +__all__ = [ + 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', + 'convert' +] DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') -if not os.path.exists(DATA_HOME): - try: - os.makedirs(DATA_HOME) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - pass +# When running unit tests, there could be multiple processes that +# trying to create DATA_HOME directory simultaneously, so we cannot +# use a if condition to check for the existence of the directory; +# instead, we use the filesystem as the synchronization mechanism by +# catching returned errors. +try: + os.makedirs(DATA_HOME) +except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass def md5file(fname): diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 12d648bf6557ed6e437320e56a80294abac29f18..f8aae52e7c29d86c7da9c1da0dd1d093634d4567 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -23,9 +23,9 @@ to initialize SRL model. import tarfile import gzip import itertools -from common import download +import paddle.v2.dataset.common -__all__ = ['test, get_dict', 'get_embedding'] +__all__ = ['test, get_dict', 'get_embedding', 'convert'] DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' @@ -182,9 +182,15 @@ def get_dict(): """ Get the word, verb and label dictionary of Wikipedia corpus. """ - word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) - verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) - label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + word_dict = load_dict( + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', + WORDDICT_MD5)) + verb_dict = load_dict( + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', + VERBDICT_MD5)) + label_dict = load_dict( + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', + TRGDICT_MD5)) return word_dict, verb_dict, label_dict @@ -192,7 +198,7 @@ def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. """ - return download(EMB_URL, 'conll05st', EMB_MD5) + return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) def test(): @@ -209,15 +215,23 @@ def test(): """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( - download(DATA_URL, 'conll05st', DATA_MD5), + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) def fetch(): - download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - download(EMB_URL, 'conll05st', EMB_MD5) - download(DATA_URL, 'conll05st', DATA_MD5) + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train") + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test") diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 07c13cf719ae0c864c23fef51f0bd7d47f265759..158cfe158c4f1c8d82d157301adcfbe0351c55df 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -13,18 +13,18 @@ # limitations under the License. """ This module will download dataset from -http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html +http://www.robots.ox.ac.uk/~vgg/data/flowers/102/index.html and parse train/test set intopaddle reader creators. -This set contains images of flowers belonging to 102 different categories. +This set contains images of flowers belonging to 102 different categories. The images were acquired by searching the web and taking pictures. There are a minimum of 40 images for each category. The database was used in: Nilsback, M-E. and Zisserman, A. Automated flower classification over a large - number of classes.Proceedings of the Indian Conference on Computer Vision, -Graphics and Image Processing (2008) + number of classes.Proceedings of the Indian Conference on Computer Vision, +Graphics and Image Processing (2008) http://www.robots.ox.ac.uk/~vgg/publications/papers/nilsback08.{pdf,ps.gz}. """ @@ -34,9 +34,9 @@ from common import download import tarfile import scipy.io as scio from paddle.v2.image import * +from paddle.v2.reader import * import os import numpy as np -import paddle.v2 as paddle from multiprocessing import cpu_count __all__ = ['train', 'test', 'valid'] @@ -46,6 +46,12 @@ SETID_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat' DATA_MD5 = '52808999861908f626f3c1f4e79d11fa' LABEL_MD5 = 'e0620be6f572b9609742df49c70aed4d' SETID_MD5 = 'a5357ecc9cb78c4bef273ce3793fc85c' +# In official 'readme', tstid is the flag of test data +# and trnid is the flag of train data. But test data is more than train data. +# So we exchange the train data and test data. +TRAIN_FLAG = 'tstid' +TEST_FLAG = 'trnid' +VALID_FLAG = 'valid' def default_mapper(sample): @@ -53,8 +59,8 @@ def default_mapper(sample): map image bytes data to type needed by model input layer ''' img, label = sample - img = paddle.image.load_image_bytes(img) - img = paddle.image.simple_transform(img, 256, 224, True) + img = load_image_bytes(img) + img = simple_transform(img, 256, 224, True) return img.flatten().astype('float32'), label @@ -63,22 +69,23 @@ def reader_creator(data_file, setid_file, dataset_name, mapper=default_mapper, - buffered_size=1024): + buffered_size=1024, + use_xmap=True): ''' - 1. read images from tar file and + 1. read images from tar file and merge images into batch files in 102flowers.tgz_batch/ 2. get a reader to read sample from batch file - - :param data_file: downloaded data file + + :param data_file: downloaded data file :type data_file: string - :param label_file: downloaded label file + :param label_file: downloaded label file :type label_file: string :param setid_file: downloaded setid file containing information about how to split dataset :type setid_file: string :param dataset_name: data set name (tstid|trnid|valid) :type dataset_name: string - :param mapper: a function to map image bytes data to type + :param mapper: a function to map image bytes data to type needed by model input layer :type mapper: callable :param buffered_size: the size of buffer used to process images @@ -105,15 +112,17 @@ def reader_creator(data_file, for sample, label in itertools.izip(data, batch['label']): yield sample, int(label) - return paddle.reader.xmap_readers(mapper, reader, - cpu_count(), buffered_size) + if use_xmap: + return xmap_readers(mapper, reader, cpu_count(), buffered_size) + else: + return map_readers(mapper, reader) -def train(mapper=default_mapper, buffered_size=1024): +def train(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' - Create flowers training set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers training set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -128,15 +137,15 @@ def train(mapper=default_mapper, buffered_size=1024): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'trnid', mapper, - buffered_size) + download(SETID_URL, 'flowers', SETID_MD5), TRAIN_FLAG, mapper, + buffered_size, use_xmap) -def test(mapper=default_mapper, buffered_size=1024): +def test(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' - Create flowers test set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers test set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -151,15 +160,15 @@ def test(mapper=default_mapper, buffered_size=1024): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'tstid', mapper, - buffered_size) + download(SETID_URL, 'flowers', SETID_MD5), TEST_FLAG, mapper, + buffered_size, use_xmap) -def valid(mapper=default_mapper, buffered_size=1024): +def valid(mapper=default_mapper, buffered_size=1024, use_xmap=True): ''' - Create flowers validation set reader. - It returns a reader, each sample in the reader is - image pixels in [0, 1] and label in [1, 102] + Create flowers validation set reader. + It returns a reader, each sample in the reader is + image pixels in [0, 1] and label in [1, 102] translated from original color image by steps: 1. resize to 256*256 2. random crop to 224*224 @@ -174,8 +183,8 @@ def valid(mapper=default_mapper, buffered_size=1024): return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), 'valid', mapper, - buffered_size) + download(SETID_URL, 'flowers', SETID_MD5), VALID_FLAG, mapper, + buffered_size, use_xmap) def fetch(): diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 5dc5abfe53d90ec3adc9a27a49ed086953146497..c0ec5992e0e6b0a2fd2359910d0f7a6c690c2ec3 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -28,7 +28,7 @@ import re import string import threading -__all__ = ['build_dict', 'train', 'test'] +__all__ = ['build_dict', 'train', 'test', 'convert'] URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' @@ -166,3 +166,12 @@ def word_dict(): def fetch(): paddle.v2.dataset.common.download(URL, 'imdb', MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + w = word_dict() + paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train") + paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index dd3a4552d2e1a2b00dde5ddb7ac1d78445bdca51..b18ee8e9ba91e0e8ccf061223b3c0d4636442956 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -22,7 +22,7 @@ import paddle.v2.dataset.common import collections import tarfile -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' @@ -146,3 +146,15 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): paddle.v2.dataset.common.download(URL, "imikolov", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + N = 5 + word_dict = build_dict() + paddle.v2.dataset.common.convert(path, + train(word_dict, N), 10, "imikolov_train") + paddle.v2.dataset.common.convert(path, + test(word_dict, N), 10, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 435556b2921b7976bbc61160ce3812949981c9e7..ea5891f4f3f6ee1c5023cccee9732cbd9d78b881 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -21,7 +21,7 @@ import paddle.v2.dataset.common import subprocess import numpy import platform -__all__ = ['train', 'test'] +__all__ = ['train', 'test', 'convert'] URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' @@ -113,3 +113,11 @@ def fetch(): paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "minist_train") + paddle.v2.dataset.common.convert(path, test(), 10, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index 837a85912663826f0483aff4f6a38f3945375d82..d9372d422a3293eddeb7c0d5b7c8980f55c44690 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -23,14 +23,15 @@ set and test set into paddle reader creators. """ import zipfile -from common import download +import paddle.v2.dataset.common import re import random import functools __all__ = [ 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', - 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' + 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info', + 'convert' ] age_table = [1, 18, 25, 35, 45, 50, 56] @@ -99,7 +100,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = download(URL, "movielens", MD5) + fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -246,7 +247,15 @@ def unittest(): def fetch(): - download(URL, "movielens", MD5) + paddle.v2.dataset.common.download(URL, "movielens", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train") + paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 4dd34e7383fe2a290fcf61474914183a383e2b9c..e33f120c8734621fd60497298d993e6e43bd06e0 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -26,9 +26,9 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import common +import paddle.v2.dataset.common -__all__ = ['train', 'test', 'get_word_dict'] +__all__ = ['train', 'test', 'get_word_dict', 'convert'] NUM_TRAINING_INSTANCES = 1600 NUM_TOTAL_INSTANCES = 2000 @@ -39,12 +39,13 @@ def download_data_if_not_yet(): """ try: # make sure that nltk can find the data - if common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(common.DATA_HOME) + if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -128,4 +129,13 @@ def test(): def fetch(): - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train") + paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test") diff --git a/python/paddle/v2/dataset/tests/flowers_test.py b/python/paddle/v2/dataset/tests/flowers_test.py index cc0626f4feae287d18dfb227cc69a4174da055da..a8ae9a07acc22eb9d3c0cc5ebb07f8f11ed21233 100644 --- a/python/paddle/v2/dataset/tests/flowers_test.py +++ b/python/paddle/v2/dataset/tests/flowers_test.py @@ -31,13 +31,13 @@ class TestFlowers(unittest.TestCase): def test_train(self): instances, max_label_value = self.check_reader( paddle.v2.dataset.flowers.train()) - self.assertEqual(instances, 1020) + self.assertEqual(instances, 6149) self.assertEqual(max_label_value, 102) def test_test(self): instances, max_label_value = self.check_reader( paddle.v2.dataset.flowers.test()) - self.assertEqual(instances, 6149) + self.assertEqual(instances, 1020) self.assertEqual(max_label_value, 102) def test_valid(self): diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 3469fd9ce12dd4d934004f90286979b73048a5c8..ec10ce646ebf3eca2c2a6423b69ee11b6a2b99cf 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -21,7 +21,7 @@ parse training set and test set into paddle reader creators. import numpy as np import os -from common import download +import paddle.v2.dataset.common __all__ = ['train', 'test'] @@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', - 'PTRATIO', 'B', 'LSTAT' + 'PTRATIO', 'B', 'LSTAT', 'convert' ] UCI_TRAIN_DATA = None @@ -82,7 +82,7 @@ def train(): :rtype: callable """ global UCI_TRAIN_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TRAIN_DATA: @@ -102,7 +102,7 @@ def test(): :rtype: callable """ global UCI_TEST_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TEST_DATA: @@ -112,4 +112,12 @@ def test(): def fetch(): - download(URL, 'uci_housing', MD5) + paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train") + paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 0902f87741c342b237439081703081b467dc6f35..e1dc4f4c30051202e8fd077087679c4fd6cbd7a0 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -22,10 +22,10 @@ parse training set and test set into paddle reader creators. import tarfile import gzip -from paddle.v2.dataset.common import download +import paddle.v2.dataset.common from paddle.v2.parameters import Parameters -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' @@ -115,7 +115,8 @@ def train(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'train/train', dict_size) def test(dict_size): @@ -130,16 +131,18 @@ def test(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'test/test', dict_size) def gen(dict_size): return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'gen/gen', dict_size) def model(): - tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL) + tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) with gzip.open(tar_file, 'r') as f: parameters = Parameters.from_tar(f) return parameters @@ -148,7 +151,7 @@ def model(): def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = download(URL_TRAIN, 'wmt14', MD5_TRAIN) + tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} @@ -157,5 +160,14 @@ def get_dict(dict_size, reverse=True): def fetch(): - download(URL_TRAIN, 'wmt14', MD5_TRAIN) - download(URL_MODEL, 'wmt14', MD5_MODEL) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) + + +def convert(path): + """ + Converts dataset to recordio format + """ + dict_size = 30000 + paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train") + paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test") diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bbb9c3ea8c1b389f0ec9fd5ec7be52bd0449f52d..4ade1c6f329ae39769279963af6809f938807bdd 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network'] def __need_to_keep__(name): return name in [ 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', - 'layer_support' + 'layer_support', 'BaseGeneratedInput' ] def __need_to_wrap__(name): - return name not in ['AggregateLevel', 'ExpandLevel'] + return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput'] def __convert_name__(inname): @@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names): return submodel_names +def __get_submodel_data_out_links__(): + data_links = set() + for submodel in cp.g_config.model_config.sub_models: + for link in submodel.out_links: + if cp.g_layer_map[link.link_name].type == 'data': + data_links.add(link.link_name) + return data_links + + def __get_used_evaluators__(layer_names): evaluator_names = set() for e in cp.g_config.model_config.evaluators: @@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None): submodel_names = __get_used_submodels__(layer_names) submodel_names.add('root') evaluator_names = __get_used_evaluators__(layer_names) + data_out_links = __get_submodel_data_out_links__() input_layer_names = set() output_layer_names = set() @@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None): continue model_config.layers.extend([l]) if l.type == 'data': - if l.name in model_config.output_layer_names: + if l.name in data_out_links: """ In text generation, the outlink to save the generated word indices is a data_layer defined in recurrent_group. This diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index ad20241b98302f136326ae491c6723a6c12ae284..bbaf8bfa979fbbf460561ebf7077b75b9c41a11a 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -51,7 +51,7 @@ class Parameters(object): def __init__(self): self.__param_conf__ = dict() self.__gradient_machines__ = [] - self.__tmp_params__ = [] + self.__tmp_params__ = dict() def __append_config__(self, param_conf): """ @@ -128,13 +128,10 @@ class Parameters(object): if len(self.__gradient_machines__) == 0: # create new parameter in python numpy. - if len(self.__tmp_params__) != 0: - ret_list = [ - mat for name, mat in self.__tmp_params__ if name == key - ] - if len(ret_list) == 1: - return ret_list[0] - return np.ndarray(shape=shape, dtype=np.float32) + if key in self.__tmp_params__: + return self.__tmp_params__[key] + else: + return np.ndarray(shape=shape, dtype=np.float32) else: for each_gradient_machine in self.__gradient_machines__: param = __get_parameter_in_gradient_machine__( @@ -187,7 +184,7 @@ class Parameters(object): (shape, value.shape)) if len(self.__gradient_machines__) == 0: - self.__tmp_params__.append((key, value)) + self.__tmp_params__[key] = value else: for each_gradient_machine in self.__gradient_machines__: __copy_parameter_to_gradient_machine__(each_gradient_machine, @@ -231,7 +228,7 @@ class Parameters(object): raise ValueError("gradient_machine should be api.GradientMachine") if len(self.__tmp_params__) != 0: - for name, val in self.__tmp_params__: + for name, val in self.__tmp_params__.iteritems(): try: __copy_parameter_to_gradient_machine__(gradient_machine, name, val) @@ -287,6 +284,18 @@ class Parameters(object): @staticmethod def from_tar(f): + """ + Create a `Parameters` object from the given file. And + the `Parameters` only contains the parameters in this + file. It is adapted the parameters are same in the + defined network and the given file. For example, it + can be used in the inference. + + :param f: the initialized model file. + :type f: tar file + :return: A Parameters object. + :rtype: Parameters. + """ params = Parameters() tar = tarfile.TarFile(fileobj=f, mode='r') for finfo in tar: @@ -302,6 +311,21 @@ class Parameters(object): params.deserialize(param_name, f) return params + def init_from_tar(self, f): + """ + Different from `from_tar`, this interface can be used to + init partial network parameters from another saved model. + + :param f: the initialized model file. + :type f: tar file + :return: Nothing. + """ + + tar_param = Parameters.from_tar(f) + for pname in tar_param.names(): + if pname in self.names(): + self.set(pname, tar_param.get(pname)) + def __get_parameter_in_gradient_machine__(gradient_machine, name): """ diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 07142056f872db5113acdd296b17c52b343c1be6..9f888b16d6b2fbf457ee4f4fe94fcb51b6f37fc9 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,7 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ -__all__ = ['np_array', 'text_file'] +__all__ = ['np_array', 'text_file', "recordio"] def np_array(x): @@ -55,3 +55,24 @@ def text_file(path): f.close() return reader + + +def recordio(path): + """ + Creates a data reader that outputs record one one by one from given recordio file + :path: path of recordio file + :returns: data reader of recordio file + """ + + import recordio as rec + + def reader(): + f = rec.reader(path) + while True: + r = f.read() + if r is None: + break + yield r + f.close() + + return reader diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index e432003129d2b8dea60138d08f13ec5e9d29a7ad..45a4288751e37b99dd1005ec78f30a98044926ff 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -166,12 +166,12 @@ def buffered(reader, size): The buffered data reader will read and save data entries into a buffer. Reading from the buffered data reader will proceed as long as the buffer is not empty. - + :param reader: the data reader to read from. :type reader: callable :param size: max buffer size. :type size: int - + :returns: the buffered data reader. """ @@ -238,7 +238,7 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): :type mapper: callable :param reader: the data reader to read from :type reader: callable - :param process_num: process number to handle original sample + :param process_num: process number to handle original sample :type process_num: int :param buffer_size: max buffer size :type buffer_size: int @@ -248,9 +248,6 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): :rtype: callable """ end = XmapEndSignal() - in_queue = Queue(buffer_size) - out_queue = Queue(buffer_size) - out_order = [0] # define a worker to read samples from reader to in_queue def read_worker(reader, in_queue): @@ -266,12 +263,6 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): in_order += 1 in_queue.put(end) - # start a read worker in a thread - target = order_read_worker if order else read_worker - t = Thread(target=target, args=(reader, in_queue)) - t.daemon = True - t.start() - # define a worker to handle samples from in_queue by mapper # and put mapped samples into out_queue def handle_worker(in_queue, out_queue, mapper): @@ -298,19 +289,27 @@ def xmap_readers(mapper, reader, process_num, buffer_size, order=False): in_queue.put(end) out_queue.put(end) - # start several handle_workers - target = order_handle_worker if order else handle_worker - args = (in_queue, out_queue, mapper, out_order) if order else ( - in_queue, out_queue, mapper) - workers = [] - for i in xrange(process_num): - worker = Thread(target=target, args=args) - worker.daemon = True - workers.append(worker) - for w in workers: - w.start() - def xreader(): + in_queue = Queue(buffer_size) + out_queue = Queue(buffer_size) + out_order = [0] + # start a read worker in a thread + target = order_read_worker if order else read_worker + t = Thread(target=target, args=(reader, in_queue)) + t.daemon = True + t.start() + # start several handle_workers + target = order_handle_worker if order else handle_worker + args = (in_queue, out_queue, mapper, out_order) if order else ( + in_queue, out_queue, mapper) + workers = [] + for i in xrange(process_num): + worker = Thread(target=target, args=args) + worker.daemon = True + workers.append(worker) + for w in workers: + w.start() + sample = out_queue.get() while not isinstance(sample, XmapEndSignal): yield sample diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index 9f8d7133b8694aae5541eff9576eaba8a31e77dc..ba4f558874a0155d276fcb0e0d2d9258f0903f0e 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -13,9 +13,7 @@ # limitations under the License. import os import unittest - import numpy as np - import paddle.v2.reader.creator @@ -36,5 +34,14 @@ class TestTextFile(unittest.TestCase): self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1)) +class TestRecordIO(unittest.TestCase): + def test_recordio(self): + path = os.path.join( + os.path.dirname(__file__), "test_recordio_creator.dat") + reader = paddle.v2.reader.creator.recordio(path) + for idx, r in enumerate(reader()): + self.assertSequenceEqual(r, str(idx)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py index bb3c5d220b9ce1552d2fc429abb1863930cd4d17..5a92951b100fa51ab6df7039d9c6b54d1f9d963e 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/v2/reader/tests/decorator_test.py @@ -132,15 +132,17 @@ class TestXmap(unittest.TestCase): for order in orders: for tNum in thread_nums: for size in buffered_size: - result = [] - for i in paddle.v2.reader.xmap_readers(mapper, + reader = paddle.v2.reader.xmap_readers(mapper, reader_creator_10(0), - tNum, size, order)(): - result.append(i) - if not order: - result.sort() - for idx, e in enumerate(result): - self.assertEqual(e, mapper(idx)) + tNum, size, order) + for n in xrange(3): + result = [] + for i in reader(): + result.append(i) + if not order: + result.sort() + for idx, e in enumerate(result): + self.assertEqual(e, mapper(idx)) if __name__ == '__main__': diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat new file mode 100644 index 0000000000000000000000000000000000000000..17aa89b6796184407e83246d3f342a55a66b4a69 Binary files /dev/null and b/python/paddle/v2/reader/tests/test_recordio_creator.dat differ diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py index 45372e7dd0ec7cbdd6a2eb5c0397ef7e74284cd0..7ba8a939fbd1a949d61a007b40c054e7543c0cbc 100644 --- a/python/paddle/v2/tests/test_parameters.py +++ b/python/paddle/v2/tests/test_parameters.py @@ -20,14 +20,17 @@ import cStringIO import numpy -def __rand_param_config__(name): +def __rand_param_config__(name, psize=None): conf = ParameterConfig() conf.name = name size = 1 - for i in xrange(2): - dim = random.randint(1, 1000) - conf.dims.append(dim) - size *= dim + if psize is None: + for i in xrange(2): + dim = random.randint(1, 1000) + conf.dims.append(dim) + size *= dim + else: + size = psize conf.size = size assert conf.IsInitialized() return conf @@ -77,6 +80,50 @@ class TestParameters(unittest.TestCase): expected = numpy.array([[1, 1], [1, 2], [1, 1]], numpy.float32) assert numpy.logical_and.reduce(numpy.reshape(val == expected, 6)) + def test_init_from_tar(self): + def get_param(names, size): + p = parameters.Parameters() + for k, v in zip(names, size): + p.__append_config__(__rand_param_config__(k, v)) + for name in p.names(): + param = p.get(name) + param[:] = numpy.random.uniform( + -1.0, 1.0, size=p.get_shape(name)) + p.set(name, param) + return p + + def get_parames(): + name1 = ['param_0', 'param_1'] + size1 = [128, 256] + p1 = get_param(name1, size1) + file1 = cStringIO.StringIO() + p1.to_tar(file1) + file1.seek(0) + + name2 = ['param_0', 'param_1', 'param_2'] + size2 = [128, 256, 288] + p2 = get_param(name2, size2) + file2 = cStringIO.StringIO() + p2.to_tar(file2) + file2.seek(0) + return p1, file1, p2, file2 + + p1, file1, p2, file2 = get_parames() + p2.init_from_tar(file1) + for name in p1.names(): + self.assertEqual(p1.get_shape(name), p2.get_shape(name)) + v1 = p1.get(name) + v2 = p2.get(name) + self.assertTrue(numpy.isclose(v1, v2).all()) + + p1, file1, p2, file2 = get_parames() + p1.init_from_tar(file2) + for name in p1.names(): + self.assertEqual(p1.get_shape(name), p2.get_shape(name)) + v1 = p1.get(name) + v2 = p2.get(name) + self.assertTrue(numpy.isclose(v1, v2).all()) + if __name__ == '__main__': unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index 2e22f640cb55677b6814b7f26a71457a96449de7..aa6771709cad0bb4dd4ce39c81de7e6ab1ad4c73 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -13,8 +13,10 @@ packages=['paddle', setup_requires=["requests", "numpy", "protobuf==3.1", + "recordio", "matplotlib", - "rarfile"] + "rarfile", + "scipy>=0.19.0"] if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"]