提交 d83bae85 编写于 作者: W whs 提交者: GitHub

Merge branch 'develop' into crop_layer

...@@ -19,3 +19,9 @@ third_party/ ...@@ -19,3 +19,9 @@ third_party/
# clion workspace. # clion workspace.
cmake-build-* cmake-build-*
# generated while compiling
python/paddle/v2/framework/core.so
CMakeFiles
cmake_install.cmake
...@@ -21,10 +21,10 @@ ...@@ -21,10 +21,10 @@
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
hooks: hooks:
- id: clang-formater - id: clang-formater
- repo: https://github.com/dnephin/pre-commit-golang - repo: https://github.com/PaddlePaddle/pre-commit-golang
sha: e4693a4c282b4fc878eda172a929f7a6508e7d16 sha: 16398aeccf263adaf53b2495eed0406347d76281
hooks: hooks:
- id: go-fmt - id: go-fmt
files: (.*\.go) types: [go]
- id: go-lint - id: gometalinter
files: (.*\.go) types: [go]
...@@ -4,6 +4,7 @@ cache: ...@@ -4,6 +4,7 @@ cache:
- $HOME/.ccache - $HOME/.ccache
- $HOME/.cache/pip - $HOME/.cache/pip
- $TRAVIS_BUILD_DIR/build/third_party - $TRAVIS_BUILD_DIR/build/third_party
- $TRAVIS_BUILD_DIR/build_android/third_party
sudo: required sudo: required
dist: trusty dist: trusty
os: os:
...@@ -11,6 +12,7 @@ os: ...@@ -11,6 +12,7 @@ os:
env: env:
- JOB=build_doc - JOB=build_doc
- JOB=check_style - JOB=check_style
- JOB=build_android
addons: addons:
apt: apt:
packages: packages:
...@@ -39,6 +41,8 @@ before_install: ...@@ -39,6 +41,8 @@ before_install:
- pip install rarfile - pip install rarfile
- curl https://glide.sh/get | bash - curl https://glide.sh/get | bash
- eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
- go get -u github.com/alecthomas/gometalinter
- gometalinter --install
- | - |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
script: script:
......
...@@ -28,7 +28,9 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -28,7 +28,9 @@ if(NOT CMAKE_CROSSCOMPILING)
endif(NOT CMAKE_CROSSCOMPILING) endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED) find_package(Git REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
find_package(Boost QUIET) if(NOT ANDROID)
find_package(Boost QUIET)
endif()
include(simd) include(simd)
...@@ -97,6 +99,7 @@ include(external/swig) # download, build, install swig ...@@ -97,6 +99,7 @@ include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
...@@ -134,11 +137,16 @@ if(WITH_GPU) ...@@ -134,11 +137,16 @@ if(WITH_GPU)
endif(WITH_GPU) endif(WITH_GPU)
if(USE_NNPACK) if(USE_NNPACK)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB} "rt") include(external/nnpack)
list(APPEND EXTERNAL_LIBS ${NNPACK_LIBS})
endif(USE_NNPACK) endif(USE_NNPACK)
add_subdirectory(proto) add_subdirectory(proto)
# "add_subdirectory(go)" should be placed after the following loine,
# because it depends on paddle/optimizer.
add_subdirectory(paddle/optimizer)
# "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
# placed after this block, because they depends on it. # placed after this block, because they depends on it.
if(WITH_GOLANG) if(WITH_GOLANG)
...@@ -146,7 +154,9 @@ if(WITH_GOLANG) ...@@ -146,7 +154,9 @@ if(WITH_GOLANG)
endif(WITH_GOLANG) endif(WITH_GOLANG)
add_subdirectory(paddle) add_subdirectory(paddle)
add_subdirectory(python) if(WITH_PYTHON)
add_subdirectory(python)
endif()
if(WITH_DOC) if(WITH_DOC)
add_subdirectory(doc) add_subdirectory(doc)
endif() endif()
...@@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ ...@@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y \ apt-get install -y \
git python-pip python-dev openssh-server bison \ git python-pip python-dev openssh-server bison \
wget unzip tar xz-utils bzip2 gzip coreutils ntp \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
curl sed grep graphviz libjpeg-dev zlib1g-dev \ curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-numpy python-matplotlib gcc g++ \ python-numpy python-matplotlib gcc g++ \
automake locales clang-format-3.8 swig doxygen cmake \ automake locales clang-format-3.8 swig doxygen cmake \
......
...@@ -14,6 +14,17 @@ RUN apt-get update && \ ...@@ -14,6 +14,17 @@ RUN apt-get update && \
wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \ wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
apt-get clean -y apt-get clean -y
# Install Go and glide
RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go.tgz && \
mkdir /root/gopath && \
mkdir /root/gopath/bin && \
mkdir /root/gopath/src && \
rm go.tgz
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
# git credential to skip password typing # git credential to skip password typing
RUN git config --global credential.helper store RUN git config --global credential.helper store
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/develop/doc/) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://doc.paddlepaddle.org/develop/doc/)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/doc_cn/) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://doc.paddlepaddle.org/develop/doc_cn/)
[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
...@@ -61,35 +61,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl ...@@ -61,35 +61,36 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
## Installation ## Installation
It is recommended to check out the It is recommended to check out the
[Docker installation guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) [Docker installation guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html)
before looking into the before looking into the
[build from source guide](http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html) [build from source guide](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html)
## Documentation ## Documentation
We provide [English](http://www.paddlepaddle.org/develop/doc/) and We provide [English](http://doc.paddlepaddle.org/develop/doc/) and
[Chinese](http://www.paddlepaddle.org/doc_cn/) documentation. [Chinese](http://doc.paddlepaddle.org/doc_cn/) documentation.
- [Deep Learning 101](http://book.paddlepaddle.org/index.html) - [Deep Learning 101](http://book.paddlepaddle.org/index.html)
You might want to start from the this online interactive book that can run in Jupyter Notebook. You might want to start from the this online interactive book that can run in Jupyter Notebook.
- [Distributed Training](http://www.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) - [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html)
You can run distributed training jobs on MPI clusters. You can run distributed training jobs on MPI clusters.
- [Distributed Training on Kubernetes](http://www.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html) - [Distributed Training on Kubernetes](http://doc.paddlepaddle.org/develop/doc/howto/usage/k8s/k8s_en.html)
You can also run distributed training jobs on Kubernetes clusters. You can also run distributed training jobs on Kubernetes clusters.
- [Python API](http://www.paddlepaddle.org/develop/doc/api/index_en.html) - [Python API](http://doc.paddlepaddle.org/develop/doc/api/index_en.html)
Our new API enables much shorter programs. Our new API enables much shorter programs.
- [How to Contribute](http://www.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html) - [How to Contribute](http://doc.paddlepaddle.org/develop/doc/howto/dev/contribute_to_paddle_en.html)
We appreciate your contributions! We appreciate your contributions!
## Ask Questions ## Ask Questions
You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues). You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
......
...@@ -102,12 +102,19 @@ if(WITH_GOLANG) ...@@ -102,12 +102,19 @@ if(WITH_GOLANG)
message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide")
endif() endif()
add_custom_target(go_vendor) # this command will only run when the file it depends is missing
add_custom_command(TARGET go_vendor # or has changed, or the output is missing.
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide
COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND env GOPATH=${GOPATH} ${GLIDE} install
COMMAND touch ${CMAKE_BINARY_DIR}/glide
DEPENDS ${PROJ_ROOT}/go/glide.lock
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go"
) )
add_dependencies(go_vendor go_path)
# depends on the custom command which outputs
# ${CMAKE_BINARY_DIR}/glide, the custom command does not need to
# run every time this target is built.
add_custom_target(go_vendor DEPENDS ${CMAKE_BINARY_DIR}/glide go_path)
endif() endif()
endif(WITH_GOLANG) endif(WITH_GOLANG)
...@@ -27,7 +27,8 @@ set(IGNORE_PATTERN ...@@ -27,7 +27,8 @@ set(IGNORE_PATTERN
.*cblas\\.h.* .*cblas\\.h.*
.*\\.pb\\.txt .*\\.pb\\.txt
.*LtrDataProvider.* .*LtrDataProvider.*
.*MultiDataProvider.*) .*MultiDataProvider.*
.*pb.*)
# add_style_check_target # add_style_check_target
# #
...@@ -52,14 +53,13 @@ macro(add_style_check_target TARGET_NAME) ...@@ -52,14 +53,13 @@ macro(add_style_check_target TARGET_NAME)
endif() endif()
endforeach() endforeach()
if(LINT MATCHES ON) if(LINT MATCHES ON)
# cpplint code style
get_filename_component(base_filename ${filename} NAME) get_filename_component(base_filename ${filename} NAME)
set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint) set(CUR_GEN ${CMAKE_CURRENT_BINARY_DIR}/${base_filename}.cpplint)
add_custom_command(OUTPUT ${CUR_GEN} add_custom_command(TARGET ${TARGET_NAME} PRE_BUILD
PRE_BUILD COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
COMMAND env ${py_env} "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py"
"--filter=${STYLE_FILTER}" "--filter=${STYLE_FILTER}"
"--write-success=${CUR_GEN}" ${filename} "--write-success=${CUR_GEN}" ${filename}
DEPENDS ${filename}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif() endif()
endforeach() endforeach()
......
...@@ -106,6 +106,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ...@@ -106,6 +106,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
SET(CMAKE_SYSTEM_PROCESSOR armv7-a) SET(CMAKE_SYSTEM_PROCESSOR armv7-a)
ENDIF() ENDIF()
ENDIF() ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(ANDROID_TOOLCHAIN_NAME aarch64-linux-android)
SET(CMAKE_SYSTEM_PROCESSOR aarch64)
ENDIF()
SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-") SET(ANDROID_TOOLCHAIN_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/bin/${ANDROID_TOOLCHAIN_NAME}-")
ENDIF() ENDIF()
...@@ -162,6 +166,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0") ...@@ -162,6 +166,10 @@ IF("${CMAKE_VERSION}" VERSION_LESS "3.7.0")
ENDIF() ENDIF()
ENDIF() ENDIF()
IF(ANDROID_ABI STREQUAL "arm64-v8a")
LIST(APPEND ANDROID_COMPILER_FLAGS -march=armv8-a)
ENDIF()
STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}") STRING(REPLACE ";" " " ANDROID_COMPILER_FLAGS "${ANDROID_COMPILER_FLAGS}")
STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}") STRING(REPLACE ";" " " ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}")
...@@ -186,6 +194,10 @@ ELSE() ...@@ -186,6 +194,10 @@ ELSE()
SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN}) SET(CMAKE_ANDROID_STANDALONE_TOOLCHAIN ${ANDROID_STANDALONE_TOOLCHAIN})
ENDIF() ENDIF()
SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI}) SET(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ABI})
IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE}) SET(CMAKE_ANDROID_ARM_MODE ${ANDROID_ARM_MODE})
IF(ANDROID_ABI STREQUAL "armeabi-v7a")
SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON}) SET(CMAKE_ANDROID_ARM_NEON ${ANDROID_ARM_NEON})
ENDIF()
ENDIF()
ENDIF() ENDIF()
...@@ -52,6 +52,7 @@ ExternalProject_Add( ...@@ -52,6 +52,7 @@ ExternalProject_Add(
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog) ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog gflags)
LIST(APPEND external_project_dependencies glog) LIST(APPEND external_project_dependencies glog)
...@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK") ...@@ -7,10 +7,24 @@ set(NNPACK_ROOT $ENV{NNPACK_ROOT} CACHE PATH "Folder contains NNPACK")
find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include) find_path(NNPACK_INC_DIR nnpack.h PATHS ${NNPACK_ROOT}/include)
find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib) find_library(NNPACK_LIB NAMES nnpack PATHS ${NNPACK_ROOT}/lib)
find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib) find_library(PTHREADPOOL_LIB NAMES pthreadpool PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_UKERNELS_LIB NAMES nnpack_ukernels PATHS ${NNPACK_ROOT}/lib)
find_library(NNPACK_CPUFEATURES_LIB NAMES cpufeatures PATHS ${NNPACK_ROOT}/lib)
if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB) if(NNPACK_INC_DIR AND NNPACK_LIB AND PTHREADPOOL_LIB)
set(NNPACK_FOUND ON) set(NNPACK_FOUND ON)
INCLUDE_DIRECTORIES(${NNPACK_INC_DIR}) INCLUDE_DIRECTORIES(${NNPACK_INC_DIR})
set(NNPACK_LIBS)
list(APPEND NNPACK_LIBS ${NNPACK_LIB} ${PTHREADPOOL_LIB})
if (NNPACK_UKERNELS_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_UKERNELS_LIB})
endif()
if (NNPACK_CPUFEATURES_LIB)
list(APPEND NNPACK_LIBS ${NNPACK_CPUFEATURES_LIB})
endif()
if(NOT ANDROID)
list(APPEND NNPACK_LIBS "rt")
endif()
else() else()
message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})") message(FATAL_ERROR "Cannot find NNPACK in (${NNPACK_ROOT})")
endif() endif()
...@@ -32,7 +32,12 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -32,7 +32,12 @@ IF(NOT ${CBLAS_FOUND})
# arm_soft_fp_abi branch of OpenBLAS to support softfp # arm_soft_fp_abi branch of OpenBLAS to support softfp
# https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi # https://github.com/xianyi/OpenBLAS/tree/arm_soft_fp_abi
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) IF(ANDROID_ABI MATCHES "^armeabi(-v7a)?$")
SET(TARGET "ARMV7")
ELSEIF(ANDROID_ABI STREQUAL "arm64-v8a")
SET(TARGET "ARMV8")
ENDIF()
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(RPI) ELSEIF(RPI)
# use hardfp # use hardfp
SET(OPENBLAS_COMMIT "v0.2.19") SET(OPENBLAS_COMMIT "v0.2.19")
......
INCLUDE(ExternalProject)
SET(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind)
INCLUDE_DIRECTORIES(${PYBIND_SOURCE_DIR}/src/extern_pybind/include)
ExternalProject_Add(
extern_pybind
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/pybind/pybind11.git"
GIT_TAG "v2.1.1"
PREFIX ${PYBIND_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
add_library(pybind STATIC ${dummyfile})
else()
add_library(pybind INTERFACE)
endif()
add_dependencies(pybind extern_pybind)
LIST(APPEND external_project_dependencies pybind)
...@@ -18,6 +18,9 @@ INCLUDE(python_module) ...@@ -18,6 +18,9 @@ INCLUDE(python_module)
FIND_PACKAGE(PythonInterp 2.7) FIND_PACKAGE(PythonInterp 2.7)
IF(WITH_PYTHON) IF(WITH_PYTHON)
FIND_PACKAGE(PythonLibs 2.7) FIND_PACKAGE(PythonLibs 2.7)
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY(python SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES})
ENDIF(WITH_PYTHON) ENDIF(WITH_PYTHON)
SET(py_env "") SET(py_env "")
......
...@@ -109,7 +109,9 @@ set(COMMON_FLAGS ...@@ -109,7 +109,9 @@ set(COMMON_FLAGS
-Wno-unused-function -Wno-unused-function
-Wno-error=literal-suffix -Wno-error=literal-suffix
-Wno-error=sign-compare -Wno-error=sign-compare
-Wno-error=unused-local-typedefs) -Wno-error=unused-local-typedefs
-Wno-error=parentheses-equality # Warnings in Pybind11
)
set(GPU_COMMON_FLAGS set(GPU_COMMON_FLAGS
-fPIC -fPIC
......
...@@ -90,10 +90,11 @@ ...@@ -90,10 +90,11 @@
# including binary directory for generated headers. # including binary directory for generated headers.
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE) if(NOT APPLE AND NOT ANDROID)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
endif(NOT APPLE) set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -ldl -lrt")
endif(NOT APPLE AND NOT ANDROID)
function(merge_static_libs TARGET_NAME) function(merge_static_libs TARGET_NAME)
set(libs ${ARGN}) set(libs ${ARGN})
...@@ -103,6 +104,7 @@ function(merge_static_libs TARGET_NAME) ...@@ -103,6 +104,7 @@ function(merge_static_libs TARGET_NAME)
foreach(lib ${libs}) foreach(lib ${libs})
list(APPEND libs_deps ${${lib}_LIB_DEPENDS}) list(APPEND libs_deps ${${lib}_LIB_DEPENDS})
endforeach() endforeach()
list(REMOVE_DUPLICATES libs_deps)
if(APPLE) # Use OSX's libtool to merge archives if(APPLE) # Use OSX's libtool to merge archives
# To produce a library we need at least one source file. # To produce a library we need at least one source file.
...@@ -183,6 +185,10 @@ function(cc_library TARGET_NAME) ...@@ -183,6 +185,10 @@ function(cc_library TARGET_NAME)
add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
endif() endif()
# cpplint code style
add_style_check_target(${TARGET_NAME} ${cc_library_SRCS})
else(cc_library_SRCS) else(cc_library_SRCS)
if (cc_library_DEPS) if (cc_library_DEPS)
merge_static_libs(${TARGET_NAME} ${cc_library_DEPS}) merge_static_libs(${TARGET_NAME} ${cc_library_DEPS})
...@@ -300,7 +306,7 @@ function(go_library TARGET_NAME) ...@@ -300,7 +306,7 @@ function(go_library TARGET_NAME)
file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND rm "${${TARGET_NAME}_LIB_PATH}" COMMAND rm "${${TARGET_NAME}_LIB_PATH}"
# Golang build source code # Golang build source code
...@@ -319,14 +325,11 @@ function(go_binary TARGET_NAME) ...@@ -319,14 +325,11 @@ function(go_binary TARGET_NAME)
cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# FIXME: link path
add_custom_command(OUTPUT ${TARGET_NAME}_timestamp add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH} COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
-o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
"./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}"
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
# TODO: don't know what ${TARGET_NAME}_link does
add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS})
install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
endfunction(go_binary) endfunction(go_binary)
...@@ -334,15 +337,18 @@ endfunction(go_binary) ...@@ -334,15 +337,18 @@ endfunction(go_binary)
function(go_test TARGET_NAME) function(go_test TARGET_NAME)
set(options OPTIONAL) set(options OPTIONAL)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs DEPS)
cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_custom_command(OUTPUT ${TARGET_NAME}_timestamp string(REPLACE "${PADDLE_GO_PATH}" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${go_test_DEPS})
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test -race
-c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
${go_test_SRCS} ".${CMAKE_CURRENT_SOURCE_REL_DIR}"
WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})
add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
endfunction(go_test) endfunction(go_test)
function(proto_library TARGET_NAME) function(proto_library TARGET_NAME)
......
...@@ -474,6 +474,11 @@ prelu ...@@ -474,6 +474,11 @@ prelu
.. autoclass:: paddle.v2.layer.prelu .. autoclass:: paddle.v2.layer.prelu
:noindex: :noindex:
gated_unit
-----------
.. autoclass:: paddle.v2.layer.gated_unit
:noindex:
Detection output Layer Detection output Layer
====================== ======================
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
\frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x}
假设 :math:`z = f(W^T x + b)` ,那么 假设 :math:`z = W^T x + b` ,那么
.. math:: .. math::
......
...@@ -37,7 +37,7 @@ Suppose our loss function is :math:`c(y)`, then ...@@ -37,7 +37,7 @@ Suppose our loss function is :math:`c(y)`, then
\frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x} \frac{\partial c(y)}{\partial x} = \frac{\partial c(y)}{\partial y} \frac{\partial y}{\partial x}
Suppose :math:`z = f(W^T x + b)`, then Suppose :math:`z = W^T x + b`, then
.. math:: .. math::
......
...@@ -41,7 +41,7 @@ PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使 ...@@ -41,7 +41,7 @@ PaddlePaddle文档需要准备的环境相对较复杂,所以我们推荐使
python -c "import py_paddle" python -c "import py_paddle"
如果提示错误,那么用户需要在本地编译安装PaddlePaddle,请参考 `源码编译文档 <http://www.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html>`_ 。 如果提示错误,那么用户需要在本地编译安装PaddlePaddle,请参考 `源码编译文档 <http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/build_from_source_en.html>`_ 。
注意,用户在首次编译安装PaddlePaddle时,请将WITH_DOC选项关闭。在编译安装正确之后,请再次确认py_paddle包已经安装,即可进行下一步操作。 注意,用户在首次编译安装PaddlePaddle时,请将WITH_DOC选项关闭。在编译安装正确之后,请再次确认py_paddle包已经安装,即可进行下一步操作。
如果提示正确,可以执行以下命令编译生成文档,即 如果提示正确,可以执行以下命令编译生成文档,即
...@@ -68,9 +68,9 @@ PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程 ...@@ -68,9 +68,9 @@ PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程
如何更新www.paddlepaddle.org文档 如何更新www.paddlepaddle.org文档
================================ ================================
开发者给PaddlePaddle代码增加的注释以PR的形式提交到github中,提交方式可参见 `贡献文档 <http://paddlepaddle.org/develop/doc_cn/howto/dev/contribute_to_paddle_cn.html>`_ 。 开发者给PaddlePaddle代码增加的注释以PR的形式提交到github中,提交方式可参见 `贡献文档 <http://doc.paddlepaddle.org/develop/doc_cn/howto/dev/contribute_to_paddle_cn.html>`_ 。
目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 <http://www.paddlepaddle.org/develop/doc_cn/>`_ 和 目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 <http://doc.paddlepaddle.org/develop/doc_cn/>`_ 和
`英文文档 <http://www.paddlepaddle.org/develop/doc/>`_ 。 `英文文档 <http://doc.paddlepaddle.org/develop/doc/>`_ 。
......
...@@ -17,3 +17,7 @@ add_subdirectory(pserver/client/c) ...@@ -17,3 +17,7 @@ add_subdirectory(pserver/client/c)
add_subdirectory(cmd/pserver) add_subdirectory(cmd/pserver)
add_subdirectory(cmd/master) add_subdirectory(cmd/master)
add_subdirectory(master/c) add_subdirectory(master/c)
add_subdirectory(master)
add_subdirectory(pserver)
add_subdirectory(pserver/client)
add_subdirectory(utils/networkhelper)
...@@ -12,4 +12,4 @@ ...@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
go_binary(master SRC master.go DEPS paddle_go_optimizer) go_binary(master SRC master.go)
...@@ -11,6 +11,7 @@ import ( ...@@ -11,6 +11,7 @@ import (
"github.com/namsral/flag" "github.com/namsral/flag"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/master" "github.com/PaddlePaddle/Paddle/go/master"
"github.com/PaddlePaddle/Paddle/go/utils/networkhelper" "github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
...@@ -20,11 +21,18 @@ func main() { ...@@ -20,11 +21,18 @@ func main() {
port := flag.Int("port", 8080, "port of the master server.") port := flag.Int("port", 8080, "port of the master server.")
ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.") ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.")
endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.") endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.")
taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutDur := flag.Duration("task-timout-dur", 20*time.Minute, "task timout duration.")
taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") taskTimeoutMax := flag.Int("task-timeout-max", 3, "max timtout count for each task before it being declared failed task.")
chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") chunkPerTask := flag.Int("chunk-per-task", 10, "chunk per task.")
logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic")
flag.Parse() flag.Parse()
level, e := log.ParseLevel(*logLevel)
candy.Must(e)
log.SetLevel(level)
if *endpoints == "" { if *endpoints == "" {
log.Warningln("-endpoints not set, fault tolerance not be enabled.") log.Warningln("-endpoints not set, fault tolerance not be enabled.")
} }
......
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"time" "time"
"github.com/namsral/flag" "github.com/namsral/flag"
"github.com/topicai/candy"
"github.com/PaddlePaddle/Paddle/go/pserver" "github.com/PaddlePaddle/Paddle/go/pserver"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
...@@ -18,53 +19,47 @@ func main() { ...@@ -18,53 +19,47 @@ func main() {
index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0")
etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
"comma separated endpoint string for pserver to connect to etcd") "comma separated endpoint string for pserver to connect to etcd")
etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") etcdTimeout := flag.Duration("etcd-timeout", 5*time.Second, "timeout for etcd calls")
numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job")
checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path") checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
checkpointInterval := flag.Int("checkpoint-interval", 600, "save checkpoint per interval seconds") checkpointInterval := flag.Duration("checkpoint-interval", 600*time.Second, "save checkpoint per interval seconds")
logLevel := flag.String("log-level", "info", logLevel := flag.String("log-level", "info",
"log level, possible values: debug, info, warning, error, fatal, panic") "log level, possible values: debug, info, warning, error, fatal, panic")
flag.Parse() flag.Parse()
level, err := log.ParseLevel(*logLevel) level, err := log.ParseLevel(*logLevel)
if err != nil { candy.Must(err)
panic(err)
}
log.SetLevel(level) log.SetLevel(level)
var idx int var idx int
var cp pserver.Checkpoint var cp pserver.Checkpoint
var e *pserver.EtcdClient var e *pserver.EtcdClient
if *index >= 0 { if *index >= 0 {
idx = *index idx = *index
} else { } else {
timeout := time.Second * time.Duration((*etcdTimeout)) e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, *etcdTimeout)
e = pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) idx, err = e.Register(*port)
idx, err = e.Register() candy.Must(err)
cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e)
if err != nil { if err != nil {
panic(err) log.Errorf("Fetch checkpoint failed, %s", err)
} }
} }
s, err := pserver.NewService(idx, *checkpointInterval, *checkpointPath, e, cp) s, err := pserver.NewService(idx, *checkpointInterval, *checkpointPath, e, cp)
if err != nil { candy.Must(err)
panic(err)
}
err = rpc.Register(s) err = rpc.Register(s)
if err != nil { candy.Must(err)
panic(err)
}
rpc.HandleHTTP() rpc.HandleHTTP()
l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
if err != nil { candy.Must(err)
panic(err)
}
log.Infof("start pserver at port %d", *port) log.Infof("start pserver at port %d", *port)
err = http.Serve(l, nil) err = http.Serve(l, nil)
candy.Must(err)
if err != nil {
panic(err)
}
} }
hash: b8f18ce6784bd3fadd9fed0b8443e7b658234ea785ae1f220723ae2c1f652aa7 hash: a8faea3a363468a88917ddeb3b1c9ea36886fb2c622acbad42604fa9cb4d3855
updated: 2017-06-27T14:05:48.925262819+08:00 updated: 2017-07-11T10:04:40.786745417+08:00
imports: imports:
- name: github.com/coreos/etcd - name: github.com/coreos/etcd
version: 61fc123e7a8b14a0a258aa3f5c4159861b1ec2e7 version: cb2a496c4ddd1c87a9f280e116649b599999ec79
subpackages: subpackages:
- auth/authpb - auth/authpb
- clientv3 - clientv3
...@@ -22,7 +22,9 @@ imports: ...@@ -22,7 +22,9 @@ imports:
- name: github.com/PaddlePaddle/recordio - name: github.com/PaddlePaddle/recordio
version: edfb82af0739c84f241c87390ec5649c7b28c129 version: edfb82af0739c84f241c87390ec5649c7b28c129
- name: github.com/sirupsen/logrus - name: github.com/sirupsen/logrus
version: 202f25545ea4cf9b191ff7f846df5d87c9382c2b version: 7f976d3a76720c4c27af2ba716b85d2e0a7e38b1
- name: github.com/topicai/candy
version: 1b9030d056fa9f8c4b1f9c91b52fe4b8ab4cd8cc
- name: golang.org/x/net - name: golang.org/x/net
version: c8c74377599bd978aee1cf3b9b63a8634051cec2 version: c8c74377599bd978aee1cf3b9b63a8634051cec2
subpackages: subpackages:
...@@ -34,11 +36,11 @@ imports: ...@@ -34,11 +36,11 @@ imports:
- lex/httplex - lex/httplex
- trace - trace
- name: golang.org/x/sys - name: golang.org/x/sys
version: f7928cfef4d09d1b080aa2b6fd3ca9ba1567c733 version: abf9c25f54453410d0c6668e519582a9e1115027
subpackages: subpackages:
- unix - unix
- name: golang.org/x/text - name: golang.org/x/text
version: 4e9ab9ee170f2a39bd66c92b3e0a47ff47a4bc77 version: cfdf022e86b4ecfb646e1efbd7db175dd623a8fa
subpackages: subpackages:
- secure/bidirule - secure/bidirule
- transform - transform
......
...@@ -10,3 +10,4 @@ import: ...@@ -10,3 +10,4 @@ import:
version: ^1.7.4-pre version: ^1.7.4-pre
- package: github.com/sirupsen/logrus - package: github.com/sirupsen/logrus
version: ^1.0.0 version: ^1.0.0
- package: github.com/topicai/candy
if(WITH_TESTING)
go_test(master_test)
endif()
...@@ -23,7 +23,6 @@ import ( ...@@ -23,7 +23,6 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex var mu sync.Mutex
var handleMap = make(map[C.paddle_master_client]*master.Client) var handleMap = make(map[C.paddle_master_client]*master.Client)
var curHandle C.paddle_master_client var curHandle C.paddle_master_client
...@@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int { ...@@ -114,13 +113,13 @@ func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
if err != nil { if err != nil {
// Error // Error
// TODO: return the type of error? // TODO: return the type of error?
*record = (*C.uchar)(nullPtr) *record = (*C.uchar)(nil)
return -1 return -1
} }
if len(r) == 0 { if len(r) == 0 {
// Empty record // Empty record
*record = (*C.uchar)(nullPtr) *record = (*C.uchar)(nil)
return 0 return 0
} }
......
...@@ -2,6 +2,7 @@ package master ...@@ -2,6 +2,7 @@ package master
import ( import (
"os" "os"
"time"
"github.com/PaddlePaddle/Paddle/go/connection" "github.com/PaddlePaddle/Paddle/go/connection"
"github.com/PaddlePaddle/recordio" "github.com/PaddlePaddle/recordio"
...@@ -36,9 +37,9 @@ func (c *Client) getRecords() { ...@@ -36,9 +37,9 @@ func (c *Client) getRecords() {
for { for {
t, err := c.getTask() t, err := c.getTask()
if err != nil { if err != nil {
// TODO(helin): wait before move on with next
// getTask call. // getTask call.
log.Errorln(err) log.Errorf("Get task failed, sleep 3 seconds and continue, %s", err)
time.Sleep(3 * time.Second)
continue continue
} }
...@@ -68,7 +69,10 @@ func (c *Client) getRecords() { ...@@ -68,7 +69,10 @@ func (c *Client) getRecords() {
// We treat a task as finished whenever the last data // We treat a task as finished whenever the last data
// instance of the task is read. This is not exactly // instance of the task is read. This is not exactly
// correct, but a reasonable approximation. // correct, but a reasonable approximation.
c.taskFinished(t.ID) err = c.taskFinished(t.Meta.ID)
if err != nil {
log.Errorln(err)
}
} }
} }
...@@ -118,6 +122,11 @@ func (c *Client) taskFinished(taskID int) error { ...@@ -118,6 +122,11 @@ func (c *Client) taskFinished(taskID int) error {
return c.conn.Call("Service.TaskFinished", taskID, nil) return c.conn.Call("Service.TaskFinished", taskID, nil)
} }
// TaskFailed tell the master server as task is failed.
func (c *Client) taskFailed(meta TaskMeta) error {
return c.conn.Call("Service.TaskFailed", meta, nil)
}
// NextRecord returns next record in the dataset. // NextRecord returns next record in the dataset.
// //
// NextRecord will block until the next record is available. It is // NextRecord will block until the next record is available. It is
......
...@@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) { ...@@ -66,11 +66,21 @@ func TestGetFinishTask(t *testing.T) {
for i := 0; i < totalTask*chunkPerTask; i++ { for i := 0; i < totalTask*chunkPerTask; i++ {
w := recordio.NewWriter(f, -1, -1) w := recordio.NewWriter(f, -1, -1)
w.Write(nil) _, err = w.Write(nil)
if err != nil {
panic(err)
}
// call Close to force RecordIO writing a chunk. // call Close to force RecordIO writing a chunk.
w.Close() err = w.Close()
if err != nil {
panic(err)
}
}
err = f.Close()
if err != nil {
panic(err)
} }
f.Close()
// Manually intialize client to avoid calling c.getRecords() // Manually intialize client to avoid calling c.getRecords()
c := &Client{} c := &Client{}
...@@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) { ...@@ -79,7 +89,11 @@ func TestGetFinishTask(t *testing.T) {
ch := make(chan string, 1) ch := make(chan string, 1)
ch <- addr ch <- addr
go c.monitorMaster(ch) go c.monitorMaster(ch)
c.SetDataset([]string{path}) err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
checkOnePass := func(i int) { checkOnePass := func(i int) {
var tasks []Task var tasks []Task
for idx := 0; idx < totalTask; idx++ { for idx := 0; idx < totalTask; idx++ {
...@@ -95,10 +109,16 @@ func TestGetFinishTask(t *testing.T) { ...@@ -95,10 +109,16 @@ func TestGetFinishTask(t *testing.T) {
t.Fatalf("Should get error, pass: %d\n", i) t.Fatalf("Should get error, pass: %d\n", i)
} }
err = c.taskFinished(tasks[0].ID) err = c.taskFinished(tasks[0].Meta.ID)
if err != nil { if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i) t.Fatalf("Error: %v, pass: %d\n", err, i)
} }
err = c.taskFailed(tasks[0].Meta)
if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i)
}
tasks = tasks[1:] tasks = tasks[1:]
task, err := c.getTask() task, err := c.getTask()
if err != nil { if err != nil {
...@@ -107,7 +127,7 @@ func TestGetFinishTask(t *testing.T) { ...@@ -107,7 +127,7 @@ func TestGetFinishTask(t *testing.T) {
tasks = append(tasks, task) tasks = append(tasks, task)
for _, task := range tasks { for _, task := range tasks {
err = c.taskFinished(task.ID) err = c.taskFinished(task.Meta.ID)
if err != nil { if err != nil {
t.Fatalf("Error: %v, pass: %d\n", err, i) t.Fatalf("Error: %v, pass: %d\n", err, i)
} }
......
...@@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) { ...@@ -57,14 +57,30 @@ func TestNextRecord(t *testing.T) {
w := recordio.NewWriter(f, -1, -1) w := recordio.NewWriter(f, -1, -1)
for i := 0; i < total; i++ { for i := 0; i < total; i++ {
w.Write([]byte{byte(i)}) _, err = w.Write([]byte{byte(i)})
if err != nil {
panic(err)
}
}
err = w.Close()
if err != nil {
panic(err)
}
err = f.Close()
if err != nil {
panic(err)
} }
w.Close()
f.Close()
curAddr := make(chan string, 1) curAddr := make(chan string, 1)
curAddr <- fmt.Sprintf(":%d", p) curAddr <- fmt.Sprintf(":%d", p)
c := master.NewClient(curAddr, 10) c := master.NewClient(curAddr, 10)
c.SetDataset([]string{path}) err = c.SetDataset([]string{path})
if err != nil {
panic(err)
}
for pass := 0; pass < 50; pass++ { for pass := 0; pass < 50; pass++ {
received := make(map[byte]bool) received := make(map[byte]bool)
for i := 0; i < total; i++ { for i := 0; i < total; i++ {
......
...@@ -30,7 +30,7 @@ type EtcdClient struct { ...@@ -30,7 +30,7 @@ type EtcdClient struct {
// NewEtcdClient creates a new EtcdClient. // NewEtcdClient creates a new EtcdClient.
func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) { func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
log.Debugf("Connecting to etcd at %v", endpoints) log.Debugf("Connecting to etcd at %v", endpoints)
// TODO(helin): gracefully shutdown etcd store. Becuase etcd // TODO(helin): gracefully shutdown etcd store. Because etcd
// store holds a etcd lock, even though the lock will expire // store holds a etcd lock, even though the lock will expire
// when the lease timeout, we need to implement graceful // when the lease timeout, we need to implement graceful
// shutdown to release the lock. // shutdown to release the lock.
...@@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat ...@@ -60,7 +60,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
} }
log.Debugf("Successfully acquired lock at %s.", lockPath) log.Debugf("Successfully acquired lock at %s.", lockPath)
put := clientv3.OpPut(addrPath, string(addr)) put := clientv3.OpPut(addrPath, addr)
resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit() resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
if err != nil { if err != nil {
return nil, err return nil, err
......
...@@ -4,7 +4,7 @@ import "sync" ...@@ -4,7 +4,7 @@ import "sync"
// InMemStore is an in memory implementation of Store interface. // InMemStore is an in memory implementation of Store interface.
// //
// It does not tolerate the fault that casues the program to crash. // It does not tolerate the fault that causes the program to crash.
type InMemStore struct { type InMemStore struct {
mu sync.Mutex mu sync.Mutex
buf []byte buf []byte
......
...@@ -31,30 +31,36 @@ type Chunk struct { ...@@ -31,30 +31,36 @@ type Chunk struct {
Index recordio.Index // chunk index Index recordio.Index // chunk index
} }
// TaskMeta is a struct which stores task's meta info.
type TaskMeta struct {
ID int
Epoch int
}
// Task is the basic unit of data instances assigned to trainers. // Task is the basic unit of data instances assigned to trainers.
type Task struct { type Task struct {
ID int Meta TaskMeta
Chunks []Chunk Chunks []Chunk
} }
type taskEntry struct { type taskEntry struct {
Epoch int
NumTimeout int
Task Task Task Task
// A task fails if it's timeout or trainer reports it exits unnormally.
NumFailure int
} }
type taskQueues struct { type taskQueues struct {
Todo []taskEntry Todo []taskEntry
Pending map[int]taskEntry // map from task ID to task entry Pending map[int]taskEntry // map from task ID to task entry
Done []taskEntry Done []taskEntry
Failed []Task Failed []taskEntry
} }
// Service is the master server service. // Service is the master server service.
type Service struct { type Service struct {
chunksPerTask int chunksPerTask int
timeoutDur time.Duration timeoutDur time.Duration
timeoutMax int failureMax int
ready chan struct{} ready chan struct{}
store Store store Store
...@@ -73,7 +79,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -73,7 +79,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
var cur taskEntry var cur taskEntry
for i, c := range chunks { for i, c := range chunks {
if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 { if i%chunksPerTask == 0 && len(cur.Task.Chunks) > 0 {
cur.Task.ID = id cur.Task.Meta.ID = id
id++ id++
result = append(result, cur) result = append(result, cur)
cur.Task.Chunks = nil cur.Task.Chunks = nil
...@@ -83,7 +89,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -83,7 +89,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
} }
if len(cur.Task.Chunks) > 0 { if len(cur.Task.Chunks) > 0 {
cur.Task.ID = id cur.Task.Meta.ID = id
result = append(result, cur) result = append(result, cur)
} }
...@@ -91,11 +97,11 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { ...@@ -91,11 +97,11 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry {
} }
// NewService creates a new service. // NewService creates a new service.
func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) (*Service, error) { func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failureMax int) (*Service, error) {
s := &Service{} s := &Service{}
s.chunksPerTask = chunksPerTask s.chunksPerTask = chunksPerTask
s.timeoutDur = timeoutDur s.timeoutDur = timeoutDur
s.timeoutMax = timeoutMax s.failureMax = failureMax
s.taskQueues = taskQueues{} s.taskQueues = taskQueues{}
s.taskQueues.Pending = make(map[int]taskEntry) s.taskQueues.Pending = make(map[int]taskEntry)
s.ready = make(chan struct{}) s.ready = make(chan struct{})
...@@ -154,7 +160,7 @@ func (s *Service) recover() (bool, error) { ...@@ -154,7 +160,7 @@ func (s *Service) recover() (bool, error) {
// snapshot *must* be called with s.mu being held. // snapshot *must* be called with s.mu being held.
func (s *Service) snapshot() error { func (s *Service) snapshot() error {
// TOOD(helin): etcd request has a size limit, so the snapshot // TODO(helin): etcd request has a size limit, so the snapshot
// size is limited by the max request size. We should either // size is limited by the max request size. We should either
// divide the snapshot into smaller chunks and save under // divide the snapshot into smaller chunks and save under
// different keys, or configure the request size to be big // different keys, or configure the request size to be big
...@@ -209,6 +215,7 @@ func readChunks(globPaths []string) ([]Chunk, error) { ...@@ -209,6 +215,7 @@ func readChunks(globPaths []string) ([]Chunk, error) {
} }
count := index.NumChunks() count := index.NumChunks()
log.Infof("readChunks: file %s has %d chunks", path, count)
for i := 0; i < count; i++ { for i := 0; i < count; i++ {
chunk := Chunk{ chunk := Chunk{
Path: path, Path: path,
...@@ -257,19 +264,10 @@ func (s *Service) SetDataset(globPaths []string, dummy *int) error { ...@@ -257,19 +264,10 @@ func (s *Service) SetDataset(globPaths []string, dummy *int) error {
return nil return nil
} }
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { func (s *Service) processFailedTask(t taskEntry, epoch int) {
return func() { if t.Task.Meta.Epoch != epoch {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return
}
if t.Epoch != epoch {
// new epoch, task launched after the // new epoch, task launched after the
// schedule of this timeout check. // schedule of this timeout check or failed status report.
return return
} }
...@@ -280,17 +278,30 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { ...@@ -280,17 +278,30 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
} }
}() }()
delete(s.taskQueues.Pending, t.Task.ID) delete(s.taskQueues.Pending, t.Task.Meta.ID)
t.NumTimeout++ t.NumFailure++
if t.NumTimeout > s.timeoutMax { if t.NumFailure > s.failureMax {
log.Warningf("Task %v timed out %d times, discard.", t.Task, t.NumTimeout) log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) s.taskQueues.Failed = append(s.taskQueues.Failed, t)
return return
} }
log.Warningf("Task %v timed out %d times, retry.", t.Task, t.NumTimeout) log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure)
s.taskQueues.Todo = append(s.taskQueues.Todo, t) s.taskQueues.Todo = append(s.taskQueues.Todo, t)
}
func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() {
return func() {
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[taskID]
if !ok {
return
}
s.processFailedTask(t, epoch)
} }
} }
...@@ -339,18 +350,18 @@ func (s *Service) GetTask(dummy int, task *Task) error { ...@@ -339,18 +350,18 @@ func (s *Service) GetTask(dummy int, task *Task) error {
} }
t := s.taskQueues.Todo[0] t := s.taskQueues.Todo[0]
t.Epoch++ t.Task.Meta.Epoch++
s.taskQueues.Todo = s.taskQueues.Todo[1:] s.taskQueues.Todo = s.taskQueues.Todo[1:]
s.taskQueues.Pending[t.Task.ID] = t s.taskQueues.Pending[t.Task.Meta.ID] = t
err := s.snapshot() err := s.snapshot()
if err != nil { if err != nil {
return err return err
} }
*task = t.Task *task = t.Task
log.WithFields(s.logFields()).Infof("Task #%d dispatched.", task.ID) log.WithFields(s.logFields()).Infof("Task #%v dispatched.", t.Task.Meta)
time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.ID, t.Epoch)) time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch))
return nil return nil
} }
...@@ -365,13 +376,12 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -365,13 +376,12 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
t, ok := s.taskQueues.Pending[taskID] t, ok := s.taskQueues.Pending[taskID]
if !ok { if !ok {
err := errors.New("pending task not found")
log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID) log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID)
return err return nil
} }
// task finished, reset timeout // task finished, reset timeout
t.NumTimeout = 0 t.NumFailure = 0
s.taskQueues.Done = append(s.taskQueues.Done, t) s.taskQueues.Done = append(s.taskQueues.Done, t)
delete(s.taskQueues.Pending, taskID) delete(s.taskQueues.Pending, taskID)
...@@ -389,3 +399,22 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { ...@@ -389,3 +399,22 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error {
} }
return err return err
} }
// TaskFailed tells the service that a task is failed.
func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error {
select {
case <-s.ready:
}
s.mu.Lock()
defer s.mu.Unlock()
t, ok := s.taskQueues.Pending[meta.ID]
if !ok {
log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta)
return nil
}
s.processFailedTask(t, meta.Epoch)
return nil
}
...@@ -30,7 +30,7 @@ func TestPartionIndex(t *testing.T) { ...@@ -30,7 +30,7 @@ func TestPartionIndex(t *testing.T) {
cs := make([]Chunk, 100) cs := make([]Chunk, 100)
ts := partition(cs, 20) ts := partition(cs, 20)
for i := range ts { for i := range ts {
if ts[i].Task.ID != i { if ts[i].Task.Meta.ID != i {
t.Error(ts[i], i) t.Error(ts[i], i)
} }
} }
......
if(WITH_TESTING)
go_test(pserver_test DEPS paddle_go_optimizer)
endif()
if(WITH_TESTING)
go_test(pserver_client_test DEPS paddle_go_optimizer)
endif()
libpaddle_go_optimizer.a
cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf)
target_link_libraries(paddle_go_optimizer stdc++ m) target_link_libraries(paddle_go_optimizer stdc++ m)
# Copy library to the required place.
# See: go/pserver/optimizer.go:
# // #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
add_custom_command(TARGET paddle_go_optimizer POST_BUILD
COMMAND cp "${CMAKE_CURRENT_BINARY_DIR}/libpaddle_go_optimizer.a" "${CMAKE_CURRENT_SOURCE_DIR}"
)
go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer)
if(WITH_TESTING) if(WITH_TESTING)
# FIXME: this test requires pserver which is not managed by the test # FIXME: this test requires pserver which is not managed by the test
......
...@@ -34,7 +34,6 @@ import ( ...@@ -34,7 +34,6 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
var nullPtr = unsafe.Pointer(uintptr(0))
var mu sync.Mutex var mu sync.Mutex
var handleMap = make(map[C.paddle_pserver_client]*client.Client) var handleMap = make(map[C.paddle_pserver_client]*client.Client)
var curHandle C.paddle_pserver_client var curHandle C.paddle_pserver_client
...@@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client { ...@@ -63,7 +62,7 @@ func remove(client C.paddle_pserver_client) *client.Client {
} }
func cArrayToSlice(p unsafe.Pointer, len int) []byte { func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr { if p == nil {
return nil return nil
} }
...@@ -101,11 +100,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli ...@@ -101,11 +100,11 @@ func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_cli
} }
//export paddle_new_etcd_pserver_client //export paddle_new_etcd_pserver_client
func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client { func paddle_new_etcd_pserver_client(etcdEndpoints *C.char, selected int) C.paddle_pserver_client {
// TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters)
addr := C.GoString(etcd_endpoints) addr := C.GoString(etcdEndpoints)
etcd_client := client.NewEtcd(addr) etcdClient := client.NewEtcd(addr)
c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0)) c := client.NewClient(etcdClient, etcdClient.Desired(), selector(selected != 0))
return add(c) return add(c)
} }
...@@ -124,20 +123,20 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int { ...@@ -124,20 +123,20 @@ func paddle_begin_init_params(client C.paddle_pserver_client) C.int {
} }
//export paddle_init_param //export paddle_init_param
func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, param_config unsafe.Pointer, config_len C.int) C.int { func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, paramConfig unsafe.Pointer, configLen C.int) C.int {
et := pserver.ElementType(param.element_type) et := pserver.ElementType(param.element_type)
name := C.GoString(param.name) name := C.GoString(param.name)
content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len)) content := cArrayToSlice(unsafe.Pointer(param.content), int(param.content_len))
pc := pserver.ParameterWithConfig{ pc := pserver.ParameterWithConfig{
Param: pserver.Parameter{Name: name, ElementType: et, Content: content}, Param: pserver.Parameter{Name: name, ElementType: et, Content: content},
Config: cArrayToSlice(param_config, int(config_len)), Config: cArrayToSlice(paramConfig, int(configLen)),
} }
c := get(client) c := get(client)
err := c.InitParam(pc) err := c.InitParam(pc)
if err != nil { if err != nil {
if err.Error() == pserver.AlreadyInitialized { if err.Error() == pserver.AlreadyInitialized {
log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name) log.Warningf("parameter %s already initialized, treat paddle_init_param as successful.", name)
return C.PSERVER_OK return C.PSERVER_OK
} }
log.Errorln(err) log.Errorln(err)
...@@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int { ...@@ -153,7 +152,7 @@ func paddle_finish_init_params(client C.paddle_pserver_client) C.int {
err := c.FinishInitParams() err := c.FinishInitParams()
if err != nil { if err != nil {
if err.Error() == pserver.AlreadyInitialized { if err.Error() == pserver.AlreadyInitialized {
log.Warningln("parameters already initialized, treat paddle_finish_init_params as sucessful.") log.Warningln("parameters already initialized, treat paddle_finish_init_params as successful.")
return C.PSERVER_OK return C.PSERVER_OK
} }
...@@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, ...@@ -223,12 +222,12 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter,
p := ps[i] p := ps[i]
param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst)))) param := *(**C.paddle_parameter)(unsafe.Pointer((uintptr(unsafe.Pointer(dst)) + uintptr(i)*unsafe.Sizeof(*dst))))
if unsafe.Pointer(param) == nullPtr { if unsafe.Pointer(param) == nil {
log.Errorln("must pre-allocate parameter.") log.Errorln("must pre-allocate parameter.")
return C.PSERVER_ERROR return C.PSERVER_ERROR
} }
if unsafe.Pointer(param.content) != nullPtr { if unsafe.Pointer(param.content) != nil {
if int(param.content_len) != len(p.Content) { if int(param.content_len) != len(p.Content) {
log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content)) log.Errorf("the pre-allocated content len does not match parameter content len. Pre-allocated len: %d, returned len: %d", param.content_len, len(p.Content))
return C.PSERVER_ERROR return C.PSERVER_ERROR
......
import paddle.v2 as paddle import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing import paddle.v2.dataset.uci_housing as uci_housing
import paddle.v2.master as master
import os
import cPickle as pickle
etcd_ip = os.getenv("MASTER_IP", "127.0.0.1")
etcd_endpoint = "http://" + etcd_ip + ":2379"
def cloud_reader():
print "connecting to master, etcd endpoints: ", etcd_endpoint
master_client = master.client(etcd_endpoint, 5, 64)
master_client.set_dataset(
["/pfs/dlnel/public/dataset/uci_housing/uci_housing-*-of-*"])
while 1:
r, e = master_client.next_record()
if not r:
break
yield pickle.loads(r)
def main(): def main():
...@@ -19,16 +37,16 @@ def main(): ...@@ -19,16 +37,16 @@ def main():
# create parameters # create parameters
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
# create optimizer # create optimizer of new remote updater to pserver
optimizer = paddle.optimizer.Momentum(momentum=0) optimizer = paddle.optimizer.Momentum(momentum=0)
#TODO(zhihong) : replace optimizer with new OptimizerConfig print "etcd endoint: ", etcd_endpoint
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer, update_equation=optimizer,
is_local=False, is_local=False,
pserver_spec="localhost:3000") pserver_spec=etcd_endpoint,
use_etcd=True)
# event_handler to print training and testing info # event_handler to print training and testing info
def event_handler(event): def event_handler(event):
...@@ -47,11 +65,11 @@ def main(): ...@@ -47,11 +65,11 @@ def main():
print "Test %d, %.2f" % (event.pass_id, result.cost) print "Test %d, %.2f" % (event.pass_id, result.cost)
# training # training
# NOTE: use uci_housing.train() as reader for non-paddlecloud training
trainer.train( trainer.train(
reader=paddle.batch( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
uci_housing.train(), buf_size=500), cloud_reader, buf_size=500), batch_size=2),
batch_size=2),
feeding={'x': 0, feeding={'x': 0,
'y': 1}, 'y': 1},
event_handler=event_handler, event_handler=event_handler,
......
...@@ -233,7 +233,7 @@ func (c *Client) Save(path string) error { ...@@ -233,7 +233,7 @@ func (c *Client) Save(path string) error {
func strHash(s string) uint32 { func strHash(s string) uint32 {
h := fnv.New32a() h := fnv.New32a()
h.Write([]byte(s)) _, _ = h.Write([]byte(s))
return h.Sum32() return h.Sum32()
} }
......
...@@ -3,11 +3,13 @@ package client_test ...@@ -3,11 +3,13 @@ package client_test
import ( import (
"context" "context"
"io/ioutil" "io/ioutil"
"math/rand"
"net" "net"
"net/http" "net/http"
"net/rpc" "net/rpc"
"strconv" "strconv"
"strings" "strings"
"sync"
"testing" "testing"
"time" "time"
...@@ -42,7 +44,8 @@ func initClient() [numPserver]int { ...@@ -42,7 +44,8 @@ func initClient() [numPserver]int {
ports[i] = p ports[i] = p
go func(l net.Listener) { go func(l net.Listener) {
s, err := pserver.NewService(0) var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp)
if err != nil { if err != nil {
panic(err) panic(err)
} }
...@@ -76,15 +79,33 @@ func initEtcdClient() { ...@@ -76,15 +79,33 @@ func initEtcdClient() {
log.Errorf("err %v", err) log.Errorf("err %v", err)
} }
ctx, cancel := context.WithTimeout(context.Background(), timeout) ctx, cancel := context.WithTimeout(context.Background(), timeout)
client.Delete(ctx, pserver.PsDesired) _, err = client.Delete(ctx, pserver.PsDesired)
client.Delete(ctx, pserver.PsPath) if err != nil {
client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) panic(err)
}
_, err = client.Delete(ctx, pserver.PsPath)
if err != nil {
panic(err)
}
_, err = client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver))
if err != nil {
panic(err)
}
ports := initClient() ports := initClient()
for i := 0; i < numPserver; i++ { for i := 0; i < numPserver; i++ {
client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) _, err = client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i]))
if err != nil {
panic(err)
}
} }
cancel() cancel()
client.Close() err = client.Close()
if err != nil {
panic(err)
}
} }
type selector bool type selector bool
...@@ -99,18 +120,22 @@ func (l lister) List() []client.Server { ...@@ -99,18 +120,22 @@ func (l lister) List() []client.Server {
return l return l
} }
func ClientTest(t *testing.T, c *client.Client) { func testClient(t *testing.T, c *client.Client) {
selected := c.BeginInitParams() selected := c.BeginInitParams()
if !selected { if !selected {
t.Fatal("should be selected.") t.Fatal("should be selected.")
} }
const numParameter = 100 const numParameter = 1000
config, err := ioutil.ReadFile("./c/test/testdata/optimizer.pb") config, err := ioutil.ReadFile("./c/test/testdata/optimizer.pb")
if err != nil { if err != nil {
t.Fatalf("read optimizer proto failed") t.Fatalf("read optimizer proto failed")
} }
var wg sync.WaitGroup
for i := 0; i < numParameter; i++ { for i := 0; i < numParameter; i++ {
wg.Add(1)
go func(i int) {
var p pserver.Parameter var p pserver.Parameter
p.Name = "p_" + strconv.Itoa(i) p.Name = "p_" + strconv.Itoa(i)
p.ElementType = pserver.Float32 p.ElementType = pserver.Float32
...@@ -119,7 +144,10 @@ func ClientTest(t *testing.T, c *client.Client) { ...@@ -119,7 +144,10 @@ func ClientTest(t *testing.T, c *client.Client) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
wg.Done()
}(i)
} }
wg.Wait()
err = c.FinishInitParams() err = c.FinishInitParams()
if err != nil { if err != nil {
...@@ -127,7 +155,7 @@ func ClientTest(t *testing.T, c *client.Client) { ...@@ -127,7 +155,7 @@ func ClientTest(t *testing.T, c *client.Client) {
} }
var grads []pserver.Gradient var grads []pserver.Gradient
for i := 0; i < numParameter/2; i++ { for i := 0; i < numParameter; i++ {
var g pserver.Gradient var g pserver.Gradient
g.Name = "p_" + strconv.Itoa(i) g.Name = "p_" + strconv.Itoa(i)
g.ElementType = pserver.Float32 g.ElementType = pserver.Float32
...@@ -135,30 +163,67 @@ func ClientTest(t *testing.T, c *client.Client) { ...@@ -135,30 +163,67 @@ func ClientTest(t *testing.T, c *client.Client) {
grads = append(grads, g) grads = append(grads, g)
} }
err = c.SendGrads(grads) const paramPerGroup = 10
const numGroups = numParameter / paramPerGroup
// shuffle send grads order
for i := range grads {
j := rand.Intn(i + 1)
grads[i], grads[j] = grads[j], grads[i]
}
for i := 0; i < numGroups; i++ {
var gs []pserver.Gradient
if i == numGroups-1 {
gs = grads[i*paramPerGroup:]
} else {
gs = grads[i*paramPerGroup : (i+1)*paramPerGroup]
}
wg.Add(1)
go func(gs []pserver.Gradient) {
err := c.SendGrads(gs)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
wg.Done()
}(gs)
}
names := make([]string, numParameter) names := make([]string, numParameter)
for i := 0; i < numParameter; i++ { for i := 0; i < numParameter; i++ {
names[i] = "p_" + strconv.Itoa(i) names[i] = "p_" + strconv.Itoa(i)
} }
params, err := c.GetParams(names) for i := 0; i < numGroups; i++ {
var ns []string
if i == numGroups-1 {
ns = names[i*paramPerGroup:]
} else {
ns = names[i*paramPerGroup : (i+1)*paramPerGroup]
}
wg.Add(1)
go func(ns []string) {
params, err := c.GetParams(ns)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if len(names) != len(params) { if len(ns) != len(params) {
t.Fatalf("parameter size not match, need: %d, have: %d", len(names), len(params)) t.Fatalf("parameter size not match, need: %d, have: %d", len(names), len(params))
} }
for i := range params { for i := range params {
if names[i] != params[i].Name { if ns[i] != params[i].Name {
t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name) t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", ns[i], params[i].Name)
}
} }
wg.Done()
}(ns)
} }
wg.Wait()
} }
func TestNativeClient(t *testing.T) { func TestNativeClient(t *testing.T) {
...@@ -168,13 +233,14 @@ func TestNativeClient(t *testing.T) { ...@@ -168,13 +233,14 @@ func TestNativeClient(t *testing.T) {
servers[i] = client.Server{Index: i, Addr: ":" + strconv.Itoa(pserverClientPorts[i])} servers[i] = client.Server{Index: i, Addr: ":" + strconv.Itoa(pserverClientPorts[i])}
} }
c1 := client.NewClient(lister(servers), len(servers), selector(true)) c1 := client.NewClient(lister(servers), len(servers), selector(true))
ClientTest(t, c1) testClient(t, c1)
} }
// TODO: tmperary disable etcdClient test for dependency of etcd) // EtcdClient is a disabled test, since we have not embedded etcd into
// our test.
func EtcdClient(t *testing.T) { func EtcdClient(t *testing.T) {
initEtcdClient() initEtcdClient()
etcd_client := client.NewEtcd(etcdEndpoints) etcdClient := client.NewEtcd(etcdEndpoints)
c2 := client.NewClient(etcd_client, etcd_client.Desired(), selector(true)) c2 := client.NewClient(etcdClient, etcdClient.Desired(), selector(true))
ClientTest(t, c2) testClient(t, c2)
} }
...@@ -12,7 +12,7 @@ import ( ...@@ -12,7 +12,7 @@ import (
) )
const ( const (
DefaultEtcdTimeout time.Duration = 5 * time.Second defaultEtcdTimeout time.Duration = 5 * time.Second
) )
// EtcdClient is used by pserver client that is a part of trainer process. // EtcdClient is used by pserver client that is a part of trainer process.
...@@ -47,7 +47,7 @@ func (p *EtcdClient) Desired() int { ...@@ -47,7 +47,7 @@ func (p *EtcdClient) Desired() int {
psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value))
if err != nil { if err != nil {
log.Errorf("psDesired %s invalid %v", psDesired, err) log.Errorf("psDesired %d invalid %v", psDesired, err)
time.Sleep(p.timeout) time.Sleep(p.timeout)
continue continue
} }
...@@ -106,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient { ...@@ -106,11 +106,11 @@ func NewEtcd(endpoints string) *EtcdClient {
for { for {
cli, err = clientv3.New(clientv3.Config{ cli, err = clientv3.New(clientv3.Config{
Endpoints: ep, Endpoints: ep,
DialTimeout: DefaultEtcdTimeout, DialTimeout: defaultEtcdTimeout,
}) })
if err != nil { if err != nil {
log.Errorf("Init etcd connection failed: %v", err) log.Errorf("Init etcd connection failed: %v", err)
time.Sleep(DefaultEtcdTimeout) time.Sleep(defaultEtcdTimeout)
continue continue
} }
break break
...@@ -118,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient { ...@@ -118,7 +118,7 @@ func NewEtcd(endpoints string) *EtcdClient {
log.Infof("Connected to etcd: %s\n", endpoints) log.Infof("Connected to etcd: %s\n", endpoints)
client := &EtcdClient{ client := &EtcdClient{
client: cli, client: cli,
timeout: DefaultEtcdTimeout, timeout: defaultEtcdTimeout,
endpoints: ep, endpoints: ep,
} }
return client return client
......
...@@ -16,7 +16,7 @@ import ( ...@@ -16,7 +16,7 @@ import (
const ( const (
// PsDesired is etcd path for store desired pserver count // PsDesired is etcd path for store desired pserver count
PsDesired = "/ps_desired" PsDesired = "/ps_desired"
// PsAddr is the base dir for pserver to store their addr // PsPath is the base dir for pserver to store their addr
PsPath = "/ps/" PsPath = "/ps/"
// PsCheckpoint is the etcd path for store checkpoints information // PsCheckpoint is the etcd path for store checkpoints information
PsCheckpoint = "/checkpoints/" PsCheckpoint = "/checkpoints/"
...@@ -49,7 +49,7 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et ...@@ -49,7 +49,7 @@ func NewEtcdClient(endpoints string, numPservers int, timeout time.Duration) *Et
// Register registers the pserver on etcd // Register registers the pserver on etcd
// //
// Register returns the index of the current pserver. // Register returns the index of the current pserver.
func (e *EtcdClient) Register() (int, error) { func (e *EtcdClient) Register(port int) (int, error) {
var err error var err error
e.externalIP, err = networkhelper.GetExternalIP() e.externalIP, err = networkhelper.GetExternalIP()
...@@ -116,7 +116,7 @@ func (e *EtcdClient) Register() (int, error) { ...@@ -116,7 +116,7 @@ func (e *EtcdClient) Register() (int, error) {
for { for {
ctx, cancel := context.WithTimeout(context.Background(), time.Second) ctx, cancel := context.WithTimeout(context.Background(), time.Second)
var err error var err error
pserverIdx, err = e.registerPserverEtcd(ctx) pserverIdx, err = e.registerPserverEtcd(ctx, port)
cancel() cancel()
if err != nil { if err != nil {
log.Warn(err) log.Warn(err)
...@@ -140,7 +140,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) ( ...@@ -140,7 +140,7 @@ func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (
} }
// registerPserverEtcd registers pserver node on etcd using transaction. // registerPserverEtcd registers pserver node on etcd using transaction.
func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { func (e *EtcdClient) registerPserverEtcd(ctx context.Context, port int) (int, error) {
var idx int var idx int
_, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error {
registered := false registered := false
...@@ -156,8 +156,9 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { ...@@ -156,8 +156,9 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
log.Fatal(err) log.Fatal(err)
} }
// find the first id and write info // find the first id and write info
c.Put(psKey, e.externalIP, clientv3.WithLease(resp.ID)) pserverAddr := e.externalIP + ":" + strconv.Itoa(port)
log.Debugf("set pserver node %s with value %s", psKey, e.externalIP) c.Put(psKey, pserverAddr, clientv3.WithLease(resp.ID))
log.Debugf("set pserver node %s with value %s", psKey, pserverAddr)
ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID) ch, kaerr := e.etcdClient.KeepAlive(context.TODO(), resp.ID)
if kaerr != nil { if kaerr != nil {
log.Errorf("keepalive etcd node error: %v", kaerr) log.Errorf("keepalive etcd node error: %v", kaerr)
...@@ -176,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { ...@@ -176,10 +177,10 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
break break
} }
} }
if registered == true { if registered {
return nil return nil
} }
return errors.New("not registerd, may due to already have enough pservers") return errors.New("not registered, may due to already have enough pservers")
}, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads))
if err != nil { if err != nil {
...@@ -189,13 +190,26 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { ...@@ -189,13 +190,26 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) {
return idx, nil return idx, nil
} }
// GetKey gets the value by the specified key
func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
resp, err := e.etcdClient.Get(ctx, key)
cancel()
if err != nil {
return []byte{}, err
}
kvs := resp.Kvs
if len(kvs) == 0 {
return []byte{}, nil
}
v := kvs[0].Value
return v, nil
}
// PutKey put into etcd with value by key specified // PutKey put into etcd with value by key specified
func (e *EtcdClient) PutKey(key string, value []byte, timeout int) error { func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(timeout)) ctx, cancel := context.WithTimeout(context.Background(), timeout)
_, err := e.etcdClient.Put(ctx, key, string(value)) _, err := e.etcdClient.Put(ctx, key, string(value))
cancel() cancel()
if err != nil {
return err return err
}
return nil
} }
package pserver package pserver
// #cgo CFLAGS: -I ../../ // #cgo CFLAGS: -I ../../
// //FIXME: ldflags contain "build" path // #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
// #cgo LDFLAGS: ${SRCDIR}/../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
// #include "paddle/optimizer/optimizer.h" // #include "paddle/optimizer/optimizer.h"
// #include <stdlib.h> // #include <stdlib.h>
// #include <string.h> // #include <string.h>
...@@ -15,15 +14,14 @@ import ( ...@@ -15,15 +14,14 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
var nullPtr = unsafe.Pointer(uintptr(0))
type optimizer struct { type optimizer struct {
opt *C.struct_paddle_optimizer opt *C.struct_paddle_optimizer
elementType ElementType elementType ElementType
contentLen int
} }
func cArrayToSlice(p unsafe.Pointer, len int) []byte { func cArrayToSlice(p unsafe.Pointer, len int) []byte {
if p == nullPtr { if p == nil {
return nil return nil
} }
...@@ -38,25 +36,28 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte { ...@@ -38,25 +36,28 @@ func cArrayToSlice(p unsafe.Pointer, len int) []byte {
func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer { func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer {
o := &optimizer{} o := &optimizer{}
o.elementType = paramWithConfigs.Param.ElementType o.elementType = paramWithConfigs.Param.ElementType
o.contentLen = len(paramWithConfigs.Param.Content)
p := paramWithConfigs.Param p := paramWithConfigs.Param
c := paramWithConfigs.Config c := paramWithConfigs.Config
s := State s := State
paramBufferSize := C.size_t(len(p.Content))
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"ElementType": p.ElementType, "ElementType": p.ElementType,
"ParamSize": len(p.Content), "ParamSize": paramBufferSize,
"ConfigSize": len(c), "ConfigSize": len(c),
"StateSize": len(s), "StateSize": len(s),
}).Info("New Optimizer Created with config:") }).Info("New Optimizer Created with config:")
var cbuffer unsafe.Pointer var cbuffer unsafe.Pointer
cbuffer = C.malloc(C.size_t(len(p.Content))) cbuffer = C.malloc(paramBufferSize)
C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content)))
C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), paramBufferSize)
var cstate unsafe.Pointer var cstate unsafe.Pointer
if len(s) != 0 { if len(s) != 0 {
cstate = unsafe.Pointer(&s[0]) cstate = unsafe.Pointer(&s[0])
} }
o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)),
C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)/C.sizeof_float), (*C.char)(cstate), C.int(len(s))) C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s)))
return o return o
} }
...@@ -68,8 +69,8 @@ func (o *optimizer) GetWeights() []byte { ...@@ -68,8 +69,8 @@ func (o *optimizer) GetWeights() []byte {
func (o *optimizer) GetStates() []byte { func (o *optimizer) GetStates() []byte {
var cbuffer *C.char var cbuffer *C.char
cbuffer_len := C.paddle_optimizer_get_state(o.opt, &cbuffer) cbufferLen := C.paddle_optimizer_get_state(o.opt, &cbuffer)
return cArrayToSlice(unsafe.Pointer(cbuffer), int(cbuffer_len)) return cArrayToSlice(unsafe.Pointer(cbuffer), int(cbufferLen))
} }
func (o *optimizer) UpdateParameter(g Gradient) error { func (o *optimizer) UpdateParameter(g Gradient) error {
...@@ -77,7 +78,11 @@ func (o *optimizer) UpdateParameter(g Gradient) error { ...@@ -77,7 +78,11 @@ func (o *optimizer) UpdateParameter(g Gradient) error {
return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType) return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType)
} }
r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))/C.sizeof_float) if o.contentLen != len(g.Content) {
return fmt.Errorf("Name: %s, parameter and gradient does not have same content len, parameter: %d, gradient: %d", g.Name, o.contentLen, len(g.Content))
}
r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content)))
if r != 0 { if r != 0 {
return fmt.Errorf("optimizer update returned error code: %d", r) return fmt.Errorf("optimizer update returned error code: %d", r)
} }
...@@ -85,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error { ...@@ -85,8 +90,8 @@ func (o *optimizer) UpdateParameter(g Gradient) error {
} }
func (o *optimizer) Cleanup() { func (o *optimizer) Cleanup() {
if unsafe.Pointer(o.opt) != nullPtr { if unsafe.Pointer(o.opt) != nil {
C.paddle_release_optimizer(o.opt) C.paddle_release_optimizer(o.opt)
o.opt = (*C.struct_paddle_optimizer)(nullPtr) o.opt = (*C.struct_paddle_optimizer)(nil)
} }
} }
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
...@@ -21,14 +22,14 @@ import ( ...@@ -21,14 +22,14 @@ import (
// ElementType is the type of elements of a Parameter. // ElementType is the type of elements of a Parameter.
type ElementType int type ElementType int
// RPC error message.
const ( const (
// AlreadyInitialized is true if pserver is initialized
AlreadyInitialized = "pserver already initialized" AlreadyInitialized = "pserver already initialized"
// Uninitialized is true if pserver not fully initialized
Uninitialized = "pserver not fully initialized" Uninitialized = "pserver not fully initialized"
CheckpointMD5Failed = "checkpoint file MD5 validation failed"
) )
// Supported element types // Supported element types.
const ( const (
Int32 ElementType = iota Int32 ElementType = iota
UInt32 UInt32
...@@ -51,21 +52,15 @@ type ParameterWithConfig struct { ...@@ -51,21 +52,15 @@ type ParameterWithConfig struct {
Config []byte // parameter configuration in Proto Buffer format Config []byte // parameter configuration in Proto Buffer format
} }
// ParameterCheckpoint is Parameter and State checkpoint // checkpointMeta saves checkpoint metadata
type ParameterCheckpoint struct {
ParamConfig ParameterWithConfig
State []byte
}
// checkpoint signature
type checkpointMeta struct { type checkpointMeta struct {
UUID string `json:"uuid"` UUID string `json:"uuid"`
Md5sum string `json:"md5sum"` MD5 string `json:"md5"`
Timestamp string `json:"timestamp"` Timestamp int64 `json:"timestamp"`
} }
// Checkpoint is the pserver shard persist in file // Checkpoint is the pserver shard persist in file
type Checkpoint []ParameterCheckpoint type Checkpoint []parameterCheckpoint
// Gradient is the gradient of the parameter. // Gradient is the gradient of the parameter.
type Gradient Parameter type Gradient Parameter
...@@ -81,12 +76,53 @@ type Service struct { ...@@ -81,12 +76,53 @@ type Service struct {
optMap map[string]*optimizer optMap map[string]*optimizer
} }
// parameterCheckpoint saves parameter checkpoint
type parameterCheckpoint struct {
ParameterWithConfig
State []byte
}
// NewCheckpointFromFile loads parameters and state from checkpoint file
func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) {
v, err := e.GetKey(PsPath+string(idx), 3*time.Second)
if err != nil {
return nil, err
}
var cpMeta checkpointMeta
if err = json.Unmarshal(v, &cpMeta); err != nil {
return nil, err
}
fn := filepath.Join(cpPath, cpMeta.UUID)
if _, err = os.Stat(fn); os.IsNotExist(err) {
return nil, err
}
content, err := ioutil.ReadFile(fn)
if err != nil {
return nil, err
}
h := md5.New()
md5 := hex.EncodeToString(h.Sum(content))
if md5 != cpMeta.MD5 {
return nil, errors.New(CheckpointMD5Failed)
}
dec := gob.NewDecoder(bytes.NewReader(content))
cp := Checkpoint{}
if err = dec.Decode(cp); err != nil {
return nil, err
}
return cp, nil
}
// NewService creates a new service, will bypass etcd registration if no // NewService creates a new service, will bypass etcd registration if no
// endpoints specified. // endpoints specified. It will recovery from checkpoint file if a exists a specified checkpoint.
func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkpoint) (*Service, error) { func NewService(idx int, interval time.Duration, path string, client *EtcdClient, cp Checkpoint) (*Service, error) {
s := &Service{ s := &Service{
idx: idx, idx: idx,
checkpointInterval: time.Second * time.Duration(seconds), checkpointInterval: interval,
checkpointPath: path, checkpointPath: path,
client: client, client: client,
} }
...@@ -95,9 +131,11 @@ func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkp ...@@ -95,9 +131,11 @@ func NewService(idx int, seconds int, path string, client *EtcdClient, cp Checkp
if cp != nil { if cp != nil {
for _, item := range cp { for _, item := range cp {
p := item.ParamConfig p := ParameterWithConfig{
st := item.State Param: item.Param,
s.optMap[p.Param.Name] = newOptimizer(p, st) Config: item.Config,
}
s.optMap[p.Param.Name] = newOptimizer(p, item.State)
} }
} }
return s, nil return s, nil
...@@ -173,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { ...@@ -173,7 +211,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
// learning optimization methods are stochastic in // learning optimization methods are stochastic in
// nature. This race condition is allowed deliberately // nature. This race condition is allowed deliberately
// to save the program from making a copy of the // to save the program from making a copy of the
// paramter content. // parameter content.
parameter.Name = name parameter.Name = name
parameter.ElementType = opt.elementType parameter.ElementType = opt.elementType
parameter.Content = opt.GetWeights() parameter.Content = opt.GetWeights()
...@@ -181,56 +219,81 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { ...@@ -181,56 +219,81 @@ func (s *Service) GetParam(name string, parameter *Parameter) error {
} }
// pserver save checkpoint // pserver save checkpoint
func (s *Service) doCheckpoint() error { func (s *Service) doCheckpoint() (err error) {
<-s.initialized <-s.initialized
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
cp := make([]ParameterCheckpoint, 0, len(s.optMap)) cp := make([]parameterCheckpoint, len(s.optMap))
index := 0 index := 0
for name, opt := range s.optMap { for name, opt := range s.optMap {
var pc ParameterCheckpoint var pc parameterCheckpoint
pc.ParamConfig.Param.Name = name pc.Param.Name = name
pc.ParamConfig.Param.ElementType = opt.elementType pc.Param.ElementType = opt.elementType
pc.ParamConfig.Param.Content = opt.GetWeights() pc.Param.Content = opt.GetWeights()
pc.State = opt.GetStates() pc.State = opt.GetStates()
cp[index] = pc cp[index] = pc
index++ index++
} }
var buf bytes.Buffer var buf bytes.Buffer
encoder := gob.NewEncoder(&buf) encoder := gob.NewEncoder(&buf)
err := encoder.Encode(cp) err = encoder.Encode(cp)
if err != nil { if err != nil {
return err return
} }
cpMeta := checkpointMeta{} cpMeta := checkpointMeta{}
cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx) cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx)
cpMeta.Timestamp = time.Now().String() cpMeta.Timestamp = time.Now().UnixNano()
h := md5.New() h := md5.New()
cpMeta.Md5sum = hex.EncodeToString(h.Sum(buf.Bytes())) cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes()))
cpMetajson, err := json.Marshal(cpMeta)
if err != nil {
return
}
cpMetajson, _ := json.Marshal(cpMeta) err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second)
err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3)
if err != nil { if err != nil {
return err return
} }
if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) { if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) {
log.Info("checkpoint does not exists.") log.Info("checkpoint does not exists.")
} else { } else {
err = os.Remove(cpMeta.UUID) err = os.Remove(cpMeta.UUID)
if err != nil {
log.Infof("Removing checkpoint %s failed", cpMeta.UUID)
} else {
log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID) log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID)
} }
}
f, err := os.Create(cpMeta.UUID) f, err := os.Create(cpMeta.UUID)
defer f.Close()
if err != nil { if err != nil {
return err return
} }
defer func() {
closeErr := f.Close()
if closeErr != nil {
if err != nil {
log.Errorln(closeErr)
} else {
// Set closeErr as return value.
err = closeErr
}
}
}()
writer := bufio.NewWriter(f) writer := bufio.NewWriter(f)
_, err = writer.Write(buf.Bytes()) _, err = writer.Write(buf.Bytes())
writer.Flush()
if err != nil { if err != nil {
return err return
} }
return nil
err = writer.Flush()
if err != nil {
return
}
return
} }
...@@ -31,7 +31,7 @@ func TestServiceFull(t *testing.T) { ...@@ -31,7 +31,7 @@ func TestServiceFull(t *testing.T) {
err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
var p1 pserver.Parameter var p1 pserver.Parameter
...@@ -40,40 +40,40 @@ func TestServiceFull(t *testing.T) { ...@@ -40,40 +40,40 @@ func TestServiceFull(t *testing.T) {
p1.ElementType = pserver.Float32 p1.ElementType = pserver.Float32
err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil) err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
err = s.FinishInitParams(0, nil) err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
var param pserver.Parameter var param pserver.Parameter
err = s.GetParam("param_b", &param) err = s.GetParam("param_b", &param)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
if !reflect.DeepEqual(param, p1) { if !reflect.DeepEqual(param, p1) {
t.FailNow() t.Fatal("not equal:", param, p1)
} }
g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) g1, g2 := pserver.Gradient(p1), pserver.Gradient(p)
err = s.SendGrad(g1, nil) err = s.SendGrad(g1, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
err = s.SendGrad(g2, nil) err = s.SendGrad(g2, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
var param1 pserver.Parameter var param1 pserver.Parameter
err = s.GetParam("param_a", &param1) err = s.GetParam("param_a", &param1)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
// don't compare content, since it's already changed by // don't compare content, since it's already changed by
...@@ -82,7 +82,7 @@ func TestServiceFull(t *testing.T) { ...@@ -82,7 +82,7 @@ func TestServiceFull(t *testing.T) {
p.Content = nil p.Content = nil
if !reflect.DeepEqual(param1, p) { if !reflect.DeepEqual(param1, p) {
t.FailNow() t.Fatal("not equal:", param1, p)
} }
} }
...@@ -90,16 +90,16 @@ func TestMultipleInit(t *testing.T) { ...@@ -90,16 +90,16 @@ func TestMultipleInit(t *testing.T) {
var cp pserver.Checkpoint var cp pserver.Checkpoint
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, 1, "", nil, cp)
if err != nil { if err != nil {
t.Error(err) t.Fatal(err)
} }
err = s.FinishInitParams(0, nil) err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
err = s.FinishInitParams(0, nil) err = s.FinishInitParams(0, nil)
if err.Error() != pserver.AlreadyInitialized { if err.Error() != pserver.AlreadyInitialized {
t.FailNow() t.Fatal(err)
} }
} }
...@@ -108,7 +108,7 @@ func TestUninitialized(t *testing.T) { ...@@ -108,7 +108,7 @@ func TestUninitialized(t *testing.T) {
s, err := pserver.NewService(0, 1, "", nil, cp) s, err := pserver.NewService(0, 1, "", nil, cp)
err = s.SendGrad(pserver.Gradient{}, nil) err = s.SendGrad(pserver.Gradient{}, nil)
if err.Error() != pserver.Uninitialized { if err.Error() != pserver.Uninitialized {
t.FailNow() t.Fatal(err)
} }
} }
...@@ -154,12 +154,12 @@ func TestBlockUntilInitialized(t *testing.T) { ...@@ -154,12 +154,12 @@ func TestBlockUntilInitialized(t *testing.T) {
err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
err = s.FinishInitParams(0, nil) err = s.FinishInitParams(0, nil)
if err != nil { if err != nil {
t.FailNow() t.Fatal(err)
} }
wg.Wait() wg.Wait()
......
if(WITH_TESTING)
go_test(network_helper_test)
endif()
...@@ -8,13 +8,14 @@ add_subdirectory(gserver) ...@@ -8,13 +8,14 @@ add_subdirectory(gserver)
add_subdirectory(pserver) add_subdirectory(pserver)
add_subdirectory(trainer) add_subdirectory(trainer)
add_subdirectory(scripts) add_subdirectory(scripts)
add_subdirectory(optimizer)
add_subdirectory(string) add_subdirectory(string)
if(Boost_FOUND) if(Boost_FOUND)
add_subdirectory(memory) add_subdirectory(memory)
add_subdirectory(platform) add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
endif() endif()
if(WITH_C_API) if(WITH_C_API)
......
...@@ -64,11 +64,7 @@ ModelConfig* TrainerConfig::getModelConfig() const { ...@@ -64,11 +64,7 @@ ModelConfig* TrainerConfig::getModelConfig() const {
ParameterConfig::ParameterConfig() : m(new ParameterConfigPrivate()) {} ParameterConfig::ParameterConfig() : m(new ParameterConfigPrivate()) {}
ParameterConfig::~ParameterConfig() { ParameterConfig::~ParameterConfig() { delete m; }
if (m) {
delete m;
}
}
ParameterConfig* ParameterConfig::createParameterConfigFromParameterSharedPtr( ParameterConfig* ParameterConfig::createParameterConfigFromParameterSharedPtr(
void* ptr) { void* ptr) {
...@@ -98,11 +94,7 @@ void* ParameterConfig::getRawPtr() { return m->getConfigPtr(); } ...@@ -98,11 +94,7 @@ void* ParameterConfig::getRawPtr() { return m->getConfigPtr(); }
OptimizationConfig::OptimizationConfig() : m(new OptimizationConfigPrivate()) {} OptimizationConfig::OptimizationConfig() : m(new OptimizationConfigPrivate()) {}
OptimizationConfig::~OptimizationConfig() { OptimizationConfig::~OptimizationConfig() { delete m; }
if (m) {
delete m;
}
}
std::string OptimizationConfig::toProtoString() { std::string OptimizationConfig::toProtoString() {
return m->getConfig().SerializeAsString(); return m->getConfig().SerializeAsString();
......
...@@ -843,7 +843,8 @@ public: ...@@ -843,7 +843,8 @@ public:
bool useSparseUpdater); bool useSparseUpdater);
static ParameterUpdater* createNewRemoteUpdater( static ParameterUpdater* createNewRemoteUpdater(
OptimizationConfig* config, OptimizationConfig* config,
const std::string pserverSpec) throw(UnsupportError); const std::string pserverSpec,
const bool useEtcd) throw(UnsupportError);
~ParameterUpdater(); ~ParameterUpdater();
/** /**
......
...@@ -53,11 +53,7 @@ struct ParameterTraverseCallbackPrivate { ...@@ -53,11 +53,7 @@ struct ParameterTraverseCallbackPrivate {
ParameterOptimizer::ParameterOptimizer() : m(new ParameterOptimizerPrivate()) {} ParameterOptimizer::ParameterOptimizer() : m(new ParameterOptimizerPrivate()) {}
ParameterOptimizer::~ParameterOptimizer() { ParameterOptimizer::~ParameterOptimizer() { delete m; }
if (m) {
delete m;
}
}
ParameterOptimizer* ParameterOptimizer::create(OptimizationConfig* config) { ParameterOptimizer* ParameterOptimizer::create(OptimizationConfig* config) {
CHECK(config != nullptr); CHECK(config != nullptr);
...@@ -104,11 +100,7 @@ std::vector<int> ParameterOptimizer::getParameterTypes() const { ...@@ -104,11 +100,7 @@ std::vector<int> ParameterOptimizer::getParameterTypes() const {
ParameterTraverseCallback::ParameterTraverseCallback() ParameterTraverseCallback::ParameterTraverseCallback()
: m(new ParameterTraverseCallbackPrivate()) {} : m(new ParameterTraverseCallbackPrivate()) {}
ParameterTraverseCallback::~ParameterTraverseCallback() { ParameterTraverseCallback::~ParameterTraverseCallback() { delete m; }
if (m) {
delete m;
}
}
void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs, void ParameterTraverseCallback::apply(const std::vector<Vector*>& vecs,
const ParameterConfig& conf, const ParameterConfig& conf,
......
...@@ -33,11 +33,12 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater( ...@@ -33,11 +33,12 @@ ParameterUpdater *ParameterUpdater::createLocalUpdater(
ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( ParameterUpdater *ParameterUpdater::createNewRemoteUpdater(
OptimizationConfig *config, OptimizationConfig *config,
const std::string pserverSpec) throw(UnsupportError) { const std::string pserverSpec,
const bool useEtcd) throw(UnsupportError) {
#ifndef PADDLE_WITHOUT_GOLANG #ifndef PADDLE_WITHOUT_GOLANG
auto updater = new ParameterUpdater(); auto updater = new ParameterUpdater();
updater->m->updater.reset(new paddle::NewRemoteParameterUpdater( updater->m->updater.reset(new paddle::NewRemoteParameterUpdater(
config->m->getConfig(), pserverSpec)); config->m->getConfig(), pserverSpec, useEtcd));
return updater; return updater;
#else #else
throw UnsupportError(); throw UnsupportError();
......
...@@ -171,11 +171,7 @@ struct VectorPrivate { ...@@ -171,11 +171,7 @@ struct VectorPrivate {
Vector::Vector() : m(new VectorPrivate()) {} Vector::Vector() : m(new VectorPrivate()) {}
Vector::~Vector() { Vector::~Vector() { delete m; }
if (m) {
delete m;
}
}
Vector* Vector::createZero(size_t sz, bool useGpu) { Vector* Vector::createZero(size_t sz, bool useGpu) {
auto retVec = new Vector(); auto retVec = new Vector();
......
# ddim lib # ddim lib
cc_library(ddim SRCS ddim.cc) cc_library(enforce SRCS enforce.cc DEPS glog)
cc_test(enforce_test SRCS enforce_test.cc DEPS enforce)
cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_test(tensor_test SRCS tensor_test.cc DEPS ddim) cc_library(tensor SRCS tensor.cc DEPS ddim place enforce paddle_memory)
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
cc_test(variable_test SRCS variable_test.cc) cc_test(variable_test SRCS variable_test.cc)
cc_test(scope_test SRCS scope_test.cc) cc_test(scope_test SRCS scope_test.cc)
cc_test(enforce_test SRCS enforce_test.cc)
proto_library(attr_type SRCS attr_type.proto) proto_library(attr_type SRCS attr_type.proto)
proto_library(op_proto SRCS op_proto.proto DEPS attr_type) proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
proto_library(op_desc SRCS op_desc.proto DEPS attr_type) proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf) cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_proto op_desc)
cc_library(operator SRCS operator.cc DEPS op_desc device_context tensor)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto op_desc enforce)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry operator)
py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto) py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(framework_py_proto framework_py_proto_init) add_dependencies(framework_py_proto framework_py_proto_init)
proto_library(net_proto SRCS net_proto.proto DEPS op_proto)
cc_library(net SRCS net.cc DEPS operator net_proto op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
///@cond HIDDEN /// @cond HIDDEN
template <int i> template <int i>
Dim<i> make_dim(const int* d) { Dim<i> make_dim(const int* d) {
...@@ -50,7 +65,7 @@ void make_ddim(DDim& ddim, const int* dims, int n) { ...@@ -50,7 +65,7 @@ void make_ddim(DDim& ddim, const int* dims, int n) {
} }
} }
///@endcond /// @endcond
DDim make_ddim(std::initializer_list<int> dims) { DDim make_ddim(std::initializer_list<int> dims) {
DDim result(make_dim(0)); DDim result(make_dim(0));
...@@ -64,11 +79,11 @@ DDim make_ddim(const std::vector<int>& dims) { ...@@ -64,11 +79,11 @@ DDim make_ddim(const std::vector<int>& dims) {
return result; return result;
} }
///@cond HIDDEN /// @cond HIDDEN
// XXX For some reason, putting this in an anonymous namespace causes errors // XXX For some reason, putting this in an anonymous namespace causes errors
class DynamicMutableIndexer : public boost::static_visitor<int&> { class DynamicMutableIndexer : public boost::static_visitor<int&> {
public: public:
DynamicMutableIndexer(int idx) : idx_(idx) {} explicit DynamicMutableIndexer(int idx) : idx_(idx) {}
template <int D> template <int D>
int& operator()(Dim<D>& dim) const { int& operator()(Dim<D>& dim) const {
...@@ -81,7 +96,7 @@ class DynamicMutableIndexer : public boost::static_visitor<int&> { ...@@ -81,7 +96,7 @@ class DynamicMutableIndexer : public boost::static_visitor<int&> {
class DynamicConstIndexer : public boost::static_visitor<int> { class DynamicConstIndexer : public boost::static_visitor<int> {
public: public:
DynamicConstIndexer(int idx) : idx_(idx) {} explicit DynamicConstIndexer(int idx) : idx_(idx) {}
template <int D> template <int D>
int operator()(const Dim<D>& dim) const { int operator()(const Dim<D>& dim) const {
...@@ -92,7 +107,7 @@ class DynamicConstIndexer : public boost::static_visitor<int> { ...@@ -92,7 +107,7 @@ class DynamicConstIndexer : public boost::static_visitor<int> {
int idx_; int idx_;
}; };
///@endcond /// @endcond
int& DDim::operator[](int idx) { int& DDim::operator[](int idx) {
return boost::apply_visitor(DynamicMutableIndexer(idx), var); return boost::apply_visitor(DynamicMutableIndexer(idx), var);
...@@ -102,6 +117,8 @@ int DDim::operator[](int idx) const { ...@@ -102,6 +117,8 @@ int DDim::operator[](int idx) const {
return boost::apply_visitor(DynamicConstIndexer(idx), var); return boost::apply_visitor(DynamicConstIndexer(idx), var);
} }
ssize_t DDim::size() const { return arity(*this); }
bool DDim::operator==(DDim d) const { bool DDim::operator==(DDim d) const {
if (var.which() != d.getVar().which()) { if (var.which() != d.getVar().which()) {
return false; return false;
...@@ -155,11 +172,11 @@ int get(const DDim& ddim, int idx) { return ddim[idx]; } ...@@ -155,11 +172,11 @@ int get(const DDim& ddim, int idx) { return ddim[idx]; }
void set(DDim& ddim, int idx, int value) { ddim[idx] = value; } void set(DDim& ddim, int idx, int value) { ddim[idx] = value; }
///@cond HIDDEN /// @cond HIDDEN
struct VectorizeVisitor : public boost::static_visitor<> { struct VectorizeVisitor : public boost::static_visitor<> {
std::vector<int>& vector; std::vector<int>& vector;
VectorizeVisitor(std::vector<int>& v) : vector(v) {} explicit VectorizeVisitor(std::vector<int>& v) : vector(v) {}
template <typename T> template <typename T>
void operator()(const T& t) { void operator()(const T& t) {
...@@ -169,7 +186,7 @@ struct VectorizeVisitor : public boost::static_visitor<> { ...@@ -169,7 +186,7 @@ struct VectorizeVisitor : public boost::static_visitor<> {
void operator()(const Dim<1>& t) { vector.push_back(t.head); } void operator()(const Dim<1>& t) { vector.push_back(t.head); }
}; };
///@endcond /// @endcond
std::vector<int> vectorize(const DDim& ddim) { std::vector<int> vectorize(const DDim& ddim) {
std::vector<int> result; std::vector<int> result;
...@@ -178,16 +195,59 @@ std::vector<int> vectorize(const DDim& ddim) { ...@@ -178,16 +195,59 @@ std::vector<int> vectorize(const DDim& ddim) {
return result; return result;
} }
struct ProductVisitor : public boost::static_visitor<ssize_t> {
template <int D>
ssize_t operator()(const Dim<D>& dim) {
return product(dim);
}
};
ssize_t product(const DDim& ddim) { ssize_t product(const DDim& ddim) {
ssize_t result = 1; ProductVisitor visitor;
std::vector<int> v = vectorize(ddim); return boost::apply_visitor(visitor, ddim);
for (auto i : v) { }
result *= i;
struct SliceVectorizeVisitor : public boost::static_visitor<> {
std::vector<int>& vector;
int begin;
int end;
SliceVectorizeVisitor(std::vector<int>& v, int b, int e)
: vector(v), begin(b), end(e) {
PADDLE_ENFORCE(begin < end,
"Begin index must be less than end index in ddim slice.");
PADDLE_ENFORCE(begin >= 0,
"Begin index can't be less than zero in ddim slice.");
} }
return result;
template <int S>
void operator()(const Dim<S>& dim) {
if (begin == 0) {
vector.push_back(dim.head);
} else {
--begin;
}
--end;
if (end > 0) {
this->operator()(dim.tail);
}
}
void operator()(const Dim<1>& dim) {
PADDLE_ENFORCE(end == 1, "End index in ddim slice is out of bound.");
vector.push_back(dim.head);
}
};
DDim slice_ddim(const DDim& dim, int begin, int end) {
std::vector<int> vec;
vec.reserve(end - begin);
SliceVectorizeVisitor visitor(vec, begin, end);
boost::apply_visitor(visitor, dim);
return make_ddim(vec);
} }
///\cond HIDDEN /// \cond HIDDEN
struct ArityVisitor : boost::static_visitor<int> { struct ArityVisitor : boost::static_visitor<int> {
template <int D> template <int D>
...@@ -196,15 +256,15 @@ struct ArityVisitor : boost::static_visitor<int> { ...@@ -196,15 +256,15 @@ struct ArityVisitor : boost::static_visitor<int> {
} }
}; };
///\endcond /// \endcond
int arity(const DDim& d) { return boost::apply_visitor(ArityVisitor(), d); } int arity(const DDim& d) { return boost::apply_visitor(ArityVisitor(), d); }
///\cond HIDDEN /// \cond HIDDEN
struct DDimPrinter : boost::static_visitor<void> { struct DDimPrinter : boost::static_visitor<void> {
std::ostream& os; std::ostream& os;
DDimPrinter(std::ostream& os_) : os(os_) {} explicit DDimPrinter(std::ostream& os_) : os(os_) {}
template <typename T> template <typename T>
void operator()(const T& t) { void operator()(const T& t) {
...@@ -212,7 +272,7 @@ struct DDimPrinter : boost::static_visitor<void> { ...@@ -212,7 +272,7 @@ struct DDimPrinter : boost::static_visitor<void> {
} }
}; };
///\endcond /// \endcond
std::ostream& operator<<(std::ostream& os, const DDim& ddim) { std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
DDimPrinter printer(os); DDimPrinter printer(os);
...@@ -220,5 +280,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) { ...@@ -220,5 +280,9 @@ std::ostream& operator<<(std::ostream& os, const DDim& ddim) {
return os; return os;
} }
DDim::DDim(std::initializer_list<int> init_list) {
*this = make_ddim(init_list);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once #pragma once
#include <boost/variant.hpp> #include <boost/variant.hpp>
#include <initializer_list> #include <initializer_list>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include "paddle/framework/dim.h" #include "paddle/framework/dim.h"
#include "paddle/framework/enforce.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -27,7 +42,9 @@ struct DDim { ...@@ -27,7 +42,9 @@ struct DDim {
DDim() : var(Dim<1>()) {} DDim() : var(Dim<1>()) {}
template <int D> template <int D>
DDim(const Dim<D>& in) : var(in) {} explicit DDim(const Dim<D>& in) : var(in) {}
/*implicit*/ DDim(std::initializer_list<int> init_list);
template <int D> template <int D>
DDim& operator=(const Dim<D>& in) { DDim& operator=(const Dim<D>& in) {
...@@ -57,6 +74,8 @@ struct DDim { ...@@ -57,6 +74,8 @@ struct DDim {
DDim operator+(DDim d) const; DDim operator+(DDim d) const;
DDim operator*(DDim d) const; DDim operator*(DDim d) const;
ssize_t size() const;
}; };
/** /**
...@@ -81,6 +100,15 @@ std::vector<int> vectorize(const DDim& ddim); ...@@ -81,6 +100,15 @@ std::vector<int> vectorize(const DDim& ddim);
ssize_t product(const DDim& ddim); ssize_t product(const DDim& ddim);
/**
* \brief Slice a ddim
*
* Slice dim with [begin, end).
* e.g. DDim d = make_ddim({1,2,3,4,5});
* slice_ddim(d, 1, 3); ====> {2,3}
*/
DDim slice_ddim(const DDim& dim, int begin, int end);
/** /**
* \brief What is the length of this dimension? * \brief What is the length of this dimension?
* *
...@@ -91,6 +119,17 @@ int arity(const DDim& ddim); ...@@ -91,6 +119,17 @@ int arity(const DDim& ddim);
std::ostream& operator<<(std::ostream&, const DDim&); std::ostream& operator<<(std::ostream&, const DDim&);
template <int NDIMS>
Eigen::DSizes<Eigen::DenseIndex, NDIMS> ToEigenDSizes(const DDim& dims) {
int rank = arity(dims);
PADDLE_ENFORCE(rank == NDIMS, "DDim and NDIMS must be same");
Eigen::DSizes<Eigen::DenseIndex, NDIMS> dsizes;
for (int d = 0; d < rank; d++) {
dsizes[d] = dims[d];
}
return dsizes;
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -49,9 +49,30 @@ TEST(DDim, Equality) { ...@@ -49,9 +49,30 @@ TEST(DDim, Equality) {
// arity of a DDim // arity of a DDim
EXPECT_EQ(paddle::framework::arity(ddim), 3); EXPECT_EQ(paddle::framework::arity(ddim), 3);
EXPECT_EQ(ddim.size(), 3);
// product of a DDim // product of a DDim
EXPECT_EQ(paddle::framework::product(vddim), 45); EXPECT_EQ(paddle::framework::product(vddim), 45);
EXPECT_EQ(
paddle::framework::product(paddle::framework::make_ddim({3, 2, 5, 3})),
90);
// slice a DDim
paddle::framework::DDim ddim2 =
paddle::framework::make_ddim({1, 2, 3, 4, 5, 6});
paddle::framework::DDim ss = paddle::framework::slice_ddim(ddim2, 2, 5);
EXPECT_EQ(arity(ss), 3);
EXPECT_EQ(ss[0], 3);
EXPECT_EQ(ss[1], 4);
EXPECT_EQ(ss[2], 5);
paddle::framework::DDim ss2 = paddle::framework::slice_ddim(ddim2, 0, 6);
EXPECT_EQ(arity(ss2), 6);
EXPECT_EQ(ss2[0], 1);
EXPECT_EQ(ss2[1], 2);
EXPECT_EQ(ss2[2], 3);
EXPECT_EQ(ss2[3], 4);
EXPECT_EQ(ss2[4], 5);
EXPECT_EQ(ss2[5], 6);
} }
TEST(DDim, Print) { TEST(DDim, Print) {
......
...@@ -266,29 +266,6 @@ HOSTDEVICE inline bool contained(const Dim<1>& idx, const Dim<1>& size) { ...@@ -266,29 +266,6 @@ HOSTDEVICE inline bool contained(const Dim<1>& idx, const Dim<1>& size) {
return ((0 <= idx.head) && (idx.head < size.head)); return ((0 <= idx.head) && (idx.head < size.head));
} }
/**
* \brief Check if a size and a stride create a Fortran order contiguous
* block of memory.
*/
template <int i>
HOST bool contiguous(const Dim<i>& size, const Dim<i>& stride, int mul = 1) {
if (product(size) == 0) return true;
int contiguous_stride = get<0>(size) == 1 ? 0 : mul;
return (get<0>(stride) == contiguous_stride &&
contiguous(size.tail, stride.tail, mul * get<0>(size)));
}
///\cond HIDDEN
// Base case of contiguous, check the nth stride is the size of
// the prefix multiply of n-1 dims.
template <>
inline bool contiguous(const Dim<1>& size, const Dim<1>& stride, int mul) {
if (get<0>(size) == 0) return true;
int contiguous_stride = get<0>(size) == 1 ? 0 : mul;
return get<0>(stride) == contiguous_stride;
}
///\endcond
/** /**
* \brief Compute exclusive prefix-multiply of a Dim. * \brief Compute exclusive prefix-multiply of a Dim.
*/ */
...@@ -306,31 +283,6 @@ HOSTDEVICE inline Dim<1> ex_prefix_mul(const Dim<1>& src, int mul) { ...@@ -306,31 +283,6 @@ HOSTDEVICE inline Dim<1> ex_prefix_mul(const Dim<1>& src, int mul) {
} }
///\endcond ///\endcond
/**
* \brief Calculate strides of a contiguous array of the given size
*
* Sets the stride for any dimension with an extent of 1 to 0.
* \param size Dim object containing the size of the array.
* \param base The base stride to use.
* \return Dim object the same size as \p size with the strides.
*/
template <int i>
HOSTDEVICE Dim<i> contiguous_strides(const Dim<i>& size, int base = 1) {
int stride = size.head == 1 ? 0 : base;
return Dim<i>(stride, contiguous_strides(size.tail, base * size.head));
}
///\cond HIDDEN
// Base case of contiguous_strides
template <>
HOSTDEVICE inline Dim<1> contiguous_strides(const Dim<1>& size, int base) {
int stride = size.head == 1 ? 0 : base;
return Dim<1>(stride);
}
///\endcond
/** /**
* Add two dimensions together * Add two dimensions together
*/ */
......
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <sstream> #include <sstream>
#include "paddle/framework/dim.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/framework/dim.h"
__global__ void test(paddle::framework::Dim<2>* o) { __global__ void test(paddle::framework::Dim<2>* o) {
o[0] = paddle::framework::make_dim(5, 6); o[0] = paddle::framework::make_dim(5, 6);
...@@ -21,7 +21,7 @@ TEST(Dim, Equality) { ...@@ -21,7 +21,7 @@ TEST(Dim, Equality) {
// construct a Dim on the GPU // construct a Dim on the GPU
thrust::device_vector<paddle::framework::Dim<2>> t(2); thrust::device_vector<paddle::framework::Dim<2>> t(2);
test<<<1,1>>>(thrust::raw_pointer_cast(t.data())); test<<<1, 1>>>(thrust::raw_pointer_cast(t.data()));
a = t[0]; a = t[0];
EXPECT_EQ(paddle::framework::get<0>(a), 5); EXPECT_EQ(paddle::framework::get<0>(a), 5);
EXPECT_EQ(paddle::framework::get<1>(a), 6); EXPECT_EQ(paddle::framework::get<1>(a), 6);
...@@ -48,34 +48,17 @@ TEST(Dim, Equality) { ...@@ -48,34 +48,17 @@ TEST(Dim, Equality) {
// dynamic access on GPU // dynamic access on GPU
thrust::device_vector<int> r(1); thrust::device_vector<int> r(1);
dyn_idx_gpu<<<1,1>>>(thrust::raw_pointer_cast(r.data())); dyn_idx_gpu<<<1, 1>>>(thrust::raw_pointer_cast(r.data()));
int res = r[0]; int res = r[0];
EXPECT_EQ(res, 6); EXPECT_EQ(res, 6);
// ex_prefix_mul // ex_prefix_mul
paddle::framework::Dim<3> c = paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5)); paddle::framework::Dim<3> c =
paddle::framework::ex_prefix_mul(paddle::framework::Dim<3>(3, 4, 5));
EXPECT_EQ(paddle::framework::get<0>(c), 1); EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 3); EXPECT_EQ(paddle::framework::get<1>(c), 3);
EXPECT_EQ(paddle::framework::get<2>(c), 12); EXPECT_EQ(paddle::framework::get<2>(c), 12);
// contiguous_strides
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 1, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 0);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(10, 10, 1));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 10);
EXPECT_EQ(paddle::framework::get<2>(c), 0);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(1, 10, 10));
EXPECT_EQ(paddle::framework::get<0>(c), 0);
EXPECT_EQ(paddle::framework::get<1>(c), 1);
EXPECT_EQ(paddle::framework::get<2>(c), 10);
c = paddle::framework::contiguous_strides(paddle::framework::Dim<3>(2, 3, 4));
EXPECT_EQ(paddle::framework::get<0>(c), 1);
EXPECT_EQ(paddle::framework::get<1>(c), 2);
EXPECT_EQ(paddle::framework::get<2>(c), 6);
// generate from an index // generate from an index
auto size = paddle::framework::make_dim(4, 5, 2); auto size = paddle::framework::make_dim(4, 5, 2);
c = paddle::framework::Dim<3>(14, size); c = paddle::framework::Dim<3>(14, size);
...@@ -101,16 +84,6 @@ TEST(Dim, Bool) { ...@@ -101,16 +84,6 @@ TEST(Dim, Bool) {
EXPECT_TRUE(a == a); EXPECT_TRUE(a == a);
EXPECT_FALSE(a == b); EXPECT_FALSE(a == b);
EXPECT_TRUE(a == c); EXPECT_TRUE(a == c);
// contiguous check
int x = 4, y = 5, z = 2;
paddle::framework::Dim<3> sizef(x, y, z);
paddle::framework::Dim<3> stridea(1, x, x*y);
paddle::framework::Dim<3> strideb(2, 2*x, 2*x*y);
paddle::framework::Dim<3> stridec(1, x, 2*x*y);
EXPECT_TRUE(paddle::framework::contiguous(sizef, stridea));
EXPECT_FALSE(paddle::framework::contiguous(sizef, strideb));
EXPECT_FALSE(paddle::framework::contiguous(sizef, stridec));
} }
TEST(Dim, Print) { TEST(Dim, Print) {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/enforce.h"
...@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and ...@@ -10,6 +10,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <glog/logging.h>
#include <paddle/string/printf.h> #include <paddle/string/printf.h>
#include <exception> #include <exception>
#include <sstream> #include <sstream>
...@@ -58,12 +59,17 @@ class EnforceNotMet : public std::exception { ...@@ -58,12 +59,17 @@ class EnforceNotMet : public std::exception {
/** /**
* @brief Enforce a condition, otherwise throw an EnforceNotMet * @brief Enforce a condition, otherwise throw an EnforceNotMet
*/ */
#ifdef NDEBUG
#define PADDLE_ENFORCE(condition, ...) \ #define PADDLE_ENFORCE(condition, ...) \
do { \ do { \
if (UNLIKELY(!(condition))) { \ if (UNLIKELY(!(condition))) { \
PADDLE_THROW(__VA_ARGS__); \ PADDLE_THROW(__VA_ARGS__); \
} \ } \
} while (0) } while (0)
#else
#define PADDLE_ENFORCE(condition, ...) \
CHECK(condition) << ::paddle::string::Sprintf(__VA_ARGS__);
#endif
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "paddle/framework/net.h"
namespace paddle {
namespace framework {
void PlainNet::CompleteAddOp() {
std::unordered_set<std::string> input_set;
std::unordered_set<std::string> output_set;
std::unordered_set<std::string> temp_output;
for (auto& op : ops_) {
for (auto& ipt : op->inputs_) {
if (!Contains(output_set, ipt)) { // Not other op's output
input_set.insert(ipt);
} else {
temp_output.insert(ipt);
}
}
for (auto& opt : op->outputs_) {
output_set.insert(opt);
}
}
inputs_.reserve(input_set.size());
std::copy(input_set.begin(), input_set.end(), std::back_inserter(inputs_));
outputs_.reserve(output_set.size());
std::vector<int> tmp_index;
tmp_index.reserve(temp_output.size());
int idx = 0;
for (auto& opt : output_set) {
if (Contains(temp_output, opt)) {
tmp_index.push_back(idx);
}
outputs_.push_back(opt);
++idx;
}
attrs_["temporary_index"] = tmp_index;
add_op_done_ = true;
}
std::string PlainNet::DebugString() const {
std::ostringstream os;
os << this->type_ << ":" << std::endl;
for (auto& op : ops_) {
os << "\t" << op->DebugString() << std::endl;
}
return os.str();
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <paddle/framework/op_desc.pb.h>
#include <paddle/framework/operator.h>
#include "paddle/framework/net_proto.pb.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
#include "paddle/platform/device_context.h"
namespace paddle {
namespace framework {
/**
* @brief Network is also a type of Operator
*
* It will manage the operators it has.
*
* Network is the container and controller of a set of operators.
* A network object knows all Operators belonging to this network. Variables,
* which are inputs and outputs of these operators, are created and managed by a
* hierarchy of Scope objects.
*
* This is the base class of network, all the networks should implement the APIs
* it defines.
*/
class Net : public OperatorBase {
public:
virtual void AddOp(const OperatorPtr& op) = 0;
virtual void CompleteAddOp() = 0;
};
using NetPtr = std::shared_ptr<Net>;
/**
* @brief a basic implementation of Net.
*
* PlainNet is a very simple Net, it create a list of operators, and run them
* sequentially following the order they added.
*/
class PlainNet : public Net {
public:
/**
* Infer all the operators' input and output variables' shapes, will be called
* before every mini-batch
*/
void InferShape(const ScopePtr& scope) const override {
for (auto& op : ops_) {
op->InferShape(scope);
}
}
/**
* @brief Run the network.
*
* Run all the operators with the `scope`, if no scope is provided, default
* scope will be used instead. If no OpContext is provicded, default context
* will be used.
*/
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const override {
for (auto& op : ops_) {
op->Run(scope, dev_ctx);
}
}
/**
* @brief Add an operator by ptr
*/
void AddOp(const OperatorPtr& op) override {
PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed");
ops_.push_back(op);
}
void CompleteAddOp() override;
std::string DebugString() const override;
std::vector<OperatorPtr> ops_;
private:
bool add_op_done_{false};
template <typename T, typename KeyType>
static bool Contains(T container, KeyType key) {
return container.find(key) != container.end();
}
};
} // namespace framework
} // namespace paddle
#include <gtest/gtest.h>
#include <paddle/framework/net.h>
#include <paddle/framework/op_registry.h>
#include <paddle/framework/operator.h>
namespace pd = paddle::framework;
static int infer_shape_cnt = 0;
static int run_cnt = 0;
class TestOp : public pd::OperatorBase {
public:
void InferShape(const paddle::framework::ScopePtr& scope) const override {
++infer_shape_cnt;
}
void Run(const paddle::framework::ScopePtr& scope,
const paddle::platform::DeviceContext& dev_ctx) const override {
++run_cnt;
}
};
template <typename T>
void AssertSameVectorWithoutOrder(const std::vector<T>& expected,
const std::vector<T>& actual) {
ASSERT_EQ(expected.size(), actual.size());
std::unordered_set<T> expected_set;
for (auto& tmp : expected) {
expected_set.insert(tmp);
}
for (auto& act : actual) {
ASSERT_NE(expected_set.end(), expected_set.find(act));
}
}
TEST(OpKernel, all) {
auto net = std::make_shared<paddle::framework::PlainNet>();
ASSERT_NE(net, nullptr);
auto op1 = std::make_shared<TestOp>();
op1->inputs_ = {"x", "w1", "b1"};
op1->outputs_ = {"y"};
net->AddOp(op1);
auto op2 = std::make_shared<TestOp>();
op2->inputs_ = {"y", "w2", "b2"};
op2->outputs_ = {"z"};
net->AddOp(op2);
net->CompleteAddOp();
AssertSameVectorWithoutOrder({"x", "w1", "b1", "w2", "b2"}, net->inputs_);
AssertSameVectorWithoutOrder({"y", "z"}, net->outputs_);
auto tmp_idx_iter = net->attrs_.find("temporary_index");
ASSERT_NE(net->attrs_.end(), tmp_idx_iter);
auto& tmp_idx = boost::get<std::vector<int>>(tmp_idx_iter->second);
ASSERT_EQ(1UL, tmp_idx.size());
ASSERT_EQ("y", net->outputs_[tmp_idx[0]]);
auto scope = std::make_shared<pd::Scope>();
paddle::platform::CPUDeviceContext dev_ctx;
net->InferShape(scope);
net->Run(scope, dev_ctx);
ASSERT_EQ(2, infer_shape_cnt);
ASSERT_EQ(2, run_cnt);
ASSERT_THROW(net->AddOp(op2), paddle::framework::EnforceNotMet);
}
syntax="proto2";
package paddle.framework;
import "op_proto.proto";
message NetDesc {
// network identification
optional string name = 1;
// operator contains in network
repeated OpProto operators = 2;
// network type to run with. e.g "plainNet", "DAG"
optional string net_type = 3;
// num worker always
optional int32 num_workers = 4;
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/net.h"
#include "paddle/framework/op_registry.h"
#include <gtest/gtest.h>
namespace paddle {
namespace framework {
class FakeFC : public Operator {}
} // namespace framework
} // namespace paddle
...@@ -34,6 +34,11 @@ message AttrProto { ...@@ -34,6 +34,11 @@ message AttrProto {
// Supported attribute comments. It helps 3rd-party language generate doc-string. // Supported attribute comments. It helps 3rd-party language generate doc-string.
required string comment = 3; required string comment = 3;
// If that attribute is generated, it means the Paddle third language
// binding has responsibility to fill that attribute. End-User should
// not set that attribute.
optional bool generated = 4 [default=false];
} }
// Input or output message for 3rd-party language binding. // Input or output message for 3rd-party language binding.
...@@ -45,6 +50,40 @@ message VarProto { ...@@ -45,6 +50,40 @@ message VarProto {
// The comment for that input. It helps 3rd-party language generate doc-string. // The comment for that input. It helps 3rd-party language generate doc-string.
required string comment = 2; required string comment = 2;
// Is that input/output could be a list or not.
// If so, that Op should write a attributed named `input_format` or
// `output_format`.
//
// e.g.
// If the op is a fc op, the inputs are `X`, `W`, `b`. The `X` and `W`
// could be multiple, so the multiple of `X` and `W` is True, and OpDesc
// will hold a attribute of them.
//
// The Op desc of same fc could be
// {
// "type": "fc",
// "input": ["X1", "X2", "W1", "W2", "b"],
// "output": "fc.out",
// "attrs" : {
// "input_format": [0, 2, 4, 5]
// }
// }
//
optional bool multiple = 3 [default=false];
// It marks that output is a temporary output. That output is not used by
// user, but used by other op internally as input. If other op is not use
// that output, it could be optimized early.
//
// Attribute temporary_index will be set in OpDesc if there is some
// outputs are temporary.
//
// output = [ "xxx.out1", "xxx.tmp", "xxx.out2"],
// attrs = {
// "temporary_index": [1]
// }
optional bool temporary = 4 [default=false];
} }
// Op protocol message for 3rd-party language binding. // Op protocol message for 3rd-party language binding.
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
namespace paddle {
namespace framework {
template <>
void AttrTypeHelper::SetAttrType<int>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::INT);
}
template <>
void AttrTypeHelper::SetAttrType<float>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::FLOAT);
}
template <>
void AttrTypeHelper::SetAttrType<std::string>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::STRING);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<int>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::INTS);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<float>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::FLOATS);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<std::string>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::STRINGS);
}
} // namespace framework
} // namespace paddle
#pragma once #pragma once
#include <algorithm>
#include <atomic>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include "paddle/framework/attr_checker.h" #include "paddle/framework/attr_checker.h"
//#include "paddle/framework/op_base.h"
#include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/operator.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
//==================For test================//
class OpBase {
public:
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
AttributeMap attr_map_;
virtual std::string Run() const = 0;
virtual ~OpBase() {}
};
//=========================================//
// helper class to set attribute type // helper class to set attribute type
struct AttrTypeHelper { struct AttrTypeHelper {
template <typename T> template <typename T>
...@@ -64,190 +56,344 @@ struct AttrTypeHelper { ...@@ -64,190 +56,344 @@ struct AttrTypeHelper {
} }
}; };
template <>
void AttrTypeHelper::SetAttrType<int>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::INT);
}
template <>
void AttrTypeHelper::SetAttrType<float>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::FLOAT);
}
template <>
void AttrTypeHelper::SetAttrType<std::string>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::STRING);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<int>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::INTS);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<float>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::FLOATS);
}
template <>
void AttrTypeHelper::SetAttrType<std::vector<std::string>>(AttrProto* attr) {
attr->set_type(paddle::framework::AttrType::STRINGS);
}
// this class not only make proto but also init attribute checkers. // this class not only make proto but also init attribute checkers.
class OpProtoAndCheckerMaker { class OpProtoAndCheckerMaker {
public: public:
OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) OpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: proto_(proto), op_checker_(op_checker) {} : proto_(proto), op_checker_(op_checker) {}
~OpProtoAndCheckerMaker() {
PADDLE_ENFORCE(validated_, "should call Validate after build");
}
void Validate() {
validated_ = true;
CheckNoDuplicatedInOutAttrs();
}
protected: protected:
void AddInput(const std::string& name, const std::string& comment) { void AddInput(const std::string& name, const std::string& comment,
bool multiple = false) {
auto input = proto_->mutable_inputs()->Add(); auto input = proto_->mutable_inputs()->Add();
*(input->mutable_name()) = name; *input->mutable_name() = name;
*(input->mutable_comment()) = comment; *input->mutable_comment() = comment;
input->set_multiple(multiple);
if (multiple) {
SetHasMultipleInput();
}
}
void AddInputs(const std::string& name, const std::string& comment) {
AddInput(name, comment, true);
} }
void AddOutput(const std::string& name, const std::string& comment) { void AddOutput(const std::string& name, const std::string& comment,
bool temporary = false, bool multiple = false) {
auto output = proto_->mutable_outputs()->Add(); auto output = proto_->mutable_outputs()->Add();
*(output->mutable_name()) = name; *output->mutable_name() = name;
*(output->mutable_comment()) = comment; *output->mutable_comment() = comment;
output->set_multiple(multiple);
if (multiple) {
SetHasMultipleOutput();
}
output->set_temporary(temporary);
if (temporary) {
SetHasTemporaryOutput();
}
}
void AddOutputs(const std::string& name, const std::string& comment,
bool temporary = false) {
AddOutput(name, comment, temporary, true);
} }
template <typename T> template <typename T>
TypedAttrChecker<T>& AddAttr(const std::string& name, TypedAttrChecker<T>& AddAttr(const std::string& name,
const std::string& comment) { const std::string& comment,
bool generated = false) {
auto attr = proto_->mutable_attrs()->Add(); auto attr = proto_->mutable_attrs()->Add();
*(attr->mutable_name()) = name; *attr->mutable_name() = name;
*(attr->mutable_comment()) = comment; *attr->mutable_comment() = comment;
attr->set_generated(generated);
AttrTypeHelper::SetAttrType<T>(attr); AttrTypeHelper::SetAttrType<T>(attr);
return op_checker_->AddAttrChecker<T>(name); return op_checker_->AddAttrChecker<T>(name);
} }
void AddType(const std::string& op_type) { proto_->set_type(op_type); }
void AddComment(const std::string& comment) { void AddComment(const std::string& comment) {
*(proto_->mutable_comment()) = comment; *(proto_->mutable_comment()) = comment;
} }
private:
void SetHasMultiple(const std::string& in_out, bool* flag) {
if (!*flag) {
AddAttr<std::vector<int>>(in_out + "_format",
"The multiple index of " + in_out +
"\n"
R"DOC(
This attribute is used by Paddle core framework. Paddle's Op support each input
or output could be a list of variable. This attribute is used to show how that
list organized.
e.g.
input = ["a", "b", "c", "d", "e", "f"]
input_format = [0, 4, 5, 6]
means
The number of all input variables this op is six, and they are segmented into
three inputs.
The first input is input[0:4], second is input[4:5], third is input[5:6].
)DOC",
/*generated*/ true);
*flag = true;
}
}
void SetHasMultipleInput() { SetHasMultiple("input", &has_multiple_input_); }
void SetHasMultipleOutput() {
SetHasMultiple("output", &has_multiple_output_);
}
void SetHasTemporaryOutput() {
if (!has_temporary_output_) {
AddAttr<std::vector<int>>("temporary_index",
R"DOC(The temporary index of output.
Not all output of Paddle Op is used by user. For faster computation, each op
could output some its internal state to other op, other op could take that
output to make compute faster.
Add a mark to which output is temporary is helpful for future optimization.
)DOC",
/*generated*/ true)
.SetDefault(std::vector<int>());
has_temporary_output_ = true;
}
}
void CheckNoDuplicatedInOutAttrs() {
std::unordered_set<std::string> names;
auto checker = [&](const std::string& name) {
PADDLE_ENFORCE(!names.count(name), "[%s] is duplicated", name);
names.insert(name);
};
for (auto& attr : proto_->attrs()) {
checker(attr.name());
}
for (auto& input : proto_->inputs()) {
checker(input.name());
}
for (auto& output : proto_->outputs()) {
checker(output.name());
}
}
OpProto* proto_; OpProto* proto_;
OpAttrChecker* op_checker_; OpAttrChecker* op_checker_;
bool validated_{false};
bool has_multiple_input_{false};
bool has_multiple_output_{false};
bool has_temporary_output_{false};
}; };
class OpRegistry { class OpRegistry {
typedef std::function<OpBase*()> OpCreator; using OpCreator = std::function<OperatorBase*()>;
using VarIndexMap = std::unordered_map<std::string, int>;
using VarNameList = std::vector<std::string>;
public: public:
template <typename OpType, typename ProtoMakerType> template <typename OpType, typename ProtoMakerType>
static void RegisterOp(const std::string& op_type) { static void RegisterOp(const std::string& op_type) {
creators_[op_type] = []() { return new OpType; }; creators()[op_type] = [] { return new OpType; };
OpProto& op_proto = protos_[op_type]; OpProto& op_proto = protos()[op_type];
OpAttrChecker& op_checker = op_checkers_[op_type]; OpAttrChecker& op_checker = op_checkers()[op_type];
ProtoMakerType(&op_proto, &op_checker); auto maker = ProtoMakerType(&op_proto, &op_checker);
PADDLE_ENFORCE(op_proto.IsInitialized() == true, maker.Validate();
"Fail to initialize %s's OpProto !", op_type); *op_proto.mutable_type() = op_type;
PADDLE_ENFORCE(
op_proto.IsInitialized(),
"Fail to initialize %s's OpProto, because %s is not initialized",
op_type, op_proto.InitializationErrorString());
VarIndexMaps()[op_type].reset(new VarIndexMap());
auto& varmap = *VarIndexMaps()[op_type];
int idx = 0;
for (auto& var : op_proto.inputs()) {
varmap[var.name()] = idx++;
} }
idx = 0;
static OpBase* CreateOp(const OpDesc& op_desc) { for (auto& var : op_proto.outputs()) {
std::string op_type = op_desc.type(); varmap[var.name()] = idx++;
OpBase* op = (creators_.at(op_type))();
(op->inputs_).resize(op_desc.inputs_size());
for (int i = 0; i < op_desc.inputs_size(); ++i) {
(op->inputs_)[i] = op_desc.inputs(i);
} }
(op->outputs_).resize(op_desc.outputs_size());
for (int i = 0; i < op_desc.outputs_size(); ++i) {
(op->outputs_)[i] = op_desc.outputs(i);
} }
for (int i = 0; i < op_desc.attrs_size(); ++i) {
const AttrDesc& ith_attr = op_desc.attrs(i); static OperatorPtr CreateOp(const std::string& type,
std::string name = ith_attr.name(); const VarNameList& inputs,
(op->attr_map_)[name] = AttrTypeHelper::GetAttrValue(ith_attr); const VarNameList& outputs,
const AttributeMap& attrs) {
auto op_create_it = creators().find(type);
PADDLE_ENFORCE(op_create_it != creators().end(),
"Operator %s cannot be found", type);
auto op = op_create_it->second();
op->type_ = type;
op->inputs_ = inputs;
op->outputs_ = outputs;
op->attrs_ = attrs;
op_checkers().at(type).Check(op->attrs_);
GenerateTempVariableName(op);
{
auto var_index_it = VarIndexMaps().find(type);
if (var_index_it != VarIndexMaps().end()) {
op->in_out_idxs_ = var_index_it->second;
} }
const OpAttrChecker& op_checker = op_checkers_.at(op_type);
op_checker.Check(op->attr_map_);
return op;
} }
private: op->Init();
static std::unordered_map<std::string, OpCreator> creators_; return OperatorPtr(op);
static std::unordered_map<std::string, OpProto> protos_; }
static std::unordered_map<std::string, OpAttrChecker> op_checkers_;
};
std::unordered_map<std::string, std::function<OpBase*()>> OpRegistry::creators_; static OperatorPtr CreateOp(const OpDesc& op_desc) {
std::unordered_map<std::string, OpProto> OpRegistry::protos_; std::vector<std::string> inputs;
std::unordered_map<std::string, OpAttrChecker> OpRegistry::op_checkers_; inputs.reserve((size_t)op_desc.inputs_size());
std::copy(op_desc.inputs().begin(), op_desc.inputs().end(),
std::back_inserter(inputs));
template <typename OpType, typename ProtoMakerType> std::vector<std::string> outputs;
class OpRegisterHelper { outputs.reserve((size_t)op_desc.outputs_size());
public: std::copy(op_desc.outputs().begin(), op_desc.outputs().end(),
OpRegisterHelper(std::string op_type) { std::back_inserter(outputs));
OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
AttributeMap attrs;
for (auto& attr : op_desc.attrs()) {
attrs[attr.name()] = AttrTypeHelper::GetAttrValue(attr);
} }
};
#define REGISTER_OP(__op_class, __op_maker_class, __op_type) \ return CreateOp(op_desc.type(), inputs, outputs, attrs);
class __op_class##Register { \ }
private: \
const static OpRegisterHelper<__op_class, __op_maker_class> reg; \
}; \
const OpRegisterHelper<__op_class, __op_maker_class> \
__op_class##Register::reg(#__op_type);
// Demos static std::unordered_map<std::string, OpProto>& protos() {
static std::unordered_map<std::string, OpProto> protos_;
return protos_;
};
class CosineOp : public OpBase { private:
public: static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>>&
virtual std::string Run() const { VarIndexMaps() {
std::string msg = "CosineOp runs! scale = " + static std::unordered_map<std::string, std::shared_ptr<VarIndexMap>> maps_;
std::to_string(boost::get<float>(attr_map_.at("scale"))); return maps_;
return msg;
} }
};
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { static void GenerateTempVariableName(OperatorBase* op) {
public: static std::atomic<size_t> gUniqId(0UL);
CosineOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) for (auto& outname : op->outputs_) {
: OpProtoAndCheckerMaker(proto, op_checker) { if (outname == OperatorBase::TMP_VAR_NAME()) {
AddInput("input", "input of cosine op"); outname += op->type_;
AddOutput("output", "output of cosine op"); outname += "@";
AddAttr<float>("scale", "scale of cosine op") outname += std::to_string(gUniqId.fetch_add(1));
.SetDefault(1.0) }
.LargerThan(0.0); }
AddType("cos");
AddComment("This is cos op");
} }
};
REGISTER_OP(CosineOp, CosineOpProtoAndCheckerMaker, cos_sim)
class MyTestOp : public OpBase { static std::unordered_map<std::string, OpCreator>& creators() {
public: static std::unordered_map<std::string, OpCreator> creators_;
virtual std::string Run() const { return creators_;
std::string msg =
"MyTestOp runs! test_attr = " +
std::to_string(boost::get<int>(attr_map_.at("test_attr")));
return msg;
} }
static std::unordered_map<std::string, OpAttrChecker>& op_checkers() {
static std::unordered_map<std::string, OpAttrChecker> op_checkers_;
return op_checkers_;
};
}; };
class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { template <typename OpType, typename ProtoMakerType>
class OpRegisterHelper {
public: public:
MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) OpRegisterHelper(const char* op_type) {
: OpProtoAndCheckerMaker(proto, op_checker) { OpRegistry::RegisterOp<OpType, ProtoMakerType>(op_type);
AddInput("input", "input of cosine op");
AddOutput("output", "output of cosine op");
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
AddType("my_test_op");
AddComment("This is my_test op");
} }
}; };
REGISTER_OP(MyTestOp, MyTestOpProtoAndCheckerMaker, my_test_op) /**
* check if MACRO is used in GLOBAL NAMESPACE.
*/
#define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \
struct __test_global_namespace_##uniq_name##__ {}; \
static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
__test_global_namespace_##uniq_name##__>::value, \
msg)
/**
* Macro to Register Operator.
*/
#define REGISTER_OP(__op_type, __op_class, __op_maker_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE(__reg_op__##__op_type, \
"REGISTER_OP must be in global namespace"); \
static ::paddle::framework::OpRegisterHelper<__op_class, __op_maker_class> \
__op_register_##__op_type##__(#__op_type); \
int __op_register_##__op_type##_handle__() { return 0; }
/**
* Macro to Register OperatorKernel.
*/
#define REGISTER_OP_KERNEL(type, DEVICE_TYPE, PlaceType, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op_kernel_##type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be in global namespace"); \
struct __op_kernel_register__##type##__ { \
__op_kernel_register__##type##__() { \
::paddle::framework::OperatorWithKernel::OpKernelKey key; \
key.place_ = PlaceType(); \
::paddle::framework::OperatorWithKernel::AllOpKernels()[#type][key] \
.reset(new __VA_ARGS__()); \
} \
}; \
static __op_kernel_register__##type##__ __reg_kernel_##type##__; \
int __op_kernel_register_##type##_handle_##DEVICE_TYPE##__() { return 0; }
// (type, KernelType)
#define REGISTER_OP_GPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, GPU, ::paddle::platform::GPUPlace, __VA_ARGS__)
// (type, KernelType)
#define REGISTER_OP_CPU_KERNEL(type, ...) \
REGISTER_OP_KERNEL(type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
/**
* Macro to mark what Operator and Kernel we will use and tell the compiler to
* link them into target.
*/
#define USE_OP_WITHOUT_KERNEL(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_without_kernel_##op_type, \
"USE_OP_WITHOUT_KERNEL must be in global namespace"); \
extern int __op_register_##op_type##_handle__(); \
static int __use_op_ptr_##op_type##_without_kernel__ \
__attribute__((unused)) = __op_register_##op_type##_handle__()
#define USE_OP_KERNEL(op_type, DEVICE_TYPE) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"USE_OP_KERNEL must be in global namespace"); \
extern int __op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__(); \
static int __use_op_ptr_##op_type##_##DEVICE_TYPE##_kernel__ \
__attribute__((unused)) = \
__op_kernel_register_##op_type##_handle_##DEVICE_TYPE##__()
// use Operator with only cpu kernel.
#define USE_OP_CPU(op_type) \
USE_OP_WITHOUT_KERNEL(op_type); \
USE_OP_KERNEL(op_type, CPU)
#ifdef PADDLE_ONLY_CPU
#define USE_OP(op_type) USE_OP_CPU(op_type)
#else
#define USE_OP(op_type) \
USE_OP_CPU(op_type); \
USE_OP_KERNEL(op_type, GPU)
#endif
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
namespace pd = paddle::framework;
namespace paddle {
namespace framework {
class CosineOp : public OperatorBase {
public:
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const override {}
void InferShape(const ScopePtr& scope) const override {}
};
class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
CosineOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of cosine op");
AddOutput("output", "output of cosine op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddComment("This is cos op");
}
};
class MyTestOp : public OperatorBase {
public:
void InferShape(const ScopePtr& scope) const override {}
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const override {}
};
class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
MyTestOpProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInputs("input", "input of cosine op");
AddOutput("output", "output of cosine op",
/*temporary*/ true);
auto my_checker = [](int i) {
PADDLE_ENFORCE(i % 2 == 0, "'test_attr' must be even!");
};
AddAttr<int>("test_attr", "a simple test attribute")
.AddCustomChecker(my_checker);
AddComment("This is my_test op");
}
};
} // namespace framework
} // namespace paddle
REGISTER_OP(cos_sim, paddle::framework::CosineOp,
paddle::framework::CosineOpProtoAndCheckerMaker);
REGISTER_OP(my_test_op, paddle::framework::MyTestOp,
paddle::framework::MyTestOpProtoAndCheckerMaker);
TEST(OpRegistry, CreateOp) { TEST(OpRegistry, CreateOp) {
paddle::framework::OpDesc op_desc; paddle::framework::OpDesc op_desc;
op_desc.set_type("cos_sim"); op_desc.set_type("cos_sim");
op_desc.add_inputs("aa"); op_desc.add_inputs("aa");
op_desc.add_outputs("bb"); op_desc.add_outputs("bb");
float scale = 3.3;
auto attr = op_desc.mutable_attrs()->Add(); auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale"); attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT); attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.3); attr->set_f(scale);
paddle::framework::OpBase* op = paddle::framework::OperatorPtr op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
std::string debug_str = op->Run(); auto scope = std::make_shared<paddle::framework::Scope>();
std::string str = "CosineOp runs! scale = " + std::to_string(3.3); paddle::platform::CPUDeviceContext dev_ctx;
ASSERT_EQ(str.size(), debug_str.size()); op->Run(scope, dev_ctx);
for (size_t i = 0; i < debug_str.length(); ++i) { float scale_get = op->GetAttr<float>("scale");
ASSERT_EQ(debug_str[i], str[i]); ASSERT_EQ(scale_get, scale);
}
} }
TEST(OpRegistry, IllegalAttr) { TEST(OpRegistry, IllegalAttr) {
...@@ -35,7 +89,7 @@ TEST(OpRegistry, IllegalAttr) { ...@@ -35,7 +89,7 @@ TEST(OpRegistry, IllegalAttr) {
bool caught = false; bool caught = false;
try { try {
paddle::framework::OpBase* op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (paddle::framework::EnforceNotMet err) {
caught = true; caught = true;
...@@ -54,15 +108,22 @@ TEST(OpRegistry, DefaultValue) { ...@@ -54,15 +108,22 @@ TEST(OpRegistry, DefaultValue) {
op_desc.add_inputs("aa"); op_desc.add_inputs("aa");
op_desc.add_outputs("bb"); op_desc.add_outputs("bb");
paddle::framework::OpBase* op = ASSERT_TRUE(op_desc.IsInitialized());
paddle::framework::OperatorPtr op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
std::string debug_str = op->Run(); auto scope = std::make_shared<paddle::framework::Scope>();
float default_value = 1.0; paddle::platform::CPUDeviceContext dev_ctx;
std::string str = "CosineOp runs! scale = " + std::to_string(default_value); op->Run(scope, dev_ctx);
ASSERT_EQ(str.size(), debug_str.size()); ASSERT_EQ(op->GetAttr<float>("scale"), 1.0);
for (size_t i = 0; i < debug_str.length(); ++i) { }
ASSERT_EQ(debug_str[i], str[i]);
} static void SetInputFormat(paddle::framework::OpDesc* desc) {
auto attr = desc->add_attrs();
attr->set_name("input_format");
attr->set_type(paddle::framework::INTS);
attr->mutable_ints()->Add(0);
attr->mutable_ints()->Add(1);
} }
TEST(OpRegistry, CustomChecker) { TEST(OpRegistry, CustomChecker) {
...@@ -70,11 +131,12 @@ TEST(OpRegistry, CustomChecker) { ...@@ -70,11 +131,12 @@ TEST(OpRegistry, CustomChecker) {
op_desc.set_type("my_test_op"); op_desc.set_type("my_test_op");
op_desc.add_inputs("ii"); op_desc.add_inputs("ii");
op_desc.add_outputs("oo"); op_desc.add_outputs("oo");
SetInputFormat(&op_desc);
// attr 'test_attr' is not set // attr 'test_attr' is not set
bool caught = false; bool caught = false;
try { try {
paddle::framework::OpBase* op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (paddle::framework::EnforceNotMet err) {
caught = true; caught = true;
...@@ -93,7 +155,7 @@ TEST(OpRegistry, CustomChecker) { ...@@ -93,7 +155,7 @@ TEST(OpRegistry, CustomChecker) {
attr->set_i(3); attr->set_i(3);
caught = false; caught = false;
try { try {
paddle::framework::OpBase* op __attribute__((unused)) = paddle::framework::OperatorPtr op __attribute__((unused)) =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
} catch (paddle::framework::EnforceNotMet err) { } catch (paddle::framework::EnforceNotMet err) {
caught = true; caught = true;
...@@ -111,12 +173,44 @@ TEST(OpRegistry, CustomChecker) { ...@@ -111,12 +173,44 @@ TEST(OpRegistry, CustomChecker) {
attr->set_name("test_attr"); attr->set_name("test_attr");
attr->set_type(paddle::framework::AttrType::INT); attr->set_type(paddle::framework::AttrType::INT);
attr->set_i(4); attr->set_i(4);
paddle::framework::OpBase* op = SetInputFormat(&op_desc);
paddle::framework::OperatorPtr op =
paddle::framework::OpRegistry::CreateOp(op_desc); paddle::framework::OpRegistry::CreateOp(op_desc);
std::string debug_str = op->Run(); paddle::platform::CPUDeviceContext dev_ctx;
std::string str = "MyTestOp runs! test_attr = " + std::to_string(4); auto scope = std::make_shared<paddle::framework::Scope>();
ASSERT_EQ(str.size(), debug_str.size()); op->Run(scope, dev_ctx);
for (size_t i = 0; i < debug_str.length(); ++i) { int test_attr = op->GetAttr<int>("test_attr");
ASSERT_EQ(debug_str[i], str[i]); ASSERT_EQ(test_attr, 4);
}
class TestAttrProtoMaker : public pd::OpProtoAndCheckerMaker {
public:
TestAttrProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("scale", "scale of test op");
AddAttr<float>("scale", "scale of test op");
} }
};
TEST(ProtoMaker, DuplicatedAttr) {
pd::OpProto op_proto;
pd::OpAttrChecker op_checker;
auto proto_maker = TestAttrProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet);
}
class TestInOutProtoMaker : public pd::OpProtoAndCheckerMaker {
public:
TestInOutProtoMaker(pd::OpProto* proto, pd::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op");
AddInput("input", "input of test op");
}
};
TEST(ProtoMaker, DuplicatedInOut) {
pd::OpProto op_proto;
pd::OpAttrChecker op_checker;
auto proto_maker = TestInOutProtoMaker(&op_proto, &op_checker);
ASSERT_THROW(proto_maker.Validate(), paddle::framework::EnforceNotMet);
} }
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/framework/operator.h"
namespace paddle {
namespace framework {
template <>
Eigen::DefaultDevice* KernelContext::GetEigenDevice<
platform::CPUPlace, Eigen::DefaultDevice>() const {
return device_context_.get_eigen_device<Eigen::DefaultDevice>();
}
#ifndef PADDLE_ONLY_CPU
template <>
Eigen::GpuDevice*
KernelContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
return device_context_.get_eigen_device<Eigen::GpuDevice>();
}
#endif
const std::string& OperatorBase::Input(const std::string& name) const {
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("input_format") == 0) {
return inputs_[it->second];
} else {
const auto& input_format = GetAttr<std::vector<int>>("input_format");
int idx = input_format[it->second];
return inputs_.at(idx);
}
}
std::vector<std::string> OperatorBase::Inputs(const std::string& name) const {
auto input_format = GetAttr<std::vector<int>>("input_format");
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
inputs_.begin() + input_format.at(offset),
inputs_.begin() + input_format.at(offset + 1)};
}
const std::string& OperatorBase::Output(const std::string& name) const {
auto it = in_out_idxs_->find(name);
PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_",
name);
if (attrs_.count("output_format") == 0) {
return outputs_[it->second];
} else {
const auto& output_format = GetAttr<std::vector<int>>("output_format");
int idx = output_format[it->second];
return outputs_.at(idx);
}
}
std::vector<std::string> OperatorBase::Outputs(const std::string& name) const {
auto output_format = GetAttr<std::vector<int>>("output_format");
auto offset = in_out_idxs_->at(name);
return std::vector<std::string>{
outputs_.begin() + output_format.at(offset),
outputs_.begin() + output_format.at(offset + 1)};
}
std::string OperatorBase::DebugString() const {
std::stringstream ss;
ss << "Op(" << type_ << "), inputs:(";
for (size_t i = 0; i < inputs_.size(); ++i) {
ss << inputs_[i];
if (i != inputs_.size() - 1) {
ss << ", ";
}
}
ss << "), outputs:(";
for (size_t i = 0; i < outputs_.size(); ++i) {
ss << outputs_[i];
if (i != outputs_.size() - 1) {
ss << ", ";
}
}
ss << ").";
return ss.str();
}
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <boost/variant.hpp>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/framework/attr_checker.h"
#include "paddle/framework/op_desc.pb.h"
#include "paddle/framework/op_proto.pb.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/place.h"
#include "paddle/utils/Error.h"
namespace paddle {
namespace framework {
template <typename T>
struct EigenDeviceConverter;
template <>
struct EigenDeviceConverter<platform::CPUPlace> {
using EigenDeviceType = Eigen::DefaultDevice;
};
#ifndef PADDLE_ONLY_CPU
template <>
struct EigenDeviceConverter<platform::GPUPlace> {
using EigenDeviceType = Eigen::GpuDevice;
};
#endif
class OperatorBase;
using OperatorPtr = std::shared_ptr<OperatorBase>;
/**
* OperatorBase has the basic element that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User
* should always construct a proto message OpDesc and call
* OpRegistry::CreateOp(op_desc) to get an Operator instance.
*/
class OperatorBase {
public:
/// If a variable is a empty variable, that name will be used.
static std::string EMPTY_VAR_NAME() { return "@EMPTY@"; }
/// If a variable is a temporary variable, that name will be set in Python,
/// but it will be convert to a unique name in scope after OpCreator.
static std::string TMP_VAR_NAME() { return "@TEMP@"; }
virtual ~OperatorBase() {}
template <typename T>
inline const T& GetAttr(const std::string& name) const {
PADDLE_ENFORCE(attrs_.count(name) != 0, "%s should be in AttributeMap",
name);
return boost::get<T>(attrs_.at(name));
}
virtual std::string DebugString() const;
/// Init will be called after CreateOperator, you can put some initialization
/// logic here.
virtual void Init() {}
/// InferShape infer the size of Variables used by this Operator with
/// information inside scope
virtual void InferShape(const ScopePtr& scope) const = 0;
/// Net will call this function to Run an op.
virtual void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const = 0;
// Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const;
// Get a input which has multiple variables.
// TODO add a vector_view to prevent memory copy.
std::vector<std::string> Inputs(const std::string& name) const;
// Get a output with argument's name described in `op_proto`
const std::string& Output(const std::string& name) const;
// Get an output which has multiple variables.
// TODO add a vector_view to prevent memory copy.
std::vector<std::string> Outputs(const std::string& name) const;
public:
std::string type_;
std::vector<std::string> inputs_;
std::vector<std::string> outputs_;
AttributeMap attrs_;
// store the arguments' offset described in op_desc.
std::shared_ptr<std::unordered_map<std::string, int>> in_out_idxs_;
};
class KernelContext {
public:
KernelContext(const OperatorBase* op, const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& device_context)
: op_(*op), scope_(scope), device_context_(device_context) {}
const Variable* Input(int index) const {
return scope_->GetVariable(op_.inputs_[index]);
}
Variable* Output(int index) const {
return scope_->GetVariable(op_.outputs_[index]);
}
const Variable* Input(const std::string& name) const {
return scope_->GetVariable(op_.Input(name));
}
const Variable* Output(const std::string& name) const {
return scope_->GetVariable(op_.Output(name));
}
const std::vector<const Variable*> Inputs(const std::string& name) const {
auto names = op_.Inputs(name);
std::vector<const Variable*> res;
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
return res;
}
const std::vector<const Variable*> Outputs(const std::string& name) const {
auto names = op_.Outputs(name);
std::vector<const Variable*> res;
std::transform(
names.begin(), names.end(), res.begin(),
[this](const std::string& name) { return scope_->GetVariable(name); });
return res;
}
template <typename PlaceType,
typename DeviceType =
typename EigenDeviceConverter<PlaceType>::EigenDeviceType>
DeviceType* GetEigenDevice() const;
platform::Place GetPlace() const { return device_context_.GetPlace(); }
const OperatorBase& op_;
const std::shared_ptr<Scope>& scope_;
const platform::DeviceContext& device_context_;
};
class OpKernel {
public:
/**
* KernelContext is the only parameter of Kernel Run function.
* Run will get input/output variables, state such as momentum and
* device resource such as CUDA stream, cublas handle, etc. from
* KernelContext. User should construct it before run the Operator.
*/
virtual void Compute(const KernelContext& context) const = 0;
virtual ~OpKernel() {}
};
template <typename T>
struct VarToTensor {};
template <>
struct VarToTensor<Tensor*> {
Tensor* operator()(Variable* var) { return var->GetMutable<Tensor>(); }
};
template <>
struct VarToTensor<const Tensor*> {
const Tensor* operator()(Variable* var) { return &var->Get<Tensor>(); }
};
class OperatorWithKernel : public OperatorBase {
public:
struct OpKernelKey {
platform::Place place_;
OpKernelKey() = default;
OpKernelKey(const platform::DeviceContext& dev_ctx) {
place_ = dev_ctx.GetPlace();
}
bool operator==(const OpKernelKey& o) const { return place_ == o.place_; }
};
struct OpKernelHash {
std::hash<bool> hash_;
size_t operator()(const OpKernelKey& key) const {
return hash_(platform::is_gpu_place(key.place_));
}
};
using OpKernelMap =
std::unordered_map<OpKernelKey, std::unique_ptr<OpKernel>, OpKernelHash>;
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const final {
auto& opKernel = AllOpKernels().at(type_).at(OpKernelKey(dev_ctx));
opKernel->Compute(KernelContext(this, scope, dev_ctx));
}
static std::unordered_map<std::string /* op_type */, OpKernelMap>&
AllOpKernels() {
static std::unordered_map<std::string, OpKernelMap> g_all_op_kernels;
return g_all_op_kernels;
}
void InferShape(const std::shared_ptr<Scope>& scope) const final {
std::vector<const Tensor*> ins;
VarNamesToTensors(scope, inputs_, &ins);
std::vector<Tensor*> outs;
VarNamesToTensors(scope, outputs_, &outs);
InferShape(ins, outs);
};
private:
template <typename T>
void VarNamesToTensors(const std::shared_ptr<Scope>& scope,
const std::vector<std::string>& var_names,
std::vector<T>* container) const {
container->reserve(var_names.size());
VarToTensor<T> convert;
for (auto& name : var_names) {
auto var = scope->GetVariable(name);
if (var != nullptr) {
container->push_back(convert(var));
} else {
container->push_back(nullptr);
}
}
}
protected:
virtual void InferShape(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const = 0;
};
} // namespace framework
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/operator.h"
#include "gtest/gtest.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace framework {
static int op_run_num = 0;
class OpWithoutKernelTest : public OperatorBase {
public:
void Init() override { x = 1; }
void InferShape(const ScopePtr& scope) const override {}
void Run(const ScopePtr& scope,
const platform::DeviceContext& dev_ctx) const override {
op_run_num++;
ASSERT_EQ((int)inputs_.size(), 1);
ASSERT_EQ((int)outputs_.size(), 1);
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
}
public:
float x = 0;
};
class OpeWithoutKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
OpeWithoutKernelTestProtoAndCheckerMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("input", "input of test op");
AddOutput("output", "output of test op");
AddAttr<float>("scale", "scale of cosine op");
AddComment("This is test op");
}
};
} // namespace framework
} // namespace paddle
REGISTER_OP(test_operator, paddle::framework::OpWithoutKernelTest,
paddle::framework::OpeWithoutKernelTestProtoAndCheckerMaker);
TEST(OperatorBase, all) {
paddle::framework::OpDesc op_desc;
op_desc.set_type("test_operator");
*op_desc.mutable_inputs()->Add() = "IN1";
*op_desc.mutable_outputs()->Add() = "OUT1";
auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.14);
paddle::platform::CPUDeviceContext device_context;
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::OperatorPtr op =
paddle::framework::OpRegistry::CreateOp(op_desc);
scope->CreateVariable("OUT1");
ASSERT_EQ(paddle::framework::op_run_num, 0);
op->Run(scope, device_context);
ASSERT_EQ(paddle::framework::op_run_num, 1);
}
namespace paddle {
namespace framework {
class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker {
public:
OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("x", "input of test op");
AddOutput("y", "output of test op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddComment("This is test op");
}
};
static int cpu_kernel_run_num = 0;
class OpWithKernelTest : public OperatorWithKernel {
protected:
void InferShape(const std::vector<const Tensor*>& inputs,
const std::vector<Tensor*>& outputs) const override {}
};
template <typename T1, typename T2>
class CPUKernelTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
std::cout << "this is cpu kernel" << std::endl;
std::cout << ctx.op_.DebugString() << std::endl;
cpu_kernel_run_num++;
ASSERT_EQ(ctx.op_.Input("x"), "IN1");
ASSERT_EQ(ctx.op_.Output("y"), "OUT1");
}
};
// multiple inputs test
class OperatorMultiInputsTest : public OperatorBase {
public:
void Init() override { x = 1; }
void InferShape(const std::shared_ptr<Scope>& scope) const override {}
void Run(const std::shared_ptr<Scope>& scope,
const platform::DeviceContext& dev_ctx) const override {
ASSERT_EQ(scope->GetVariable(inputs_[0]), nullptr);
ASSERT_EQ(x, 1);
ASSERT_NE(scope->GetVariable(outputs_[0]), nullptr);
ASSERT_EQ(Input("x"), "IN1");
ASSERT_EQ(Input("y"), "OUT1");
}
public:
float x = 0;
};
class OpKernelTestMultiInputsProtoAndCheckerMaker
: public OpProtoAndCheckerMaker {
public:
OpKernelTestMultiInputsProtoAndCheckerMaker(OpProto* proto,
OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInputs("xs", "inputs of test op");
AddInput("k", "input of test op");
AddOutputs("ys", "outputs of test op");
AddAttr<float>("scale", "scale of cosine op")
.SetDefault(1.0)
.LargerThan(0.0);
AddComment("This is test op");
}
};
class CPUKernalMultiInputsTest : public OpKernel {
public:
void Compute(const KernelContext& ctx) const {
auto xs = ctx.op_.Inputs("xs");
ASSERT_EQ(xs.size(), 3UL);
ASSERT_EQ(xs[0], "x0");
ASSERT_EQ(xs[1], "x1");
ASSERT_EQ(xs[2], "x2");
auto k = ctx.op_.Input("k");
ASSERT_EQ(k, "k0");
auto ys = ctx.op_.Outputs("ys");
ASSERT_EQ(ys.size(), 2UL);
ASSERT_EQ(ys[0], "y0");
ASSERT_EQ(ys[1], "y1");
}
};
} // namespace framework
} // namespace paddle
REGISTER_OP(op_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_with_kernel,
paddle::framework::CPUKernelTest<float, float>);
// test with single input
TEST(OpKernel, all) {
paddle::framework::OpDesc op_desc;
op_desc.set_type("op_with_kernel");
*op_desc.mutable_inputs()->Add() = "IN1";
*op_desc.mutable_outputs()->Add() = "OUT1";
auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.14);
paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<paddle::framework::Scope>();
paddle::framework::OperatorPtr op =
paddle::framework::OpRegistry::CreateOp(op_desc);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 0);
op->Run(scope, cpu_device_context);
ASSERT_EQ(paddle::framework::cpu_kernel_run_num, 1);
}
REGISTER_OP(op_multi_inputs_with_kernel, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMultiInputsProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(op_multi_inputs_with_kernel,
paddle::framework::CPUKernalMultiInputsTest);
// test with multi inputs
TEST(OpKernel, multi_inputs) {
using namespace paddle::framework;
OpDesc op_desc;
op_desc.set_type("op_multi_inputs_with_kernel");
*op_desc.mutable_inputs()->Add() = "x0";
*op_desc.mutable_inputs()->Add() = "x1";
*op_desc.mutable_inputs()->Add() = "x2";
*op_desc.mutable_inputs()->Add() = "k0";
*op_desc.mutable_outputs()->Add() = "y0";
*op_desc.mutable_outputs()->Add() = "y1";
auto attr = op_desc.mutable_attrs()->Add();
attr->set_name("scale");
attr->set_type(paddle::framework::AttrType::FLOAT);
attr->set_f(3.14);
auto attr0 = op_desc.mutable_attrs()->Add();
attr0->set_name("input_format");
attr0->set_type(paddle::framework::AttrType::INTS);
auto input_format = attr0->mutable_ints();
input_format->Add(0); // x0
input_format->Add(3); // k
input_format->Add(4); // end
auto attr1 = op_desc.mutable_attrs()->Add();
attr1->set_name("output_format");
attr1->set_type(paddle::framework::AttrType::INTS);
auto output_format = attr1->mutable_ints();
output_format->Add(0); // y0
output_format->Add(2); // y1
paddle::platform::CPUDeviceContext cpu_device_context;
auto scope = std::make_shared<Scope>();
OperatorPtr op(paddle::framework::OpRegistry::CreateOp(op_desc));
op->Run(scope, cpu_device_context);
}
...@@ -23,6 +23,9 @@ limitations under the License. */ ...@@ -23,6 +23,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class Scope;
using ScopePtr = std::shared_ptr<Scope>;
/** /**
* @brief Scope that manage all variables. * @brief Scope that manage all variables.
* *
...@@ -41,7 +44,7 @@ class Scope { ...@@ -41,7 +44,7 @@ class Scope {
/** /**
* @brief Initialize a Scope with parent. * @brief Initialize a Scope with parent.
*/ */
explicit Scope(const std::shared_ptr<Scope>& parent) : parent_(parent) {} explicit Scope(const ScopePtr& parent) : parent_(parent) {}
/** /**
* @brief Create Variable * @brief Create Variable
...@@ -88,7 +91,7 @@ class Scope { ...@@ -88,7 +91,7 @@ class Scope {
private: private:
std::unordered_map<std::string, std::unique_ptr<Variable>> vars_; std::unordered_map<std::string, std::unique_ptr<Variable>> vars_;
std::shared_ptr<Scope> parent_{nullptr}; ScopePtr parent_{nullptr};
}; };
} // namespace framework } // namespace framework
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/tensor.h>
namespace paddle {
namespace framework {}
} // namespace paddle
...@@ -14,84 +14,245 @@ limitations under the License. */ ...@@ -14,84 +14,245 @@ limitations under the License. */
#pragma once #pragma once
#include <cstdint>
#include <cstring>
#include <memory> #include <memory>
#include <type_traits> #include <typeindex>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/framework/enforce.h" #include "paddle/framework/enforce.h"
#include "paddle/framework/tensor_types.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
namespace pybind {
namespace details { // forward declare
template <bool less, size_t i, typename... args>
struct CastToPyBufferImpl;
} // namespace details
} // namespace pybind
namespace framework { namespace framework {
class Tensor { class Tensor {
public: public:
Tensor() : offset_(0) {}
template <typename T> template <typename T>
const T* data() const { const T* data() const {
PADDLE_ENFORCE(holder_ != nullptr, CheckDims<T>();
"Tensor::data must be called after Tensor::mutable_data."); return reinterpret_cast<const T*>(
return static_cast<const T*>(holder_->Ptr()); reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
template <typename T>
T* raw_data() const {
CheckDims<T>();
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
template <typename T>
T* mutable_data(DDim dims, platform::Place place) {
set_dims(dims);
return mutable_data<T>(place);
} }
template <typename T, // must be POD types template <typename T>
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> T* mutable_data(platform::Place place) {
T* mutable_data(DDim dims, paddle::platform::Place place) { PADDLE_ENFORCE(product(dims_) > 0,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first.");
if (holder_ == nullptr || if (holder_ == nullptr ||
!(holder_->Place() == !(holder_->place() ==
place) /* some versions of boost::variant don't have operator!= */ place) /* some versions of boost::variant don't have operator!= */
|| holder_->Size() < product(dims) * sizeof(T)) { || holder_->size() < product(dims_) * sizeof(T) + offset_) {
holder_.reset(new PlaceholderImpl<T>(place, product(dims) * sizeof(T))); if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<T, platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), product(dims_) * sizeof(T)));
} else if (platform::is_gpu_place(place)) {
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#else
holder_.reset(new PlaceholderImpl<T, platform::GPUPlace>(
boost::get<platform::GPUPlace>(place), product(dims_) * sizeof(T)));
#endif
} else {
PADDLE_THROW("Unknown 'place'.");
}
offset_ = 0;
}
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) {
Eigen::array<Eigen::DenseIndex, NDIMS> dims =
paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
return typename TTypes<T, NDIMS>::Tensor(raw_data<T>(), dims);
}
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor tensor() {
return typename TTypes<T, NDIMS>::Tensor(
raw_data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
}
// flat to rank = 1
template <typename T>
typename TTypes<T>::Flat flat() {
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}
// to TensorType Vec
template <typename T>
typename TTypes<T>::Vec vec() {
return tensor<T, 1>();
}
// to TensorType Matrix
template <typename T>
typename TTypes<T>::Matrix matrix() {
return tensor<T, 2>();
}
// const versions of all the methods above.
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::Tensor shaped(DDim new_dims) const {
Eigen::array<Eigen::DenseIndex, NDIMS> dims =
paddle::framework::ToEigenDSizes<NDIMS>(new_dims);
return typename TTypes<T, NDIMS>::Tensor(data<T>(), dims);
}
template <typename T, size_t NDIMS>
typename TTypes<T, NDIMS>::ConstantTensor tensor() const {
return typename TTypes<T, NDIMS>::Tensor(
data<T>(), paddle::framework::ToEigenDSizes<NDIMS>(dims_));
}
template <typename T>
typename TTypes<T>::ConstFlat flat() const {
return shaped<T, 1>(make_ddim({static_cast<int>(product(dims_))}));
}
template <typename T>
typename TTypes<T>::ConstVec vec() const {
return tensor<T, 1>();
} }
return static_cast<T*>(holder_->Ptr());
template <typename T>
typename TTypes<T>::ConstMatrix matrix() const {
return tensor<T, 2>();
} }
template <typename T, // must be POD types template <typename T>
typename std::enable_if<std::is_pod<T>::value>::type* = nullptr> void ShareDataFrom(const Tensor& src) {
T* mutable_data(DDim dims) { src.CheckDims<T>();
return mutable_data<T>(dims, paddle::platform::get_place()); holder_ = src.holder_;
set_dims(src.dims());
offset_ = src.offset_;
} }
template <typename T>
void CopyFrom(const Tensor& src, platform::Place dst_place) {
PADDLE_ENFORCE(platform::is_cpu_place(src.holder_->place()) &&
platform::is_cpu_place(dst_place),
"Tensor::CopyFrom only support CPU now.");
src.CheckDims<T>();
size_t size = product(src.dims_) * sizeof(T);
set_dims(src.dims());
const void* src_ptr = static_cast<const void*>(src.data<T>());
void* dst_ptr = static_cast<void*>(mutable_data<T>(dst_place));
memcpy(dst_ptr, src_ptr, size);
}
template <typename T>
Tensor Slice(const int& begin_idx, const int& end_idx) const {
CheckDims<T>();
PADDLE_ENFORCE(begin_idx >= 0 && end_idx <= dims_[0],
"Slice index is less than zero or out of bound.");
PADDLE_ENFORCE(begin_idx < end_idx,
"Begin index must be less than end index.");
PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1.");
std::vector<int> d = vectorize(dims_);
int base = 1;
for (size_t i = 1; i < d.size(); ++i) {
base *= d[i];
}
Tensor dst;
dst.holder_ = holder_;
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.set_dims(dst_dims);
dst.offset_ = offset_ + begin_idx * base * sizeof(T);
return dst;
}
void set_dims(const DDim& dims) {
if (dims == dims_) {
return;
}
dims_ = dims;
}
DDim dims() const { return dims_; }
private: private:
// Placeholder hides type T, so it doesn't appear as a template // Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable. // parameter of Variable.
struct Placeholder { struct Placeholder {
virtual ~Placeholder() {} virtual ~Placeholder() {}
virtual void* Ptr() const = 0; virtual void* ptr() const = 0;
virtual paddle::platform::Place Place() const = 0; virtual platform::Place place() const = 0;
virtual size_t Size() const = 0; virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
}; };
template <typename T> template <typename T, typename PlaceType>
struct PlaceholderImpl : public Placeholder { struct PlaceholderImpl : public Placeholder {
private: private:
template <typename PType>
class Deleter { class Deleter {
public: public:
Deleter(platform::Place place) : place_(place) {} Deleter(PType place) : place_(place) {}
void operator()(T* ptr) { void operator()(T* ptr) { memory::Free(place_, static_cast<void*>(ptr)); }
paddle::memory::Free(place_, static_cast<void*>(ptr));
}
private: private:
paddle::platform::Place place_; PType place_;
}; };
public: public:
PlaceholderImpl(paddle::platform::Place place, size_t size) PlaceholderImpl(PlaceType place, size_t size)
: ptr_(static_cast<T*>(paddle::memory::Alloc(place, size)), : ptr_(static_cast<T*>(memory::Alloc(place, size)),
Deleter(place)), Deleter<PlaceType>(place)),
place_(place), place_(place),
size_(size) {} size_(size) {}
virtual void* Ptr() const { return static_cast<void*>(ptr_.get()); } virtual void* ptr() const { return static_cast<void*>(ptr_.get()); }
virtual size_t Size() const { return size_; } virtual size_t size() const { return size_; }
virtual paddle::platform::Place Place() const { return place_; } virtual paddle::platform::Place place() const { return place_; }
virtual std::type_index type() const { return std::type_index(typeid(T)); }
std::unique_ptr<T, Deleter> ptr_; std::unique_ptr<T, Deleter<PlaceType>> ptr_;
paddle::platform::Place place_; // record the place of ptr_. platform::Place place_; // record the place of ptr_.
size_t size_; // size of the memory block. size_t size_; // size of the memory block.
}; };
template <typename T>
inline void CheckDims() const {
PADDLE_ENFORCE(holder_ != nullptr,
"Tenosr holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.");
}
std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated. std::shared_ptr<Placeholder> holder_; // holds the memory block if allocated.
DDim dims_;
size_t offset_; // marks the begin of tensor data area.
template <bool less, size_t i, typename... args>
friend struct paddle::pybind::details::CastToPyBufferImpl;
}; };
} // namespace framework } // namespace framework
......
...@@ -15,15 +15,28 @@ ...@@ -15,15 +15,28 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <string> #include <string>
TEST(Tensor, ASSERT) { TEST(Tensor, Dims) {
paddle::framework::Tensor cpu_tensor; using namespace paddle::framework;
using namespace paddle::platform;
Tensor tt;
tt.set_dims(make_ddim({2, 3, 4}));
DDim dims = tt.dims();
ASSERT_EQ(arity(dims), 3);
for (int i = 0; i < 3; ++i) {
EXPECT_EQ(i + 2, dims[i]);
}
}
TEST(Tensor, DataAssert) {
paddle::framework::Tensor src_tensor;
bool caught = false; bool caught = false;
try { try {
const double* p __attribute__((unused)) = cpu_tensor.data<double>(); src_tensor.data<double>();
} catch (paddle::framework::EnforceNotMet err) { } catch (paddle::framework::EnforceNotMet err) {
caught = true; caught = true;
std::string msg = "Tensor::data must be called after Tensor::mutable_data."; std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first.";
const char* what = err.what(); const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) { for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]); ASSERT_EQ(what[i], msg[i]);
...@@ -32,54 +45,168 @@ TEST(Tensor, ASSERT) { ...@@ -32,54 +45,168 @@ TEST(Tensor, ASSERT) {
ASSERT_TRUE(caught); ASSERT_TRUE(caught);
} }
/* mutable_data() is not tested at present /* following tests are not available at present
because Memory::Alloc() and Memory::Free() have not been ready. because Memory::Alloc() and Memory::Free() have not been ready.
*/
TEST(Tensor, MutableData) { TEST(Tensor, MutableData) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
{ {
Tensor cpu_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
float* p2 = nullptr; float* p2 = nullptr;
// initialization // initialization
p1 = cpu_tensor.mutable_data<float>(make_ddim({1, 2, 3}), CPUPlace()); p1 = src_tensor.mutable_data<float>(make_ddim({1, 2, 3}), CPUPlace());
EXPECT_NE(p1, nullptr); EXPECT_NE(p1, nullptr);
// set cpu_tensor a new dim with large size // set src_tensor a new dim with large size
// momery is supposed to be re-allocated // momery is supposed to be re-allocated
p2 = cpu_tensor.mutable_data<float>(make_ddim({3, 4})); p2 = src_tensor.mutable_data<float>(make_ddim({3, 4}), CPUPlace());
EXPECT_NE(p2, nullptr); EXPECT_NE(p2, nullptr);
EXPECT_NE(p1, p2); EXPECT_NE(p1, p2);
// set cpu_tensor a new dim with same size // set src_tensor a new dim with same size
// momery block is supposed to be unchanged // momery block is supposed to be unchanged
p1 = cpu_tensor.mutable_data<float>(make_ddim({2, 2, 3})); p1 = src_tensor.mutable_data<float>(make_ddim({2, 2, 3}), CPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
// set cpu_tensor a new dim with smaller size // set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged // momery block is supposed to be unchanged
p2 = cpu_tensor.mutable_data<float>(make_ddim({2, 2})); p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), CPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#ifdef __CUDACC__
{ {
Tensor gpu_tensor; Tensor src_tensor;
float* p1 = nullptr; float* p1 = nullptr;
float* p2 = nullptr; float* p2 = nullptr;
// initialization // initialization
p1 = gpu_tensor.mutable_data<float>(make_ddim({1, 2, 3}), GPUPlace()); p1 = src_tensor.mutable_data<float>(make_ddim({1, 2, 3}), GPUPlace());
EXPECT_NE(p1, nullptr); EXPECT_NE(p1, nullptr);
// set gpu_tensor a new dim with large size // set src_tensor a new dim with large size
// momery is supposed to be re-allocated // momery is supposed to be re-allocated
p2 = gpu_tensor.mutable_data<float>(make_ddim({3, 4})); p2 = src_tensor.mutable_data<float>(make_ddim({3, 4}), GPUPlace());
EXPECT_NE(p2, nullptr); EXPECT_NE(p2, nullptr);
EXPECT_NE(p1, p2); EXPECT_NE(p1, p2);
// set gpu_tensor a new dim with same size // set src_tensor a new dim with same size
// momery block is supposed to be unchanged // momery block is supposed to be unchanged
p1 = gpu_tensor.mutable_data<float>(make_ddim({2, 2, 3})); p1 = src_tensor.mutable_data<float>(make_ddim({2, 2, 3}), GPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
// set gpu_tensor a new dim with smaller size // set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged // momery block is supposed to be unchanged
p2 = gpu_tensor.mutable_data<float>(make_ddim({2, 2})); p2 = src_tensor.mutable_data<float>(make_ddim({2, 2}), GPUPlace());
EXPECT_EQ(p1, p2); EXPECT_EQ(p1, p2);
} }
#endif
}
TEST(Tensor, ShareDataFrom) {
using namespace paddle::framework;
using namespace paddle::platform;
{
Tensor src_tensor;
Tensor dst_tensor;
// Try to share data form uninitialized tensor
bool caught = false;
try {
dst_tensor.ShareDataFrom<float>(src_tensor);
} catch (EnforceNotMet err) {
caught = true;
std::string msg =
"Tenosr holds no memory. Call Tensor::mutable_data first.";
const char* what = err.what();
for (size_t i = 0; i < msg.length(); ++i) {
ASSERT_EQ(what[i], msg[i]);
}
}
ASSERT_TRUE(caught);
src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), CPUPlace());
dst_tensor.ShareDataFrom<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
}
#ifdef __CUDACC__
{
Tensor src_tensor;
Tensor dst_tensor;
src_tensor.mutable_data<int>(make_ddim({2, 3, 4}), GPUPlace());
dst_tensor.ShareDataFrom<int>(src_tensor);
ASSERT_EQ(src_tensor.data<int>(), dst_tensor.data<int>());
}
#endif
}
TEST(Tensor, Slice) {
using namespace paddle::framework;
using namespace paddle::platform;
{
Tensor src_tensor;
src_tensor.mutable_data<int>(make_ddim({5, 3, 4}), CPUPlace());
Tensor slice_tensor = src_tensor.Slice<int>(1, 3);
DDim slice_dims = slice_tensor.dims();
ASSERT_EQ(arity(slice_dims), 3);
EXPECT_EQ(slice_dims[0], 2);
EXPECT_EQ(slice_dims[1], 3);
EXPECT_EQ(slice_dims[2], 4);
uintptr_t src_data_address =
reinterpret_cast<uintptr_t>(src_tensor.data<int>());
uintptr_t src_mutable_data_address = reinterpret_cast<uintptr_t>(
src_tensor.mutable_data<int>(src_tensor.dims(), CPUPlace()));
uintptr_t slice_data_address =
reinterpret_cast<uintptr_t>(slice_tensor.data<int>());
uintptr_t slice_mutable_data_address = reinterpret_cast<uintptr_t>(
slice_tensor.mutable_data<int>(slice_tensor.dims(), CPUPlace()));
EXPECT_EQ(src_data_address, src_mutable_data_address);
EXPECT_EQ(slice_data_address, slice_mutable_data_address);
EXPECT_EQ(src_data_address + 3 * 4 * 1 * sizeof(int), slice_data_address);
}
#ifdef __CUDACC__
{
Tensor src_tensor;
src_tensor.mutable_data<double>(make_ddim({6, 9}), GPUPlace());
Tensor slice_tensor = src_tensor.Slice<double>(2, 6);
DDim slice_dims = slice_tensor.dims();
ASSERT_EQ(arity(slice_dims), 2);
EXPECT_EQ(slice_dims[0], 4);
EXPECT_EQ(slice_dims[1], 9);
uintptr_t src_data_address =
reinterpret_cast<uintptr_t>(src_tensor.data<double>());
uintptr_t src_mutable_data_address = reinterpret_cast<uintptr_t>(
src_tensor.mutable_data<double>(src_tensor.dims(), GPUPlace()));
uintptr_t slice_data_address =
reinterpret_cast<uintptr_t>(slice_tensor.data<double>());
uintptr_t slice_mutable_data_address = reinterpret_cast<uintptr_t>(
slice_tensor.mutable_data<double>(slice_tensor.dims(), GPUPlace()));
EXPECT_EQ(src_data_address, src_mutable_data_address);
EXPECT_EQ(slice_data_address, slice_mutable_data_address);
EXPECT_EQ(src_data_address + 9 * 2 * sizeof(double), slice_data_address);
}
#endif
}
TEST(Tensor, CopyFrom) {
using namespace paddle::framework;
using namespace paddle::platform;
Tensor src_tensor;
int* src_ptr = src_tensor.mutable_data<int>(make_ddim({3, 3}), CPUPlace());
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
memcpy(src_ptr, arr, 9 * sizeof(int));
Tensor dst_tensor;
dst_tensor.CopyFrom<int>(src_tensor, CPUPlace());
const int* dst_ptr = dst_tensor.data<int>();
ASSERT_NE(src_ptr, dst_ptr);
for (size_t i = 0; i < 9; ++i) {
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
}
Tensor slice_tensor = src_tensor.Slice<int>(1, 2);
dst_tensor.CopyFrom<int>(slice_tensor, CPUPlace());
const int* slice_ptr = slice_tensor.data<int>();
dst_ptr = dst_tensor.data<int>();
ASSERT_NE(dst_ptr, slice_ptr);
for (size_t i = 0; i < 3; ++i) {
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
}
} }
*/
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "unsupported/Eigen/CXX11/Tensor"
namespace paddle {
namespace framework {
// Helper to define Tensor types given that the scalar is of type T.
template <typename T, int NDIMS = 1, typename IndexType = Eigen::DenseIndex>
struct TTypes {
// Rank-<NDIMS> tensor of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, NDIMS, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Tensor;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, NDIMS, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstTensor;
// Scalar tensor (implemented as a rank-0 tensor) of scalar type T.
typedef Eigen::TensorMap<
Eigen::TensorFixedSize<T, Eigen::Sizes<>, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Scalar;
typedef Eigen::TensorMap<Eigen::TensorFixedSize<const T, Eigen::Sizes<>,
Eigen::RowMajor, IndexType>,
Eigen::Aligned>
ConstScalar;
// Rank-1 tensor (vector) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Flat;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstFlat;
typedef Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Vec;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstVec;
// Rank-2 tensor (matrix) of scalar type T.
typedef Eigen::TensorMap<Eigen::Tensor<T, 2, Eigen::RowMajor, IndexType>,
Eigen::Aligned>
Matrix;
typedef Eigen::TensorMap<
Eigen::Tensor<const T, 2, Eigen::RowMajor, IndexType>, Eigen::Aligned>
ConstMatrix;
};
} // namespace framework
} // namespace paddle
...@@ -11,7 +11,6 @@ if(WITH_GPU) ...@@ -11,7 +11,6 @@ if(WITH_GPU)
endif() endif()
if(USE_NNPACK) if(USE_NNPACK)
include(nnpack/nnpack.cmake)
list(APPEND cpp_files nnpack/NNPACKConvOp.cpp) list(APPEND cpp_files nnpack/NNPACKConvOp.cpp)
if(WITH_TESTING) if(WITH_TESTING)
add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp) add_unittest(NNPACKConvOpTest nnpack/NNPACKConvOpTest.cpp)
......
...@@ -117,8 +117,7 @@ public: ...@@ -117,8 +117,7 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs, void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape(); const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape(); const TensorShape& output = outputs[0].shape();
...@@ -217,8 +216,7 @@ public: ...@@ -217,8 +216,7 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs, void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape(); const TensorShape& output = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& input = outputs[0].shape(); const TensorShape& input = outputs[0].shape();
...@@ -311,8 +309,7 @@ public: ...@@ -311,8 +309,7 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs, void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const BufferArgs& outputs) override {
const TensorShape& output = inputs[0].shape(); const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape(); const TensorShape& input = inputs[1].shape();
const TensorShape& filter = outputs[0].shape(); const TensorShape& filter = outputs[0].shape();
......
...@@ -90,8 +90,7 @@ public: ...@@ -90,8 +90,7 @@ public:
ConvFunctionBase::init(config); ConvFunctionBase::init(config);
} }
virtual void check(const BufferArgs& inputs, void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
const BufferArgs& outputs) override {
const TensorShape& input = inputs[0].shape(); const TensorShape& input = inputs[0].shape();
const TensorShape& filter = inputs[1].shape(); const TensorShape& filter = inputs[1].shape();
const TensorShape& output = outputs[0].shape(); const TensorShape& output = outputs[0].shape();
......
...@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy, ...@@ -144,12 +144,15 @@ __global__ void KeRowConvBwWeight(real* dw, const real* x, const real* dy,
int yoff = start + j; int yoff = start + j;
// transpose // transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ? dy[yoff * width + xoff] : 0.0; x[yoff * width + xoff] : 0.0;
sh_dy[tidx][tidy + context - 1] = (xoff < width && yoff < end) ?
dy[yoff * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
if (tidy < (context - 1)) { if (tidy < (context - 1)) {
yoff = yoff - context + 1; yoff = yoff - context + 1;
sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ? dy[yoff * width + xoff] : 0.0; sh_dy[tidx][tidy] = (xoff < width && yoff >= start) ?
dy[yoff * width + xoff] : 0.0;
} }
__syncthreads(); __syncthreads();
...@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy, ...@@ -199,11 +202,13 @@ __global__ void KeRowConvBwWeight2(real* dw, const real* x, const real* dy,
int yoff = start + j; int yoff = start + j;
// transpose // transpose
sh_x[tidx][tidy] = (xoff < width && yoff < end) ? x[yoff * width + xoff] : 0.0; sh_x[tidx][tidy] = (xoff < width && yoff < end) ?
x[yoff * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
for (int t = 0; t < context; t++) { for (int t = 0; t < context; t++) {
sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start && yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0; sh_dy[tidx][tidy] = (xoff < width && (yoff - t) >= start &&
yoff - t < end) ? dy[(yoff - t) * width + xoff] : 0.0;
__syncthreads(); __syncthreads();
real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx]; real val = sh_x[tidy][tidx] * sh_dy[tidy][tidx];
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/function/ConvOp.h" #include "paddle/function/ConvOp.h"
DEFINE_bool(nnpack_allocate_outside, DEFINE_bool(nnpack_allocate_outside,
false, true,
"Allocate and free workspace memory outside the NNPACK interface."); "Allocate and free workspace memory outside the NNPACK interface.");
DEFINE_int32(nnpack_num_threads, DEFINE_int32(nnpack_num_threads,
0, 0,
...@@ -58,18 +58,10 @@ public: ...@@ -58,18 +58,10 @@ public:
workspaceBuffer_ = nullptr; workspaceBuffer_ = nullptr;
workspaceSize_ = 0; workspaceSize_ = 0;
threadpool_ = nullptr; create_nnpack_threadpool();
if (FLAGS_nnpack_num_threads) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
} }
~NNPACKConvFunction() { ~NNPACKConvFunction() {
if (threadpool_) {
pthreadpool_destroy(threadpool_);
}
if (workspaceBuffer_) { if (workspaceBuffer_) {
free(workspaceBuffer_); free(workspaceBuffer_);
} }
...@@ -225,14 +217,25 @@ public: ...@@ -225,14 +217,25 @@ public:
} }
} }
static void create_nnpack_threadpool() {
if (FLAGS_nnpack_num_threads && threadpool_ == nullptr) {
threadpool_ = pthreadpool_create(FLAGS_nnpack_num_threads);
VLOG(3) << "Number of threads "
<< pthreadpool_get_threads_count(threadpool_);
}
}
private: private:
nnp_convolution_algorithm algorithm_; nnp_convolution_algorithm algorithm_;
nnp_convolution_transform_strategy transform_strategy_; nnp_convolution_transform_strategy transform_strategy_;
void* workspaceBuffer_; void* workspaceBuffer_;
size_t workspaceSize_; size_t workspaceSize_;
pthreadpool_t threadpool_; static pthreadpool_t threadpool_;
}; };
template <DeviceType Device>
pthreadpool_t NNPACKConvFunction<Device>::threadpool_ = nullptr;
REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction); REGISTER_TYPED_FUNC(NNPACKConv, CPU, NNPACKConvFunction);
} // namespace paddle } // namespace paddle
...@@ -205,11 +205,9 @@ public: ...@@ -205,11 +205,9 @@ public:
hl_destroy_event(hlEvent_); hl_destroy_event(hlEvent_);
hlEvent_ = NULL; hlEvent_ = NULL;
} }
if (batchData_) {
delete batchData_; delete batchData_;
batchData_ = NULL; batchData_ = NULL;
} }
}
void setDataBatch(DataBatch* batchData) { batchData_ = batchData; } void setDataBatch(DataBatch* batchData) { batchData_ = batchData; }
DataBatch* getDataBatch() { return batchData_; } DataBatch* getDataBatch() { return batchData_; }
......
...@@ -403,7 +403,7 @@ public: ...@@ -403,7 +403,7 @@ public:
: layerName_(layerName) { : layerName_(layerName) {
addEvaluator(std::move(evaluator)); addEvaluator(std::move(evaluator));
} }
virtual void eval(const NeuralNetwork& nn) override { void eval(const NeuralNetwork& nn) override {
const LayerPtr& layer = nn.getLayer(layerName_); const LayerPtr& layer = nn.getLayer(layerName_);
CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel "
<< nn.getName(); << nn.getName();
......
...@@ -636,7 +636,7 @@ void lenToStarts(std::vector<int>& starts) { ...@@ -636,7 +636,7 @@ void lenToStarts(std::vector<int>& starts) {
} }
starts.back() = pos; starts.back() = pos;
} }
} } // namespace
void RecurrentGradientMachine::calcSequenceStartPositions() { void RecurrentGradientMachine::calcSequenceStartPositions() {
std::vector<int> starts(commonSeqInfo_.size() + 1); std::vector<int> starts(commonSeqInfo_.size() + 1);
......
...@@ -124,7 +124,7 @@ void copyElements(const IVector& srcVec, ...@@ -124,7 +124,7 @@ void copyElements(const IVector& srcVec,
dest[index[i]] = src[i]; dest[index[i]] = src[i];
} }
} }
} } // namespace
void GatherAgentLayer::forwardIds(PassType passType) { void GatherAgentLayer::forwardIds(PassType passType) {
IVectorPtr realId = realLayers_[0]->getOutputLabel(); IVectorPtr realId = realLayers_[0]->getOutputLabel();
......
...@@ -359,12 +359,11 @@ void Layer::backwardActivation() { ...@@ -359,12 +359,11 @@ void Layer::backwardActivation() {
/* Do error clipping */ /* Do error clipping */
if (config_.error_clipping_threshold() > 0.0f) { if (config_.error_clipping_threshold() > 0.0f) {
if (FLAGS_log_error_clipping) { if (FLAGS_log_error_clipping) {
CpuVector outGradVec(0, nullptr); VectorPtr outGradVec = Vector::create(
outGradVec.subVecFrom( output_.grad->getData(), output_.grad->getElementCnt(), useGpu_);
output_.grad->getData(), 0, output_.grad->getElementCnt()); real maxAbsGrad = outGradVec->getAbsMax();
real maxAbsGrad = outGradVec.getAbsMax();
if (maxAbsGrad > config_.error_clipping_threshold()) { if (maxAbsGrad > config_.error_clipping_threshold()) {
real avgAbsGrad = outGradVec.getAbsSum() / outGradVec.getSize(); real avgAbsGrad = outGradVec->getAbsSum() / outGradVec->getSize();
LOG(INFO) << " layer=" << config_.name() << " need clipping," LOG(INFO) << " layer=" << config_.name() << " need clipping,"
<< " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad; << " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad;
} }
......
...@@ -32,9 +32,7 @@ static InitFunction __init_storage_engine([]() { StorageEngine::singleton(); }, ...@@ -32,9 +32,7 @@ static InitFunction __init_storage_engine([]() { StorageEngine::singleton(); },
StorageEngine::StorageEngine() : cpuAllocator_(nullptr) {} StorageEngine::StorageEngine() : cpuAllocator_(nullptr) {}
StorageEngine::~StorageEngine() { StorageEngine::~StorageEngine() {
if (cpuAllocator_) {
delete cpuAllocator_; delete cpuAllocator_;
}
for (auto it : gpuAllocator_) { for (auto it : gpuAllocator_) {
delete it; delete it;
} }
......
add_subdirectory(detail) add_subdirectory(detail)
cc_library(memory SRCS memory.cc)
cc_library(paddle_memory
DEPS
memory meta_data
meta_cache memory_block
buddy_allocator system_allocator)
cc_test(memory_test SRCS memory_test.cc DEPS place paddle_memory)
if(${WITH_GPU}) if(${WITH_GPU})
nv_library(system_allocator SRCS system_allocator.cc DEPS gflags) nv_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info)
nv_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
else(${WITH_GPU}) else(${WITH_GPU})
cc_library(system_allocator SRCS system_allocator.cc DEPS gflags) cc_library(system_allocator SRCS system_allocator.cc DEPS gflags cpu_info)
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator gflags)
endif(${WITH_GPU}) endif(${WITH_GPU})
cc_test(system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator)
cc_library(meta_data SRCS meta_data.cc)
cc_library(meta_cache SRCS meta_cache.cc)
cc_library(memory_block SRCS memory_block.cc)
cc_library(buddy_allocator SRCS buddy_allocator.cc DEPS glog)
...@@ -12,22 +12,317 @@ ...@@ -12,22 +12,317 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once
#include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/buddy_allocator.h"
#include "glog/logging.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
BuddyAllocator::BuddyAllocator(size_t pool_size, size_t max_pools, BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator,
SystemAllocator* system_allocator) size_t min_chunk_size, size_t max_chunk_size)
: pool_size_(pool_size), : min_chunk_size_(min_chunk_size),
max_pools_(max_pools), max_chunk_size_(max_chunk_size),
system_allocator_(system_allocator) { cache_(system_allocator->UseGpu()),
PADDLE_ASSERT(pool_size > 0); system_allocator_(std::move(system_allocator)) {}
PADDLE_ASSERT(max_pools > 0);
PADDLE_ASSERT(system_allocator != nullptr); BuddyAllocator::~BuddyAllocator() {
DLOG(INFO) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed";
while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
DLOG(INFO) << "Free from block (" << block << ", " << max_chunk_size_
<< ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
pool_.erase(pool_.begin());
}
}
inline size_t align(size_t size, size_t alignment) {
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
void* BuddyAllocator::Alloc(size_t unaligned_size) {
// adjust allocation alignment
size_t size = align(unaligned_size + sizeof(Metadata), min_chunk_size_);
// acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
DLOG(INFO) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
// if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) {
DLOG(INFO) << "Allocate from system allocator.";
return SystemAlloc(size);
}
// query and allocate from the existing chunk
auto it = FindExistChunk(size);
// refill the pool if failure
if (it == pool_.end()) {
it = RefillPool();
// if still failure, fail fatally
if (it == pool_.end()) {
return nullptr;
}
} else {
DLOG(INFO) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
}
total_used_ += size;
total_free_ -= size;
// split the allocation and return data for use
return reinterpret_cast<MemoryBlock*>(SplitToAlloc(it, size))->data();
}
void BuddyAllocator::Free(void* p) {
// Point back to metadata
auto block = static_cast<MemoryBlock*>(p)->metadata();
// Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
DLOG(INFO) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
DLOG(INFO) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_));
// Invalidate GPU allocation from cache
cache_.invalidate(block);
return;
}
block->mark_as_free(cache_);
total_used_ -= block->total_size(cache_);
total_free_ += block->total_size(cache_);
// Trying to merge the right buddy
if (block->has_right_buddy(cache_)) {
DLOG(INFO) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_);
if (right_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) {
// Take away right buddy from pool
pool_.erase(IndexSizeAddress(right_buddy->index(cache_),
right_buddy->total_size(cache_),
right_buddy));
// merge its right buddy to the block
block->merge(cache_, right_buddy);
}
}
// Trying to merge the left buddy
if (block->has_left_buddy(cache_)) {
DLOG(INFO) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_);
if (left_buddy->type(cache_) == MemoryBlock::FREE_CHUNK) {
// Take away right buddy from pool
pool_.erase(IndexSizeAddress(left_buddy->index(cache_),
left_buddy->total_size(cache_), left_buddy));
// merge the block to its left buddy
left_buddy->merge(cache_, block);
block = left_buddy;
}
}
// Dumping this block into pool
DLOG(INFO) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
// Clean up if existing too much free memory
// Prefer freeing fallback allocation first
CleanIdleFallBackAlloc();
// Free normal allocation
CleanIdleNormalAlloc();
}
size_t BuddyAllocator::Used() { return total_used_; }
void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0;
void* p = system_allocator_->Alloc(index, size);
DLOG(INFO) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr;
static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::HUGE_CHUNK, index,
size, nullptr, nullptr);
return static_cast<MemoryBlock*>(p)->data();
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
#ifndef PADDLE_ONLY_CPU
if (system_allocator_->UseGpu()) {
if ((total_used_ + total_free_) == 0) {
// Compute the maximum allocation size for the first allocation.
max_chunk_size_ = platform::GpuMaxChunkSize();
}
}
#endif // PADDLE_ONLY_CPU
// Allocate a new maximum sized block
size_t index = 0;
void* p = system_allocator_->Alloc(index, max_chunk_size_);
if (p == nullptr) return pool_.end();
DLOG(INFO) << "Creating and inserting new block " << p
<< " from system allocator";
static_cast<MemoryBlock*>(p)->init(cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr);
// gpu fallback allocation
if (system_allocator_->UseGpu() &&
static_cast<MemoryBlock*>(p)->index(cache_) == 1) {
fallback_alloc_count_++;
}
total_free_ += max_chunk_size_;
// dump the block into pool
return pool_.insert(IndexSizeAddress(index, max_chunk_size_, p)).first;
}
BuddyAllocator::PoolSet::iterator BuddyAllocator::FindExistChunk(size_t size) {
size_t index = 0;
while (1) {
auto it = pool_.lower_bound(IndexSizeAddress(index, size, nullptr));
// no match chunk memory
if (it == pool_.end()) return it;
if (std::get<0>(*it) > index) {
// find suitable one
if (std::get<1>(*it) >= size) {
return it;
}
// update and continue
index = std::get<0>(*it);
continue;
}
return it;
}
}
void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
size_t size) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it);
DLOG(INFO) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
block->split(cache_, size);
DLOG(INFO) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")";
block->set_type(cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist
if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
DLOG(INFO) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
block->right_buddy(cache_)->total_size(cache_),
block->right_buddy(cache_)));
}
}
return block;
}
void BuddyAllocator::CleanIdleFallBackAlloc() {
// If fallback allocation does not exist, return directly
if (!fallback_alloc_count_) return;
for (auto pool = pool_.rbegin(); pool != pool_.rend();) {
// If free memory block less than max_chunk_size_, return directly
if (std::get<1>(*pool) < max_chunk_size_) return;
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
// If no GPU fallback allocator, return
if (!system_allocator_->UseGpu() || block->index(cache_) == 0) {
return;
}
DLOG(INFO) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= max_chunk_size_;
fallback_alloc_count_--;
// If no fall allocation exists, return directly
if (!fallback_alloc_count_) return;
}
}
void BuddyAllocator::CleanIdleNormalAlloc() {
auto shall_free_alloc = [&]() -> bool {
// free all fallback allocations
if (fallback_alloc_count_ > 0) {
return true;
}
// keep 2x overhead if we haven't fallen back
if ((total_used_ + max_chunk_size_) * 2 < total_free_) {
return true;
}
return false;
};
if (!shall_free_alloc()) return;
for (auto pool = pool_.rbegin(); pool != pool_.rend();) {
// If free memory block less than max_chunk_size_, return directly
if (std::get<1>(*pool) < max_chunk_size_) return;
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
DLOG(INFO) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
pool = PoolSet::reverse_iterator(pool_.erase(std::next(pool).base()));
total_free_ -= max_chunk_size_;
if (!shall_free_alloc()) return;
}
} }
} // namespace detail } // namespace detail
......
...@@ -14,9 +14,16 @@ ...@@ -14,9 +14,16 @@
#pragma once #pragma once
#include "paddle/memory/detail/meta_cache.h"
#include "paddle/memory/detail/meta_data.h"
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cpu_info.h"
#include "paddle/platform/gpu_info.h"
#include <mutex> #include <mutex>
#include <set>
#include <unordered_map>
#include <vector> #include <vector>
namespace paddle { namespace paddle {
...@@ -25,61 +32,80 @@ namespace detail { ...@@ -25,61 +32,80 @@ namespace detail {
class BuddyAllocator { class BuddyAllocator {
public: public:
BuddyAllocator(size_t pool_size, size_t max_pools, BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size,
SystemAllocator* system_allocator); size_t max_chunk_size);
~BuddyAllocator(); ~BuddyAllocator();
void* Alloc(size_t size); public:
void* Alloc(size_t unaligned_size);
void Free(void*); void Free(void*);
size_t Used(); size_t Used();
public:
// Disable copy and assignment
BuddyAllocator(const BuddyAllocator&) = delete;
BuddyAllocator& operator=(const BuddyAllocator&) = delete;
private: private:
struct Block { // Tuple (allocator index, memory size, memory address)
size_t size_; using IndexSizeAddress = std::tuple<size_t, size_t, void*>;
Block* left_; // left buddy // Each element in PoolSet is a free allocation
Block* right_; // right buddy using PoolSet = std::set<IndexSizeAddress>;
};
// Initially, there is only one pool. If a Alloc founds not enough /*! \brief Allocate fixed-size memory from system */
// memory from that pool, and there has not been max_num_pools_, void* SystemAlloc(size_t size);
// create a new pool by calling system_allocator_.Alloc(pool_size_).
std::vector<void*> pools_;
size_t pool_size_; // the size of each pool; /*! \brief If existing chunks are not suitable, refill pool */
size_t max_num_pools_; // the size of all pools; PoolSet::iterator RefillPool();
SystemAllocator* system_allocator_; /**
* \brief Find the suitable chunk from existing pool and split
* it to left and right buddies
*
* \param it the iterator of pool list
* \param size the size of allocation
*
* \return the left buddy address
*/
void* SplitToAlloc(PoolSet::iterator it, size_t size);
std::mutex mutex_; /*! \brief Find the existing chunk which used to allocation */
PoolSet::iterator FindExistChunk(size_t size);
// Disable copy and assignment. /*! \brief Clean idle fallback allocation */
BuddyAllocator(const BuddyAllocator&) = delete; void CleanIdleFallBackAlloc();
BuddyAllocator& operator=(const BuddyAllocator&) = delete;
}; /*! \brief Clean idle normal allocation */
void CleanIdleNormalAlloc();
BuddyAllocator<CPUAllocator>* GetCPUBuddyAllocator() { private:
static BuddyAllocator<CPUAllocator>* a = nullptr; size_t total_used_ = 0; // the total size of used memory
if (a == nullptr) { size_t total_free_ = 0; // the total size of free memory
a = new BuddyAllocator<CPUAllocator>();
} size_t min_chunk_size_; // the minimum size of each chunk
return a; size_t max_chunk_size_; // the maximum size of each chunk
}
private:
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. /**
* \brief A list of free allocation
BuddyAllocator<GPUAllocator>* GetGPUBuddyAllocator(int gpu_id) { *
static BuddyAllocator<GPUAllocator>** as = NULL; * \note Only store free chunk memory in pool
if (as == NULL) { */
int gpu_num = platform::GetDeviceCount(); PoolSet pool_;
as = new BuddyAllocator<GPUAllocator>*[gpu_num];
for (int gpu = 0; gpu < gpu_num; gpu++) { /*! Record fallback allocation count for auto-scaling */
as[gpu] = new BuddyAllocator<GPUAllocator>(); size_t fallback_alloc_count_ = 0;
}
} private:
return as[gpu_id]; /*! Unify the metadata format between GPU and CPU allocations */
} MetadataCache cache_;
#endif // PADDLE_ONLY_CPU private:
/*! Allocate CPU/GPU memory from system */
SystemAllocator* system_allocator_;
std::mutex mutex_;
};
} // namespace detail } // namespace detail
} // namespace memory } // namespace memory
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/memory/detail/memory_block.h"
#include "paddle/memory/detail/meta_cache.h"
#include "paddle/memory/detail/meta_data.h"
#include "paddle/platform/assert.h"
namespace paddle {
namespace memory {
namespace detail {
void MemoryBlock::init(MetadataCache& cache, Type t, size_t index, size_t size,
void* left_buddy, void* right_buddy) {
cache.store(this, Metadata(t, index, size - sizeof(Metadata), size,
static_cast<MemoryBlock*>(left_buddy),
static_cast<MemoryBlock*>(right_buddy)));
}
MemoryBlock::Type MemoryBlock::type(MetadataCache& cache) const {
return cache.load(this).type;
}
size_t MemoryBlock::size(MetadataCache& cache) const {
return cache.load(this).size;
}
size_t MemoryBlock::total_size(MetadataCache& cache) const {
return cache.load(this).total_size;
}
MemoryBlock* MemoryBlock::left_buddy(MetadataCache& cache) const {
return cache.load(this).left_buddy;
}
MemoryBlock* MemoryBlock::right_buddy(MetadataCache& cache) const {
return cache.load(this).right_buddy;
}
void MemoryBlock::split(MetadataCache& cache, size_t size) {
// make sure the split fits
PADDLE_ASSERT(total_size(cache) >= size);
// bail out if there is no room for another partition
if (total_size(cache) - size <= sizeof(Metadata)) {
return;
}
// find the position of the split
void* right_partition = reinterpret_cast<uint8_t*>(this) + size;
size_t remaining_size = total_size(cache) - size;
// Add the new block as a buddy
auto metadata = cache.load(this);
// Write the metadata for the new block
auto new_block_right_buddy = metadata.right_buddy;
cache.store(
static_cast<MemoryBlock*>(right_partition),
Metadata(FREE_CHUNK, index(cache), remaining_size - sizeof(Metadata),
remaining_size, this, new_block_right_buddy));
metadata.right_buddy = static_cast<MemoryBlock*>(right_partition);
metadata.size = size - sizeof(Metadata);
metadata.total_size = size;
cache.store(this, metadata);
// Write metadata for the new block's right buddy
if (new_block_right_buddy != nullptr) {
auto buddy_metadata = cache.load(new_block_right_buddy);
buddy_metadata.left_buddy = static_cast<MemoryBlock*>(right_partition);
cache.store(new_block_right_buddy, buddy_metadata);
}
}
void MemoryBlock::merge(MetadataCache& cache, MemoryBlock* right_buddy) {
// only free blocks can be merged
PADDLE_ASSERT(type(cache) == FREE_CHUNK);
PADDLE_ASSERT(right_buddy->type(cache) == FREE_CHUNK);
auto metadata = cache.load(this);
// link this->buddy's buddy
metadata.right_buddy = right_buddy->right_buddy(cache);
// link buddy's buddy -> this
if (metadata.right_buddy != nullptr) {
auto buddy_metadata = cache.load(metadata.right_buddy);
buddy_metadata.left_buddy = this;
cache.store(metadata.right_buddy, buddy_metadata);
}
metadata.size += right_buddy->total_size(cache);
metadata.total_size += right_buddy->total_size(cache);
cache.store(this, metadata);
cache.store(right_buddy, Metadata(INVALID_CHUNK, 0, 0, 0, nullptr, nullptr));
}
void MemoryBlock::mark_as_free(MetadataCache& cache) {
// check for double free or corruption
PADDLE_ASSERT(type(cache) != FREE_CHUNK);
PADDLE_ASSERT(type(cache) != INVALID_CHUNK);
set_type(cache, FREE_CHUNK);
}
void MemoryBlock::set_type(MetadataCache& cache, Type t) {
auto metadata = cache.load(this);
metadata.type = t;
cache.store(this, metadata);
}
bool MemoryBlock::has_left_buddy(MetadataCache& cache) const {
return left_buddy(cache) != nullptr;
}
bool MemoryBlock::has_right_buddy(MetadataCache& cache) const {
return right_buddy(cache) != nullptr;
}
size_t MemoryBlock::index(MetadataCache& cache) const {
return cache.load(this).index;
}
void* MemoryBlock::data() const {
return const_cast<Metadata*>(reinterpret_cast<const Metadata*>(this)) + 1;
}
MemoryBlock* MemoryBlock::metadata() const {
return const_cast<MemoryBlock*>(reinterpret_cast<const MemoryBlock*>(
reinterpret_cast<const Metadata*>(this) - 1));
}
} // namespace detail
} // namespace memory
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstddef>
namespace paddle {
namespace memory {
namespace detail {
// Forward Declarations
class MetadataCache;
/*! \brief A class used to interpret the contents of a memory block */
class MemoryBlock {
public:
enum Type {
FREE_CHUNK, // memory is free and idle
ARENA_CHUNK, // memory is being occupied
HUGE_CHUNK, // memory is out of management
INVALID_CHUNK // memory is invalid
};
public:
void init(MetadataCache& cache, Type t, size_t index, size_t size,
void* left_buddy, void* right_buddy);
public:
/*! \brief The type of the allocation */
Type type(MetadataCache& cache) const;
/*! \brief The size of the data region */
size_t size(MetadataCache& cache) const;
/*! \brief An index to track the allocator */
size_t index(MetadataCache& cache) const;
/*! \brief The total size of the block */
size_t total_size(MetadataCache& cache) const;
/*! \brief Check the left buddy of the block */
bool has_left_buddy(MetadataCache& cache) const;
/*! \brief Check the right buddy of the block */
bool has_right_buddy(MetadataCache& cache) const;
/*! \brief Get the left buddy */
MemoryBlock* left_buddy(MetadataCache& cache) const;
/*! \brief Get the right buddy */
MemoryBlock* right_buddy(MetadataCache& cache) const;
public:
/*! \brief Split the allocation into left/right blocks */
void split(MetadataCache& cache, size_t size);
/*! \brief Merge left and right blocks together */
void merge(MetadataCache& cache, MemoryBlock* right_buddy);
/*! \brief Mark the allocation as free */
void mark_as_free(MetadataCache& cache);
/*! \brief Change the type of the allocation */
void set_type(MetadataCache& cache, Type t);
public:
/*! \brief Get a pointer to the memory block's data */
void* data() const;
/*! \brief Get a pointer to the memory block's metadata */
MemoryBlock* metadata() const;
public:
static size_t overhead();
};
} // namespace detail
} // namespace memory
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/memory/detail/meta_cache.h"
#include "paddle/memory/detail/memory_block.h"
#include "paddle/platform/assert.h"
namespace paddle {
namespace memory {
namespace detail {
MetadataCache::MetadataCache(bool uses_gpu) : uses_gpu_(uses_gpu) {}
Metadata MetadataCache::load(const MemoryBlock* block) {
if (uses_gpu_) {
auto existing_metadata = cache_.find(block);
PADDLE_ASSERT(existing_metadata->second.check_guards());
return existing_metadata->second;
} else {
PADDLE_ASSERT(reinterpret_cast<const Metadata*>(block)->check_guards());
return *reinterpret_cast<const Metadata*>(block);
}
}
void MetadataCache::store(MemoryBlock* block,
const Metadata& original_metadata) {
auto metadata = original_metadata;
metadata.update_guards();
if (uses_gpu_) {
cache_[block] = metadata;
} else {
*reinterpret_cast<Metadata*>(block) = metadata;
}
}
void MetadataCache::invalidate(MemoryBlock* block) {
if (uses_gpu_) {
cache_.erase(block);
}
}
} // namespace detail
} // namespace memory
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/memory/detail/memory_block.h"
#include "paddle/memory/detail/meta_data.h"
#include <unordered_map>
namespace paddle {
namespace memory {
namespace detail {
/**
* \brief A cache for accessing memory block meta-data that may be expensive
* to access directly.
*
* \note This class exists to unify the metadata format between GPU and CPU
* allocations. It should be removed when the CPU can access all GPU
* allocations directly via UVM.
*/
class MetadataCache {
public:
MetadataCache(bool uses_gpu);
public:
/*! \brief Load the associated metadata for the specified memory block. */
Metadata load(const MemoryBlock*);
/*! \brief Store the associated metadata for the specified memory block. */
void store(MemoryBlock*, const Metadata&);
/*! \brief Indicate that the specified metadata will no longer be used. */
void invalidate(MemoryBlock*);
public:
MetadataCache(const MetadataCache&) = delete;
MetadataCache& operator=(const MetadataCache&) = delete;
private:
bool uses_gpu_;
private:
typedef std::unordered_map<const MemoryBlock*, Metadata> MetadataMap;
private:
MetadataMap cache_;
};
} // namespace detail
} // namespace memory
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/memory/detail/meta_data.h"
#include <functional>
namespace paddle {
namespace memory {
namespace detail {
Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts,
MemoryBlock* l, MemoryBlock* r)
: type(t),
index(i),
size(s),
total_size(ts),
left_buddy(l),
right_buddy(r) {}
Metadata::Metadata()
: type(MemoryBlock::INVALID_CHUNK),
index(0),
size(0),
total_size(0),
left_buddy(nullptr),
right_buddy(nullptr) {}
template <class T>
inline void hash_combine(std::size_t& seed, const T& v) {
std::hash<T> hasher;
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
inline size_t hash(const Metadata* metadata, size_t initial_seed) {
size_t seed = initial_seed;
hash_combine(seed, (size_t)metadata->type);
hash_combine(seed, metadata->index);
hash_combine(seed, metadata->size);
hash_combine(seed, metadata->total_size);
hash_combine(seed, metadata->left_buddy);
hash_combine(seed, metadata->right_buddy);
return seed;
}
void Metadata::update_guards() {
guard_begin = hash(this, 1);
guard_end = hash(this, 2);
}
bool Metadata::check_guards() const {
return guard_begin == hash(this, 1) && guard_end == hash(this, 2);
}
} // namespace detail
} // namespace memory
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/memory/detail/memory_block.h"
#include <stddef.h>
namespace paddle {
namespace memory {
namespace detail {
class Metadata {
public:
Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts, MemoryBlock* l,
MemoryBlock* r);
Metadata();
public:
/*! \brief Update the guards when metadata is changed */
void update_guards();
/*! \brief Check consistency to previous modification */
bool check_guards() const;
public:
// TODO(gangliao): compress this
// clang-format off
size_t guard_begin = 0;
MemoryBlock::Type type = MemoryBlock::INVALID_CHUNK;
size_t index = 0;
size_t size = 0;
size_t total_size = 0;
MemoryBlock* left_buddy = nullptr;
MemoryBlock* right_buddy = nullptr;
size_t guard_end = 0;
// clang-format on
};
} // namespace detail
} // namespace memory
} // namespace paddle
...@@ -13,76 +13,128 @@ See the License for the specific language governing permissions and ...@@ -13,76 +13,128 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/error.h"
#include "paddle/platform/gpu_info.h"
#include <stdlib.h> // for malloc and free #include <stdlib.h> // for malloc and free
#include <sys/mman.h> // for mlock and munlock #include <sys/mman.h> // for mlock and munlock
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/platform/assert.h"
#include "paddle/platform/cuda.h"
// If use_pinned_memory is true, CPUAllocator calls mlock, which // If use_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange // returns pinned and locked memory as staging areas for data exchange
// between host and device. Allocates too much would reduce the amount // between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, false, DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory.");
"If set, allocate cpu/gpu pinned memory.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
void* CPUAllocator::Alloc(size_t size) { void* CPUAllocator::Alloc(size_t& index, size_t size) {
// According to http://www.cplusplus.com/reference/cstdlib/malloc/, // According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned // malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr. // pointer shall not be dereferenced -- so we make it nullptr.
if (size <= 0) return nullptr; if (size <= 0) return nullptr;
index = 0; // unlock memory
void* p = malloc(size); void* p = malloc(size);
if (p != nullptr && FLAGS_use_pinned_memory) {
mlock(p, size); if (p != nullptr) {
if (FLAGS_use_pinned_memory) {
index = 1;
mlock(p, size); // lock memory
}
} }
return p; return p;
} }
void CPUAllocator::Free(void* p, size_t size) { void CPUAllocator::Free(void* p, size_t size, size_t index) {
if (p != nullptr && FLAGS_use_pinned_memory) { if (p != nullptr && index == 1) {
munlock(p, size); munlock(p, size);
} }
free(p); free(p);
} }
bool CPUAllocator::UseGpu() const { return false; }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
void* GPUAllocator::Alloc(size_t size) { void* GPUAllocator::Alloc(size_t& index, size_t size) {
// CUDA documentation doesn't explain if cudaMalloc returns nullptr // CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does. // if size is 0. We just make sure it does.
if (size <= 0) { if (size <= 0) return nullptr;
return nullptr;
} size_t available = 0;
size_t capacity = 0;
paddle::platform::GpuMemoryUsage(available, capacity);
// Reserve memory for page tables, etc.
size_t reserving = capacity - paddle::platform::GpuMaxAllocSize();
size_t usable = available > reserving ? available - reserving : 0;
// If remaining size no less than expected size, using general
// cudaMalloc to allocate GPU memory.
void* p = 0; void* p = 0;
cudaError_t result = if (size <= usable) {
FLAGS_use_pinned_memory ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); cudaError_t result = cudaMalloc(&p, size);
if (result != cudaSuccess) { if (result == cudaSuccess) {
cudaGetLastError(); // clear error if there is any. index = 0;
gpu_alloc_size_ += size;
return p;
}
}
// If remaining size less than expected size or cudaMalloc failed,
// cudaMallocHost will be considered as a fallback allocator.
//
// NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
// of host fallback allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging.
usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_;
if (size > usable) return nullptr;
cudaError_t result = cudaMallocHost(&p, size);
if (result == cudaSuccess) {
index = 1;
fallback_alloc_size_ += size;
return p;
} }
return result == cudaSuccess ? p : nullptr;
return nullptr;
} }
void GPUAllocator::Free(void* p, size_t size) { void GPUAllocator::Free(void* p, size_t size, size_t index) {
cudaError_t err;
if (index == 0) {
PADDLE_ASSERT(gpu_alloc_size_ >= size);
gpu_alloc_size_ -= size;
err = cudaFree(p);
} else {
PADDLE_ASSERT(fallback_alloc_size_ >= size);
fallback_alloc_size_ -= size;
err = cudaFreeHost(p);
}
// Purposefully allow cudaErrorCudartUnloading, because // Purposefully allow cudaErrorCudartUnloading, because
// that is returned if you ever call cudaFree after the // that is returned if you ever call cudaFree after the
// driver has already shutdown. This happens only if the // driver has already shutdown. This happens only if the
// process is terminating, in which case we don't care if // process is terminating, in which case we don't care if
// cudaFree succeeds. // cudaFree succeeds.
cudaError_t err = FLAGS_use_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
if (err != cudaErrorCudartUnloading) { if (err != cudaErrorCudartUnloading) {
platform::throw_on_error(err, "cudaFree{Host} failed"); platform::throw_on_error(err,
"cudaFree{Host} failed in GPUAllocator::Free.");
} }
} }
bool GPUAllocator::UseGpu() const { return true; }
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
} // namespace detail } // namespace detail
......
...@@ -20,31 +20,36 @@ namespace paddle { ...@@ -20,31 +20,36 @@ namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
// SystemAllocator is the parent class of CPUAllocator and /**
// GPUAllocator. A BuddyAllocator object uses a SystemAllocator* * \brief SystemAllocator is the parent class of CPUAllocator and GPUAllocator.
// pointing to the underlying system allocator. An alternative to * A BuddyAllocator object uses a SystemAllocator* pointing to the
// this class hierarchy is to pass a system allocator class to * underlying system allocator.
// BuddyAllocator as a template parameter. This approach makes */
// BuddyAllocator a class template, and it's very complicated
// algorithm would make the buddy_allocator.h messy.
class SystemAllocator { class SystemAllocator {
public: public:
virtual ~SystemAllocator() {} virtual ~SystemAllocator() {}
virtual void* Alloc(size_t size) = 0; virtual void* Alloc(size_t& index, size_t size) = 0;
virtual void Free(void* p, size_t size) = 0; virtual void Free(void* p, size_t size, size_t index) = 0;
virtual bool UseGpu() const = 0;
}; };
class CPUAllocator : public SystemAllocator { class CPUAllocator : public SystemAllocator {
public: public:
virtual void* Alloc(size_t size); virtual void* Alloc(size_t& index, size_t size);
virtual void Free(void* p, size_t size); virtual void Free(void* p, size_t size, size_t index);
virtual bool UseGpu() const;
}; };
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
class GPUAllocator : public SystemAllocator { class GPUAllocator : public SystemAllocator {
public: public:
virtual void* Alloc(size_t size); virtual void* Alloc(size_t& index, size_t size);
virtual void Free(void* p, size_t size); virtual void Free(void* p, size_t size, size_t index);
virtual bool UseGpu() const;
private:
size_t gpu_alloc_size_ = 0;
size_t fallback_alloc_size_ = 0;
}; };
#endif // PADDLE_ONLY_CPU #endif // PADDLE_ONLY_CPU
......
...@@ -25,7 +25,8 @@ DECLARE_bool(use_pinned_memory); ...@@ -25,7 +25,8 @@ DECLARE_bool(use_pinned_memory);
void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) {
bool freed = false; bool freed = false;
{ {
void* p = a.Alloc(size); size_t index;
void* p = a.Alloc(index, size);
if (size > 0) { if (size > 0) {
EXPECT_NE(p, nullptr); EXPECT_NE(p, nullptr);
} else { } else {
...@@ -35,7 +36,7 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) { ...@@ -35,7 +36,7 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) {
int* i = static_cast<int*>(p); int* i = static_cast<int*>(p);
std::shared_ptr<int> ptr(i, [&](void* p) { std::shared_ptr<int> ptr(i, [&](void* p) {
freed = true; freed = true;
a.Free(p, size); a.Free(p, size, index);
}); });
} }
EXPECT_TRUE(freed); EXPECT_TRUE(freed);
...@@ -56,14 +57,7 @@ TEST(CPUAllocator, LockMem) { ...@@ -56,14 +57,7 @@ TEST(CPUAllocator, LockMem) {
} }
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
TEST(GPUAllocator, NoStaging) { TEST(GPUAllocator, Alloc) {
FLAGS_use_pinned_memory = false;
paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048);
TestAllocator(a, 0);
}
TEST(GPUAllocator, Staging) {
FLAGS_use_pinned_memory = true;
paddle::memory::detail::GPUAllocator a; paddle::memory::detail::GPUAllocator a;
TestAllocator(a, 2048); TestAllocator(a, 2048);
TestAllocator(a, 0); TestAllocator(a, 0);
......
...@@ -17,43 +17,67 @@ limitations under the License. */ ...@@ -17,43 +17,67 @@ limitations under the License. */
#include "paddle/memory/detail/system_allocator.h" #include "paddle/memory/detail/system_allocator.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
#include <boost/variant.hpp>
namespace paddle { namespace paddle {
namespace memory { namespace memory {
void* Alloc(platform::Place pl, size_t size) { detail::BuddyAllocator* GetCPUBuddyAllocator() {
#ifndef PADDLE_ONLY_CPU static detail::BuddyAllocator* a = nullptr;
if (paddle::platform::is_gpu_place(pl)) { if (a == nullptr) {
size_t gpu_id = boost::get<platform::GPUPlace>(pl).device; a = new detail::BuddyAllocator(new detail::CPUAllocator,
return detail::GetGPUBuddyAllocator(gpu_id)->Alloc(size); platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
} }
#endif // PADDLE_ONLY_CPU return a;
PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
return detail::GetCPUBuddyAllocator()->Alloc(size);
} }
void Free(paddle::platform::Place pl, void* p) { template <>
#ifndef PADDLE_ONLY_CPU void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
if (paddle::platform::is_gpu_place(pl)) { return GetCPUBuddyAllocator()->Alloc(size);
size_t gpu_id = boost::get<platform::GPUPlace>(pl).device; }
detail::GetGPUBuddyAllocator(gpu_id)->Free(p);
} template <>
#endif // PADDLE_ONLY_CPU void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); GetCPUBuddyAllocator()->Free(p);
detail::GetCPUBuddyAllocator()->Free(p); }
template <>
size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
return GetCPUBuddyAllocator()->Used();
} }
size_t Used(paddle::platform::Place pl) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
if (paddle::platform::is_gpu_place(pl)) {
size_t gpu_id = boost::get<platform::GPUPlace>(pl).device; detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
return detail::GetGPUBuddyAllocator(gpu_id)->Used(); static detail::BuddyAllocator** as = NULL;
if (as == NULL) {
int gpu_num = platform::GetDeviceCount();
as = new detail::BuddyAllocator*[gpu_num];
for (int gpu = 0; gpu < gpu_num; gpu++) {
platform::SetDeviceId(gpu);
as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator,
platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize());
} }
#endif // PADDLE_ONLY_CPU }
PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); return as[gpu_id];
return detail::GetCPUBuddyAllocator()->Used(); }
template <>
void* Alloc<platform::GPUPlace>(platform::GPUPlace place, size_t size) {
return GetGPUBuddyAllocator(place.device)->Alloc(size);
}
template <>
void Free<platform::GPUPlace>(platform::GPUPlace place, void* p) {
GetGPUBuddyAllocator(place.device)->Free(p);
}
template <>
size_t Used<platform::GPUPlace>(platform::GPUPlace place) {
return GetGPUBuddyAllocator(place.device)->Used();
} }
#endif // PADDLE_ONLY_CPU
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -19,9 +19,14 @@ limitations under the License. */ ...@@ -19,9 +19,14 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace memory { namespace memory {
void* Alloc(paddle::platform::Place, size_t); template <class Place>
void Free(paddle::platform::Place, void*); void* Alloc(Place, size_t);
size_t Used(paddle::platform::Place);
template <class Place>
void Free(Place, void*);
template <class Place>
size_t Used(Place);
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/memory/memory.h"
#include "paddle/memory/detail/memory_block.h"
#include "paddle/memory/detail/meta_data.h"
#include "paddle/platform/cpu_info.h"
#include "paddle/platform/gpu_info.h"
#include "paddle/platform/place.h"
#include <gtest/gtest.h>
#include <unordered_map>
inline bool is_aligned(void const *p) {
return 0 == (reinterpret_cast<uintptr_t>(p) & 0x3);
}
size_t align(size_t size, paddle::platform::CPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata);
size_t alignment = paddle::platform::CpuMinChunkSize();
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
TEST(BuddyAllocator, CPUAllocation) {
void *p = nullptr;
EXPECT_EQ(p, nullptr);
paddle::platform::CPUPlace cpu;
p = paddle::memory::Alloc(cpu, 4096);
EXPECT_NE(p, nullptr);
paddle::memory::Free(cpu, p);
}
TEST(BuddyAllocator, CPUMultAlloc) {
paddle::platform::CPUPlace cpu;
std::unordered_map<void *, size_t> ps;
size_t total_size = paddle::memory::Used(cpu);
EXPECT_EQ(total_size, 0UL);
for (auto size :
{128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304}) {
ps[paddle::memory::Alloc(cpu, size)] = size;
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(cpu) == total_size) continue;
size_t aligned_size = align(size, cpu);
total_size += aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(cpu));
}
for (auto p : ps) {
EXPECT_EQ(is_aligned(p.first), true);
paddle::memory::Free(cpu, p.first);
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(cpu) == total_size) continue;
size_t aligned_size = align(p.second, cpu);
total_size -= aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(cpu));
}
}
#ifndef PADDLE_ONLY_CPU
size_t align(size_t size, paddle::platform::GPUPlace place) {
size += sizeof(paddle::memory::detail::Metadata);
size_t alignment = paddle::platform::GpuMinChunkSize();
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
TEST(BuddyAllocator, GPUAllocation) {
void *p = nullptr;
EXPECT_EQ(p, nullptr);
paddle::platform::GPUPlace gpu(0);
p = paddle::memory::Alloc(gpu, 4096);
EXPECT_NE(p, nullptr);
paddle::memory::Free(gpu, p);
}
TEST(BuddyAllocator, GPUMultAlloc) {
paddle::platform::GPUPlace gpu;
std::unordered_map<void *, size_t> ps;
size_t total_size = paddle::memory::Used(gpu);
EXPECT_EQ(total_size, 0UL);
for (auto size :
{128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304}) {
ps[paddle::memory::Alloc(gpu, size)] = size;
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(gpu) == total_size) continue;
size_t aligned_size = align(size, gpu);
total_size += aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(gpu));
}
for (auto p : ps) {
EXPECT_EQ(is_aligned(p.first), true);
paddle::memory::Free(gpu, p.first);
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(gpu) == total_size) continue;
size_t aligned_size = align(p.second, gpu);
total_size -= aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(gpu));
}
}
#endif // PADDLE_ONLY_CPU
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# for ops.
set(cc_srcs)
set(cu_srcs)
set(op_common_deps operator op_registry)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
endif()
endforeach()
list(LENGTH cc_srcs cc_srcs_len)
if (${cc_srcs_len} EQUAL 0)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
endif()
list(LENGTH cu_srcs cu_srcs_len)
if (${cu_srcs_len} EQUAL 0)
message(WARNING "The op library ${TARGET} not support GPU!")
endif()
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else()
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
endif()
endfunction()
op_library(add_op SRCS add_op.cc add_op.cu)
cc_test(add_op_test SRCS add_op_test.cc DEPS add_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(sigmoid_op SRCS sigmoid_op.cu sigmoid_op.cc)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/tensor.h"
namespace paddle {
namespace operators {
class AddOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "Input size of AddOp must be two");
PADDLE_ENFORCE(outputs.size() == 1, "Output size of AddOp must be one");
PADDLE_ENFORCE(
inputs[0] != nullptr && inputs[1] != nullptr && outputs[0] != nullptr,
"Inputs/Outputs of AddOp must all be set");
PADDLE_ENFORCE(inputs[0]->dims() == inputs[1]->dims(),
"Two input of Add Op's dimension must be same.");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class AddOpMaker : public framework::OpProtoAndCheckerMaker {
public:
AddOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of add op");
AddInput("Y", "The second input of add op");
AddOutput("Out", "The output of add op");
AddComment(R"DOC(
Two Element Add Operator.
The equation is: Out = X + Y
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(add_two, paddle::operators::AddOp, paddle::operators::AddOpMaker);
typedef paddle::operators::AddKernel<::paddle::platform::CPUPlace, float>
AddKernel_CPU_float;
REGISTER_OP_CPU_KERNEL(add_two, AddKernel_CPU_float);
#include "paddle/operators/add_op.h"
#include "paddle/framework/op_registry.h"
typedef paddle::operators::AddKernel<::paddle::platform::GPUPlace, float> AddKernel_GPU_float;
REGISTER_OP_GPU_KERNEL(add_two,
AddKernel_GPU_float);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/operator.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class AddKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext& context) const override {
auto input0 = context.Input(0)->Get<framework::Tensor>();
auto input1 = context.Input(1)->Get<framework::Tensor>();
auto* output = context.Output(0)->GetMutable<framework::Tensor>();
output->mutable_data<T>(context.GetPlace());
output->flat<T>().device(*(context.GetEigenDevice<Place>())) =
input0.flat<T>() + input1.flat<T>();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#define private public
#include <paddle/framework/op_registry.h>
USE_OP(add_two);
TEST(AddOp, GetOpProto) {
auto& protos = paddle::framework::OpRegistry::protos();
auto it = protos.find("add_two");
ASSERT_NE(it, protos.end());
}
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/framework/tensor.h>
#include <paddle/operators/mul_op.h>
namespace paddle {
namespace operators {
class MulOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2, "The mul op must take two inputs");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2 && dim1.size() == 2,
"The input of mul op must be matrix");
PADDLE_ENFORCE(
dim0[1] == dim1[0],
"First matrix's width must be equal with second matrix's height.");
PADDLE_ENFORCE(outputs.size() == 1, "The mul op must take one output");
outputs[0]->set_dims({dim0[0], dim1[1]});
}
};
class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public:
MulOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op");
AddInput("Y", "The second input of mul op");
AddOutput("Out", "The output of mul op");
AddComment(R"DOC(
Two Element Mul Operator.
The equation is: Out = X * Y
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(mul, paddle::operators::MulOp, paddle::operators::MulOpMaker);
REGISTER_OP_CPU_KERNEL(
mul, paddle::operators::MulKernel<paddle::platform::CPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/operators/mul_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(mul,
paddle::operators::MulKernel<paddle::platform
::GPUPlace>);
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class MulKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Mul kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
namespace paddle {
namespace operators {
class RowWiseAddOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 2UL, "Two inputs is needed by rowwise add");
auto dim0 = inputs[0]->dims();
auto dim1 = inputs[1]->dims();
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(outputs.size() == 1, "The output size must be 1");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class RowWiseAddOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RowWiseAddOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The left input of row-wise add op, must be matrix");
AddInput("b", "The right input of row-wise add op, must be vector");
AddOutput("Out", "The output of row-wise add op");
AddComment(R"DOC(Row-wise Add operator
for i in xrange(X.shape[0]):
Out = X[i] + b
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(rowwise_add,
paddle::operators::RowWiseAddOp,
paddle::operators::RowWiseAddOpMaker);
REGISTER_OP_CPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/rowwise_add_op.h>
REGISTER_OP_GPU_KERNEL(
rowwise_add,
paddle::operators::RowWiseAddKernel<paddle::platform ::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class RowWiseAddKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "RowWiseAdd kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/sigmoid_op.h>
namespace paddle {
namespace operators {
class SigmoidOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Sigmoid Op only have one input");
PADDLE_ENFORCE(outputs.size() == 1, "Sigmoid Op only have one output");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SigmoidOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "sigmoid input");
AddInput("Y", "sigmoid output");
AddComment("Sigmoid function");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP(sigmoid,
paddle::operators::SigmoidOp,
paddle::operators::SigmoidOpMaker);
REGISTER_OP_CPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::CPUPlace>);
#include <paddle/operators/sigmoid_op.h>
#include <paddle/framework/op_registry.h>
REGISTER_OP_GPU_KERNEL(
sigmoid, paddle::operators::SigmoidKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SigmoidKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Sigmoid kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
namespace paddle {
namespace operators {
class SoftmaxOp : public framework::OperatorWithKernel {
protected:
void InferShape(
const std::vector<const framework::Tensor *> &inputs,
const std::vector<framework::Tensor *> &outputs) const override {
PADDLE_ENFORCE(inputs.size() == 1, "Only one input is need for softmax");
PADDLE_ENFORCE(outputs.size() == 1, "Only one output is need for softmax");
outputs[0]->set_dims(inputs[0]->dims());
}
};
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SoftmaxOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "input of softmax");
AddOutput("Y", "output of softmax");
AddComment("Softmax Op");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker);
REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel<paddle::platform::CPUPlace>);
#include <paddle/framework/op_registry.h>
#include <paddle/operators/softmax_op.h>
REGISTER_OP_GPU_KERNEL(
softmax, paddle::operators::SoftmaxKernel<paddle::platform::GPUPlace>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <paddle/framework/operator.h>
namespace paddle {
namespace operators {
template <typename Place>
class SoftmaxKernel : public framework::OpKernel {
public:
void Compute(const framework::KernelContext &context) const override {
LOG(INFO) << "Softmax kernel in " << typeid(Place).name();
}
};
} // namespace operators
} // namespace paddle
...@@ -44,8 +44,8 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, ...@@ -44,8 +44,8 @@ paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
const int state_len) { const int state_len) {
paddle_optimizer* optimizer = new paddle_optimizer; paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len); std::string config(config_proto, config_proto + config_proto_len);
Tensor* parameter = Tensor* parameter = new Tensor(reinterpret_cast<float*>(param_buffer),
new Tensor(reinterpret_cast<float*>(param_buffer), num_bytes); num_bytes / sizeof(float));
optimizer->impl = ParameterOptimizer::Create(config, parameter); optimizer->impl = ParameterOptimizer::Create(config, parameter);
if (state != nullptr) { if (state != nullptr) {
std::string s(state, state + state_len); std::string s(state, state + state_len);
...@@ -65,7 +65,8 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -65,7 +65,8 @@ int paddle_update_parameter(paddle_optimizer* o,
int num_bytes) { int num_bytes) {
// TOOD(zhihong): datatype not work. need to add the runtime datatype // TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad_type = reinterpret_cast<const float*>(grad_buffer); auto grad_type = reinterpret_cast<const float*>(grad_buffer);
Tensor* gradient = new Tensor(const_cast<float*>(grad_type), num_bytes); Tensor* gradient =
new Tensor(const_cast<float*>(grad_type), num_bytes / sizeof(float));
o->impl->Update(gradient); o->impl->Update(gradient);
return PADDLE_SUCCESS; return PADDLE_SUCCESS;
} }
......
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "parameter_optimizer.h" #include "parameter_optimizer.h"
#include <cmath> #include <cmath>
#include <map> #include <map>
...@@ -5,21 +21,18 @@ ...@@ -5,21 +21,18 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "lr_policy.h" #include "lr_policy.h"
using namespace paddle; paddle::optimizer::Tensor* FillTensor(size_t size) {
using namespace paddle::optimizer; paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size);
paddle::optimizer::Tensor& p = *param;
Tensor* FillTensor(size_t size) {
Tensor* param = new Tensor(size);
Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) { for (size_t i = 0; i < p.size(); ++i) {
p[i] = (float)rand() / (float)RAND_MAX; p[i] = (float)rand() / (float)RAND_MAX;
} }
return param; return param;
} }
Tensor* FixedTensor(size_t size) { paddle::optimizer::Tensor* FixedTensor(size_t size) {
Tensor* param = new Tensor(size); paddle::optimizer::Tensor* param = new paddle::optimizer::Tensor(size);
Tensor& p = *param; paddle::optimizer::Tensor& p = *param;
for (size_t i = 0; i < p.size(); ++i) { for (size_t i = 0; i < p.size(); ++i) {
p[i] = i; p[i] = i;
} }
...@@ -28,7 +41,8 @@ Tensor* FixedTensor(size_t size) { ...@@ -28,7 +41,8 @@ Tensor* FixedTensor(size_t size) {
class OptimizerTest : public testing::Test { class OptimizerTest : public testing::Test {
public: public:
// init tensor shape virtual ~OptimizerTest() {}
// init paddle::optimizer::Tensor shape
const size_t kSize = 5; const size_t kSize = 5;
virtual void SetUp() { virtual void SetUp() {
...@@ -38,34 +52,36 @@ public: ...@@ -38,34 +52,36 @@ public:
virtual void TearDown() {} virtual void TearDown() {}
void CreateSGD() { void CreateSGD() {
Tensor* parameter = FixedTensor(kSize); paddle::optimizer::Tensor* parameter = FixedTensor(kSize);
config_.set_optimizer(OptimizerConfig::SGD); config_.set_optimizer(paddle::OptimizerConfig::SGD);
config_.mutable_sgd()->set_momentum(0.0); config_.mutable_sgd()->set_momentum(0.0);
config_.mutable_sgd()->set_decay(0.0); config_.mutable_sgd()->set_decay(0.0);
config_.mutable_sgd()->set_nesterov(false); config_.mutable_sgd()->set_nesterov(false);
config_.set_lr_policy(OptimizerConfig::Const); config_.set_lr_policy(paddle::OptimizerConfig::Const);
config_.mutable_const_lr()->set_learning_rate(0.1); config_.mutable_const_lr()->set_learning_rate(0.1);
std::string str = config_.SerializeAsString(); std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter); paddle::optimizer::ParameterOptimizer* opt =
paddle::optimizer::ParameterOptimizer::Create(str, parameter);
opts_.push_back(opt); opts_.push_back(opt);
} }
void CreateAdam() { void CreateAdam() {
Tensor* parameter = FixedTensor(kSize); paddle::optimizer::Tensor* parameter = FixedTensor(kSize);
config_.set_optimizer(OptimizerConfig::Adam); config_.set_optimizer(paddle::OptimizerConfig::Adam);
config_.mutable_adam()->set_beta_1(0.9); config_.mutable_adam()->set_beta_1(0.9);
config_.mutable_adam()->set_beta_2(0.1); config_.mutable_adam()->set_beta_2(0.1);
config_.mutable_adam()->set_epsilon(1e-3); config_.mutable_adam()->set_epsilon(1e-3);
config_.mutable_adam()->set_decay(0.0); config_.mutable_adam()->set_decay(0.0);
config_.set_lr_policy(OptimizerConfig::Const); config_.set_lr_policy(paddle::OptimizerConfig::Const);
config_.mutable_const_lr()->set_learning_rate(0.1); config_.mutable_const_lr()->set_learning_rate(0.1);
std::string str = config_.SerializeAsString(); std::string str = config_.SerializeAsString();
ParameterOptimizer* opt = ParameterOptimizer::Create(str, parameter); paddle::optimizer::ParameterOptimizer* opt =
paddle::optimizer::ParameterOptimizer::Create(str, parameter);
opts_.push_back(opt); opts_.push_back(opt);
} }
void TestGetWeight() { void TestGetWeight() {
Tensor* p = FixedTensor(kSize); paddle::optimizer::Tensor* p = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) { for (size_t i = 0; i < opts_.size(); ++i) {
int s = 0; int s = 0;
float* newp = (float*)opts_[i]->get_weight(&s); float* newp = (float*)opts_[i]->get_weight(&s);
...@@ -76,7 +92,7 @@ public: ...@@ -76,7 +92,7 @@ public:
} }
void TestUpdate() { void TestUpdate() {
Tensor* g = FixedTensor(kSize); paddle::optimizer::Tensor* g = FixedTensor(kSize);
for (size_t i = 0; i < opts_.size(); ++i) { for (size_t i = 0; i < opts_.size(); ++i) {
opts_[i]->Update(g); opts_[i]->Update(g);
} }
...@@ -91,8 +107,8 @@ public: ...@@ -91,8 +107,8 @@ public:
} }
private: private:
std::vector<ParameterOptimizer*> opts_; std::vector<paddle::optimizer::ParameterOptimizer*> opts_;
OptimizerConfig config_; paddle::OptimizerConfig config_;
}; };
TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); } TEST_F(OptimizerTest, TestGetWeight) { TestGetWeight(); }
......
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "serialization.h" #include "serialization.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
using namespace paddle;
using namespace paddle::optimizer;
TEST(TensorToProto, Case1) { TEST(TensorToProto, Case1) {
Tensor t(3), t1(3); paddle::optimizer::Tensor t(3), t1(3);
for (size_t i = 0; i < t.size(); ++i) { for (size_t i = 0; i < t.size(); ++i) {
t[i] = i; t[i] = i;
t1[i] = 0; t1[i] = 0;
} }
TensorProto proto; paddle::TensorProto proto;
TensorToProto(t, &proto); paddle::optimizer::TensorToProto(t, &proto);
ProtoToTensor(proto, &t1); paddle::optimizer::ProtoToTensor(proto, &t1);
for (size_t i = 0; i < t1.size(); ++i) { for (size_t i = 0; i < t1.size(); ++i) {
EXPECT_EQ(t1[i], t[i]); EXPECT_EQ(t1[i], t[i]);
} }
......
add_subdirectory(dynload) cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog)
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info)
nv_test(cuda_test SRCS cuda_test.cu) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags)
cc_library(place SRCS place.cc) cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
nv_test(device_context_test SRCS device_context_test.cc DEPS dynamic_loader place eigen3 glog gflags) add_subdirectory(dynload)
IF(WITH_GPU)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
ELSE()
set(GPU_CTX_DEPS)
ENDIF()
cc_library(device_context SRCS device_context.cc DEPS place eigen3 ${GPU_CTX_DEPS})
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/cpu_info.h"
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/types.h>
#else
#include <unistd.h>
#endif
#include "gflags/gflags.h"
#include "paddle/platform/error.h"
DEFINE_double(fraction_of_cpu_memory_to_use, 1,
"Default use 100% of CPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
namespace paddle {
namespace platform {
inline size_t CpuTotalPhysicalMemory() {
#ifdef __APPLE__
int mib[2];
mib[0] = CTL_HW;
mib[1] = HW_MEMSIZE;
int64_t size = 0;
size_t len = sizeof(size);
if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size;
return 0L;
#else
int64_t pages = sysconf(_SC_PHYS_PAGES);
int64_t page_size = sysconf(_SC_PAGE_SIZE);
return pages * page_size;
#endif
}
size_t CpuMaxAllocSize() {
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
}
size_t CpuMinChunkSize() {
// Allow to allocate the minimum chunk size is 4 KB.
return 1 << 12;
}
size_t CpuMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
return CpuMaxAllocSize() / 32;
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stddef.h>
namespace paddle {
namespace platform {
//! Get the maximum allocation size for a machine.
size_t CpuMaxAllocSize();
//! Get the minimum chunk size for buddy allocator.
size_t CpuMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t CpuMaxChunkSize();
} // namespace platform
} // namespace paddle
#include "paddle/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include <ostream>
#include <sstream>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
DECLARE_double(fraction_of_cpu_memory_to_use);
TEST(CpuMemoryUsage, Print) {
std::stringstream ss;
size_t memory_size = paddle::platform::CpuMaxAllocSize() / 1024 / 1024 / 1024;
float use_percent = FLAGS_fraction_of_cpu_memory_to_use * 100;
std::cout << paddle::string::Sprintf("\n%.2f %% of CPU Memory Usage: %d GB\n",
use_percent, memory_size)
<< std::endl;
}
#include <cuda_runtime.h>
#include <stdio.h>
#include "gtest/gtest.h"
#define CHECK_ERR(x) \
if (x != cudaSuccess) { \
fprintf(stderr, \
"%s in %s at line %d\n", \
cudaGetErrorString(err), \
__FILE__, \
__LINE__); \
exit(-1); \
}
__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
d_C[i] = d_A[i] + d_B[i];
}
}
TEST(Cuda, Equality) {
int n = 10;
// Memory allocation for h_A, h_B and h_C (in the host)
float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
float h_C[10];
float *d_A, *d_B, *d_C;
cudaError_t err;
// Memory allocation for d_A, d_B and d_C (in the device)
err = cudaMalloc((void **)&d_A, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_B, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_C, sizeof(float) * n);
CHECK_ERR(err);
// Copying memory to device
err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
// Calling the kernel
vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
// Copying results back to host
err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
CHECK_ERR(err);
EXPECT_EQ(h_C[0], 1.0);
for (int i = 1; i < n - 1; ++i) {
EXPECT_EQ(h_C[i], 11.0);
}
EXPECT_EQ(h_C[9], 1.0);
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/device_context.h"
namespace paddle {
namespace platform {
template <>
Eigen::DefaultDevice* DeviceContext::get_eigen_device<Eigen::DefaultDevice>()
const {
return reinterpret_cast<const CPUDeviceContext*>(this)->eigen_device();
}
#ifndef PADDLE_ONLY_CPU
template <>
Eigen::GpuDevice* DeviceContext::get_eigen_device<Eigen::GpuDevice>() const {
return reinterpret_cast<const CUDADeviceContext*>(this)->eigen_device();
}
#endif
} // namespace platform
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -16,12 +13,14 @@ limitations under the License. */ ...@@ -16,12 +13,14 @@ limitations under the License. */
#include "paddle/framework/enforce.h" #include "paddle/framework/enforce.h"
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
#include "paddle/platform/cuda.h"
#include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h" #include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/curand.h" #include "paddle/platform/dynload/curand.h"
#include "paddle/platform/error.h"
#include "paddle/platform/gpu_info.h"
#define EIGEN_USE_GPU #define EIGEN_USE_GPU
#endif #endif
#include <memory>
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
...@@ -31,11 +30,29 @@ namespace platform { ...@@ -31,11 +30,29 @@ namespace platform {
class DeviceContext { class DeviceContext {
public: public:
virtual ~DeviceContext() {} virtual ~DeviceContext() {}
virtual Place GetPlace() const = 0;
template <typename DeviceType>
DeviceType* get_eigen_device() const;
}; };
class CPUDeviceContext : public DeviceContext {}; class CPUDeviceContext : public DeviceContext {
public:
CPUDeviceContext() { eigen_device_.reset(new Eigen::DefaultDevice()); }
Eigen::DefaultDevice* eigen_device() const { return eigen_device_.get(); }
Place GetPlace() const override {
Place retv = CPUPlace();
return retv;
}
private:
std::unique_ptr<Eigen::DefaultDevice> eigen_device_;
};
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
class GPUPlaceGuard { class GPUPlaceGuard {
public: public:
explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) { explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) {
...@@ -56,8 +73,13 @@ class CUDADeviceContext : public DeviceContext { ...@@ -56,8 +73,13 @@ class CUDADeviceContext : public DeviceContext {
GPUPlaceGuard guard(gpu_place_); GPUPlaceGuard guard(gpu_place_);
paddle::platform::throw_on_error(cudaStreamCreate(&stream_), paddle::platform::throw_on_error(cudaStreamCreate(&stream_),
"cudaStreamCreate failed"); "cudaStreamCreate failed");
eigen_stream_ = new Eigen::CudaStreamDevice(&stream_); eigen_stream_.reset(new Eigen::CudaStreamDevice(&stream_));
eigen_device_ = new Eigen::GpuDevice(eigen_stream_); eigen_device_.reset(new Eigen::GpuDevice(eigen_stream_.get()));
}
Place GetPlace() const override {
Place retv = GPUPlace();
return retv;
} }
void Wait() { void Wait() {
...@@ -67,7 +89,7 @@ class CUDADeviceContext : public DeviceContext { ...@@ -67,7 +89,7 @@ class CUDADeviceContext : public DeviceContext {
cudaStream_t stream() { return stream_; } cudaStream_t stream() { return stream_; }
Eigen::GpuDevice eigen_device() { return *eigen_device_; } Eigen::GpuDevice* eigen_device() const { return eigen_device_.get(); }
cublasHandle_t cublas_handle() { cublasHandle_t cublas_handle() {
if (!blas_handle_) { if (!blas_handle_) {
...@@ -132,10 +154,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -132,10 +154,8 @@ class CUDADeviceContext : public DeviceContext {
rand_generator_) == CURAND_STATUS_SUCCESS, rand_generator_) == CURAND_STATUS_SUCCESS,
"curandDestroyGenerator failed"); "curandDestroyGenerator failed");
} }
eigen_stream_.reset();
delete eigen_stream_; eigen_device_.reset();
delete eigen_device_;
paddle::platform::throw_on_error(cudaStreamDestroy(stream_), paddle::platform::throw_on_error(cudaStreamDestroy(stream_),
"cudaStreamDestroy failed"); "cudaStreamDestroy failed");
} }
...@@ -144,8 +164,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -144,8 +164,8 @@ class CUDADeviceContext : public DeviceContext {
GPUPlace gpu_place_; GPUPlace gpu_place_;
cudaStream_t stream_; cudaStream_t stream_;
Eigen::CudaStreamDevice* eigen_stream_; std::unique_ptr<Eigen::CudaStreamDevice> eigen_stream_;
Eigen::GpuDevice* eigen_device_; std::unique_ptr<Eigen::GpuDevice> eigen_device_;
cublasHandle_t blas_handle_{nullptr}; cublasHandle_t blas_handle_{nullptr};
...@@ -154,6 +174,8 @@ class CUDADeviceContext : public DeviceContext { ...@@ -154,6 +174,8 @@ class CUDADeviceContext : public DeviceContext {
int random_seed_; int random_seed_;
curandGenerator_t rand_generator_{nullptr}; curandGenerator_t rand_generator_{nullptr};
}; };
#endif #endif
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -15,13 +15,26 @@ limitations under the License. */ ...@@ -15,13 +15,26 @@ limitations under the License. */
#include "paddle/platform/device_context.h" #include "paddle/platform/device_context.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
TEST(CUDADeviceContext, Init) { using DEVICE_GPU = Eigen::GpuDevice;
TEST(Device, Init) {
int count = paddle::platform::GetDeviceCount();
for (int i = 0; i < count; i++) {
paddle::platform::DeviceContext* device_context =
new paddle::platform::CUDADeviceContext(i);
Eigen::GpuDevice* gpu_device =
device_context->template get_eigen_device<DEVICE_GPU>();
ASSERT_NE(nullptr, gpu_device);
delete device_context;
}
}
TEST(Device, CUDADeviceContext) {
int count = paddle::platform::GetDeviceCount(); int count = paddle::platform::GetDeviceCount();
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
paddle::platform::CUDADeviceContext* device_context = paddle::platform::CUDADeviceContext* device_context =
new paddle::platform::CUDADeviceContext(i); new paddle::platform::CUDADeviceContext(i);
Eigen::GpuDevice gpu_device = device_context->eigen_device(); Eigen::GpuDevice* gpu_device = device_context->eigen_device();
ASSERT_NE(nullptr, gpu_device.stream()); ASSERT_NE(nullptr, gpu_device);
cudnnHandle_t cudnn_handle = device_context->cudnn_handle(); cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
ASSERT_NE(nullptr, cudnn_handle); ASSERT_NE(nullptr, cudnn_handle);
cublasHandle_t cublas_handle = device_context->cublas_handle(); cublasHandle_t cublas_handle = device_context->cublas_handle();
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)
#include <paddle/platform/dynload/cublas.h>
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUBLAS_BLAS_ROUTINE_EACH(DEFINE_WRAP);
} // namespace dynload
} // namespace platform
} // namespace paddle
...@@ -23,8 +23,8 @@ namespace paddle { ...@@ -23,8 +23,8 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cublas_dso_flag; extern std::once_flag cublas_dso_flag;
void *cublas_dso_handle = nullptr; extern void *cublas_dso_handle;
/** /**
* The following macro definition can generate structs * The following macro definition can generate structs
...@@ -34,10 +34,10 @@ void *cublas_dso_handle = nullptr; ...@@ -34,10 +34,10 @@ void *cublas_dso_handle = nullptr;
* note: default dynamic linked libs * note: default dynamic linked libs
*/ */
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
cublasStatus_t operator()(Args... args) { \ inline cublasStatus_t operator()(Args... args) { \
typedef cublasStatus_t (*cublasFunc)(Args...); \ typedef cublasStatus_t (*cublasFunc)(Args...); \
std::call_once(cublas_dso_flag, \ std::call_once(cublas_dso_flag, \
paddle::platform::dynload::GetCublasDsoHandle, \ paddle::platform::dynload::GetCublasDsoHandle, \
...@@ -45,62 +45,46 @@ void *cublas_dso_handle = nullptr; ...@@ -45,62 +45,46 @@ void *cublas_dso_handle = nullptr;
void *p_##__name = dlsym(cublas_dso_handle, #__name); \ void *p_##__name = dlsym(cublas_dso_handle, #__name); \
return reinterpret_cast<cublasFunc>(p_##__name)(args...); \ return reinterpret_cast<cublasFunc>(p_##__name)(args...); \
} \ } \
} __name; // struct DynLoad__##__name }; \
extern DynLoad__##__name __name
#else #else
#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ inline template <typename... Args> \
cublasStatus_t operator()(Args... args) { \ cublasStatus_t operator()(Args... args) { \
return __name(args...); \ return __name(args...); \
} \ } \
} __name; // struct DynLoad__##__name }; \
extern DynLoad__##__name __name
#endif #endif
#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name) #define DECLARE_DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) \
DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name)
// include all needed cublas functions in HPPL
// clang-format off
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ #define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSgemv) \ __macro(cublasSgemv); \
__macro(cublasDgemv) \ __macro(cublasDgemv); \
__macro(cublasSgemm) \ __macro(cublasSgemm); \
__macro(cublasDgemm) \ __macro(cublasDgemm); \
__macro(cublasSgeam) \ __macro(cublasSgeam); \
__macro(cublasDgeam) \ __macro(cublasDgeam); \
__macro(cublasCreate_v2); \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) __macro(cublasDestroy_v2); \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) __macro(cublasSetStream_v2); \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetStream) __macro(cublasSetPointerMode_v2); \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetPointerMode) __macro(cublasGetPointerMode_v2); \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasGetPointerMode) __macro(cublasSgemmBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched) __macro(cublasDgemmBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched) __macro(cublasCgemmBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched) __macro(cublasZgemmBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched) __macro(cublasSgetrfBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched) __macro(cublasSgetriBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched) __macro(cublasDgetrfBatched); \
DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched) __macro(cublasDgetriBatched)
DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched)
CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
#undef DYNAMIC_LOAD_CUBLAS_WRAP CUBLAS_BLAS_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP);
#undef DYNAMIC_LOAD_CUBLAS_V2_WRAP
#undef CUBLAS_BLAS_ROUTINE_EACH
// clang-format on #undef DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP
#ifndef PADDLE_TYPE_DOUBLE
#define CUBLAS_GEAM paddle::platform::dynload::cublasSgeam
#define CUBLAS_GEMV paddle::platform::dynload::cublasSgemv
#define CUBLAS_GEMM paddle::platform::dynload::cublasSgemm
#define CUBLAS_GETRF paddle::platform::dynload::cublasSgetrfBatched
#define CUBLAS_GETRI paddle::platform::dynload::cublasSgetriBatched
#else
#define CUBLAS_GEAM paddle::platform::dynload::cublasDgeam
#define CUBLAS_GEMV paddle::platform::dynload::cublasDgemv
#define CUBLAS_GEMM paddle::platform::dynload::cublasDgemm
#define CUBLAS_GETRF paddle::platform::dynload::cublasDgetrfBatched
#define CUBLAS_GETRI paddle::platform::dynload::cublasDgetriBatched
#endif
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#include <paddle/platform/dynload/cudnn.h>
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R2(DEFINE_WRAP);
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3
CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R4
CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R5
CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP);
#endif
} // namespace dynload
} // namespace platform
} // namespace paddle
\ No newline at end of file
...@@ -23,12 +23,12 @@ namespace paddle { ...@@ -23,12 +23,12 @@ namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag cudnn_dso_flag; extern std::once_flag cudnn_dso_flag;
void* cudnn_dso_handle = nullptr; extern void* cudnn_dso_handle;
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \ auto operator()(Args... args) -> decltype(__name(args...)) { \
...@@ -39,17 +39,19 @@ void* cudnn_dso_handle = nullptr; ...@@ -39,17 +39,19 @@ void* cudnn_dso_handle = nullptr;
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \ return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
} \ } \
} __name; /* struct DynLoad__##__name */ }; \
extern struct DynLoad__##__name __name
#else #else
#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \ auto operator()(Args... args) -> decltype(__name(args...)) { \
return __name(args...); \ return __name(args...); \
} \ } \
} __name; /* struct DynLoad__##__name */ }; \
extern DynLoad__##__name __name
#endif #endif
...@@ -57,80 +59,73 @@ void* cudnn_dso_handle = nullptr; ...@@ -57,80 +59,73 @@ void* cudnn_dso_handle = nullptr;
* include all needed cudnn functions in HPPL * include all needed cudnn functions in HPPL
* different cudnn version has different interfaces * different cudnn version has different interfaces
**/ **/
// clang-format off
#define CUDNN_DNN_ROUTINE_EACH(__macro) \ #define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor) \ __macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx) \ __macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnGetConvolutionNdForwardOutputDim) \ __macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnGetConvolutionForwardAlgorithm) \ __macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnCreateTensorDescriptor) \ __macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor) \ __macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor) \ __macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor) \ __macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetPooling2dDescriptor) \ __macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnDestroyFilterDescriptor) \ __macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor) \ __macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor) \ __macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor) \ __macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor) \ __macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor) \ __macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnCreate) \ __macro(cudnnCreate); \
__macro(cudnnDestroy) \ __macro(cudnnDestroy); \
__macro(cudnnSetStream) \ __macro(cudnnSetStream); \
__macro(cudnnActivationForward) \ __macro(cudnnActivationForward); \
__macro(cudnnConvolutionForward) \ __macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias) \ __macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize) \ __macro(cudnnGetConvolutionForwardWorkspaceSize); \
__macro(cudnnTransformTensor) \ __macro(cudnnTransformTensor); \
__macro(cudnnPoolingForward) \ __macro(cudnnPoolingForward); \
__macro(cudnnPoolingBackward) \ __macro(cudnnPoolingBackward); \
__macro(cudnnSoftmaxBackward) \ __macro(cudnnSoftmaxBackward); \
__macro(cudnnSoftmaxForward) \ __macro(cudnnSoftmaxForward); \
__macro(cudnnGetVersion) \ __macro(cudnnGetVersion); \
__macro(cudnnGetErrorString) __macro(cudnnGetErrorString);
CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(cudnnAddTensor) \ __macro(cudnnAddTensor); \
__macro(cudnnConvolutionBackwardData) \ __macro(cudnnConvolutionBackwardData); \
__macro(cudnnConvolutionBackwardFilter) __macro(cudnnConvolutionBackwardFilter);
CUDNN_DNN_ROUTINE_EACH_R2(DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R3: // APIs available after R3:
#if CUDNN_VERSION >= 3000 #if CUDNN_VERSION >= 3000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \
__macro(cudnnGetConvolutionBackwardFilterWorkspaceSize) \ __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm) \ __macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm) \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataWorkspaceSize) __macro(cudnnGetConvolutionBackwardDataWorkspaceSize);
CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3
#endif #endif
// APIs available after R4: // APIs available after R4:
#if CUDNN_VERSION >= 4007 #if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining) \ __macro(cudnnBatchNormalizationForwardTraining); \
__macro(cudnnBatchNormalizationForwardInference) \ __macro(cudnnBatchNormalizationForwardInference); \
__macro(cudnnBatchNormalizationBackward) __macro(cudnnBatchNormalizationBackward);
CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4
#endif #endif
// APIs in R5 // APIs in R5
#if CUDNN_VERSION >= 5000 #if CUDNN_VERSION >= 5000
#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \ #define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \
__macro(cudnnCreateActivationDescriptor) \ __macro(cudnnCreateActivationDescriptor); \
__macro(cudnnSetActivationDescriptor) \ __macro(cudnnSetActivationDescriptor); \
__macro(cudnnGetActivationDescriptor) \ __macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor) __macro(cudnnDestroyActivationDescriptor);
CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#undef CUDNN_DNN_ROUTINE_EACH_R5
#endif #endif
#undef CUDNN_DNN_ROUTINE_EACH
// clang-format on
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#include <paddle/platform/dynload/curand.h>
namespace paddle {
namespace platform {
namespace dynload {
std::once_flag curand_dso_flag;
void *curand_dso_handle;
#define DEFINE_WRAP(__name) DynLoad__##__name __name
CURAND_RAND_ROUTINE_EACH(DEFINE_WRAP);
}
}
}
\ No newline at end of file
...@@ -22,10 +22,10 @@ limitations under the License. */ ...@@ -22,10 +22,10 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
namespace dynload { namespace dynload {
std::once_flag curand_dso_flag; extern std::once_flag curand_dso_flag;
void *curand_dso_handle = nullptr; extern void *curand_dso_handle;
#ifdef PADDLE_USE_DSO #ifdef PADDLE_USE_DSO
#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
curandStatus_t operator()(Args... args) { \ curandStatus_t operator()(Args... args) { \
...@@ -36,32 +36,29 @@ void *curand_dso_handle = nullptr; ...@@ -36,32 +36,29 @@ void *curand_dso_handle = nullptr;
void *p_##__name = dlsym(curand_dso_handle, #__name); \ void *p_##__name = dlsym(curand_dso_handle, #__name); \
return reinterpret_cast<curandFunc>(p_##__name)(args...); \ return reinterpret_cast<curandFunc>(p_##__name)(args...); \
} \ } \
} __name; /* struct DynLoad__##__name */ }; \
extern DynLoad__##__name __name
#else #else
#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ #define DECLARE_DYNAMIC_LOAD_CURAND_WRAP(__name) \
struct DynLoad__##__name { \ struct DynLoad__##__name { \
template <typename... Args> \ template <typename... Args> \
curandStatus_t operator()(Args... args) { \ curandStatus_t operator()(Args... args) { \
return __name(args...); \ return __name(args...); \
} \ } \
} __name; /* struct DynLoad__##__name */ }; \
extern DynLoad__##__name __name
#endif #endif
/* include all needed curand functions in HPPL */
// clang-format off
#define CURAND_RAND_ROUTINE_EACH(__macro) \ #define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator) \ __macro(curandCreateGenerator); \
__macro(curandSetStream) \ __macro(curandSetStream); \
__macro(curandSetPseudoRandomGeneratorSeed)\ __macro(curandSetPseudoRandomGeneratorSeed); \
__macro(curandGenerateUniform) \ __macro(curandGenerateUniform); \
__macro(curandGenerateUniformDouble) \ __macro(curandGenerateUniformDouble); \
__macro(curandDestroyGenerator) __macro(curandDestroyGenerator);
// clang-format on
CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP);
#undef CURAND_RAND_ROUTINE_EACH
#undef DYNAMIC_LOAD_CURAND_WRAP
} // namespace dynload } // namespace dynload
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#pragma once
#include <sstream>
#include <stdexcept>
#include <string>
#ifndef PADDLE_ONLY_CPU
#include <cublas_v2.h>
#include <cudnn.h>
#include <curand.h>
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif // PADDLE_ONLY_CPU
namespace paddle {
namespace platform {
#ifndef PADDLE_ONLY_CPU
inline void throw_on_error(cudaError_t e, const char* message) {
if (e) {
throw thrust::system_error(e, thrust::cuda_category(), message);
}
}
inline void throw_on_error(curandStatus_t stat, const char* message) {
if (stat != CURAND_STATUS_SUCCESS) {
throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(),
message);
}
}
inline void throw_on_error(cudnnStatus_t stat, const char* message) {
std::stringstream ss;
if (stat == CUDNN_STATUS_SUCCESS) {
return;
} else {
ss << cudnnGetErrorString(stat);
ss << ", " << message;
throw std::runtime_error(ss.str());
}
}
inline void throw_on_error(cublasStatus_t stat, const char* message) {
std::stringstream ss;
if (stat == CUBLAS_STATUS_SUCCESS) {
return;
} else if (stat == CUBLAS_STATUS_NOT_INITIALIZED) {
ss << "CUBLAS: not initialized";
} else if (stat == CUBLAS_STATUS_ALLOC_FAILED) {
ss << "CUBLAS: alloc failed";
} else if (stat == CUBLAS_STATUS_INVALID_VALUE) {
ss << "CUBLAS: invalid value";
} else if (stat == CUBLAS_STATUS_ARCH_MISMATCH) {
ss << "CUBLAS: arch mismatch";
} else if (stat == CUBLAS_STATUS_MAPPING_ERROR) {
ss << "CUBLAS: mapping error";
} else if (stat == CUBLAS_STATUS_EXECUTION_FAILED) {
ss << "CUBLAS: execution failed";
} else if (stat == CUBLAS_STATUS_INTERNAL_ERROR) {
ss << "CUBLAS: internal error";
} else if (stat == CUBLAS_STATUS_NOT_SUPPORTED) {
ss << "CUBLAS: not supported";
} else if (stat == CUBLAS_STATUS_LICENSE_ERROR) {
ss << "CUBLAS: license error";
}
ss << ", " << message;
throw std::runtime_error(ss.str());
}
inline void throw_on_error(cublasStatus_t stat) {
const char* message = "";
throw_on_error(stat, message);
}
#endif // PADDLE_ONLY_CPU
inline void throw_on_error(int stat, const char* message) {
if (stat) {
throw std::runtime_error(message + (", stat = " + std::to_string(stat)));
}
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/gpu_info.h"
#include "gflags/gflags.h"
#include "paddle/platform/error.h"
DEFINE_double(fraction_of_gpu_memory_to_use, 0.95,
"Default use 95% of GPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
namespace paddle {
namespace platform {
int GetDeviceCount() {
int count;
throw_on_error(
cudaGetDeviceCount(&count),
"cudaGetDeviceCount failed in paddle::platform::GetDeviceCount");
return count;
}
int GetCurrentDeviceId() {
int device_id;
throw_on_error(
cudaGetDevice(&device_id),
"cudaGetDevice failed in paddle::platform::GetCurrentDeviceId");
return device_id;
}
void SetDeviceId(int id) {
throw_on_error(cudaSetDevice(id),
"cudaSetDevice failed in paddle::platform::SetDeviceId");
}
void GpuMemoryUsage(size_t& available, size_t& total) {
throw_on_error(cudaMemGetInfo(&available, &total),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
}
size_t GpuMaxAllocSize() {
size_t total = 0;
size_t available = 0;
GpuMemoryUsage(available, total);
// Reserve the rest for page tables, etc.
return static_cast<size_t>(total * FLAGS_fraction_of_gpu_memory_to_use);
}
size_t GpuMinChunkSize() {
// Allow to allocate the minimum chunk size is 256 bytes.
return 1 << 8;
}
size_t GpuMaxChunkSize() {
size_t total = 0;
size_t available = 0;
GpuMemoryUsage(available, total);
// Reserving the rest memory for page tables, etc.
size_t reserving = (1 - FLAGS_fraction_of_gpu_memory_to_use) * total;
// If available less than minimum chunk size, no usable memory exists.
available = std::max(available, GpuMinChunkSize()) - GpuMinChunkSize();
// If available less than reserving, no usable memory exists.
size_t usable = std::max(available, reserving) - reserving;
return usable;
}
} // namespace platform
} // namespace paddle
...@@ -16,33 +16,31 @@ limitations under the License. */ ...@@ -16,33 +16,31 @@ limitations under the License. */
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
#include <thrust/system/cuda/error.h> #include <stddef.h>
#include <thrust/system_error.h>
namespace paddle { namespace paddle {
namespace platform { namespace platform {
inline void throw_on_error(cudaError_t e, const char* message) { //! Get the total number of GPU devices in system.
if (e) { int GetDeviceCount();
throw thrust::system_error(e, thrust::cuda_category(), message);
} //! Get the current GPU device id in system.
} int GetCurrentDeviceId();
int GetDeviceCount(void) { //! Set the GPU device id for next execution.
int count; void SetDeviceId(int device_id);
throw_on_error(cudaGetDeviceCount(&count), "cudaGetDeviceCount failed");
return count; //!Get the memory usage of current GPU device.
} void GpuMemoryUsage(size_t& available, size_t& total);
int GetCurrentDeviceId(void) { //! Get the maximum allocation size of current GPU device.
int device_id; size_t GpuMaxAllocSize();
throw_on_error(cudaGetDevice(&device_id), "cudaGetDevice failed");
return device_id; //! Get the minimum chunk size for GPU buddy allocator.
} size_t GpuMinChunkSize();
void SetDeviceId(int device_id) { //! Get the maximum chunk size for GPU buddy allocator.
throw_on_error(cudaSetDevice(device_id), "cudaSetDevice failed"); size_t GpuMaxChunkSize();
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
namespace paddle { namespace paddle {
...@@ -7,7 +21,7 @@ namespace detail { ...@@ -7,7 +21,7 @@ namespace detail {
class PlacePrinter : public boost::static_visitor<> { class PlacePrinter : public boost::static_visitor<> {
public: public:
PlacePrinter(std::ostream &os) : os_(os) {} explicit PlacePrinter(std::ostream &os) : os_(os) {}
void operator()(const CPUPlace &) { os_ << "CPUPlace"; } void operator()(const CPUPlace &) { os_ << "CPUPlace"; }
void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; }
......
cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python
add_op mul_op rowwise_add_op sigmoid_op softmax_op)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <Python.h>
#include <paddle/framework/op_registry.h>
#include <paddle/framework/scope.h>
#include <paddle/pybind/tensor_bind.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <fstream>
#include <vector>
namespace py = pybind11;
namespace pd = paddle::framework;
USE_OP(add_two);
USE_OP(softmax);
USE_OP(mul);
USE_OP(rowwise_add);
USE_OP(sigmoid);
PYBIND11_PLUGIN(core) {
py::module m("core", "C++ core of Paddle Paddle");
py::class_<pd::Tensor>(m, "Tensor", py::buffer_protocol())
.def_buffer([](pd::Tensor& self) -> py::buffer_info {
return paddle::pybind::CastToPyBuffer(self);
})
.def("get_dims",
[](const pd::Tensor& self) { return pd::vectorize(self.dims()); })
.def("set_dims",
[](pd::Tensor& self, const std::vector<int>& dim) {
self.set_dims(pd::make_ddim(dim));
})
.def("alloc_float",
[](pd::Tensor& self) {
self.mutable_data<float>(paddle::platform::CPUPlace());
})
.def("alloc_int",
[](pd::Tensor& self) {
self.mutable_data<int>(paddle::platform::CPUPlace());
})
.def("set", paddle::pybind::PyTensorSetFromArray<float>)
.def("set", paddle::pybind::PyTensorSetFromArray<int>);
py::class_<pd::Variable>(m, "Variable", R"DOC(Variable Class.
All parameter, weight, gradient are variables in Paddle.
)DOC")
.def("is_int", [](const pd::Variable& var) { return var.IsType<int>(); })
.def("set_int",
[](pd::Variable& var, int val) -> void {
*var.GetMutable<int>() = val;
})
.def("get_int",
[](const pd::Variable& var) -> int { return var.Get<int>(); })
.def("get_tensor",
[](pd::Variable& self) -> pd::Tensor* {
return self.GetMutable<pd::Tensor>();
},
py::return_value_policy::reference);
py::class_<pd::Scope, std::shared_ptr<pd::Scope>>(m, "Scope")
.def(py::init<const std::shared_ptr<pd::Scope>&>())
.def("get_var",
&pd::Scope::GetVariable,
py::return_value_policy::reference)
.def("create_var",
&pd::Scope::CreateVariable,
py::return_value_policy::reference);
//! @note: Be careful! PyBind will return std::string as an unicode, not
//! Python str. If you want a str object, you should cast them in Python.
m.def("get_all_op_protos", []() -> std::vector<std::string> {
auto& protos = pd::OpRegistry::protos();
std::vector<std::string> ret_values;
for (auto it = protos.begin(); it != protos.end(); ++it) {
PADDLE_ENFORCE(it->second.IsInitialized(),
"OpProto must all be initialized");
ret_values.emplace_back();
PADDLE_ENFORCE(it->second.SerializeToString(&ret_values.back()),
"Serialize OpProto Error. This could be a bug of Paddle.");
}
return ret_values;
});
m.def_submodule(
"var_names",
"The module will return special predefined variable name in Paddle")
.def("empty", pd::OperatorBase::EMPTY_VAR_NAME)
.def("temp", pd::OperatorBase::TMP_VAR_NAME);
py::class_<pd::OperatorBase, pd::OperatorPtr>(m, "Operator")
.def("__str__", &pd::OperatorBase::DebugString)
.def_static("create", [](const std::string& protobin) {
pd::OpDesc desc;
PADDLE_ENFORCE(desc.ParsePartialFromString(protobin),
"Cannot parse user input to OpDesc");
PADDLE_ENFORCE(desc.IsInitialized(),
"User OpDesc is not initialized, reason %s",
desc.InitializationErrorString());
return pd::OpRegistry::CreateOp(desc);
});
return m.ptr();
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <paddle/framework/tensor.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
namespace py = pybind11;
namespace paddle {
namespace pybind {
namespace details {
template <bool less, size_t I, typename... ARGS>
struct CastToPyBufferImpl;
template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<false, I, ARGS...> {
py::buffer_info operator()(framework::Tensor &tensor) {
PADDLE_THROW("This type of tensor cannot be expose to Python");
return py::buffer_info();
}
};
template <size_t I, typename... ARGS>
struct CastToPyBufferImpl<true, I, ARGS...> {
using CUR_TYPE = typename std::tuple_element<I, std::tuple<ARGS...>>::type;
py::buffer_info operator()(framework::Tensor &tensor) {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(tensor.holder_->place()),
"Only CPU tensor can cast to numpy array");
if (std::type_index(typeid(CUR_TYPE)) == tensor.holder_->type()) {
auto dim_vec = framework::vectorize(tensor.dims());
std::vector<size_t> dims_outside;
std::vector<size_t> strides;
dims_outside.resize(dim_vec.size());
strides.resize(dim_vec.size());
size_t prod = 1;
for (size_t i = dim_vec.size(); i != 0; --i) {
dims_outside[i - 1] = (size_t)dim_vec[i - 1];
strides[i - 1] = sizeof(CUR_TYPE) * prod;
prod *= dims_outside[i - 1];
}
return py::buffer_info(
tensor.mutable_data<CUR_TYPE>(tensor.holder_->place()),
sizeof(CUR_TYPE),
py::format_descriptor<CUR_TYPE>::format(),
(size_t)framework::arity(tensor.dims()),
dims_outside,
strides);
} else {
constexpr bool less = I + 1 < std::tuple_size<std::tuple<ARGS...>>::value;
return CastToPyBufferImpl<less, I + 1, ARGS...>()(tensor);
}
}
};
} // namespace details
inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
auto buffer_info = details::CastToPyBufferImpl<true, 0, float, int>()(tensor);
return buffer_info;
}
template <typename T>
void PyTensorSetFromArray(
framework::Tensor &self,
py::array_t<T, py::array::c_style | py::array::forcecast> array) {
std::vector<int> dims;
dims.reserve(array.ndim());
for (size_t i = 0; i < array.ndim(); ++i) {
dims.push_back((int)array.shape()[i]);
}
self.set_dims(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(paddle::platform::CPUPlace());
std::memcpy(dst, array.data(), sizeof(T) * array.size());
}
} // namespace pybind
} // namespace paddle
...@@ -155,7 +155,8 @@ RUN apt-get update &&\ ...@@ -155,7 +155,8 @@ RUN apt-get update &&\
paddle version paddle version
${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/
# default command shows the paddle version and exit # default command shows the paddle version and exit
CMD ["paddle", "version"] CMD ["paddle", "version"]
EOF EOF
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
set -xe set -xe
mkdir -p /paddle/build mkdir -p /paddle/build_android
cd /paddle/build cd /paddle/build_android
rm -f /paddle/install 2>/dev/null || true rm -rf /paddle/install 2>/dev/null || true
cmake -DCMAKE_SYSTEM_NAME=Android \ cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \ -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=armeabi-v7a \ -DANDROID_ABI=armeabi-v7a \
...@@ -21,6 +21,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ ...@@ -21,6 +21,3 @@ cmake -DCMAKE_SYSTEM_NAME=Android \
.. ..
make -j `nproc` make -j `nproc`
make install make install
export PATH=/paddle/install/bin:/paddle/install/opt/paddle/bin:$PATH
paddle version
#!/bin/bash
set -e
ANDROID_STANDALONE_TOOLCHAIN=$HOME/android-toolchain-gcc
TMP_DIR=$HOME/$JOB/tmp
mkdir -p $TMP_DIR
cd $TMP_DIR
wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip
unzip -q android-ndk-r14b-linux-x86_64.zip
chmod +x $TMP_DIR/android-ndk-r14b/build/tools/make-standalone-toolchain.sh
$TMP_DIR/android-ndk-r14b/build/tools/make-standalone-toolchain.sh --force --arch=arm --platform=android-21 --install-dir=$ANDROID_STANDALONE_TOOLCHAIN
cd $HOME
rm -rf $TMP_DIR
# Create the build directory for CMake.
mkdir -p $TRAVIS_BUILD_DIR/build_android
cd $TRAVIS_BUILD_DIR/build_android
# Compile paddle binaries
cmake -DCMAKE_SYSTEM_NAME=Android \
-DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
-DANDROID_ABI=armeabi-v7a \
-DANDROID_ARM_NEON=ON \
-DANDROID_ARM_MODE=ON \
-DWITH_C_API=ON \
-DWITH_SWIG_PY=OFF \
-DWITH_STYLE_CHECK=OFF \
..
make -j `nproc`
#!/bin/bash #!/bin/bash
function abort(){ function abort(){
echo "Your change doesn't follow PaddlePaddle's code style." 1>&2 echo "Your change doesn't follow PaddlePaddle's code style." 1>&2
echo "Please use pre-commit to reformat your code and git push again." 1>&2 echo "Please use pre-commit to check what is wrong." 1>&2
exit 1 exit 1
} }
...@@ -13,8 +13,14 @@ export PATH=/usr/bin:$PATH ...@@ -13,8 +13,14 @@ export PATH=/usr/bin:$PATH
pre-commit install pre-commit install
clang-format --version clang-format --version
# set up go environment for running gometalinter
mkdir -p $GOPATH/src/github.com/PaddlePaddle/
ln -sf $TRAVIS_BUILD_DIR $GOPATH/src/github.com/PaddlePaddle/Paddle
cd $GOPATH/src/github.com/PaddlePaddle/Paddle/go; glide install; cd -
if ! pre-commit run -a ; then if ! pre-commit run -a ; then
git diff --exit-code git diff
exit 1
fi fi
trap : 0 trap : 0
...@@ -35,7 +35,7 @@ public: ...@@ -35,7 +35,7 @@ public:
// We provide non-explicit singleton constructors so users can // We provide non-explicit singleton constructors so users can
// pass in a "const char*" or a "string" wherever a "Piece" // pass in a "const char*" or a "string" wherever a "Piece"
// is expected. These contructors ensure that if data_ is NULL, // is expected. These constructors ensure that if data_ is NULL,
// size_ is 0. // size_ is 0.
Piece(); Piece();
Piece(const char* d, size_t n); Piece(const char* d, size_t n);
......
...@@ -22,11 +22,23 @@ DECLARE_string(save_dir); ...@@ -22,11 +22,23 @@ DECLARE_string(save_dir);
namespace paddle { namespace paddle {
NewRemoteParameterUpdater::NewRemoteParameterUpdater( NewRemoteParameterUpdater::NewRemoteParameterUpdater(
const OptimizationConfig &config, const std::string pserverSpec) const OptimizationConfig &config, const std::string pserverSpec)
: parameterClient_(-1), : trainerConfig_(config),
parameterClient_(-1),
newParameters_(nullptr), newParameters_(nullptr),
newGradients_(nullptr), newGradients_(nullptr),
pserverSpec_(pserverSpec) {} pserverSpec_(pserverSpec) {}
NewRemoteParameterUpdater::NewRemoteParameterUpdater(
const OptimizationConfig &config,
const std::string pserverSpec,
const bool useEtcd)
: trainerConfig_(config),
parameterClient_(-1),
newParameters_(nullptr),
newGradients_(nullptr),
pserverSpec_(pserverSpec),
useEtcd_(useEtcd) {}
void NewRemoteParameterUpdater::init( void NewRemoteParameterUpdater::init(
const std::vector<ParameterPtr> &parameters) { const std::vector<ParameterPtr> &parameters) {
ParameterUpdater::init(parameters); ParameterUpdater::init(parameters);
...@@ -37,8 +49,13 @@ void NewRemoteParameterUpdater::init( ...@@ -37,8 +49,13 @@ void NewRemoteParameterUpdater::init(
} }
// create parameter server client. // create parameter server client.
if (useEtcd_) {
parameterClient_ = paddle_new_etcd_pserver_client(
(char *)pserverSpec_.c_str(), FLAGS_trainer_id == 0);
} else {
parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(), parameterClient_ = paddle_new_pserver_client((char *)pserverSpec_.c_str(),
FLAGS_trainer_id == 0); FLAGS_trainer_id == 0);
}
// init new parameter and gradient. // init new parameter and gradient.
newParameters_ = initNewParameter(PARAMETER_VALUE); newParameters_ = initNewParameter(PARAMETER_VALUE);
...@@ -51,7 +68,22 @@ void NewRemoteParameterUpdater::init( ...@@ -51,7 +68,22 @@ void NewRemoteParameterUpdater::init(
LOG(INFO) << "paddle_begin_init_params start"; LOG(INFO) << "paddle_begin_init_params start";
for (int i = 0; i < parameterSize(); ++i) { for (int i = 0; i < parameterSize(); ++i) {
auto paramConfig = parameters_[i]->getConfig(); auto paramConfig = parameters_[i]->getConfig();
std::string bytes = paramConfig.SerializeAsString(); LOG(INFO) << "old param config: " << paramConfig.DebugString();
// FIXME(typhoonzero): convert old paramConfig to optimizerConfig
OptimizerConfig optimizeConfigV2;
auto sgdConfigV2 = optimizeConfigV2.mutable_sgd();
sgdConfigV2->set_momentum(paramConfig.momentum());
sgdConfigV2->set_decay(paramConfig.decay_rate());
optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const);
auto constlr = optimizeConfigV2.mutable_const_lr();
constlr->set_learning_rate(paramConfig.learning_rate());
if (trainerConfig_.algorithm() == "sgd") {
optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
// FIXME: config all algorithms
} else {
optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);
}
std::string bytes = optimizeConfigV2.SerializeAsString();
const char *array = bytes.data(); const char *array = bytes.data();
int size = (int)bytes.size(); int size = (int)bytes.size();
paddle_init_param( paddle_init_param(
...@@ -83,4 +115,4 @@ void NewRemoteParameterUpdater::finishBatch(real cost) { ...@@ -83,4 +115,4 @@ void NewRemoteParameterUpdater::finishBatch(real cost) {
void NewRemoteParameterUpdater::startPass() {} void NewRemoteParameterUpdater::startPass() {}
bool NewRemoteParameterUpdater::finishPass() { return true; } bool NewRemoteParameterUpdater::finishPass() { return true; }
} } // namespace paddle
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <functional> #include <functional>
#include <thread> #include <thread>
#include "OptimizerConfig.pb.h"
#include "ParameterUpdater.h" #include "ParameterUpdater.h"
#include "libpaddle_pserver_cclient.h" #include "libpaddle_pserver_cclient.h"
#include "paddle/pserver/ParameterClient2.h" #include "paddle/pserver/ParameterClient2.h"
...@@ -31,6 +32,9 @@ class NewRemoteParameterUpdater : public ParameterUpdater { ...@@ -31,6 +32,9 @@ class NewRemoteParameterUpdater : public ParameterUpdater {
public: public:
NewRemoteParameterUpdater(const OptimizationConfig& config, NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec); const std::string pserverSpec);
NewRemoteParameterUpdater(const OptimizationConfig& config,
const std::string pserverSpec,
const bool useEtcd);
~NewRemoteParameterUpdater() { ~NewRemoteParameterUpdater() {
releaseNewParameter(newParameters_); releaseNewParameter(newParameters_);
releaseNewParameter(newGradients_); releaseNewParameter(newGradients_);
...@@ -101,6 +105,7 @@ private: ...@@ -101,6 +105,7 @@ private:
} }
protected: protected:
const OptimizationConfig& trainerConfig_;
/// internal parameter client object for exchanging data with pserver /// internal parameter client object for exchanging data with pserver
paddle_pserver_client parameterClient_; paddle_pserver_client parameterClient_;
/// the parameters for new pserver client /// the parameters for new pserver client
...@@ -109,6 +114,8 @@ protected: ...@@ -109,6 +114,8 @@ protected:
paddle_parameter** newGradients_; paddle_parameter** newGradients_;
/// the specification of parameter server "host1:port,host1:port" /// the specification of parameter server "host1:port,host1:port"
std::string pserverSpec_; std::string pserverSpec_;
/// true if pserverSpec_ is etcd endpoint, else pserverSpec_ is pserver addr
bool useEtcd_;
}; };
} // namespace paddle } // namespace paddle
...@@ -62,11 +62,7 @@ TrainerConfigHelper::TrainerConfigHelper(const TrainerConfig &config) ...@@ -62,11 +62,7 @@ TrainerConfigHelper::TrainerConfigHelper(const TrainerConfig &config)
m->conf = config; m->conf = config;
} }
TrainerConfigHelper::~TrainerConfigHelper() { TrainerConfigHelper::~TrainerConfigHelper() { delete m; }
if (m) {
delete m;
}
}
const TrainerConfig &TrainerConfigHelper::getConfig() const { return m->conf; } const TrainerConfig &TrainerConfigHelper::getConfig() const { return m->conf; }
......
...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef DYNAMIC_LOAD_H_ #pragma once
#define DYNAMIC_LOAD_H_
#include <dlfcn.h> #include <dlfcn.h>
#include <memory> #include <memory>
...@@ -59,5 +58,3 @@ void GetWarpCTCDsoHandle(void** dso_handle); ...@@ -59,5 +58,3 @@ void GetWarpCTCDsoHandle(void** dso_handle);
* *
*/ */
void GetLapackDsoHandle(void** dso_handle); void GetLapackDsoHandle(void** dso_handle);
#endif // DYNAMIC_LOAD_H_
...@@ -51,7 +51,7 @@ template <class T> ...@@ -51,7 +51,7 @@ template <class T>
class ThreadLocal { class ThreadLocal {
public: public:
ThreadLocal() { ThreadLocal() {
CHECK(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0); CHECK_EQ(pthread_key_create(&threadSpecificKey_, dataDestructor), 0);
} }
~ThreadLocal() { pthread_key_delete(threadSpecificKey_); } ~ThreadLocal() { pthread_key_delete(threadSpecificKey_); }
...@@ -65,7 +65,7 @@ public: ...@@ -65,7 +65,7 @@ public:
if (!p && createLocal) { if (!p && createLocal) {
p = new T(); p = new T();
int ret = pthread_setspecific(threadSpecificKey_, p); int ret = pthread_setspecific(threadSpecificKey_, p);
CHECK(ret == 0); CHECK_EQ(ret, 0);
} }
return p; return p;
} }
...@@ -79,7 +79,7 @@ public: ...@@ -79,7 +79,7 @@ public:
if (T* q = get(false)) { if (T* q = get(false)) {
dataDestructor(q); dataDestructor(q);
} }
CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0);
} }
/** /**
...@@ -112,7 +112,7 @@ private: ...@@ -112,7 +112,7 @@ private:
template <class T> template <class T>
class ThreadLocalD { class ThreadLocalD {
public: public:
ThreadLocalD() { CHECK(pthread_key_create(&threadSpecificKey_, NULL) == 0); } ThreadLocalD() { CHECK_EQ(pthread_key_create(&threadSpecificKey_, NULL), 0); }
~ThreadLocalD() { ~ThreadLocalD() {
pthread_key_delete(threadSpecificKey_); pthread_key_delete(threadSpecificKey_);
for (auto t : threadMap_) { for (auto t : threadMap_) {
...@@ -127,7 +127,7 @@ public: ...@@ -127,7 +127,7 @@ public:
T* p = (T*)pthread_getspecific(threadSpecificKey_); T* p = (T*)pthread_getspecific(threadSpecificKey_);
if (!p) { if (!p) {
p = new T(); p = new T();
CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0);
updateMap(p); updateMap(p);
} }
return p; return p;
...@@ -141,7 +141,7 @@ public: ...@@ -141,7 +141,7 @@ public:
if (T* q = (T*)pthread_getspecific(threadSpecificKey_)) { if (T* q = (T*)pthread_getspecific(threadSpecificKey_)) {
dataDestructor(q); dataDestructor(q);
} }
CHECK(pthread_setspecific(threadSpecificKey_, p) == 0); CHECK_EQ(pthread_setspecific(threadSpecificKey_, p), 0);
updateMap(p); updateMap(p);
} }
......
...@@ -26,10 +26,17 @@ endif(WITH_GOLANG) ...@@ -26,10 +26,17 @@ endif(WITH_GOLANG)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${PROJ_ROOT}/python/paddle/v2/framework/core.so
DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so)
add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp
DEPENDS gen_proto_py framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
add_custom_target(paddle_python ALL DEPENDS add_custom_target(paddle_python ALL DEPENDS
${OUTPUT_DIR}/.timestamp) ${OUTPUT_DIR}/.timestamp)
......
...@@ -1575,7 +1575,13 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): ...@@ -1575,7 +1575,13 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase):
@config_layer('fc') @config_layer('fc')
class FCLayer(LayerBase): class FCLayer(LayerBase):
def __init__(self, name, size, inputs, bias=True, **xargs): def __init__(self,
name,
size,
inputs,
bias=True,
error_clipping_threshold=None,
**xargs):
super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs)
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index) input_layer = self.get_input_layer(input_index)
...@@ -1592,6 +1598,8 @@ class FCLayer(LayerBase): ...@@ -1592,6 +1598,8 @@ class FCLayer(LayerBase):
self.create_input_parameter(input_index, psize, dims, sparse, self.create_input_parameter(input_index, psize, dims, sparse,
format) format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
if error_clipping_threshold is not None:
self.config.error_clipping_threshold = error_clipping_threshold
@config_layer('selective_fc') @config_layer('selective_fc')
......
...@@ -126,6 +126,7 @@ __all__ = [ ...@@ -126,6 +126,7 @@ __all__ = [
'row_conv_layer', 'row_conv_layer',
'dropout_layer', 'dropout_layer',
'prelu_layer', 'prelu_layer',
'gated_unit_layer',
] ]
...@@ -5863,7 +5864,7 @@ def prelu_layer(input, ...@@ -5863,7 +5864,7 @@ def prelu_layer(input,
:rtype: LayerOutput :rtype: LayerOutput
""" """
assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input' assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.'
assert isinstance(param_attr, ParameterAttribute) assert isinstance(param_attr, ParameterAttribute)
l = Layer( l = Layer(
...@@ -5879,6 +5880,99 @@ def prelu_layer(input, ...@@ -5879,6 +5880,99 @@ def prelu_layer(input,
size=l.config.size) size=l.config.size)
@wrap_name_default()
@layer_support(ERROR_CLIPPING, DROPOUT)
@wrap_act_default(act=LinearActivation())
def gated_unit_layer(input,
size,
act=None,
name=None,
gate_attr=None,
gate_param_attr=None,
gate_bias_attr=True,
inproj_attr=None,
inproj_param_attr=None,
inproj_bias_attr=True,
layer_attr=None):
"""
The gated unit layer implements a simple gating mechanism over the input.
The input :math:`X` is first projected into a new space :math:`X'`, and
it is also used to produce a gate weight :math:`\sigma`. Element-wise
prodict between :match:`X'` and :math:`\sigma` is finally returned.
Reference:
Language Modeling with Gated Convolutional Networks
https://arxiv.org/abs/1612.08083
.. math::
y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c)
The example usage is:
.. code-block:: python
gated_unit = gated_unit_layer(size=128, input=input_layer))
:param input: input for this layer.
:type input: LayerOutput
:param size: output size of the gated unit.
:type size: int
:param act: activation type of the projected input.
:type act: BaseActivation
:param name: name of this layer.
:type name: basestring
:param gate_attr: Attributes to tune the gate output, for example, error
clipping threshold, dropout and so on. See ExtraLayerAttribute for
more details.
:type gate_attr: ExtraLayerAttribute|None
:param gate_param_attr: Attributes to tune the learnable projected matrix
parameter of the gate.
:type gate_param_attr: ParameterAttribute|None
:param gate_bias_attr: Attributes to tune the learnable bias of the gate.
:type gate_bias_attr: ParameterAttribute|None
:param inproj_attr: Attributes to the tune the projected input, for
example, error clipping threshold, dropout and so on. See
ExtraLayerAttribute for more details.
:type inproj_attr: ExtraLayerAttribute|None
:param inproj_param_attr: Attributes to tune the learnable parameter of
the projection of input.
:type inproj_param_attr: ParameterAttribute|None
:param inproj_bias_attr: Attributes to tune the learnable bias of
projection of the input.
:type inproj_bias_attr: ParameterAttribute|None
:param layer_attr: Attributes to tune the final output of the gated unit,
for example, error clipping threshold, dropout and so on. See
ExtraLayerAttribute for more details.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(
input, LayerOutput), 'The gated linear unit accepts only one input.'
input_proj = fc_layer(
input=input,
name="%s_input_proj" % name,
size=size,
act=act,
layer_attr=inproj_attr,
param_attr=inproj_param_attr,
bias_attr=inproj_bias_attr)
gate = fc_layer(
size=size,
name="%s_gate" % name,
act=SigmoidActivation(),
input=input,
layer_attr=gate_attr,
param_attr=gate_param_attr,
bias_attr=gate_bias_attr)
return mixed_layer(
name="%s_gated_act" % name,
input=dotmul_operator(input_proj, gate),
layer_attr=layer_attr)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
...@@ -5886,11 +5980,9 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): ...@@ -5886,11 +5980,9 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None):
The crop layer crops images by offset and shape. User can set crop shape by The crop layer crops images by offset and shape. User can set crop shape by
args 'shape' explicitly or by reference input layer. args 'shape' explicitly or by reference input layer.
The example usage is: The example usage is:
.. code-block:: python .. code-block:: python
crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3]) crop = crop_layer(input=[image_input, reference_input], axis=2, offset=[2, 3])
:param input: The input layer.If two inputs were setted, :param input: The input layer.If two inputs were setted,
......
...@@ -1408,6 +1408,8 @@ def outputs(layers, *args): ...@@ -1408,6 +1408,8 @@ def outputs(layers, *args):
:return: :return:
""" """
traveled = set()
def __dfs_travel__(layer, def __dfs_travel__(layer,
predicate=lambda x: x.layer_type == LayerType.DATA): predicate=lambda x: x.layer_type == LayerType.DATA):
""" """
...@@ -1419,6 +1421,11 @@ def outputs(layers, *args): ...@@ -1419,6 +1421,11 @@ def outputs(layers, *args):
:type layer: LayerOutput :type layer: LayerOutput
:return: :return:
""" """
if layer in traveled:
return []
else:
traveled.add(layer)
assert isinstance(layer, LayerOutput), "layer is %s" % (layer) assert isinstance(layer, LayerOutput), "layer is %s" % (layer)
retv = [] retv = []
if layer.parents is not None: if layer.parents is not None:
......
...@@ -6,6 +6,7 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos ...@@ -6,6 +6,7 @@ img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cos
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer) test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer)
export whole_configs=(test_split_datasource) export whole_configs=(test_split_datasource)
...@@ -131,6 +131,7 @@ input_layer_names: "weight" ...@@ -131,6 +131,7 @@ input_layer_names: "weight"
input_layer_names: "multi_class_label" input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__" output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__" output_layer_names: "__mse_cost_0__"
output_layer_names: "__nce_layer_0__"
evaluators { evaluators {
name: "classification_error_evaluator" name: "classification_error_evaluator"
type: "classification_error" type: "classification_error"
...@@ -154,6 +155,7 @@ sub_models { ...@@ -154,6 +155,7 @@ sub_models {
input_layer_names: "multi_class_label" input_layer_names: "multi_class_label"
output_layer_names: "__cost_0__" output_layer_names: "__cost_0__"
output_layer_names: "__mse_cost_0__" output_layer_names: "__mse_cost_0__"
output_layer_names: "__nce_layer_0__"
evaluator_names: "classification_error_evaluator" evaluator_names: "classification_error_evaluator"
is_recurrent_layer_group: false is_recurrent_layer_group: false
} }
......
type: "nn"
layers {
name: "input"
type: "data"
size: 256
active_type: ""
}
layers {
name: "__gated_unit_layer_0___input_proj"
type: "fc"
size: 512
active_type: "tanh"
inputs {
input_layer_name: "input"
input_parameter_name: "___gated_unit_layer_0___input_proj.w0"
}
bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias"
error_clipping_threshold: 100.0
}
layers {
name: "__gated_unit_layer_0___gate"
type: "fc"
size: 512
active_type: "sigmoid"
inputs {
input_layer_name: "input"
input_parameter_name: "___gated_unit_layer_0___gate.w0"
}
bias_parameter_name: "___gated_unit_layer_0___gate.wbias"
error_clipping_threshold: 100.0
}
layers {
name: "__gated_unit_layer_0___gated_act"
type: "mixed"
size: 512
active_type: ""
inputs {
input_layer_name: "__gated_unit_layer_0___input_proj"
}
inputs {
input_layer_name: "__gated_unit_layer_0___gate"
}
error_clipping_threshold: 100.0
operator_confs {
type: "dot_mul"
input_indices: 0
input_indices: 1
input_sizes: 512
input_sizes: 512
output_size: 512
dotmul_scale: 1
}
}
parameters {
name: "___gated_unit_layer_0___input_proj.w0"
size: 131072
initial_mean: 0.0
initial_std: 0.0001
dims: 256
dims: 512
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___gated_unit_layer_0___input_proj.wbias"
size: 512
initial_mean: 0.0
initial_std: 1
dims: 1
dims: 512
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___gated_unit_layer_0___gate.w0"
size: 131072
initial_mean: 0.0
initial_std: 0.0001
dims: 256
dims: 512
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___gated_unit_layer_0___gate.wbias"
size: 512
initial_mean: 0.0
initial_std: 1
dims: 1
dims: 512
initial_strategy: 0
initial_smart: false
}
input_layer_names: "input"
output_layer_names: "__gated_unit_layer_0___gated_act"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__gated_unit_layer_0___input_proj"
layer_names: "__gated_unit_layer_0___gate"
layer_names: "__gated_unit_layer_0___gated_act"
input_layer_names: "input"
output_layer_names: "__gated_unit_layer_0___gated_act"
is_recurrent_layer_group: false
}
type: "nn"
layers {
name: "data"
type: "data"
size: 100
active_type: ""
}
layers {
name: "__addto_0__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "data"
}
inputs {
input_layer_name: "data"
}
}
layers {
name: "__addto_1__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_0__"
}
inputs {
input_layer_name: "__addto_0__"
}
}
layers {
name: "__addto_2__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_1__"
}
inputs {
input_layer_name: "__addto_1__"
}
}
layers {
name: "__addto_3__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_2__"
}
inputs {
input_layer_name: "__addto_2__"
}
}
layers {
name: "__addto_4__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_3__"
}
inputs {
input_layer_name: "__addto_3__"
}
}
layers {
name: "__addto_5__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_4__"
}
inputs {
input_layer_name: "__addto_4__"
}
}
layers {
name: "__addto_6__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_5__"
}
inputs {
input_layer_name: "__addto_5__"
}
}
layers {
name: "__addto_7__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_6__"
}
inputs {
input_layer_name: "__addto_6__"
}
}
layers {
name: "__addto_8__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_7__"
}
inputs {
input_layer_name: "__addto_7__"
}
}
layers {
name: "__addto_9__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_8__"
}
inputs {
input_layer_name: "__addto_8__"
}
}
layers {
name: "__addto_10__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_9__"
}
inputs {
input_layer_name: "__addto_9__"
}
}
layers {
name: "__addto_11__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_10__"
}
inputs {
input_layer_name: "__addto_10__"
}
}
layers {
name: "__addto_12__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_11__"
}
inputs {
input_layer_name: "__addto_11__"
}
}
layers {
name: "__addto_13__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_12__"
}
inputs {
input_layer_name: "__addto_12__"
}
}
layers {
name: "__addto_14__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_13__"
}
inputs {
input_layer_name: "__addto_13__"
}
}
layers {
name: "__addto_15__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_14__"
}
inputs {
input_layer_name: "__addto_14__"
}
}
layers {
name: "__addto_16__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_15__"
}
inputs {
input_layer_name: "__addto_15__"
}
}
layers {
name: "__addto_17__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_16__"
}
inputs {
input_layer_name: "__addto_16__"
}
}
layers {
name: "__addto_18__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_17__"
}
inputs {
input_layer_name: "__addto_17__"
}
}
layers {
name: "__addto_19__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_18__"
}
inputs {
input_layer_name: "__addto_18__"
}
}
layers {
name: "__addto_20__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_19__"
}
inputs {
input_layer_name: "__addto_19__"
}
}
layers {
name: "__addto_21__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_20__"
}
inputs {
input_layer_name: "__addto_20__"
}
}
layers {
name: "__addto_22__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_21__"
}
inputs {
input_layer_name: "__addto_21__"
}
}
layers {
name: "__addto_23__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_22__"
}
inputs {
input_layer_name: "__addto_22__"
}
}
layers {
name: "__addto_24__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_23__"
}
inputs {
input_layer_name: "__addto_23__"
}
}
layers {
name: "__addto_25__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_24__"
}
inputs {
input_layer_name: "__addto_24__"
}
}
layers {
name: "__addto_26__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_25__"
}
inputs {
input_layer_name: "__addto_25__"
}
}
layers {
name: "__addto_27__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_26__"
}
inputs {
input_layer_name: "__addto_26__"
}
}
layers {
name: "__addto_28__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_27__"
}
inputs {
input_layer_name: "__addto_27__"
}
}
layers {
name: "__addto_29__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_28__"
}
inputs {
input_layer_name: "__addto_28__"
}
}
layers {
name: "__addto_30__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_29__"
}
inputs {
input_layer_name: "__addto_29__"
}
}
layers {
name: "__addto_31__"
type: "addto"
size: 100
active_type: ""
inputs {
input_layer_name: "__addto_30__"
}
inputs {
input_layer_name: "__addto_30__"
}
}
layers {
name: "__fc_layer_0__"
type: "fc"
size: 32
active_type: "relu"
inputs {
input_layer_name: "__addto_31__"
input_parameter_name: "___fc_layer_0__.w0"
}
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__fc_layer_1__"
type: "fc"
size: 10
active_type: "softmax"
inputs {
input_layer_name: "__fc_layer_0__"
input_parameter_name: "___fc_layer_1__.w0"
}
bias_parameter_name: "___fc_layer_1__.wbias"
}
parameters {
name: "___fc_layer_0__.w0"
size: 3200
initial_mean: 0.0
initial_std: 0.1
dims: 100
dims: 32
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_0__.wbias"
size: 32
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 32
initial_strategy: 0
initial_smart: false
}
parameters {
name: "___fc_layer_1__.w0"
size: 320
initial_mean: 0.0
initial_std: 0.176776695297
dims: 32
dims: 10
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___fc_layer_1__.wbias"
size: 10
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 10
initial_strategy: 0
initial_smart: false
}
input_layer_names: "data"
output_layer_names: "__fc_layer_1__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__addto_0__"
layer_names: "__addto_1__"
layer_names: "__addto_2__"
layer_names: "__addto_3__"
layer_names: "__addto_4__"
layer_names: "__addto_5__"
layer_names: "__addto_6__"
layer_names: "__addto_7__"
layer_names: "__addto_8__"
layer_names: "__addto_9__"
layer_names: "__addto_10__"
layer_names: "__addto_11__"
layer_names: "__addto_12__"
layer_names: "__addto_13__"
layer_names: "__addto_14__"
layer_names: "__addto_15__"
layer_names: "__addto_16__"
layer_names: "__addto_17__"
layer_names: "__addto_18__"
layer_names: "__addto_19__"
layer_names: "__addto_20__"
layer_names: "__addto_21__"
layer_names: "__addto_22__"
layer_names: "__addto_23__"
layer_names: "__addto_24__"
layer_names: "__addto_25__"
layer_names: "__addto_26__"
layer_names: "__addto_27__"
layer_names: "__addto_28__"
layer_names: "__addto_29__"
layer_names: "__addto_30__"
layer_names: "__addto_31__"
layer_names: "__fc_layer_0__"
layer_names: "__fc_layer_1__"
input_layer_names: "data"
output_layer_names: "__fc_layer_1__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=256)
glu = gated_unit_layer(
size=512,
input=data,
act=TanhActivation(),
gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
gate_param_attr=ParamAttr(initial_std=1e-4),
gate_bias_attr=ParamAttr(initial_std=1),
inproj_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
inproj_param_attr=ParamAttr(initial_std=1e-4),
inproj_bias_attr=ParamAttr(initial_std=1),
layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0))
outputs(glu)
from paddle.trainer_config_helpers import *
settings(batch_size=1000, learning_rate=1e-5)
din = data_layer(name='data', size=100)
enc = din
for i in range(32):
enc = addto_layer([enc, enc])
pred = fc_layer(
input=fc_layer(
input=enc, size=32, act=ReluActivation()),
size=10,
act=SoftmaxActivation())
outputs(pred)
...@@ -20,7 +20,6 @@ import trainer ...@@ -20,7 +20,6 @@ import trainer
import event import event
import data_type import data_type
import topology import topology
import data_feeder
import networks import networks
import evaluator import evaluator
from . import dataset from . import dataset
...@@ -31,7 +30,6 @@ import op ...@@ -31,7 +30,6 @@ import op
import pooling import pooling
import inference import inference
import networks import networks
import py_paddle.swig_paddle as api
import minibatch import minibatch
import plot import plot
import image import image
...@@ -47,7 +45,6 @@ __all__ = [ ...@@ -47,7 +45,6 @@ __all__ = [
'data_type', 'data_type',
'attr', 'attr',
'pooling', 'pooling',
'data_feeder',
'dataset', 'dataset',
'reader', 'reader',
'topology', 'topology',
...@@ -61,6 +58,7 @@ __all__ = [ ...@@ -61,6 +58,7 @@ __all__ = [
def init(**kwargs): def init(**kwargs):
import py_paddle.swig_paddle as api
args = [] args = []
args_dict = {} args_dict = {}
# NOTE: append arguments if they are in ENV # NOTE: append arguments if they are in ENV
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
import collections import collections
import paddle.trainer.PyDataProvider2 as pydp2 import paddle.trainer.PyDataProvider2 as pydp2
......
...@@ -26,8 +26,9 @@ import sentiment ...@@ -26,8 +26,9 @@ import sentiment
import wmt14 import wmt14
import mq2007 import mq2007
import flowers import flowers
import voc2012
__all__ = [ __all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing', 'wmt14', 'mq2007', 'flowers' 'uci_housing', 'wmt14', 'mq2007', 'flowers', 'voc2012'
] ]
...@@ -22,6 +22,8 @@ import importlib ...@@ -22,6 +22,8 @@ import importlib
import paddle.v2.dataset import paddle.v2.dataset
import cPickle import cPickle
import glob import glob
import cPickle as pickle
import random
__all__ = [ __all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
...@@ -170,8 +172,6 @@ def convert(output_path, ...@@ -170,8 +172,6 @@ def convert(output_path,
name_prefix, name_prefix,
max_lines_to_shuffle=1000): max_lines_to_shuffle=1000):
import recordio import recordio
import cPickle as pickle
import random
""" """
Convert data from reader to recordio format files. Convert data from reader to recordio format files.
...@@ -201,8 +201,10 @@ def convert(output_path, ...@@ -201,8 +201,10 @@ def convert(output_path,
def write_data(w, lines): def write_data(w, lines):
random.shuffle(lines) random.shuffle(lines)
for i, d in enumerate(lines): for i, d in enumerate(lines):
d = pickle.dumps(d, pickle.HIGHEST_PROTOCOL) # FIXME(Yancey1989):
w[i % num_shards].write(d) # dumps with protocol: pickle.HIGHEST_PROTOCOL
o = pickle.dumps(d)
w[i % num_shards].write(o)
w = open_writers() w = open_writers()
lines = [] lines = []
......
...@@ -212,19 +212,19 @@ def gen_pair(querylist, partial_order="full"): ...@@ -212,19 +212,19 @@ def gen_pair(querylist, partial_order="full"):
for j in range(i + 1, len(querylist)): for j in range(i + 1, len(querylist)):
query_right = querylist[j] query_right = querylist[j]
if query_left.relevance_score > query_right.relevance_score: if query_left.relevance_score > query_right.relevance_score:
labels.append(1) labels.append([1])
docpairs.append([ docpairs.append([
np.array(query_left.feature_vector), np.array(query_left.feature_vector),
np.array(query_right.feature_vector) np.array(query_right.feature_vector)
]) ])
elif query_left.relevance_score < query_right.relevance_score: elif query_left.relevance_score < query_right.relevance_score:
labels.append(1) labels.append([1])
docpairs.append([ docpairs.append([
np.array(query_right.feature_vector), np.array(query_right.feature_vector),
np.array(query_left.feature_vector) np.array(query_left.feature_vector)
]) ])
for label, pair in zip(labels, docpairs): for label, pair in zip(labels, docpairs):
yield label, pair[0], pair[1] yield np.array(label), pair[0], pair[1]
def gen_list(querylist): def gen_list(querylist):
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.voc2012
import unittest
class TestVOC(unittest.TestCase):
def check_reader(self, reader):
sum = 0
label = 0
for l in reader():
self.assertEqual(l[0].size, 3 * l[1].size)
sum += 1
return sum
def test_train(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.train())
self.assertEqual(count, 2913)
def test_test(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.test())
self.assertEqual(count, 1464)
def test_val(self):
count = self.check_reader(paddle.v2.dataset.voc_seg.val())
self.assertEqual(count, 1449)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Image dataset for segmentation.
The 2012 dataset contains images from 2008-2011 for which additional
segmentations have been prepared. As in previous years the assignment
to training/test sets has been maintained. The total number of images
with segmentation has been increased from 7,062 to 9,993.
"""
import tarfile
import io
import numpy as np
from paddle.v2.dataset.common import download
from paddle.v2.image import *
from PIL import Image
__all__ = ['train', 'test', 'val']
VOC_URL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/\
VOCtrainval_11-May-2012.tar'
VOC_MD5 = '6cd6e144f989b92b3379bac3b3de84fd'
SET_FILE = 'VOCdevkit/VOC2012/ImageSets/Segmentation/{}.txt'
DATA_FILE = 'VOCdevkit/VOC2012/JPEGImages/{}.jpg'
LABEL_FILE = 'VOCdevkit/VOC2012/SegmentationClass/{}.png'
CACHE_DIR = 'voc2012'
def reader_creator(filename, sub_name):
tarobject = tarfile.open(filename)
name2mem = {}
for ele in tarobject.getmembers():
name2mem[ele.name] = ele
def reader():
set_file = SET_FILE.format(sub_name)
sets = tarobject.extractfile(name2mem[set_file])
for line in sets:
line = line.strip()
data_file = DATA_FILE.format(line)
label_file = LABEL_FILE.format(line)
data = tarobject.extractfile(name2mem[data_file]).read()
label = tarobject.extractfile(name2mem[label_file]).read()
data = Image.open(io.BytesIO(data))
label = Image.open(io.BytesIO(label))
data = np.array(data)
label = np.array(label)
yield data, label
return reader
def train():
"""
Create a train dataset reader containing 2913 images in HWC order.
"""
return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'trainval')
def test():
"""
Create a test dataset reader containing 1464 images in HWC order.
"""
return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'train')
def val():
"""
Create a val dataset reader containing 1449 images in HWC order.
"""
return reader_creator(download(VOC_URL, CACHE_DIR, VOC_MD5), 'val')
...@@ -9,8 +9,6 @@ There are: ...@@ -9,8 +9,6 @@ There are:
* BeginPass * BeginPass
* EndPass * EndPass
""" """
import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult' 'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult'
] ]
...@@ -18,6 +16,7 @@ __all__ = [ ...@@ -18,6 +16,7 @@ __all__ = [
class WithMetric(object): class WithMetric(object):
def __init__(self, evaluator): def __init__(self, evaluator):
import py_paddle.swig_paddle as api
if not isinstance(evaluator, api.Evaluator): if not isinstance(evaluator, api.Evaluator):
raise TypeError("Evaluator should be api.Evaluator type") raise TypeError("Evaluator should be api.Evaluator type")
self.__evaluator__ = evaluator self.__evaluator__ = evaluator
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
import cStringIO
def get_all_op_protos():
"""
Get all registered op proto from Paddle C++
:return: list of OpProto
"""
protostrs = core.get_all_op_protos()
ret_values = []
for pbstr in protostrs:
op_proto = op_proto_pb2.OpProto.FromString(str(pbstr))
ret_values.append(op_proto)
return ret_values
class OpDescCreationMethod(object):
"""
A Functor object to convert user input(use key word args) to OpDesc based on
OpProto.
:param op_proto: The OpProto object.
:type op_proto: op_proto_pb2.OpProto
"""
def __init__(self, op_proto):
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Argument should be OpProto")
self.__op_proto__ = op_proto
def __call__(self, *args, **kwargs):
"""
Convert user input to OpDesc. Only key-word args are supported.
:return: OpDesc based on user input
:rtype: op_desc_pb2.OpDesc
"""
if len(args) != 0:
raise ValueError("Only keyword arguments is supported by Paddle")
op_desc = op_desc_pb2.OpDesc()
# Inputs
ipts, ipt_format, _ = OpDescCreationMethod.extract_input_or_output(
"input", kwargs, self.__op_proto__.inputs)
op_desc.inputs.extend(ipts)
if ipt_format is not None:
op_desc.attrs.extend([ipt_format])
# Outputs
outs, out_format, tmp_index = OpDescCreationMethod.extract_input_or_output(
"output", kwargs, self.__op_proto__.outputs)
op_desc.outputs.extend(outs)
if out_format is not None:
op_desc.attrs.extend([out_format])
if len(tmp_index) != 0:
tmp_index_attr = op_desc.attrs.add()
tmp_index_attr.type = attr_type_pb2.INTS
tmp_index_attr.name = "temporary_index"
tmp_index_attr.ints.extend(tmp_index)
# Types
op_desc.type = self.__op_proto__.type
# Attrs
for attr in self.__op_proto__.attrs:
if attr.generated:
continue
user_defined_attr = kwargs.get(attr.name, None)
if user_defined_attr is not None:
new_attr = op_desc.attrs.add()
new_attr.name = attr.name
new_attr.type = attr.type
if attr.type == attr_type_pb2.INT:
new_attr.i = user_defined_attr
elif attr.type == attr_type_pb2.FLOAT:
new_attr.f = user_defined_attr
elif attr.type == attr_type_pb2.STRING:
new_attr.s = user_defined_attr
elif attr.type == attr_type_pb2.INTS:
new_attr.ints.extend(user_defined_attr)
elif attr.type == attr_type_pb2.FLOATS:
new_attr.floats.extend(user_defined_attr)
elif attr.type == attr_type_pb2.STRINGS:
new_attr.strings.extend(user_defined_attr)
else:
raise NotImplementedError("Not support attribute type " +
attr.type)
return op_desc
@staticmethod
def extract_input_or_output(in_out, kwargs, meta):
"""
Extract input variable names or output variable names from key-word
arguments, which base on VarProtos.
:param in_out: "input" or "output"
:param kwargs: key-word arguments that user inputted.
:param meta: a list of VarProto
:return: The three object will be return. The variable names. The
input_format or output_format attribute(None if the input or output is
not multiple). The temporary variable index list.
"""
multiple = OpDescCreationMethod.any_is_true((m.multiple for m in meta))
tmp_index = []
retv = []
if multiple:
var_format = op_desc_pb2.AttrDesc()
var_format.type = attr_type_pb2.INTS
var_format.name = "%s_format" % in_out
var_format.ints.append(0)
for var in meta:
var_name = var.name
if var.temporary:
var_name = [core.var_names.temp()]
tmp_index.append(len(retv))
else:
var_name = kwargs.get(var_name, [])
if not isinstance(var_name, list):
var_name = [var_name]
retv.extend(var_name)
var_format.ints.append(len(var_name) + var_format.ints[-1])
return retv, var_format, tmp_index
else:
for var in meta:
if var.temporary:
retv.append(kwargs.get(var.name, core.var_names.temp()))
tmp_index.append(len(retv))
else:
retv.append(kwargs.get(var.name, core.var_names.empty()))
return retv, None, tmp_index
@staticmethod
def any_is_true(generator):
"""
Reduce a bool array to one. If any of them is True, then return True.
"""
for flag in generator:
if flag:
return True
return False
def get_docstring_from_op_proto(op_proto):
"""
Generate docstring from a OpProto
:param op_proto: a OpProto instance.
:type op_proto: op_proto_pb2.OpProto
:return: docstring
"""
if not isinstance(op_proto, op_proto_pb2.OpProto):
raise TypeError("Input must be OpProto")
f = cStringIO.StringIO()
f.write(op_proto.comment)
f.write("\n")
def __append_param__(name, comment, type):
# Maybe replace the following line with template engine is better.
f.write(":param ")
f.write(name)
f.write(": ")
f.write(comment)
f.write("\n")
f.write(":type ")
f.write(name)
f.write(": ")
f.write(type)
f.write("\n")
for ipt in op_proto.inputs:
__append_param__(ipt.name, ipt.comment, "list | basestr"
if ipt.multiple else "basestr")
temp_var_prefix = \
"This is a temporary variable. It does not have to set by user. "
for opt in op_proto.outputs:
__append_param__(opt.name, opt.comment if not opt.temporary else
temp_var_prefix + opt.comment, "list | basestr"
if opt.multiple else "basestr")
for attr in op_proto.attrs:
attr_type = None
if attr.type == attr_type_pb2.INT:
attr_type = "int"
elif attr.type == attr_type_pb2.FLOAT:
attr_type = "float"
elif attr.type == attr_type_pb2.STRING:
attr_type = "basestr"
elif attr.type == attr_type_pb2.INTS:
attr_type = "list of int"
elif attr.type == attr_type_pb2.FLOATS:
attr_type = "list of float"
elif attr.type == attr_type_pb2.STRINGS:
attr_type = "list of basestr"
if attr_type is None:
raise RuntimeError("Not supported attribute type " + attr.type)
__append_param__(attr.name, attr.comment, attr_type)
return f.getvalue()
def create_op_creation_method(op_proto):
"""
Generate op creation method for an OpProto
"""
method = OpDescCreationMethod(op_proto)
def __impl__(*args, **kwargs):
opdesc = method(*args, **kwargs)
return core.Operator.create(opdesc.SerializeToString())
__impl__.__doc__ = get_docstring_from_op_proto(op_proto)
return __impl__
class OpCreationsHolder(object):
"""
A object will holds all op creation methods.
Use `op_creations.xxx_op` to access them.
"""
pass
op_creations = OpCreationsHolder()
def __bootstrap__():
"""
Bootstrap function for this module. It will dynamic create all op creation
methods in runtime.
"""
for op_proto in get_all_op_protos():
func = create_op_creation_method(op_proto)
func.__name__ = str(op_proto.type)
setattr(op_creations, func.__name__, func)
__bootstrap__()
"""
Default scope function.
`Paddle` manages Scope as programming language's scope. It just a
thread-local stack of Scope. Top of that stack is current scope, the bottom
of that stack is all scopes' parent.
Invoking `create_var/get_var` can `create/get` variable in current scope.
Invoking `enter_local_scope/leave_local_scope` can create or destroy local
scope.
A `scoped_function` will take a `function` as input. That function will be
invoked in a new local scope.
"""
import paddle.v2.framework.core
import threading
__tl_scope__ = threading.local()
__all__ = [
'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'create_var',
'get_var', 'scoped_function'
]
def get_cur_scope():
"""
Get current scope.
:rtype: paddle.v2.framework.core.Scope
"""
cur_scope_stack = getattr(__tl_scope__, 'cur_scope', None)
if cur_scope_stack is None:
__tl_scope__.cur_scope = list()
if len(__tl_scope__.cur_scope) == 0:
__tl_scope__.cur_scope.append(paddle.v2.framework.core.Scope(None))
return __tl_scope__.cur_scope[-1]
def enter_local_scope():
"""
Enter a new local scope
"""
cur_scope = get_cur_scope()
new_scope = paddle.v2.framework.core.Scope(cur_scope)
__tl_scope__.cur_scope.append(new_scope)
def leave_local_scope():
"""
Leave local scope
"""
__tl_scope__.cur_scope.pop()
def create_var(name):
"""
create variable in current scope.
"""
return get_cur_scope().create_var(name)
def get_var(name):
"""
get variable in current scope.
"""
return get_cur_scope().get_var(name)
def scoped_function(func):
"""
invoke `func` in new scope.
:param func: a callable function that will be run in new scope.
:type func: callable
"""
enter_local_scope()
try:
func()
except:
raise
finally:
leave_local_scope()
add_python_test(test_framework test_protobuf.py) add_python_test(test_framework test_protobuf.py test_scope.py
test_default_scope_funcs.py test_op_creation_methods.py
test_tensor.py)
from paddle.v2.framework.default_scope_funcs import *
import unittest
class TestDefaultScopeFuncs(unittest.TestCase):
def test_cur_scope(self):
self.assertIsNotNone(get_cur_scope())
def test_none_variable(self):
self.assertIsNone(get_var("test"))
def test_create_var_get_var(self):
var_a = create_var("var_a")
self.assertIsNotNone(var_a)
self.assertIsNotNone(get_cur_scope().get_var('var_a'))
enter_local_scope()
self.assertIsNotNone(get_cur_scope().get_var('var_a'))
leave_local_scope()
def test_var_get_int(self):
def __new_scope__():
i = create_var("var_i")
self.assertFalse(i.is_int())
i.set_int(10)
self.assertTrue(i.is_int())
self.assertEqual(10, i.get_int())
for _ in xrange(10):
scoped_function(__new_scope__)
if __name__ == '__main__':
unittest.main()
import unittest
import paddle.v2.framework.create_op_creation_methods as creation
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2
import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2
import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2
class TestGetAllProtos(unittest.TestCase):
def test_all(self):
all_protos = creation.get_all_op_protos()
self.assertNotEqual(0, len(all_protos))
for each in all_protos:
self.assertTrue(each.IsInitialized())
class TestOpDescCreationMethod(unittest.TestCase):
def test_plain_input_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = "not matter"
ipt = op.inputs.add()
ipt.name = "Y"
ipt.comment = "not matter"
opt = op.outputs.add()
opt.name = "Z"
opt.comment = "not matter"
op.comment = "not matter"
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
output = method(X="a", Y="b", Z="c")
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(["a", "b"])
expected.outputs.append("c")
self.assertEqual(expected, output)
def test_multiple_input_plain_output(self):
op = op_proto_pb2.OpProto()
op.type = "fc"
ipt = op.inputs.add()
ipt.name = "X"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "W"
ipt.comment = ""
ipt.multiple = True
ipt = op.inputs.add()
ipt.name = "b"
ipt.comment = ""
out = op.outputs.add()
out.name = "Y"
out.comment = ""
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated1 = method(X="x", W="w", b="b", Y="y")
expected1 = op_desc_pb2.OpDesc()
expected1.inputs.extend(['x', 'w', 'b'])
expected1.outputs.extend(['y'])
expected1.type = 'fc'
attr = expected1.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3])
self.assertEqual(expected1, generated1)
generated2 = method(
X=['x1', 'x2', 'x3'], b='b', W=['w1', 'w2', 'w3'], Y='y')
expected2 = op_desc_pb2.OpDesc()
expected2.inputs.extend(['x1', 'x2', 'x3', 'w1', 'w2', 'w3', 'b'])
expected2.outputs.extend(['y'])
expected2.type = 'fc'
attr = expected2.attrs.add()
attr.name = 'input_format'
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 3, 6, 7])
self.assertEqual(expected2, generated2)
def test_attrs(self):
op = op_proto_pb2.OpProto()
op.type = "test"
ipt = op.inputs.add()
ipt.name = 'X'
ipt.comment = ""
def __add_attr__(name, type):
attr = op.attrs.add()
attr.name = name
attr.comment = ""
attr.type = type
__add_attr__("int_attr", attr_type_pb2.INT)
__add_attr__("float_attr", attr_type_pb2.FLOAT)
__add_attr__("string_attr", attr_type_pb2.STRING)
__add_attr__("ints_attr", attr_type_pb2.INTS)
__add_attr__("floats_attr", attr_type_pb2.FLOATS)
__add_attr__("strings_attr", attr_type_pb2.STRINGS)
op.comment = ""
self.assertTrue(op.IsInitialized())
method = creation.OpDescCreationMethod(op)
generated = method(
X="a",
int_attr=10,
float_attr=3.2,
string_attr="test_str",
ints_attr=[0, 1, 2, 3, 4],
floats_attr=[0.2, 3.2, 4.5],
strings_attr=["a", "b", "c"])
expected = op_desc_pb2.OpDesc()
expected.type = "test"
expected.inputs.extend(['a'])
attr = expected.attrs.add()
attr.name = "int_attr"
attr.type = attr_type_pb2.INT
attr.i = 10
attr = expected.attrs.add()
attr.name = "float_attr"
attr.type = attr_type_pb2.FLOAT
attr.f = 3.2
attr = expected.attrs.add()
attr.name = "string_attr"
attr.type = attr_type_pb2.STRING
attr.s = "test_str"
attr = expected.attrs.add()
attr.name = "ints_attr"
attr.type = attr_type_pb2.INTS
attr.ints.extend([0, 1, 2, 3, 4])
attr = expected.attrs.add()
attr.name = 'floats_attr'
attr.type = attr_type_pb2.FLOATS
attr.floats.extend([0.2, 3.2, 4.5])
attr = expected.attrs.add()
attr.name = 'strings_attr'
attr.type = attr_type_pb2.STRINGS
attr.strings.extend(['a', 'b', 'c'])
self.assertEqual(expected, generated)
def test_input_temporary_output(self):
op = op_proto_pb2.OpProto()
op.type = "test"
out = op.outputs.add()
out.name = "OUT"
out.comment = ""
out = op.outputs.add()
out.name = "TMP"
out.comment = ""
out.temporary = True
out = op.outputs.add()
out.name = "OUT2"
out.comment = ""
op.comment = ""
method = creation.OpDescCreationMethod(op)
generated = method(OUT="a", OUT2="b")
desc = op_desc_pb2.OpDesc()
desc.outputs.extend(["a", core.var_names.temp(), "b"])
desc.type = "test"
attr = desc.attrs.add()
attr.name = "temporary_index"
attr.type = attr_type_pb2.INTS
attr.ints.append(2)
self.assertEqual(generated, desc)
class TestOpCreationDocStr(unittest.TestCase):
def test_all(self):
op = op_proto_pb2.OpProto()
op.type = "test"
op.comment = """Test Op.
This op is used for unit test, not a real op.
"""
a = op.inputs.add()
a.name = "a"
a.comment = "Input a for test op"
a.multiple = True
b = op.inputs.add()
b.name = "b"
b.comment = "Input b for test op"
self.assertTrue(op.IsInitialized())
o1 = op.outputs.add()
o1.name = "output"
o1.comment = "The output of test op"
o2 = op.outputs.add()
o2.name = "temp output"
o2.comment = "The temporary output of test op"
o2.temporary = True
test_str = op.attrs.add()
test_str.name = "str_attr"
test_str.type = attr_type_pb2.STRING
test_str.comment = "A string attribute for test op"
actual = creation.get_docstring_from_op_proto(op)
expected_docstring = '''Test Op.
This op is used for unit test, not a real op.
:param a: Input a for test op
:type a: list | basestr
:param b: Input b for test op
:type b: basestr
:param output: The output of test op
:type output: basestr
:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op
:type temp output: basestr
:param str_attr: A string attribute for test op
:type str_attr: basestr
'''
self.assertEqual(expected_docstring, actual)
class TestOpCreations(unittest.TestCase):
def test_all(self):
add_op = creation.op_creations.add_two(X="a", Y="b", Out="z")
self.assertIsNotNone(add_op)
# Invoke C++ DebugString()
self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).',
str(add_op))
if __name__ == "__main__":
unittest.main()
...@@ -24,3 +24,7 @@ class TestFrameworkProto(unittest.TestCase): ...@@ -24,3 +24,7 @@ class TestFrameworkProto(unittest.TestCase):
attr.type = attr_type_lib.FLOAT attr.type = attr_type_lib.FLOAT
op_proto.type = "cos" op_proto.type = "cos"
self.assertTrue(op_proto.IsInitialized()) self.assertTrue(op_proto.IsInitialized())
if __name__ == "__main__":
unittest.main()
import paddle.v2.framework.core
import unittest
class TestScope(unittest.TestCase):
def test_create_destroy(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope(None)
self.assertIsNotNone(scope)
scope_with_parent = paddle_c.Scope(scope)
self.assertIsNotNone(scope_with_parent)
def test_none_variable(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope(None)
self.assertIsNone(scope.get_var("test"))
def test_create_var_get_var(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope(None)
var_a = scope.create_var("var_a")
self.assertIsNotNone(var_a)
self.assertIsNotNone(scope.get_var('var_a'))
scope2 = paddle_c.Scope(scope)
self.assertIsNotNone(scope2.get_var('var_a'))
def test_var_get_int(self):
paddle_c = paddle.v2.framework.core
scope = paddle_c.Scope(None)
var = scope.create_var("test_int")
var.set_int(10)
self.assertTrue(var.is_int())
self.assertEqual(10, var.get_int())
if __name__ == '__main__':
unittest.main()
import paddle.v2.framework.core as core
import unittest
import numpy
class TestScope(unittest.TestCase):
def test_int_tensor(self):
scope = core.Scope(None)
var = scope.create_var("test_tensor")
tensor = var.get_tensor()
tensor.set_dims([1000, 784])
tensor.alloc_int()
tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1
tensor_array[19, 11] = 2
tensor.set(tensor_array)
tensor_array_2 = numpy.array(tensor)
self.assertEqual(1.0, tensor_array_2[3, 9])
self.assertEqual(2.0, tensor_array_2[19, 11])
def test_float_tensor(self):
scope = core.Scope(None)
var = scope.create_var("test_tensor")
tensor = var.get_tensor()
tensor.set_dims([1000, 784])
tensor.alloc_float()
tensor_array = numpy.array(tensor)
self.assertEqual((1000, 784), tensor_array.shape)
tensor_array[3, 9] = 1.0
tensor_array[19, 11] = 2.0
tensor.set(tensor_array)
tensor_array_2 = numpy.array(tensor)
self.assertAlmostEqual(1.0, tensor_array_2[3, 9])
self.assertAlmostEqual(2.0, tensor_array_2[19, 11])
if __name__ == '__main__':
unittest.main()
import numpy import numpy
import py_paddle.swig_paddle as api
import collections import collections
import topology import topology
import minibatch import minibatch
from data_feeder import DataFeeder
__all__ = ['infer', 'Inference'] __all__ = ['infer', 'Inference']
...@@ -28,6 +26,7 @@ class Inference(object): ...@@ -28,6 +26,7 @@ class Inference(object):
""" """
def __init__(self, output_layer, parameters): def __init__(self, output_layer, parameters):
import py_paddle.swig_paddle as api
topo = topology.Topology(output_layer) topo = topology.Topology(output_layer)
gm = api.GradientMachine.createFromConfigProto( gm = api.GradientMachine.createFromConfigProto(
topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE]) topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
...@@ -40,6 +39,7 @@ class Inference(object): ...@@ -40,6 +39,7 @@ class Inference(object):
self.__data_types__ = topo.data_type() self.__data_types__ = topo.data_type()
def iter_infer(self, input, feeding=None): def iter_infer(self, input, feeding=None):
from data_feeder import DataFeeder
feeder = DataFeeder(self.__data_types__, feeding) feeder = DataFeeder(self.__data_types__, feeding)
batch_size = len(input) batch_size = len(input)
......
...@@ -10,8 +10,9 @@ class client(object): ...@@ -10,8 +10,9 @@ class client(object):
client is a client to the master server. client is a client to the master server.
""" """
def __init__(self, addr, buf_size): def __init__(self, etcd_endpoints, timeout, buf_size):
self.c = lib.paddle_new_master_client(addr, buf_size) self.c = lib.paddle_new_etcd_master_client(etcd_endpoints, timeout,
buf_size)
def close(self): def close(self):
lib.paddle_release_master_client(self.c) lib.paddle_release_master_client(self.c)
......
import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.trainer_config_helpers.optimizers as v1_optimizers import paddle.trainer_config_helpers.optimizers as v1_optimizers
""" """
...@@ -18,6 +16,7 @@ __all__ = [ ...@@ -18,6 +16,7 @@ __all__ = [
class Optimizer(object): class Optimizer(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
import py_paddle.swig_paddle as swig_api
if 'batch_size' in kwargs: if 'batch_size' in kwargs:
del kwargs['batch_size'] # not important for python library. del kwargs['batch_size'] # not important for python library.
...@@ -36,23 +35,27 @@ class Optimizer(object): ...@@ -36,23 +35,27 @@ class Optimizer(object):
For each optimizer(SGD, Adam), GradientMachine should enable different For each optimizer(SGD, Adam), GradientMachine should enable different
buffers. buffers.
""" """
import py_paddle.swig_paddle as swig_api
tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__) tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__)
assert isinstance(tmp, swig_api.ParameterOptimizer) assert isinstance(tmp, swig_api.ParameterOptimizer)
return tmp.getParameterTypes() return tmp.getParameterTypes()
def __create_local_updater__(self): def __create_local_updater__(self):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__) return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__)
def __create_remote_updater__(self, pass_num, use_sparse_updater): def __create_remote_updater__(self, pass_num, use_sparse_updater):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createRemoteUpdater( return swig_api.ParameterUpdater.createRemoteUpdater(
self.__opt_conf__, pass_num, use_sparse_updater) self.__opt_conf__, pass_num, use_sparse_updater)
def __create_new_remote_updater__(self, pserver_spec): def __create_new_remote_updater__(self, pserver_spec, use_etcd):
import py_paddle.swig_paddle as swig_api
return swig_api.ParameterUpdater.createNewRemoteUpdater( return swig_api.ParameterUpdater.createNewRemoteUpdater(
self.__opt_conf__, pserver_spec) self.__opt_conf__, pserver_spec, use_etcd)
def create_updater(self, is_local, num_passes, use_sparse_updater, def create_updater(self, is_local, num_passes, use_sparse_updater,
pserver_spec): pserver_spec, use_etcd):
""" """
create proper parameter_updater by configuration. create proper parameter_updater by configuration.
:param is_local: create local or remote parameter updater :param is_local: create local or remote parameter updater
...@@ -66,6 +69,8 @@ class Optimizer(object): ...@@ -66,6 +69,8 @@ class Optimizer(object):
if use_sparse_remote_updater: if use_sparse_remote_updater:
gradient_machine.prefetch(in_args) gradient_machine.prefetch(in_args)
parameter_updater.getParametersRemote() parameter_updater.getParametersRemote()
:param pserver_spec: pserver location, eg: localhost:3000
:return: parameter_updater :return: parameter_updater
""" """
if is_local: if is_local:
...@@ -76,7 +81,7 @@ class Optimizer(object): ...@@ -76,7 +81,7 @@ class Optimizer(object):
num_passes, use_sparse_updater) num_passes, use_sparse_updater)
else: else:
parameter_updater = self.__create_new_remote_updater__( parameter_updater = self.__create_new_remote_updater__(
pserver_spec) pserver_spec, use_etcd)
return parameter_updater return parameter_updater
...@@ -266,6 +271,7 @@ ModelAverage = v1_optimizers.ModelAverage ...@@ -266,6 +271,7 @@ ModelAverage = v1_optimizers.ModelAverage
L2Regularization = v1_optimizers.L2Regularization L2Regularization = v1_optimizers.L2Regularization
if __name__ == '__main__': if __name__ == '__main__':
import py_paddle.swig_paddle as swig_api
swig_api.initPaddle('--use_gpu=false') swig_api.initPaddle('--use_gpu=false')
for opt in [ for opt in [
Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(), Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(),
......
import numpy as np import numpy as np
import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
import struct import struct
...@@ -124,6 +123,7 @@ class Parameters(object): ...@@ -124,6 +123,7 @@ class Parameters(object):
:return: parameter value :return: parameter value
:rtype: np.ndarray :rtype: np.ndarray
""" """
import py_paddle.swig_paddle as api
shape = self.get_shape(key) shape = self.get_shape(key)
if len(self.__gradient_machines__) == 0: if len(self.__gradient_machines__) == 0:
...@@ -223,7 +223,7 @@ class Parameters(object): ...@@ -223,7 +223,7 @@ class Parameters(object):
:type gradient_machine: api.GradientMachine :type gradient_machine: api.GradientMachine
:return: :return:
""" """
import py_paddle.swig_paddle as api
if not isinstance(gradient_machine, api.GradientMachine): if not isinstance(gradient_machine, api.GradientMachine):
raise ValueError("gradient_machine should be api.GradientMachine") raise ValueError("gradient_machine should be api.GradientMachine")
...@@ -359,6 +359,7 @@ def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr): ...@@ -359,6 +359,7 @@ def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr):
:return: :return:
:rtype: api.Parameter :rtype: api.Parameter
""" """
import py_paddle.swig_paddle as api
param = __get_parameter_in_gradient_machine__(gradient_machine, name) param = __get_parameter_in_gradient_machine__(gradient_machine, name)
vec = param.getBuf(api.PARAMETER_VALUE) vec = param.getBuf(api.PARAMETER_VALUE)
assert isinstance(vec, api.Vector) assert isinstance(vec, api.Vector)
......
...@@ -2,12 +2,6 @@ ...@@ -2,12 +2,6 @@
Module Trainer Module Trainer
""" """
import collections import collections
import gzip
import os
import py_paddle.swig_paddle as api
from data_feeder import DataFeeder
from topology import Topology from topology import Topology
from . import event as v2_event from . import event as v2_event
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
...@@ -41,6 +35,7 @@ class SGD(object): ...@@ -41,6 +35,7 @@ class SGD(object):
:type parameters: paddle.v2.parameters.Parameters :type parameters: paddle.v2.parameters.Parameters
:param extra_layers: Some layers in the neural network graph are not :param extra_layers: Some layers in the neural network graph are not
in the path of cost layer. in the path of cost layer.
:param pserver_spec: pserver location, eg: localhost:3000
:type extra_layers: paddle.v2.config_base.Layer :type extra_layers: paddle.v2.config_base.Layer
""" """
...@@ -50,7 +45,8 @@ class SGD(object): ...@@ -50,7 +45,8 @@ class SGD(object):
update_equation, update_equation,
extra_layers=None, extra_layers=None,
is_local=True, is_local=True,
pserver_spec=None): pserver_spec=None,
use_etcd=True):
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -58,6 +54,7 @@ class SGD(object): ...@@ -58,6 +54,7 @@ class SGD(object):
if not isinstance(update_equation, v2_optimizer.Optimizer): if not isinstance(update_equation, v2_optimizer.Optimizer):
raise TypeError("update equation parameter must be " raise TypeError("update equation parameter must be "
"paddle.v2.optimizer.Optimizer") "paddle.v2.optimizer.Optimizer")
import py_paddle.swig_paddle as api
topology = Topology(cost, extra_layers=extra_layers) topology = Topology(cost, extra_layers=extra_layers)
self.__optimizer__ = update_equation self.__optimizer__ = update_equation
self.__topology__ = topology self.__topology__ = topology
...@@ -65,6 +62,7 @@ class SGD(object): ...@@ -65,6 +62,7 @@ class SGD(object):
self.__topology_in_proto__ = topology.proto() self.__topology_in_proto__ = topology.proto()
self.__is_local__ = is_local self.__is_local__ = is_local
self.__pserver_spec__ = pserver_spec self.__pserver_spec__ = pserver_spec
self.__use_etcd__ = use_etcd
self.__use_sparse_updater__ = self.__topology__.use_sparse_updater() self.__use_sparse_updater__ = self.__topology__.use_sparse_updater()
# # In local mode, disable sparse_remote_update. # # In local mode, disable sparse_remote_update.
...@@ -123,13 +121,15 @@ class SGD(object): ...@@ -123,13 +121,15 @@ class SGD(object):
:type feeding: dict|list :type feeding: dict|list
:return: :return:
""" """
import py_paddle.swig_paddle as api
from data_feeder import DataFeeder
if event_handler is None: if event_handler is None:
event_handler = default_event_handler event_handler = default_event_handler
__check_train_args__(**locals()) __check_train_args__(**locals())
self.__parameter_updater__ = self.__optimizer__.create_updater( self.__parameter_updater__ = self.__optimizer__.create_updater(
self.__is_local__, num_passes, self.__use_sparse_updater__, self.__is_local__, num_passes, self.__use_sparse_updater__,
self.__pserver_spec__) self.__pserver_spec__, self.__use_etcd__)
self.__parameter_updater__.init(self.__gradient_machine__) self.__parameter_updater__.init(self.__gradient_machine__)
self.__gradient_machine__.start() self.__gradient_machine__.start()
...@@ -186,6 +186,8 @@ class SGD(object): ...@@ -186,6 +186,8 @@ class SGD(object):
:type feeding: dict :type feeding: dict
:return: :return:
""" """
import py_paddle.swig_paddle as api
from data_feeder import DataFeeder
feeder = DataFeeder(self.__data_types__, feeding) feeder = DataFeeder(self.__data_types__, feeding)
evaluator = self.__gradient_machine__.makeEvaluator() evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
......
...@@ -19,7 +19,9 @@ setup_requires=["requests", ...@@ -19,7 +19,9 @@ setup_requires=["requests",
"recordio", "recordio",
"matplotlib", "matplotlib",
"rarfile", "rarfile",
"scipy>=0.19.0"] "scipy>=0.19.0",
"Pillow",
"nltk"]
if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
setup_requires+=["opencv-python"] setup_requires+=["opencv-python"]
...@@ -29,7 +31,9 @@ setup(name='paddle', ...@@ -29,7 +31,9 @@ setup(name='paddle',
description='Parallel Distributed Deep Learning', description='Parallel Distributed Deep Learning',
install_requires=setup_requires, install_requires=setup_requires,
packages=packages, packages=packages,
package_data={'paddle.v2.master': ['libpaddle_master.so'], }, package_data={'paddle.v2.master': ['libpaddle_master.so'],
'paddle.v2.framework': ['core.so']
},
package_dir={ package_dir={
'': '${CMAKE_CURRENT_SOURCE_DIR}', '': '${CMAKE_CURRENT_SOURCE_DIR}',
# The paddle.v2.framework.proto will be generated while compiling. # The paddle.v2.framework.proto will be generated while compiling.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册