diff --git a/AUTHORS.md b/AUTHORS.md index d5baee2161aa1d5360056e03ca67d5b2fe9ff7d2..4db4a4a8e7441b07ce2db4adff043bb99a09014b 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -1,28 +1,48 @@ | Github account | name | |---|---| -| reyoung | Yang Yu | +| backyes | Yan-Fei Wang | +| beckett1124 | Bin Qi | +| Canpio | Jia-Yi Feng | +| chengxiaohua1105 | Xiao-Hua Cheng | +| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | +| cxysteven | Xing-Yi Cheng | +| dzhwinter | Zhi-Hong Dong | +| emailweixu | Wei Xu | | gangliao | Gang Liao | -| luotao01 | Tao Luo | -| jacquesqiao | Long-Fei Qiao | -| qingqing01 | Qing-Qing Dang | +| gongweibao | Wei-Bao Gong | +| Guo Sheng | Sheng Guo | +| Haichao-Zhang | Hai-Chao Zhang | | hedaoyuan | Dao-Yuan He | -| wangyang59 | Yang Wang | +| helinwang | He-Lin Wang | +| jacquesqiao | Long-Fei Qiao | +| kuke | Yi-Bing Liu | +| lcy-seso | Ying Cao | +| lipeng-unisound | Peng Li | +| liuyuan | Yuan Liu | +| livc | Zhao Li | +| llxxxll | Yong-Feng Liu | +| luotao01 | Tao Luo | +| lzhao4ever | Liang Zhao | +| NHZlX | Zhao-Long Xing | +| pakchoi | Chuan-Jiang Song | +| pengli09 | Peng Li | +| pkuyym | Ya-Ming Yang | | QiJune | Jun Qi | +| qingqing01 | Qing-Qing Dang | +| reyoung | Yang Yu | +| Superjom | Chun-Wei Yan | | tianbingsz | Tian-Bing Xu | -| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | | typhoonzero | Yi Wu | -| backyes | Yan-Fei Wang | -| pengli09 | Peng Li | -| livc | Zhao Li | +| wanghaoshuang | Hao-Shuang Wang | +| wangyang59 | Yang Wang | +| wangzhen-nlp | Zhen Wang | +| wen-bo-yang | Wen-Bo Yang | +| wwhu | Wei-Wei Hu | +| xinghai-sun | Xing-Hai Sun | | Xreki | Yi-Qun Liu | +| xujun05 | Jun Xu | +| xushaoyong | Shao-Yong Xu | | Yancey1989 | Xu Yan | -| emailweixu | Wei Xu | -| wen-bo-yang | Wen-Bo Yang | -| helinwang | He-Lin Wang | -| lcy-seso | Ying Cao | -| Zrachel | Rui-Qing Zhang | -| Haichao-Zhang | Hai-Chao Zhang | -| gongweibao | Wei-Bao Gong | -| lzhao4ever | Liang Zhao | +| zhaopu7 | Pu Zhao | | zhouxiao-coder | Xiao Zhou | -| lipeng-unisound | Peng Li | +| Zrachel | Rui-Qing Zhang | diff --git a/CMakeLists.txt b/CMakeLists.txt index ed3c390066dfac2322d802c6039bc7155a36e38a..5bedbbefa85a730ff2934a12597988a67e73c1a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) +option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) # CMAKE_BUILD_TYPE @@ -96,6 +97,8 @@ include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any include(external/eigen) # download eigen3 +include(cudnn) # set cudnn libraries, must before configure +include(configure) # add paddle env configuration include(generic) # simplify cmake module include(package) # set paddle packages include(cpplint) # set paddle c++ style @@ -103,15 +106,14 @@ include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(rdma) # set rdma libraries include(flags) # set paddle compile flags -include(cudnn) # set cudnn libraries include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include(configure) # add paddle env configuration + include_directories("${PROJ_ROOT}") include_directories("${PROJ_ROOT}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") -include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/cclient") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) set(EXTERNAL_LIBS @@ -139,8 +141,7 @@ add_subdirectory(proto) # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be # placed after this block, because they depends on it. if(WITH_GOLANG) - add_subdirectory(go/master/c) - add_subdirectory(go/pserver/cclient) + add_subdirectory(go) endif(WITH_GOLANG) add_subdirectory(paddle) diff --git a/Dockerfile b/Dockerfile index bf227737c5a67b006ccc221235daf6d8ad7b3bd8..ed5910d93b41dba8d50b2ba01c59c635797edd29 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,14 +34,18 @@ RUN apt-get update && \ net-tools && \ apt-get clean -y -# Install Go +# Install Go and glide RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ tar -C /usr/local -xzf go.tgz && \ mkdir /root/gopath && \ + mkdir /root/gopath/bin && \ + mkdir /root/gopath/src && \ rm go.tgz ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. -ENV PATH=${PATH}:${GOROOT}/bin +ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin +# install glide +RUN curl -q https://glide.sh/get | sh # git credential to skip password typing RUN git config --global credential.helper store @@ -57,7 +61,7 @@ RUN pip install --upgrade pip && \ pip install -U docopt PyYAML sphinx && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ - pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ pip install rarfile # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use diff --git a/cmake/configure.cmake b/cmake/configure.cmake index f6dca6d575956fcb893f8e11970f84a739832d58..a4f98ec7d4af652d0dd0650f4906696ff3a4efb9 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -79,6 +79,9 @@ if(WITH_GOLANG) set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go") file(MAKE_DIRECTORY ${GOPATH}) set(PADDLE_IN_GOPATH "${GOPATH}/src/github.com/PaddlePaddle/Paddle") + file(MAKE_DIRECTORY "${PADDLE_IN_GOPATH}") + set(PADDLE_GO_PATH "${CMAKE_SOURCE_DIR}/go") + add_custom_target(go_path) add_custom_command(TARGET go_path # Symlink Paddle directory into GOPATH @@ -89,7 +92,22 @@ if(WITH_GOLANG) # We can't run `go get -d ./...` for every target, because # multiple `go get` can not run concurrently, but make need to be # able to run with multiple jobs. - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./go/... WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) + + if (GLIDE_INSTALL) + if(EXISTS $ENV{GOPATH}/bin/glide) + set(GLIDE "$ENV{GOPATH}/bin/glide") + else() + message(FATAL_ERROR "no glide executeble found: $ENV{GOPATH}/bin/glide") + endif() + + add_custom_target(go_vendor) + add_custom_command(TARGET go_vendor + COMMAND env GOPATH=${GOPATH} ${GLIDE} install + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" + ) + add_dependencies(go_vendor go_path) + endif() + endif(WITH_GOLANG) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 48f705818b70c92adef107fd3c973ae1ab3d34bb..6bbcd730e1b5ac49415cac676352e6df00eb6eb5 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -25,6 +25,7 @@ set(STYLE_FILTER "${STYLE_FILTER}-readability/casting") set(IGNORE_PATTERN .*ImportanceSampler.* .*cblas\\.h.* + .*\\.pb\\.txt .*LtrDataProvider.* .*MultiDataProvider.*) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index 62eea42692b4191e53d0bbb0805786fd15ac7944..45e3764e8482a4cfc8ee72fe4d79f04a3c9b74fa 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -2,10 +2,10 @@ INCLUDE(ExternalProject) SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any) -INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any) +INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/extern_lib_any) ExternalProject_Add( - linb_any + extern_lib_any ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/thelink2012/any.git" GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" @@ -17,5 +17,15 @@ ExternalProject_Add( TEST_COMMAND "" ) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";") + add_library(lib_any STATIC ${dummyfile}) +else() + add_library(lib_any INTERFACE) +endif() + +add_dependencies(lib_any extern_lib_any) + add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE) -LIST(APPEND external_project_dependencies linb_any) \ No newline at end of file +LIST(APPEND external_project_dependencies lib_any) diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 45f44f617dcb46062355df4e35d537086215a46d..3e6cedbb0d718cfd4454f95dedf7e02a24f2981b 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -2,10 +2,10 @@ INCLUDE(ExternalProject) SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) -INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/eigen3) +INCLUDE_DIRECTORIES(${EIGEN_SOURCE_DIR}/src/extern_eigen3) ExternalProject_Add( - eigen3 + extern_eigen3 ${EXTERNAL_PROJECT_LOG_ARGS} # for latest version, please get from official website # URL "https://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz" @@ -26,4 +26,14 @@ ExternalProject_Add( TEST_COMMAND "" ) +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) + file(WRITE ${dummyfile} "const char * dummy_eigen3 = \"${dummyfile}\";") + add_library(eigen3 STATIC ${dummyfile}) +else() + add_library(eigen3 INTERFACE) +endif() + +add_dependencies(eigen3 extern_eigen3) + LIST(APPEND external_project_dependencies eigen3) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 88be13b2ac95172d5d9099d62a40449c6a01e98a..cae9524b2fe1417f7792cbfd959280206f473797 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -17,7 +17,7 @@ # generic.cmake defines CMakes functions that look like Bazel's # building rules (https://bazel.build/). # -# +# # ------------------------------------------- # C++ CUDA C++ Go # ------------------------------------------- @@ -25,51 +25,51 @@ # cc_binary nv_binary go_binary # cc_test nv_test go_test # ------------------------------------------- -# +# # To build a static library example.a from example.cc using the system # compiler (like GCC): -# +# # cc_library(example SRCS example.cc) -# +# # To build a static library example.a from multiple source files # example{1,2,3}.cc: -# +# # cc_library(example SRCS example1.cc example2.cc example3.cc) -# +# # To build a shared library example.so from example.cc: -# +# # cc_library(example SHARED SRCS example.cc) -# +# # To build a library using Nvidia's NVCC from .cu file(s), use the nv_ # prefixed version: -# +# # nv_library(example SRCS example.cu) -# +# # To specify that a library new_example.a depends on other libraies: -# +# # cc_library(new_example SRCS new_example.cc DEPS example) -# +# # Static libraries can be composed of other static libraries: -# +# # cc_library(composed DEPS dependent1 dependent2 dependent3) -# +# # To build an executable binary file from some source files and # dependent libraries: -# +# # cc_binary(example SRCS main.cc something.cc DEPS example1 example2) -# +# # To build an executable binary file using NVCC, use the nv_ prefixed # version: -# +# # nv_binary(example SRCS main.cc something.cu DEPS example1 example2) -# +# # To build a unit test binary, which is an executable binary with # GoogleTest linked: -# +# # cc_test(example_test SRCS example_test.cc DEPS example) -# +# # To build a unit test binary using NVCC, use the nv_ prefixed version: -# +# # nv_test(example_test SRCS example_test.cu DEPS example) # # It is pretty often that executable and test binaries depend on @@ -162,6 +162,7 @@ function(cc_library TARGET_NAME) endif() if (cc_library_DEPS) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) endif() else(cc_library_SRCS) if (cc_library_DEPS) @@ -211,6 +212,7 @@ function(nv_library TARGET_NAME) endif() if (nv_library_DEPS) add_dependencies(${TARGET_NAME} ${nv_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${nv_library_DEPS}) endif() else(nv_library_SRCS) if (nv_library_DEPS) @@ -278,14 +280,16 @@ function(go_library TARGET_NAME) set(${TARGET_NAME}_LIB_PATH "${CMAKE_CURRENT_BINARY_DIR}/${${TARGET_NAME}_LIB_NAME}" CACHE STRING "output library path for target ${TARGET_NAME}") file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go") + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD COMMAND rm "${${TARGET_NAME}_LIB_PATH}" # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${${TARGET_NAME}_LIB_PATH}" - ${GO_SOURCE} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - add_dependencies(${TARGET_NAME} go_path) + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}" + # must run under GOPATH + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + add_dependencies(${TARGET_NAME} go_vendor) endfunction(go_library) function(go_binary TARGET_NAME) @@ -293,12 +297,15 @@ function(go_binary TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + add_custom_command(OUTPUT ${TARGET_NAME}_timestamp COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}" - ${go_library_SRCS} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS}) + "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}" + WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go") + # TODO: don't know what ${TARGET_NAME}_link does + add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS}) install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin) endfunction(go_binary) @@ -318,10 +325,10 @@ endfunction(go_test) function(proto_library TARGET_NAME) set(oneValueArgs "") - set(multiValueArgs SRCS) + set(multiValueArgs SRCS DEPS) cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) set(proto_srcs) set(proto_hdrs) protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) - cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS protobuf) + cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) endfunction() diff --git a/doc/design/simple_op_design.md b/doc/design/simple_op_design.md new file mode 100644 index 0000000000000000000000000000000000000000..49ca5db5da9e400fd2c54eb8903b0dd2eb832d44 --- /dev/null +++ b/doc/design/simple_op_design.md @@ -0,0 +1,201 @@ +## Interaction between C++ and Python + +Users employ API in Python to describe their own network, however, the network construction actually happens in C++. so Protobuf is introduced to send the message between Python and C++. + +The Interaction between Python and C++ can be simplified as two steps: + +1. C++ tells Python how many Ops there are, and what parameter do users need to offer to initialize a new Op. Python then builds API for each Op at compile time. + +2. Users invoke APIs built by Python and provide necessary parameters. These parameters will be sent to C++ fo finish Op construction task. + +### Message form C++ to Python + +We define a Protobuf message class `OpProto` to hold message needed in the first step. What should an `OpProto` contain? This question is equivalent to “What message do we need to offer, to build a Python API which is legal and user oriented and can use to describe a whole Op.” + +Following message are necessary: + +1. Op's name, and its simple comment. +2. Input and output variable number; each variable's name, type, and comment. +3. Op's attributes; each attribute includes name, type, comment, **default value** and **value range**. + +So `OpProto` can be defined as follows: + +```proto +enum AttrType { + INT = 1; + FLOAT = 2; + STRING = 3; + INTS = 4; + FLOATS = 5; + STRINGS = 6; +}; + +message AttrValue { + AttrType type = 1; + optional int iv = 2; + optional float fv = 3; + optional string sv = 4; + repeated int ivs = 5; + repeated float fvs = 6; + repeated string svs = 7; +}; + +message AttrProto { + required string name = 1; + required string comment = 2; + required AttrType type = 3; +}; + +message VarProto { + required string name = 1; + required string comment = 2; +}; + +message OpProto { + repeated VarProto inputs = 1; + repeated VarProto outputs = 2; + repeated AttrProto attrs = 3; + required string type = 4; + required string comment = 5; +}; +``` + +To generate Python code automatically: + +```python +def create_python_ops_creatation_functions(): + op_protos = paddle.framework.OpRegistry.get_all_op_proto() + for type_name in op_protos: + op_proto = op_protos[type_name] + def __impl__(**kwargs): # User must use key word args in Paddle API + inputs = [kwargs.get(ipt.name, "") for ipt in op_proto.inputs] + outputs = [kwargs.get(opt.name, "") for opt in op_proto.outputs] + attrs = [cast_to_op_attr(attr, kwargs.get(attr.name, None)) for attr in op_proto.attrs] + opdesc = (input, outputs, type_name, attrs) + return paddle.framework.OpRegistry.CreateOp(opdesc) + __impl__.__doc__ = create_doc_string(op_proto) + globals()[type_name] = __impl__ + +create_python_ops_creatation_functions() +``` + +### Message from Python to C++ + +To hold message needed in the above second step, we define Protobuf message class `OpDesc`. It is used to hold user-specified parameters in Op describing. + +```proto +message OpDesc { + required string type = 1; + repeated string inputs = 2; + repeated string outputs = 3; + map attrs = 4; +}; +``` + +## OpProto Register + +Every Op has its own `OpProto`. For using convenience, we need to register them and record all their messages. For each `Op` class, we define a corresponding `OpMaker` class, in whose constructor we implement the `OpProto`'s building process. `OpMaker`'s constructor will be invoked by another function `OpRegistry::RegisterOp()`. + +```cpp +class OpProtoMaker { +public: + OpProtoMaker(OpProto* proto): proto_(proto) {} +protected: + OpProto* proto_; + void AddInput(const std::string& name, const std::string& desc) {...} + void AddAttr(const std::string& name, const std::string& desc, TypeId type) {...} + void AddComment(const std::string& comment) { ... } +}; + +class OpRegistry { +public: + using OpCreator = std::function; + + template + static void RegisterOp(const std::string& name) { + gCreators_[name] = [](const OpDesc& desc) { + return new OpType(desc); + }; + OpProto& opProto = gProtos_[name]; + OpMaker()(&opProto); + } + + static map gCreators_; + static map gProtos_; +}; + +template +class OpRegister { + public: + OpRegister(std::string type) { + OpRegistry::RegisterOp(type); + } +}; + +#define REGISTER_OP(op_class, op_maker_class, type_name) \ + class op_class##Register { \ + private: \ + const static OpRegister<#op_class, #op_maker_class> reg; \ + }; \ + const Register op_class##Register::reg(#type_name); + +class CosineOp { +// ... +} + +struct CosineOpProtoMaker : public OpProtoMaker { + CosineOpProtoMaker(OpProto* proto) : OpProtoMaker(proto) { + AddInput("input", "input of cosine op"); + AddAttr("scale", "scale of cosine op", float).Default(1.0).LargerThan(0.0); + AddType("cos"); + AddComment("This is cos op"); + } +} + +REGISTER_OP(CosineOp, CosineOpProtoMaker, cos); +``` + +In `REGISTER_OP(CosineOp, CosineOpProtoMaker, cos)`, we register not only `CosineOp` but also `CosineOpProto`. As fields of `CosineOpProto`, the default value and value range of `scale` are also registered here. + +## Python API + +Python APIs are divided into two types, high-level API and low-level API. + +### High-Level API + +High-level API is called by users directly, so it should keep its style consistent with existing V2 APIs. + +Here is a sample about how a define a fc layer: + +```python +hd = fc_layer(input=data, size=56, with_bias=True, activation="sigmoid"); +``` + +`hd` is the output of `fc_layer` and it's a `variable`. It can be further sent into other layers as input. + +The definition of `fc_layer()`: + +```python +def fc_layer(input, size, with_bias, activation): + attr_map = {"size":size} + check_attrs(attr_map) + w = make_variable('w') + if with_bias: + b = make_variable('b') + else: + b = None + fc_output = make_variable('fc_output'); + fc_op(input, w, b, fc_output, attr_map) + act_output = make_variable('sigmod_output'); + if activation == "sigmod": + sigmod_op(fc_output, act_output); + elif: + # ... + return act_output; +``` + +### Low Leval API + +In above sample, `fc_op` and `sigmod_op` are low-level API. They build `OpDesc` and invoke corresponding C++ code. + +*TODO* diff --git a/go/.gitignore b/go/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..000e1fd55b63b8e532308b787c2708a6c3e5ac87 --- /dev/null +++ b/go/.gitignore @@ -0,0 +1,2 @@ +vendor/ +.glide/ diff --git a/go/CMakeLists.txt b/go/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f00c70a0589a4f41a23164a95d505d4310d9157b --- /dev/null +++ b/go/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +add_subdirectory(pserver/client/c) +add_subdirectory(cmd/pserver) +add_subdirectory(cmd/master) +add_subdirectory(master/c) diff --git a/go/cmd/master/CMakeLists.txt b/go/cmd/master/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e149967e71c9439bb00b973aa8723a809604aaf --- /dev/null +++ b/go/cmd/master/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +go_binary(master SRC master.go) diff --git a/go/cmd/pserver/CMakeLists.txt b/go/cmd/pserver/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc1da3348cc21377421ce3db21ab8d4a8ee05894 --- /dev/null +++ b/go/cmd/pserver/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +go_binary(pserver SRCS pserver.go) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index 8a42d4f8af1713e246f9efaf5dc7ba878c3b271e..31ef450f032f756fb32a0444a7e94a18ec2918a0 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -15,6 +15,7 @@ import ( func main() { port := flag.Int("port", 0, "port of the pserver") + index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") @@ -29,11 +30,16 @@ func main() { } log.SetLevel(level) - timeout := time.Second * time.Duration((*etcdTimeout)) - e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) - idx, err := e.Register() - if err != nil { - panic(err) + var idx int + if *index >= 0 { + idx = *index + } else { + timeout := time.Second * time.Duration((*etcdTimeout)) + e := pserver.NewEtcdClient(*etcdEndpoint, *numPservers, timeout) + idx, err = e.Register() + if err != nil { + panic(err) + } } s, err := pserver.NewService(idx) diff --git a/go/glide.lock b/go/glide.lock new file mode 100644 index 0000000000000000000000000000000000000000..190a222338b24b7edac76c72d07df0b2cbd7d9be --- /dev/null +++ b/go/glide.lock @@ -0,0 +1,61 @@ +hash: b8f18ce6784bd3fadd9fed0b8443e7b658234ea785ae1f220723ae2c1f652aa7 +updated: 2017-06-27T14:05:48.925262819+08:00 +imports: +- name: github.com/coreos/etcd + version: 61fc123e7a8b14a0a258aa3f5c4159861b1ec2e7 + subpackages: + - auth/authpb + - clientv3 + - clientv3/concurrency + - etcdserver/api/v3rpc/rpctypes + - etcdserver/etcdserverpb + - mvcc/mvccpb +- name: github.com/golang/protobuf + version: 4bd1920723d7b7c925de087aa32e2187708897f7 + subpackages: + - jsonpb + - proto +- name: github.com/golang/snappy + version: 553a641470496b2327abcac10b36396bd98e45c9 +- name: github.com/namsral/flag + version: 71ceffbeb0ba60fccc853971bb3ed4d7d90bfd04 +- name: github.com/PaddlePaddle/recordio + version: edfb82af0739c84f241c87390ec5649c7b28c129 +- name: github.com/sirupsen/logrus + version: 202f25545ea4cf9b191ff7f846df5d87c9382c2b +- name: golang.org/x/net + version: c8c74377599bd978aee1cf3b9b63a8634051cec2 + subpackages: + - context + - http2 + - http2/hpack + - idna + - internal/timeseries + - lex/httplex + - trace +- name: golang.org/x/sys + version: f7928cfef4d09d1b080aa2b6fd3ca9ba1567c733 + subpackages: + - unix +- name: golang.org/x/text + version: 4e9ab9ee170f2a39bd66c92b3e0a47ff47a4bc77 + subpackages: + - secure/bidirule + - transform + - unicode/bidi + - unicode/norm +- name: google.golang.org/grpc + version: 8050b9cbc271307e5a716a9d782803d09b0d6f2d + subpackages: + - codes + - credentials + - grpclog + - internal + - keepalive + - metadata + - naming + - peer + - stats + - tap + - transport +testImports: [] diff --git a/go/glide.yaml b/go/glide.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05c5d15ca22b6a3d85bee8e1f31d222034ce5314 --- /dev/null +++ b/go/glide.yaml @@ -0,0 +1,12 @@ +package: github.com/PaddlePaddle/Paddle/go +import: +- package: github.com/PaddlePaddle/recordio +- package: github.com/coreos/etcd + version: ^3.2.1 + subpackages: + - clientv3 + - clientv3/concurrency +- package: github.com/namsral/flag + version: ^1.7.4-pre +- package: github.com/sirupsen/logrus + version: ^1.0.0 diff --git a/go/master/c/CMakeLists.txt b/go/master/c/CMakeLists.txt index a4e92635bab8f0dbc5d91c7131fb0f8d1f5a6363..94d6bb0b2e94419488134ad1e2221ae568338044 100644 --- a/go/master/c/CMakeLists.txt +++ b/go/master/c/CMakeLists.txt @@ -1,3 +1 @@ -cmake_minimum_required(VERSION 3.0) - go_library(paddle_master SHARED) diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go index e27c014792f31ca27fe1a1636d69acccc4206ea3..04c1394e963d1eb541b80b91407fb55b0d1e1f2a 100644 --- a/go/master/etcd_client.go +++ b/go/master/etcd_client.go @@ -50,7 +50,7 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat lock := concurrency.NewMutex(sess, lockPath) // It's fine for the lock to get stuck, in this case we have // multiple master servers running (only configured to have - // one master running, but split-brain problem may cuase + // one master running, but split-brain problem may cause // multiple master servers running), and the cluster management // software will kill one of them. log.Debugf("Trying to acquire lock at %s.", lockPath) @@ -98,7 +98,7 @@ func (e *EtcdClient) Save(state []byte) error { // We lost the master lock and can not acquire // it back, it means some other master is // already started. We don't want cluster - // managment system to kill the master server + // management system to kill the master server // who is holding the lock and running // correctly. So the most feasible solution is // to kill current master server. The current diff --git a/go/pserver/cclient/CMakeLists.txt b/go/pserver/cclient/CMakeLists.txt deleted file mode 100644 index d2c339d68866bd5c91403227e97af2c97bb30eeb..0000000000000000000000000000000000000000 --- a/go/pserver/cclient/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -go_library(paddle_pserver_cclient STATIC) - -add_subdirectory(test) diff --git a/go/pserver/cclient/test/test_cclient.c b/go/pserver/cclient/test/test_cclient.c deleted file mode 100644 index 0f9c2ef80114d4c5cd887117952f5b7b5d9355f6..0000000000000000000000000000000000000000 --- a/go/pserver/cclient/test/test_cclient.c +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include - -#include "libpaddle_pserver_cclient.h" - -typedef float real; - -void fail() { - // TODO(helin): fix: gtest using cmake is not working, using this - // hacky way for now. - printf("test failed.\n"); - exit(-1); -} - -void print_parameter(paddle_gradient* param) { - if (param == NULL) { - printf("param is NULL!!\n"); - } else { - printf("==== parameter ====\n"); - printf("name: %s\n", param->name); - printf("content_len: %d\n", param->content_len); - printf("content_type: %d\n", param->element_type); - int i; - for (i = 0; i < param->content_len / (int)sizeof(real); ++i) { - printf("%f ", ((float*)param->content)[i]); - } - printf("\n\n"); - } -} - -int main() { - char addr[] = "localhost:3000"; - paddle_pserver_client c = paddle_new_pserver_client(addr, 1); - - char* names[] = {"param_a", "param_b"}; - -retry: - printf("init parameter to pserver:\n"); - - real param_content1[] = {0.1, 0.2, 0.3}; - real param_content2[] = {0.4, 0.5, 0.6}; - paddle_parameter** params = - (paddle_parameter**)malloc(sizeof(paddle_parameter*) * 2); - params[0] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); - params[0]->name = names[0]; - params[0]->content = (unsigned char*)param_content1; - params[0]->content_len = 3 * sizeof(real); - params[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - - params[1] = (paddle_parameter*)malloc(sizeof(paddle_parameter)); - params[1]->name = names[1]; - params[1]->content = (unsigned char*)param_content2; - params[1]->content_len = 3 * sizeof(real); - params[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; - - if (paddle_begin_init_params(c)) { - if (paddle_init_param(c, *params[0], NULL, 0) != 0) { - goto retry; - } - if (paddle_init_param(c, *params[1], NULL, 0) != 0) { - goto retry; - } - if (paddle_finish_init_params(c) != 0) { - goto retry; - } - } else { - fail(); - } - - printf("get inited parameters from pserver:\n"); - // get parameters again by reusing the allocated parameter buffers. - if (paddle_get_params(c, params, 2) != 0) { - fail(); - } - print_parameter(params[0]); - print_parameter(params[1]); - - printf("send gradient to pserver:\n"); - real gradient_content1[] = {0.01, 0.02, 0.03}; - real gradinet_content2[] = {0.04, 0.05, 0.06}; - - paddle_gradient** grads = - (paddle_gradient**)malloc(sizeof(paddle_gradient*) * 2); - grads[0] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); - grads[0]->name = names[0]; - grads[0]->content = (unsigned char*)gradient_content1; - grads[0]->content_len = 3 * sizeof(real); - grads[0]->element_type = PADDLE_ELEMENT_TYPE_FLOAT32; - - grads[1] = (paddle_gradient*)malloc(sizeof(paddle_gradient)); - grads[1]->name = names[1]; - grads[1]->content = (unsigned char*)gradinet_content2; - grads[1]->content_len = 3 * sizeof(real); - grads[1]->element_type = PADDLE_ELEMENT_TYPE_INT32; - - printf("print gradient sent to pserver:\n"); - print_parameter(grads[0]); - print_parameter(grads[1]); - - if (paddle_send_grads(c, grads, 2) != 0) { - fail(); - } - - printf("get updated parameters from pserver:\n"); - // get parameters again by reusing the allocated parameter buffers. - if (paddle_get_params(c, params, 2) != 0) { - fail(); - } - print_parameter(params[0]); - print_parameter(params[1]); - - if (paddle_save_model(c, "/tmp/") != 0) { - fail(); - } - - return 0; -} diff --git a/go/pserver/client/c/CMakeLists.txt b/go/pserver/client/c/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..a3fcaeef190a178c1eed806f3e03a14ced780eef --- /dev/null +++ b/go/pserver/client/c/CMakeLists.txt @@ -0,0 +1,5 @@ +cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf) +go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer) +if(WITH_TESTING) + add_subdirectory(test) +endif() diff --git a/go/pserver/cclient/cclient.go b/go/pserver/client/c/cclient.go similarity index 88% rename from go/pserver/cclient/cclient.go rename to go/pserver/client/c/cclient.go index bbaf43d9f1434a278568bc110a709718b9b8c222..7ddaceb7ed33db32e19a191402100a0c0efa241a 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/client/c/cclient.go @@ -30,15 +30,16 @@ import ( "unsafe" "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/PaddlePaddle/Paddle/go/pserver/client" log "github.com/sirupsen/logrus" ) var nullPtr = unsafe.Pointer(uintptr(0)) var mu sync.Mutex -var handleMap = make(map[C.paddle_pserver_client]*pserver.Client) +var handleMap = make(map[C.paddle_pserver_client]*client.Client) var curHandle C.paddle_pserver_client -func add(c *pserver.Client) C.paddle_pserver_client { +func add(c *client.Client) C.paddle_pserver_client { mu.Lock() defer mu.Unlock() client := curHandle @@ -47,13 +48,13 @@ func add(c *pserver.Client) C.paddle_pserver_client { return client } -func get(client C.paddle_pserver_client) *pserver.Client { +func get(client C.paddle_pserver_client) *client.Client { mu.Lock() defer mu.Unlock() return handleMap[client] } -func remove(client C.paddle_pserver_client) *pserver.Client { +func remove(client C.paddle_pserver_client) *client.Client { mu.Lock() defer mu.Unlock() h := handleMap[client] @@ -80,9 +81,9 @@ func (s selector) Select() bool { return bool(s) } -type lister []pserver.Server +type lister []client.Server -func (l lister) List() []pserver.Server { +func (l lister) List() []client.Server { return l } @@ -90,19 +91,22 @@ func (l lister) List() []pserver.Server { func paddle_new_pserver_client(addrs *C.char, selected int) C.paddle_pserver_client { a := C.GoString(addrs) as := strings.Split(a, ",") - servers := make([]pserver.Server, len(as)) + servers := make([]client.Server, len(as)) for i := range as { servers[i].Index = i servers[i].Addr = as[i] } - c := pserver.NewClient(lister(servers), len(as), selector(selected != 0)) + c := client.NewClient(lister(servers), len(as), selector(selected != 0)) return add(c) } //export paddle_new_etcd_pserver_client -func paddle_new_etcd_pserver_client(etcd_addr *C.char) C.paddle_pserver_client { - // TODO(helin): fault tolerant pserver client using etcd. - panic("not implemented.") +func paddle_new_etcd_pserver_client(etcd_endpoints *C.char, selected int) C.paddle_pserver_client { + // TODO(Longfei: use etcd lock to decide which trainer to initialize the parameters) + addr := C.GoString(etcd_endpoints) + etcd_client := client.NewEtcd(addr) + c := client.NewClient(etcd_client, etcd_client.Desired(), selector(selected != 0)) + return add(c) } //export paddle_pserver_client_release diff --git a/go/pserver/cclient/test/CMakeLists.txt b/go/pserver/client/c/test/CMakeLists.txt similarity index 55% rename from go/pserver/cclient/test/CMakeLists.txt rename to go/pserver/client/c/test/CMakeLists.txt index 170730ccebbae9c99ebafe360261c32f5b2f4e08..f287f850719afecf918f6a53f6528d1d15ff4672 100644 --- a/go/pserver/cclient/test/CMakeLists.txt +++ b/go/pserver/client/c/test/CMakeLists.txt @@ -1,3 +1,2 @@ - -cc_binary(main SRCS main.c DEPS paddle_pserver_cclient) cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient) +add_style_check_target(test_cclient test_cclient.c) diff --git a/go/pserver/cclient/test/main.c b/go/pserver/client/c/test/test_cclient.c similarity index 80% rename from go/pserver/cclient/test/main.c rename to go/pserver/client/c/test/test_cclient.c index 03f749d4e46c4890c6dcfa25af572dab4a053c86..8eababbe33914d25f1eb91b991e11eaacd2e4716 100644 --- a/go/pserver/cclient/test/main.c +++ b/go/pserver/client/c/test/test_cclient.c @@ -16,7 +16,7 @@ void sendGrads(paddle_pserver_client c) { "param_a", PADDLE_ELEMENT_TYPE_FLOAT32, grad_a, 2000}; paddle_gradient grad2 = { "param_b", PADDLE_ELEMENT_TYPE_FLOAT32, grad_b, 3000}; - paddle_gradient* grads[2] = {&grad1, &grad2}; + paddle_gradient *grads[2] = {&grad1, &grad2}; if (paddle_send_grads(c, grads, 2)) { fail(); } @@ -39,7 +39,7 @@ void getParams(paddle_pserver_client c) { param_b.content = content_b; param_b.content_len = 3000; - paddle_parameter* params[2] = {¶m_a, ¶m_b}; + paddle_parameter *params[2] = {¶m_a, ¶m_b}; if (paddle_get_params(c, params, 2)) { fail(); } @@ -48,6 +48,17 @@ void getParams(paddle_pserver_client c) { int main() { char addr[] = "localhost:3000"; paddle_pserver_client c = paddle_new_pserver_client(addr, 1); + char *config_proto; + size_t config_proto_len = 0; + ssize_t nread; + FILE *fp = fopen("testdata/optimizer.pb", "r"); + if (!fp) { + fail(); + } + while ((nread = getline(&config_proto, &config_proto_len, fp)) != -1) { + printf("%s", config_proto); + } + fclose(fp); retry: if (paddle_begin_init_params(c)) { paddle_parameter param; @@ -59,7 +70,8 @@ retry: param.name = name_a; param.content = content_a; param.content_len = 2000; - int error = paddle_init_param(c, param, NULL, 0); + int error = + paddle_init_param(c, param, (void *)config_proto, config_proto_len); if (error != 0) { goto retry; } @@ -68,7 +80,7 @@ retry: param.name = name_b; param.content = content_b; param.content_len = 3000; - error = paddle_init_param(c, param, NULL, 0); + error = paddle_init_param(c, param, (void *)config_proto, config_proto_len); if (error != 0) { goto retry; } diff --git a/go/pserver/cclient/test/test_mnist.py b/go/pserver/client/c/test/test_mnist.py similarity index 100% rename from go/pserver/cclient/test/test_mnist.py rename to go/pserver/client/c/test/test_mnist.py diff --git a/go/pserver/cclient/test/test_train.py b/go/pserver/client/c/test/test_train.py similarity index 97% rename from go/pserver/cclient/test/test_train.py rename to go/pserver/client/c/test/test_train.py index 3f8d5d793bdeb687c9d234005d9e2eae760cc3a7..68e1d9b269209b695e27f91a656dc2d8e527b4cd 100644 --- a/go/pserver/cclient/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -22,6 +22,8 @@ def main(): # create optimizer optimizer = paddle.optimizer.Momentum(momentum=0) + #TODO(zhihong) : replace optimizer with new OptimizerConfig + trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer, diff --git a/go/pserver/client/c/test/testdata/optimizer.pb b/go/pserver/client/c/test/testdata/optimizer.pb new file mode 100644 index 0000000000000000000000000000000000000000..27dd3bc5f19e2964b4b674cff8860233cbdb445a Binary files /dev/null and b/go/pserver/client/c/test/testdata/optimizer.pb differ diff --git a/go/pserver/client.go b/go/pserver/client/client.go similarity index 92% rename from go/pserver/client.go rename to go/pserver/client/client.go index 6938b9d5ce6f6d73c05bd6e3154777023965c319..aa8bfe30c26fcc0875ad479ecd562700ccefa5a3 100644 --- a/go/pserver/client.go +++ b/go/pserver/client/client.go @@ -1,4 +1,4 @@ -package pserver +package client import ( "errors" @@ -7,6 +7,7 @@ import ( "time" "github.com/PaddlePaddle/Paddle/go/connection" + "github.com/PaddlePaddle/Paddle/go/pserver" log "github.com/sirupsen/logrus" ) @@ -105,7 +106,7 @@ func (c *Client) BeginInitParams() bool { } // InitParam initializes the parameter on parameter servers. -func (c *Client) InitParam(paramWithConfigs ParameterWithConfig) error { +func (c *Client) InitParam(paramWithConfigs pserver.ParameterWithConfig) error { return c.pservers[c.partition(paramWithConfigs.Param.Name)].Call("Service.InitParam", paramWithConfigs, nil) } @@ -123,13 +124,13 @@ func (c *Client) FinishInitParams() error { // SendGrads sends gradients to parameter servers for updating // parameters. -func (c *Client) SendGrads(grads []Gradient) error { +func (c *Client) SendGrads(grads []pserver.Gradient) error { if len(grads) == 0 { return errors.New("no gradient received") } errCh := make(chan error, len(grads)) for _, g := range grads { - go func(g Gradient) { + go func(g pserver.Gradient) { err := c.pservers[c.partition(g.Name)].Call("Service.SendGrad", g, nil) errCh <- err }(g) @@ -151,7 +152,7 @@ func (c *Client) SendGrads(grads []Gradient) error { type result struct { idx int - param Parameter + param pserver.Parameter err error } @@ -170,12 +171,12 @@ func (r results) Swap(i int, j int) { } // GetParams gets parameters from parameter servers. -func (c *Client) GetParams(names []string) ([]Parameter, error) { +func (c *Client) GetParams(names []string) ([]pserver.Parameter, error) { rCh := make(chan result, len(names)) for idx, name := range names { go func(name string, idx int) { - var parameter Parameter + var parameter pserver.Parameter err := c.pservers[c.partition(name)].Call("Service.GetParam", name, ¶meter) rCh <- result{idx: idx, param: parameter, err: err} }(name, idx) @@ -196,7 +197,7 @@ func (c *Client) GetParams(names []string) ([]Parameter, error) { } sort.Sort(rs) - ps := make([]Parameter, len(rs)) + ps := make([]pserver.Parameter, len(rs)) for i := range rs { ps[i] = rs[i].param } diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go new file mode 100644 index 0000000000000000000000000000000000000000..29b400812c9dc3a5f44700eacbf7ba043248f2f2 --- /dev/null +++ b/go/pserver/client/client_test.go @@ -0,0 +1,180 @@ +package client_test + +import ( + "context" + "io/ioutil" + "net" + "net/http" + "net/rpc" + "strconv" + "strings" + "testing" + "time" + + "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/PaddlePaddle/Paddle/go/pserver/client" + "github.com/coreos/etcd/clientv3" + log "github.com/sirupsen/logrus" +) + +const ( + numPserver = 10 + etcdEndpoints = "127.0.0.1:2379" + timeout = 2 * time.Second +) + +var pserverClientPorts [numPserver]int + +// this function init pserver client and return their ports in an array. +func initClient() [numPserver]int { + var ports [numPserver]int + for i := 0; i < numPserver; i++ { + l, err := net.Listen("tcp", ":0") + if err != nil { + panic(err) + } + + ss := strings.Split(l.Addr().String(), ":") + p, err := strconv.Atoi(ss[len(ss)-1]) + if err != nil { + panic(err) + } + ports[i] = p + + go func(l net.Listener) { + s, err := pserver.NewService(0) + if err != nil { + panic(err) + } + server := rpc.NewServer() + err = server.Register(s) + if err != nil { + panic(err) + } + + mux := http.NewServeMux() + mux.Handle(rpc.DefaultRPCPath, server) + err = http.Serve(l, mux) + if err != nil { + panic(err) + } + }(l) + } + return ports +} + +func initNativeClient() { + pserverClientPorts = initClient() +} + +func initEtcdClient() { + client, err := clientv3.New(clientv3.Config{ + Endpoints: []string{etcdEndpoints}, + DialTimeout: time.Second * time.Duration(1), + }) + if err != nil { + log.Errorf("err %v", err) + } + ctx, cancel := context.WithTimeout(context.Background(), timeout) + client.Delete(ctx, pserver.PsDesired) + client.Delete(ctx, pserver.PsPath) + client.Put(ctx, pserver.PsDesired, strconv.Itoa(numPserver)) + ports := initClient() + for i := 0; i < numPserver; i++ { + client.Put(ctx, pserver.PsPath+strconv.Itoa(i), ":"+strconv.Itoa(ports[i])) + } + cancel() + client.Close() +} + +type selector bool + +func (s selector) Select() bool { + return bool(s) +} + +type lister []client.Server + +func (l lister) List() []client.Server { + return l +} + +func ClientTest(t *testing.T, c *client.Client) { + selected := c.BeginInitParams() + if !selected { + t.Fatal("should be selected.") + } + + const numParameter = 100 + config, err := ioutil.ReadFile("./c/test/testdata/optimizer.pb") + if err != nil { + t.Fatalf("read optimizer proto failed") + } + for i := 0; i < numParameter; i++ { + var p pserver.Parameter + p.Name = "p_" + strconv.Itoa(i) + p.ElementType = pserver.Float32 + p.Content = make([]byte, (i+1)*100) + err := c.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}) + if err != nil { + t.Fatal(err) + } + } + + err = c.FinishInitParams() + if err != nil { + t.Fatal(err) + } + + var grads []pserver.Gradient + for i := 0; i < numParameter/2; i++ { + var g pserver.Gradient + g.Name = "p_" + strconv.Itoa(i) + g.ElementType = pserver.Float32 + g.Content = make([]byte, (i+1)*100) + grads = append(grads, g) + } + + err = c.SendGrads(grads) + if err != nil { + t.Fatal(err) + } + + names := make([]string, numParameter) + for i := 0; i < numParameter; i++ { + names[i] = "p_" + strconv.Itoa(i) + } + + params, err := c.GetParams(names) + if err != nil { + t.Fatal(err) + } + + if len(names) != len(params) { + t.Fatalf("parameter size not match, need: %d, have: %d", len(names), len(params)) + } + + for i := range params { + if names[i] != params[i].Name { + t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name) + } + } +} + +func TestNativeClient(t *testing.T) { + initNativeClient() + servers := make([]client.Server, numPserver) + for i := 0; i < numPserver; i++ { + servers[i] = client.Server{Index: i, Addr: ":" + strconv.Itoa(pserverClientPorts[i])} + } + c1 := client.NewClient(lister(servers), len(servers), selector(true)) + ClientTest(t, c1) +} + +// TODO: tmperary disable etcdClient test for dependency of etcd) +func EtcdClient(t *testing.T) { + initEtcdClient() + etcd_client := client.NewEtcd(etcdEndpoints) + c2 := client.NewClient(etcd_client, etcd_client.Desired(), selector(true)) + ClientTest(t, c2) +} diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go new file mode 100644 index 0000000000000000000000000000000000000000..1fd3479aa88ccbbe7c5067da1e9886b65352e847 --- /dev/null +++ b/go/pserver/client/etcd_client.go @@ -0,0 +1,125 @@ +package client + +import ( + "context" + "strconv" + "strings" + "time" + + "github.com/PaddlePaddle/Paddle/go/pserver" + "github.com/coreos/etcd/clientv3" + log "github.com/sirupsen/logrus" +) + +const ( + DefaultEtcdTimeout time.Duration = 5 * time.Second +) + +// EtcdClient is used by pserver client that is a part of trainer process. +// TODO: +// 1. add watcher to watch the change state of pservers) +// 1. add etcd lock) +type EtcdClient struct { + client *clientv3.Client + timeout time.Duration + endpoints []string +} + +// Desired read ps desired number from etcd. +func (p *EtcdClient) Desired() int { + var psDesired int + for { + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + resp, err := p.client.Get(ctx, pserver.PsDesired) + cancel() + if err != nil { + log.Errorf("Get ps dresire number failed! recnnectiong..., %v", err) + time.Sleep(p.timeout) + continue + } + + kvs := resp.Kvs + if len(kvs) == 0 { + log.Infoln("Waiting for ps desired registered ...") + time.Sleep(p.timeout) + continue + } + + psDesired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("psDesired %s invalid %v", psDesired, err) + time.Sleep(p.timeout) + continue + } + + log.Debugf("Get psDesired number: %d", psDesired) + break + } + return psDesired +} + +// List return the pserver list read from etcd. +func (p *EtcdClient) List() []Server { + psDesired := p.Desired() + + servers := make([]Server, psDesired) + for { + for i := 0; i < psDesired; i++ { + ctx, cancel := context.WithTimeout(context.Background(), p.timeout) + cancel() + psKey := pserver.PsPath + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + resp, err := p.client.Get(ctx, psKey) + if err != nil { + log.Infof("Get psKey= %s error, %v", psKey, err) + time.Sleep(p.timeout) + continue + } + kvs := resp.Kvs + if len(kvs) == 0 { + log.Infof("Waiting for ps addr registered ...") + time.Sleep(p.timeout) + continue + } + + psAddr := string(resp.Kvs[0].Value) + // TODO(Longfei) check the ps address + if psAddr == "" { + log.Infof("Get psKey = %s, psAddr is empty", psKey) + time.Sleep(p.timeout) + continue + } + log.Infof("got value (%s) for key: %s", psAddr, psKey) + servers[i].Index = i + servers[i].Addr = psAddr + } + break + } + return servers +} + +// NewEtcd create a etcd client to return the state of pserver on etcd. +func NewEtcd(endpoints string) *EtcdClient { + ep := strings.Split(endpoints, ",") + var cli *clientv3.Client + var err error + for { + cli, err = clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: DefaultEtcdTimeout, + }) + if err != nil { + log.Errorf("Init etcd connection failed: %v", err) + time.Sleep(DefaultEtcdTimeout) + continue + } + break + } + log.Infof("Connected to etcd: %s\n", endpoints) + client := &EtcdClient{ + client: cli, + timeout: DefaultEtcdTimeout, + endpoints: ep, + } + return client +} diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go deleted file mode 100644 index 5bd16118a7f70b766016abfce55f6bb2adf8cc60..0000000000000000000000000000000000000000 --- a/go/pserver/client_test.go +++ /dev/null @@ -1,126 +0,0 @@ -package pserver_test - -import ( - "net" - "net/http" - "net/rpc" - "strconv" - "strings" - "testing" - - "github.com/PaddlePaddle/Paddle/go/pserver" -) - -const numPserver = 10 - -var port [numPserver]int - -func init() { - for i := 0; i < numPserver; i++ { - l, err := net.Listen("tcp", ":0") - if err != nil { - panic(err) - } - - ss := strings.Split(l.Addr().String(), ":") - p, err := strconv.Atoi(ss[len(ss)-1]) - if err != nil { - panic(err) - } - port[i] = p - - go func(l net.Listener) { - s, err := pserver.NewService(0) - if err != nil { - panic(err) - } - server := rpc.NewServer() - err = server.Register(s) - if err != nil { - panic(err) - } - - mux := http.NewServeMux() - mux.Handle(rpc.DefaultRPCPath, server) - err = http.Serve(l, mux) - if err != nil { - panic(err) - } - }(l) - } -} - -type selector bool - -func (s selector) Select() bool { - return bool(s) -} - -type lister []pserver.Server - -func (l lister) List() []pserver.Server { - return l -} - -func TestClientFull(t *testing.T) { - servers := make([]pserver.Server, numPserver) - for i := 0; i < numPserver; i++ { - servers[i] = pserver.Server{Index: i, Addr: ":" + strconv.Itoa(port[i])} - } - c := pserver.NewClient(lister(servers), len(servers), selector(true)) - selected := c.BeginInitParams() - if !selected { - t.Fatal("should be selected.") - } - - const numParameter = 100 - for i := 0; i < numParameter; i++ { - var p pserver.Parameter - p.Name = "p_" + strconv.Itoa(i) - p.ElementType = pserver.Float32 - p.Content = make([]byte, (i+1)*100) - err := c.InitParam(pserver.ParameterWithConfig{Param: p}) - if err != nil { - t.Fatal(err) - } - } - - err := c.FinishInitParams() - if err != nil { - t.Fatal(err) - } - - var grads []pserver.Gradient - for i := 0; i < numParameter/2; i++ { - var g pserver.Gradient - g.Name = "p_" + strconv.Itoa(i) - g.ElementType = pserver.Float32 - g.Content = make([]byte, (i+1)*100) - grads = append(grads, g) - } - - err = c.SendGrads(grads) - if err != nil { - t.Fatal(err) - } - - names := make([]string, numParameter) - for i := 0; i < numParameter; i++ { - names[i] = "p_" + strconv.Itoa(i) - } - - params, err := c.GetParams(names) - if err != nil { - t.Fatal(err) - } - - if len(names) != len(params) { - t.Fatalf("parameter size not match, need: %d, have: %d", len(names), len(params)) - } - - for i := range params { - if names[i] != params[i].Name { - t.Fatalf("order of returned parameter does not required: parameter name: %s, required name: %s", names[i], params[i].Name) - } - } -} diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index 4d88243edd4aa817ddc263ba316a3f6be9e1e67f..37b8d522c1bd07acb41b9515a6d9bc15eae9aa32 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -13,6 +13,13 @@ import ( log "github.com/sirupsen/logrus" ) +const ( + // PsDesired is etcd path for store desired pserver count + PsDesired = "/ps_desired" + // PsAddr is the base dir for pserver to store their addr + PsPath = "/ps/" +) + // EtcdClient is the etcd client that the pserver uses for fault // tolerance, service registry and coordination. type EtcdClient struct { @@ -68,7 +75,7 @@ func (e *EtcdClient) Register() (int, error) { // it at the same time. for { ctx, cancel := context.WithTimeout(context.Background(), time.Second) - _, err := e.initDesiredPsercers(ctx, e.numPservers) + _, err := e.initDesiredPservers(ctx, e.numPservers) cancel() if err != nil { log.Warn(err) @@ -120,7 +127,7 @@ func (e *EtcdClient) Register() (int, error) { return pserverIdx, nil } -func (e *EtcdClient) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { +func (e *EtcdClient) initDesiredPservers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { return concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { dsStr := c.Get(PsDesired) if dsStr == "" { @@ -136,7 +143,7 @@ func (e *EtcdClient) registerPserverEtcd(ctx context.Context) (int, error) { _, err := concurrency.NewSTM(e.etcdClient, func(c concurrency.STM) error { registered := false for i := 0; i < e.desired; i++ { - psKey := "/ps/" + strconv.Itoa(i) + psKey := PsPath + strconv.Itoa(i) log.Debugf("checking %s", psKey) ps := c.Get(psKey) log.Debugf("got value (%s) for key: %s", ps, psKey) diff --git a/go/pserver/optimizer.c b/go/pserver/optimizer.c deleted file mode 100644 index f16ba2cbf8e168a434fdcdb4f1e0ba1e98d91c6b..0000000000000000000000000000000000000000 --- a/go/pserver/optimizer.c +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#include "optimizer.h" - -typedef int (*update_func)(void*, void*, paddle_element_type, const void*, int); -typedef void (*release_func)(void*); - -typedef struct paddle_optimizer { - update_func update; - release_func release; - void* optimizer; -} paddle_optimizer; - -void paddle_release_optimizer(paddle_optimizer* o) { - o->release(o->optimizer); - free(o); -} - -int paddle_update_parameter(paddle_optimizer* o, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes) { - return o->update(o->optimizer, buffer, element_type, gradient, num_bytes); -} - -typedef struct { double learning_rate; } SGD_optimizer; - -int update_SGD(void* optimizer, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes) { - SGD_optimizer* o = (SGD_optimizer*)optimizer; - float* parameter = (float*)buffer; - float* grad = (float*)gradient; - - int i; - for (i = 0; i < num_bytes / sizeof(float); ++i) { - parameter[i] -= o->learning_rate * grad[i]; - } - return 0; -} - -void release_SGD(void* optimizer) { - SGD_optimizer* o = (SGD_optimizer*)optimizer; - // nothing allocated on heap -} - -paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) { - SGD_optimizer* impl = (SGD_optimizer*)malloc(sizeof(SGD_optimizer)); - impl->learning_rate = learning_rate; - paddle_optimizer* opt = (paddle_optimizer*)malloc(sizeof(paddle_optimizer)); - opt->update = update_SGD; - opt->release = release_SGD; - opt->optimizer = impl; - return opt; -} diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 417f8c509388055028bd46e42501741298308193..bca3718af32b35416e94606816569dd9e76eccb6 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -1,42 +1,71 @@ package pserver -/* -#include "optimizer.h" -*/ +// #cgo CFLAGS: -I ../../ +// //FIXME: ldflags contain "build" path +// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ +// #include "paddle/optimizer/optimizer.h" +// #include +// #include import "C" + import ( "fmt" "unsafe" -) - -type optimizerType int -const ( - sgd optimizerType = iota + log "github.com/sirupsen/logrus" ) var nullPtr = unsafe.Pointer(uintptr(0)) type optimizer struct { - opt *C.struct_paddle_optimizer + opt *C.struct_paddle_optimizer + elementType ElementType } -func newOptimizer(t optimizerType, learning_rate float64) *optimizer { +func cArrayToSlice(p unsafe.Pointer, len int) []byte { + if p == nullPtr { + return nil + } + + // create a Go clice backed by a C array, reference: + // https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // + // Go garbage collector will not interact with this data, need + // to be freed properly. + return (*[1 << 30]byte)(p)[:len:len] +} + +func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer { o := &optimizer{} - o.opt = C.paddle_create_SGD_optimizer(C.double(learning_rate)) + o.elementType = paramWithConfigs.Param.ElementType + p := paramWithConfigs.Param + c := paramWithConfigs.Config + log.WithFields(log.Fields{ + "ElementType": p.ElementType, + "ParamSize": len(p.Content), + "ConfigSize": len(c), + }).Info("New Optimizer Created with config:") + var cbuffer unsafe.Pointer + cbuffer = C.malloc(C.size_t(len(p.Content))) + C.memcpy(cbuffer, unsafe.Pointer(&p.Content[0]), C.size_t(len(p.Content))) + o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), + C.paddle_element_type(p.ElementType), cbuffer, C.int(len(p.Content)/C.sizeof_float), + (*C.char)(nullPtr), 0) return o } -func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error { - if len(p.Content) != len(g.Content) { - return fmt.Errorf("Name: %s, parameter and gradient length not match, parameter: %d, gradient: %d", p.Name, len(p.Content), len(g.Content)) - } +func (o *optimizer) GetWeights() []byte { + var buffer unsafe.Pointer + buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer) + return cArrayToSlice(buffer, int(buffer_len)*C.sizeof_float) +} - if p.ElementType != g.ElementType { - return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", p.Name, p.ElementType, g.ElementType) +func (o *optimizer) UpdateParameter(g Gradient) error { + if o.elementType != g.ElementType { + return fmt.Errorf("Name: %s, parameter and gradient element type not match, parameter: %v, gradient: %v", g.Name, o.elementType, g.ElementType) } - r := C.paddle_update_parameter(o.opt, unsafe.Pointer(&p.Content[0]), C.paddle_element_type(p.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))) + r := C.paddle_update_parameter(o.opt, C.paddle_element_type(g.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content))/C.sizeof_float) if r != 0 { return fmt.Errorf("optimizer update returned error code: %d", r) } diff --git a/go/pserver/optimizer.h b/go/pserver/optimizer.h deleted file mode 100644 index a7e3ff0530035f2cec4359a97d3e8ff81362d363..0000000000000000000000000000000000000000 --- a/go/pserver/optimizer.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef PADDLE_PSERVER_OPTIMIZER_H -#define PADDLE_PSERVER_OPTIMIZER_H - -typedef enum { - PADDLE_ELEMENT_TYPE_INT32 = 0, - PADDLE_ELEMENT_TYPE_UINT32 = 1, - PADDLE_ELEMENT_TYPE_INT64 = 2, - PADDLE_ELEMENT_TYPE_UINT64 = 3, - PADDLE_ELEMENT_TYPE_FLOAT32 = 4, - PADDLE_ELEMENT_TYPE_FLOAT64 = 5, -} paddle_element_type; - -struct paddle_optimizer; -struct paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate); -void paddle_release_optimizer(struct paddle_optimizer* o); -int paddle_update_parameter(struct paddle_optimizer* o, - void* buffer, - paddle_element_type element_type, - const void* gradient, - int num_bytes); - -#endif /* PADDLE_PSERVER_OPTIMIZER_H */ diff --git a/go/pserver/optimizer_test.go b/go/pserver/optimizer_test.go index 64d6d092aa1864fbca012214ced5e03e157d4a4c..0b2f4cfa41a630645c128ac13826de9d8b1d521b 100644 --- a/go/pserver/optimizer_test.go +++ b/go/pserver/optimizer_test.go @@ -1,8 +1,24 @@ package pserver -import "testing" +import ( + "io/ioutil" + "testing" +) -func TestSGDCreateRelease(t *testing.T) { - o := newOptimizer(sgd, 1) +func TestOptimizerCreateRelease(t *testing.T) { + p := Parameter{ + Name: "a", + ElementType: Int32, + } + p.Content = []byte{1, 3} + config, err := ioutil.ReadFile("./client/c/test/testdata/optimizer.pb") + if err != nil { + t.Fatalf("read optimizer proto failed") + } + param := ParameterWithConfig{ + Param: p, + Config: config, + } + o := newOptimizer(param) o.Cleanup() } diff --git a/go/pserver/service.go b/go/pserver/service.go index f386ebea1eb8659a988de2a807303bb6687fa429..7711dc027e173e862f9b33e7a57224097026872c 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -24,9 +24,6 @@ const ( Float64 ) -// PsDesired is etcd path for store desired pserver count -const PsDesired = "/ps_desired" - // Parameter is a piece of data to sync with the parameter server. type Parameter struct { Name string @@ -48,9 +45,8 @@ type Service struct { initialized chan struct{} idx int - mu sync.Mutex - opt *optimizer - paramMap map[string]Parameter + mu sync.Mutex + optMap map[string]*optimizer } // NewService creates a new service, will bypass etcd registration if no @@ -58,9 +54,8 @@ type Service struct { func NewService(idx int) (*Service, error) { s := &Service{ idx: idx, - opt: newOptimizer(sgd, 0.005), } - s.paramMap = make(map[string]Parameter) + s.optMap = make(map[string]*optimizer) s.initialized = make(chan struct{}) return s, nil } @@ -81,7 +76,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er // TODO(helin): check if paramWithConfigs.Param.Content is // properly memory aligned, if not, make copy to a memory // aligned region. - s.paramMap[paramWithConfigs.Param.Name] = paramWithConfigs.Param + s.optMap[paramWithConfigs.Param.Name] = newOptimizer(paramWithConfigs) return nil } @@ -110,12 +105,12 @@ func (s *Service) SendGrad(g Gradient, dummy *int) error { s.mu.Lock() defer s.mu.Unlock() - p, ok := s.paramMap[g.Name] + o, ok := s.optMap[g.Name] if !ok { return fmt.Errorf("parameter: %s does not exist", g.Name) } - return s.opt.UpdateParameter(p, g) + return o.UpdateParameter(g) } // GetParam gets parameters from the parameter server. @@ -124,7 +119,7 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { s.mu.Lock() defer s.mu.Unlock() - p, ok := s.paramMap[name] + opt, ok := s.optMap[name] if !ok { return fmt.Errorf("parameter: %s does not exist", name) } @@ -136,7 +131,9 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { // nature. This race condition is allowed deliberately // to save the program from making a copy of the // paramter content. - *parameter = p + parameter.Name = name + parameter.ElementType = opt.elementType + parameter.Content = opt.GetWeights() return nil } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index d9d887cffd462eed48b972466a7d83bae35d9a1c..b6d20d2c8b7ba0ccd7ab46669a597a21dc11c381 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -1,6 +1,7 @@ package pserver_test import ( + "io/ioutil" "reflect" "sync" "testing" @@ -9,7 +10,11 @@ import ( "github.com/PaddlePaddle/Paddle/go/pserver" ) -func TestFull(t *testing.T) { +const ( + OptimizerConfig = "./client/c/test/testdata/optimizer.pb" +) + +func TestServiceFull(t *testing.T) { s, err := pserver.NewService(0) if err != nil { t.Error(err) @@ -18,7 +23,12 @@ func TestFull(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + config, err := ioutil.ReadFile(OptimizerConfig) + if err != nil { + t.Fatalf("read optimizer proto failed") + } + + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) if err != nil { t.FailNow() } @@ -27,7 +37,7 @@ func TestFull(t *testing.T) { p1.Name = "param_b" p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} p1.ElementType = pserver.Float32 - err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: nil}, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p1, Config: config}, nil) if err != nil { t.FailNow() } @@ -48,6 +58,7 @@ func TestFull(t *testing.T) { } g1, g2 := pserver.Gradient(p1), pserver.Gradient(p) + err = s.SendGrad(g1, nil) if err != nil { t.FailNow() @@ -142,7 +153,12 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + config, err := ioutil.ReadFile(OptimizerConfig) + if err != nil { + t.Fatalf("read optimizer proto failed") + } + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: config}, nil) + if err != nil { t.FailNow() } diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6aa6b9bc2db6a223dd8562b76ba9d777206bfd40..baad38e3c1eb3041923e297b2ebae89e68e185a4 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,3 +5,6 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_test(variable_test SRCS variable_test.cc) cc_test(scope_test SRCS scope_test.cc) cc_test(enforce_test SRCS enforce_test.cc) +proto_library(attr_type SRCS attr_type.proto) +proto_library(op_proto SRCS op_proto.proto DEPS attr_type) +cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf) diff --git a/paddle/framework/attr_type.proto b/paddle/framework/attr_type.proto new file mode 100644 index 0000000000000000000000000000000000000000..2d8e0476d710b7ba987d085d828ca13a4ee23707 --- /dev/null +++ b/paddle/framework/attr_type.proto @@ -0,0 +1,28 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +syntax="proto2"; +package paddle.framework; + +// Attribute Type for paddle's Op. +// Op contains many attributes. Each type of attributes could be different. +// The AttrType will be shared between AttrDesc and AttrProto. +enum AttrType { + INT = 0; + FLOAT = 1; + STRING = 2; + INTS = 3; + FLOATS = 4; + STRINGS = 5; +} \ No newline at end of file diff --git a/paddle/framework/net_design.md b/paddle/framework/net_design.md new file mode 100644 index 0000000000000000000000000000000000000000..a5f0483081e8a03b2d001a551fcc02bbd392016d --- /dev/null +++ b/paddle/framework/net_design.md @@ -0,0 +1,250 @@ +# Network Design + +`Network` is the container and controller of a set of operators, +user can build a real network from a `NetDesc` which is a protobuf message +and use `Network.Run()` to run all the operators in the network. + +A network object knows all Operators belonging to this network. Variables, +which are inputs and outputs of these operators, +are created and managed by a hierarchy of Scope objects. + +# API + +## Net +To make the `Network` extendable, a base class is defined like this + +```c++ +// operator's index stored in a network. +typedef int OpIndex; + +// The minimum a network should be implemented. +class Net { + public: + // run all the operators and return success(true) or not, with all the + // variables are located in `scope`. `context` describes the detail execution + // environment for ops. `begin` and `end` specify the scope of `ops_` to run, + // If no positive indexes are provided, all operators in `ops_` will run. + virtual Error Run(Scope *scope, OpContext *context, OpIndex begin = -1, + OpIndex end = -1) const = 0; + + // Add an Operator according to `def`. + virtual OpIndex AddOp(const proto::OpDef &def) = 0; + + // Add optimizer operators acctording to `attrs`. + virtual Error AddOptimizerOps(const OptAttrs &attrs) = 0; + + // Add backward operators. + virtual Error AddBackwardOps() = 0; + + // Infer the shapes of variables required by operators in the network. The + // `scope` will be mutated according to the inferred shapes. + + static std::unique_ptr Create(const NetDesc &def = NetDesc()); +}; +``` + +All network implementations should build networks from a protobuf message which +describes the structure of a real network; `Run` method should be implemented by +all implementations to offer a universal method to forward or backward compute a network. + +`Net::Create` is a method of factory pattern and can be implemented like + +```c++ +std::unique Net::Create(const NetDesc& def) { + switch (def.model_type()) { + case NN: + return new Network(def); + case Recursive: + return new RecursiveNet(def); + case Recurrent: + return new RecurrentNet(def); + } + return nullptr; +} +``` + +Network is designed as the container of operators. to make it more extendable, +we decouple it from the related variable resources. + +`Run(Scope* scope)` takes the scope as a argument so that it can run in different scopes. + +Finally, `Net` can be used as followed + +```c++ +Scope default_scope; +OpContext default_context; +auto net = Net::CreateNet(def); + +if (net) { + net.Run(&default_scope, &default_context); +} +``` + +## `PlainNet` as a simple implementation of `BaseNet` + +A very basic implementation is as follows. All it does is simply to run every operators in sequence. + +```c++ +class PlainNet : public Net { + public: + // Create a network describe by `def`. NetDesc is the definition of a network. + PlainNet(const NetDesc &def); + + // Infer all the operators' input and output varialbes' shapes, will be called before every mini-batch + training. + virtual Error InferShape(Scope *scope) override; + + // Run all the operators with the `scope`, if no scope is provided, default + // scope will be used instead. If no OpContext is provicded, default context will be used. + virtual Error Run(Scope *scope = nullptr, OpContext *context=nullptr, OpIndex begin = -1, + OpIndex end = -1) const override; + + virtual OpIndex AddOp(const proto::OpDef &def) override; + + virtual Error AddOptimizerOps(const OptAttrs &attrs) override; + + virtual Error AddBackwardOps() override; + + protected: + // Create operators accordding to `def`, will be called by the constructor. + Error BuildNet(const NetDesc &def); + + // Add a operator which is identified as `type` and has attributes described + // in `attrs`, the `inputs` are the keys of readonly input variables, + // `outputs` are keys of mutable output variables. An `OpIndex` will be + // returned to indicate the offset of the new operator in `ops_`. + OpIndex AddOp(const std::string &type, const std::vector &inputs, + const std::vector &outputs, + const OprAttr &attrs = OprAttr()); + + private: + // the operators owned by `Network`. + std::vector ops_; +}; +``` + +`PlainNet` will create operators so that a private member `ops_` is defined, +the operators are created by `CreateNet`, and each operator is created by `AddOp`. + + +## PlainNet Usage +`PlainNet` can be used to define and run a network as follows + +```c++ +// create an empty scope located on CPU device. +Scope scope(CPUPlace()); + +// create and init variables described in `net_desc`. +scope.CreateVariables(net_desc); +scope.InitVariables(net_desc); + +// create a network according to `net_desc` +auto net = Net::CreateNet(net_desc); +// Add more operators if needed. +net->AddOp(add...); +net->AddOp(fc...); + +net->AddBackwardOps(); +net->AddOptimizerOps(); + +// run the network providing the `scope`. +net.Run(&scope); +``` + +## `NetBuilder` as a C++ syntax wrapper +This is a detailed description of the user-related C++ network API, and may not needed in the prototype development stage. + +The `NetBuilder` will give users a much simpler syntax as follows to create a network, and demonstrates how to use the `BaseNet`'s raw interfaces. + +```c++ +Variable* fc_out = builder.AddOp("fc", input=image, size=100, activation="Sigmoid"); +Variable* prediction = builder.AddOp("fc", input=fc_out, size=10, activation="Sigmoid"); +Variable* loss = builder.AddOp("cross_entropy", input=prediction, label=label); +Variable* avg_loss = builder.AddOp("mean", loss); + +builder.BackwardFrom(avg_loss) +builder.AddOptimization(1e-4, "adam"); +builder.Run(); +``` + +`NetBuilder` will call `Net` 's virtual functions to change the real network structure, here is a sample definition + +```c++ +class NetBuilder final { + public: + NetBuilder(Net* net) : net_(net) {} + + Variable* AddOp(const string& type, const vector& inputs, + size_t size, Activation act) { + // much code here. + // ... + net_->AddOp(def); + need_rebuild_net_ = true; + net_->InferShape(); + // ... + } + + Error BackwardFrom(const Variable& cost); + + Error Run(Scope* scope, OpContext* context, bool need_backward = true) { + // backward. + if (need_backward) { + if (need_rebuild_net_) { + AddBackwardOps(); + AddOptimizerOps(); + } + net_->Run(scope, context); + return; + } + // just forward. + net_->Run(scope, context, 0, last_forward_op_); + } + + protected: + Error AddBackwardOps(); + Error AddOptimizerOps(); + + private: + Net* net_; + OpIndex last_forward_op_{-1}; + bool need_rebuild_net_{true}; +} +``` + +## Compatibility with RNN + +Benefitting from the decoupling of `PlainNet.Run` and `Scope`, `PlainNet` is compatible with future RNN design, +for example we can implement a simple recurrent neural network as follows + +```c++ +// copy some `vars` form `source` to `target` +void Copy(const Scope &source, Scope &target, + const std::vector &vars); + +Scope default_scope; +// some initial mutations on `default_scope` here. + +auto rnn_step_net = PlainNet(rnn_step_net_def); + +// Create rnn's states, the last scope is used to store rnn outputs. +Scope *rnn_states = new Scope[num_states + 1]; + +for (int i = 0; i < num_states + 1; i++) { + // Initialize all rnn state scopes, copy parameters and so on. + rnn_states[i].CreateVars(rnn_step_net_def); + Copy(default_scope, rnn_states[i], rnn_related_vars); + // Prepare rnn's inlinks, just copy inlink variables to each state. + Copy(default_scope, rnn_states[i], inlink_vars); +} + +// Run the rnn. +for (int i = 0; i < num_states; i++) { + rnn_step_net.Run(rnn_states[i]); + // Copy current state's state variables to next state, the related variables + // are named like "previous_state_xxx". + Copy(rnn_states[i], rnn_states[i + 1], pre_state_vars) +} + +// Copy rnn's final outputs to `default_scope`. +Copy(rnn_states[num_states], default_scope, outlink_vars); +``` diff --git a/paddle/framework/op_proto.proto b/paddle/framework/op_proto.proto new file mode 100644 index 0000000000000000000000000000000000000000..22df6f9c6b70277ddbf31e0432401889e3aa7483 --- /dev/null +++ b/paddle/framework/op_proto.proto @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +// Protocol Message for 3rd-party language binding. +// +// Paddle Python package will use `OpProto` to generate op creation methods. +// The op creation methods take user's input and generate `OpDesc` proto message, +// then pass `OpDesc` to C++ side and create Op pointer. +// +syntax="proto2"; +package paddle.framework; + +import "attr_type.proto"; + +// Attribute protocol message for 3rd-party language binding. +// It will store the Op support what attribute and what type. +message AttrProto { + // Supported attribute name. e.g. `scale` for cosine op. + required string name = 1; + + // Supported attribute type. + required AttrType type = 2; + + // Supported attribute comments. It helps 3rd-party language generate doc-string. + required string comment = 3; +} + +// Input or output message for 3rd-party language binding. +// It contains parameter name and its comments. +message VarProto { + // Input or output name in that op creation function. + // e.g. `cos(a, b, output, ...)`, "a", "b", "output" are names. + required string name = 1; + + // The comment for that input. It helps 3rd-party language generate doc-string. + required string comment = 2; +} + +// Op protocol message for 3rd-party language binding. +// It contains all information for generating op creation method. +message OpProto { + // The input information to generate op creation method. + repeated VarProto inputs = 1; + + // The output information to generate op creation method. + repeated VarProto outputs = 2; + + // The attribute information to generate op creation method. + repeated AttrProto attrs = 3; + + // The comments for that Op. It helps 3rd-party language generate + // doc-string. The whole documentation of that Op is generated by comment, + // inputs, outputs, attrs together. + required string comment = 4; + + // The type of that Op. + required string type = 5; +} diff --git a/paddle/framework/op_proto_test.cc b/paddle/framework/op_proto_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..9c054bde44e77571330cbc59074705f0cfc1cfb6 --- /dev/null +++ b/paddle/framework/op_proto_test.cc @@ -0,0 +1,31 @@ +#include +#include + +TEST(TestOpProto, ALL) { + paddle::framework::OpProto proto; + { + auto ipt = proto.mutable_inputs()->Add(); + *ipt->mutable_name() = "a"; + *ipt->mutable_comment() = "the one input of cosine op"; + } + { + auto ipt = proto.mutable_inputs()->Add(); + *ipt->mutable_name() = "b"; + *ipt->mutable_comment() = "the other input of cosine op"; + } + { + auto opt = proto.mutable_outputs()->Add(); + *opt->mutable_name() = "output"; + *opt->mutable_comment() = "the output of cosine op"; + } + { + auto attr = proto.mutable_attrs()->Add(); + *attr->mutable_name() = "scale"; + attr->set_type(paddle::framework::AttrType::FLOAT); + *attr->mutable_comment() = "the scale attribute of cosine op"; + } + proto.set_type("cos"); + *proto.mutable_comment() = "cosine op, output = scale * cos(a, b)"; + + ASSERT_TRUE(proto.IsInitialized()); +} \ No newline at end of file diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt index 9996d01d18b1185e9b01f8b1e4aab325eb28c894..926fee47e1f86efa60dc40a2727edb06499bec4f 100644 --- a/paddle/optimizer/CMakeLists.txt +++ b/paddle/optimizer/CMakeLists.txt @@ -12,6 +12,7 @@ set(OPITMIZER_SRCS add_library(paddle_optimizer STATIC ${OPITMIZER_SRCS}) add_dependencies(paddle_optimizer paddle_proto ${external_project_dependencies}) + if(WITH_TESTING) add_simple_unittest(serialization_test) add_simple_unittest(parameter_optimizer_test) diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 5cbe491b2b57fa62a3ff1d2eb749e7d3df4dc19e..2f3d1c061e37ccdd7aec2b9c4b34ae5f5b8b08c1 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,7 +1,9 @@ -cc_library(cpu_info SRCS cpu_info.cc DEPS gflags) -cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags glog) +cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog) +cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) nv_library(gpu_info SRCS gpu_info.cc DEPS gflags) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) + +cc_library(dynamic_loader SRCS dynload/dynamic_loader.cc) diff --git a/paddle/platform/cpu_info.h b/paddle/platform/cpu_info.h index 8df7c7b4bca5bc88f6ed95d6ab82c81b73918e92..edd76517a6481af13fbd1e0088c191d3cc3ec37b 100644 --- a/paddle/platform/cpu_info.h +++ b/paddle/platform/cpu_info.h @@ -28,5 +28,15 @@ size_t CpuMinChunkSize(); //! Get the maximum chunk size for buddy allocator. size_t CpuMaxChunkSize(); +int GetCurrentDeviceId(void) { + int device_id; + throw_on_error(cudaGetDevice(&device_id), "cudaGetDevice failed"); + return device_id; +} + +void SetDeviceId(int device_id) { + throw_on_error(cudaSetDevice(device_id), "cudaSetDevice failed"); +} + } // namespace platform } // namespace paddle diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..9f829b70128655f018c59db32b95d3f1789da5fc --- /dev/null +++ b/paddle/platform/dynload/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h new file mode 100644 index 0000000000000000000000000000000000000000..c9150ac5738f3292f530f053d7c0bb8a1203f13d --- /dev/null +++ b/paddle/platform/dynload/cublas.h @@ -0,0 +1,104 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace platform { +namespace dynload { + +std::once_flag cublas_dso_flag; +void *cublas_dso_handle = nullptr; + +/** + * The following macro definition can generate structs + * (for each function) to dynamic load cublas routine + * via operator overloading. + * + * note: default dynamic linked libs + */ +#ifdef PADDLE_USE_DSO +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + typedef cublasStatus_t (*cublasFunc)(Args...); \ + std::call_once(cublas_dso_flag, \ + paddle::platform::dynload::GetCublasDsoHandle, \ + &cublas_dso_handle); \ + void *p_##__name = dlsym(cublas_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; // struct DynLoad__##__name +#else +#define DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + } __name; // struct DynLoad__##__name +#endif + +#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name) + +// include all needed cublas functions in HPPL +// clang-format off +#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ + __macro(cublasSgemv) \ + __macro(cublasDgemv) \ + __macro(cublasSgemm) \ + __macro(cublasDgemm) \ + __macro(cublasSgeam) \ + __macro(cublasDgeam) \ + +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetStream) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasSetPointerMode) +DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasGetPointerMode) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasCgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasZgemmBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetrfBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasSgetriBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetrfBatched) +DYNAMIC_LOAD_CUBLAS_WRAP(cublasDgetriBatched) +CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) + +#undef DYNAMIC_LOAD_CUBLAS_WRAP +#undef DYNAMIC_LOAD_CUBLAS_V2_WRAP +#undef CUBLAS_BLAS_ROUTINE_EACH + +// clang-format on +#ifndef PADDLE_TYPE_DOUBLE +#define CUBLAS_GEAM paddle::platform::dynload::cublasSgeam +#define CUBLAS_GEMV paddle::platform::dynload::cublasSgemv +#define CUBLAS_GEMM paddle::platform::dynload::cublasSgemm +#define CUBLAS_GETRF paddle::platform::dynload::cublasSgetrfBatched +#define CUBLAS_GETRI paddle::platform::dynload::cublasSgetriBatched +#else +#define CUBLAS_GEAM paddle::platform::dynload::cublasDgeam +#define CUBLAS_GEMV paddle::platform::dynload::cublasDgemv +#define CUBLAS_GEMM paddle::platform::dynload::cublasDgemm +#define CUBLAS_GETRF paddle::platform::dynload::cublasDgetrfBatched +#define CUBLAS_GETRI paddle::platform::dynload::cublasDgetriBatched +#endif +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h new file mode 100644 index 0000000000000000000000000000000000000000..c03424b375e5dc80610eb4ca8146069b7300e016 --- /dev/null +++ b/paddle/platform/dynload/cudnn.h @@ -0,0 +1,134 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace platform { +namespace dynload { + +std::once_flag cudnn_dso_flag; +void* cudnn_dso_handle = nullptr; + +#ifdef PADDLE_USE_DSO + +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using cudnn_func = decltype(__name(args...)) (*)(Args...); \ + std::call_once(cudnn_dso_flag, \ + paddle::platform::dynload::GetCudnnDsoHandle, \ + &cudnn_dso_handle); \ + void* p_##__name = dlsym(cudnn_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ + +#else + +#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + return __name(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ + +#endif + +/** + * include all needed cudnn functions in HPPL + * different cudnn version has different interfaces + **/ +// clang-format off +#define CUDNN_DNN_ROUTINE_EACH(__macro) \ + __macro(cudnnSetTensor4dDescriptor) \ + __macro(cudnnSetTensor4dDescriptorEx) \ + __macro(cudnnGetConvolutionNdForwardOutputDim) \ + __macro(cudnnGetConvolutionForwardAlgorithm) \ + __macro(cudnnCreateTensorDescriptor) \ + __macro(cudnnDestroyTensorDescriptor) \ + __macro(cudnnCreateFilterDescriptor) \ + __macro(cudnnSetFilter4dDescriptor) \ + __macro(cudnnSetPooling2dDescriptor) \ + __macro(cudnnDestroyFilterDescriptor) \ + __macro(cudnnCreateConvolutionDescriptor) \ + __macro(cudnnCreatePoolingDescriptor) \ + __macro(cudnnDestroyPoolingDescriptor) \ + __macro(cudnnSetConvolution2dDescriptor) \ + __macro(cudnnDestroyConvolutionDescriptor) \ + __macro(cudnnCreate) \ + __macro(cudnnDestroy) \ + __macro(cudnnSetStream) \ + __macro(cudnnActivationForward) \ + __macro(cudnnConvolutionForward) \ + __macro(cudnnConvolutionBackwardBias) \ + __macro(cudnnGetConvolutionForwardWorkspaceSize) \ + __macro(cudnnTransformTensor) \ + __macro(cudnnPoolingForward) \ + __macro(cudnnPoolingBackward) \ + __macro(cudnnSoftmaxBackward) \ + __macro(cudnnSoftmaxForward) \ + __macro(cudnnGetVersion) \ + __macro(cudnnGetErrorString) +CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP) + +#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ + __macro(cudnnAddTensor) \ + __macro(cudnnConvolutionBackwardData) \ + __macro(cudnnConvolutionBackwardFilter) +CUDNN_DNN_ROUTINE_EACH_R2(DYNAMIC_LOAD_CUDNN_WRAP) + +// APIs available after R3: +#if CUDNN_VERSION >= 3000 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ + __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize) \ + __macro(cudnnGetConvolutionBackwardDataAlgorithm) \ + __macro(cudnnGetConvolutionBackwardFilterAlgorithm) \ + __macro(cudnnGetConvolutionBackwardDataWorkspaceSize) +CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 +#endif + + +// APIs available after R4: +#if CUDNN_VERSION >= 4007 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ + __macro(cudnnBatchNormalizationForwardTraining) \ + __macro(cudnnBatchNormalizationForwardInference) \ + __macro(cudnnBatchNormalizationBackward) +CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 +#endif + +// APIs in R5 +#if CUDNN_VERSION >= 5000 +#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \ + __macro(cudnnCreateActivationDescriptor) \ + __macro(cudnnSetActivationDescriptor) \ + __macro(cudnnGetActivationDescriptor) \ + __macro(cudnnDestroyActivationDescriptor) +CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP) +#undef CUDNN_DNN_ROUTINE_EACH_R5 +#endif + +#undef CUDNN_DNN_ROUTINE_EACH +// clang-format on +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h new file mode 100644 index 0000000000000000000000000000000000000000..1ef7a8c833d7488ec18f7c8df55b16006e94cc72 --- /dev/null +++ b/paddle/platform/dynload/curand.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/platform/dynamic_loader.h" + +namespace paddle { +namespace platform { +namespace dynload { +std::once_flag curand_dso_flag; +void *curand_dso_handle = nullptr; +#ifdef PADDLE_USE_DSO +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + typedef curandStatus_t (*curandFunc)(Args...); \ + std::call_once(curand_dso_flag, \ + paddle::platform::dynload::GetCurandDsoHandle, \ + &curand_dso_handle); \ + void *p_##__name = dlsym(curand_dso_handle, #__name); \ + return reinterpret_cast(p_##__name)(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ +#else +#define DYNAMIC_LOAD_CURAND_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + curandStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + } __name; /* struct DynLoad__##__name */ +#endif + +/* include all needed curand functions in HPPL */ +// clang-format off +#define CURAND_RAND_ROUTINE_EACH(__macro) \ + __macro(curandCreateGenerator) \ + __macro(curandSetStream) \ + __macro(curandSetPseudoRandomGeneratorSeed)\ + __macro(curandGenerateUniform) \ + __macro(curandGenerateUniformDouble) \ + __macro(curandDestroyGenerator) +// clang-format on + +CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) + +#undef CURAND_RAND_ROUTINE_EACH +#undef DYNAMIC_LOAD_CURAND_WRAP +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc new file mode 100644 index 0000000000000000000000000000000000000000..8ef67bad8c63cd1dfff967dbf1627ed84645e0d0 --- /dev/null +++ b/paddle/platform/dynload/dynamic_loader.cc @@ -0,0 +1,169 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "dynamic_loader.h" +#include +#include +#include +#include +#include "gflags/gflags.h" +#include "glog/logging.h" + +DEFINE_string(cudnn_dir, "", + "Specify path for loading libcudnn.so. For instance, " + "/usr/local/cudnn/lib. If empty [default], dlopen " + "will search cudnn from LD_LIBRARY_PATH"); + +DEFINE_string(cuda_dir, "", + "Specify path for loading cuda library, such as libcublas, " + "libcurand. For instance, /usr/local/cuda/lib64. If default, " + "dlopen will search cuda from LD_LIBRARY_PATH"); + +DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); + +DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); + +namespace paddle { +namespace platform { +namespace dynload { + +static inline std::string join(const std::string& part1, + const std::string& part2) { + // directory separator + const char sep = '/'; + if (!part2.empty() && part2.front() == sep) { + return part2; + } + std::string ret; + ret.reserve(part1.size() + part2.size() + 1); + ret = part1; + if (!ret.empty() && ret.back() != sep) { + ret += sep; + } + ret += part2; + return ret; +} + +static inline void GetDsoHandleFromDefaultPath(std::string& dso_path, + void** dso_handle, + int dynload_flags) { + VLOG(3) << "Try to find library: " << dso_path + << " from default system path."; + // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH + *dso_handle = dlopen(dso_path.c_str(), dynload_flags); + +// DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to +// bring System Integrity Projection (SIP), if dso_handle +// is null, search from default package path in Mac OS. +#if defined(__APPLE__) || defined(__OSX__) + if (nullptr == *dso_handle) { + dso_path = join("/usr/local/cuda/lib/", dso_path); + *dso_handle = dlopen(dso_path.c_str(), dynload_flags); + if (nullptr == *dso_handle) { + if (dso_path == "libcudnn.dylib") { + LOG(FATAL) + << "Note: [Recommend] copy cudnn into /usr/local/cuda/ \n" // NOLINT + << "For instance, sudo tar -xzf " + "cudnn-7.5-osx-x64-v5.0-ga.tgz -C " // NOLINT + << "/usr/local \n sudo chmod a+r " + "/usr/local/cuda/include/cudnn.h " // NOLINT + << "/usr/local/cuda/lib/libcudnn*"; + } + } + } +#endif +} + +static inline void GetDsoHandleFromSearchPath(const std::string& search_root, + const std::string& dso_name, + void** dso_handle) { + int dynload_flags = RTLD_LAZY | RTLD_LOCAL; + *dso_handle = nullptr; + + std::string dlPath = dso_name; + if (search_root.empty()) { + GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + } else { + // search xxx.so from custom path + dlPath = join(search_root, dso_name); + *dso_handle = dlopen(dlPath.c_str(), dynload_flags); + // if not found, search from default path + if (nullptr == *dso_handle) { + LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" + << dlerror() << ")"; + dlPath = dso_name; + GetDsoHandleFromDefaultPath(dlPath, dso_handle, dynload_flags); + } + } + + CHECK(nullptr != *dso_handle) << "Failed to find dynamic library: " << dlPath + << " (" << dlerror() << ") \n" + << "Please specify its path correctly using " + "following ways: \n" + + << "Method. set environment variable " + "LD_LIBRARY_PATH on Linux or " + << "DYLD_LIBRARY_PATH on Mac OS. \n" + << "For instance, issue command: export " + "LD_LIBRARY_PATH=... \n" + + << "Note: After Mac OS 10.11, using the " + "DYLD_LIBRARY_PATH is impossible " + << "unless System Integrity Protection (SIP) " + "is disabled."; +} + +void GetCublasDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so", dso_handle); +#endif +} + +void GetCudnnDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", dso_handle); +#endif +} + +void GetCurandDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so", dso_handle); +#endif +} + +void GetWarpCTCDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so", dso_handle); +#endif +} + +void GetLapackDsoHandle(void** dso_handle) { +#if defined(__APPLE__) || defined(__OSX__) + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib", dso_handle); +#else + GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so", dso_handle); +#endif +} + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/dynload/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..a99b05443feb909f10b2c56f4d8bdf3c6fa11e3f --- /dev/null +++ b/paddle/platform/dynload/dynamic_loader.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +namespace paddle { +namespace platform { +namespace dynload { + +/** + * @brief load the DSO of CUBLAS + * + * @param **dso_handle dso handler + * + */ +void GetCublasDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of CUDNN + * + * @param **dso_handle dso handler + * + */ +void GetCudnnDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of CURAND + * + * @param **dso_handle dso handler + * + */ +void GetCurandDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of warp-ctc + * + * @param **dso_handle dso handler + * + */ +void GetWarpCTCDsoHandle(void** dso_handle); + +/** + * @brief load the DSO of lapack + * + * @param **dso_handle dso handler + * + */ +void GetLapackDsoHandle(void** dso_handle); + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index 1ef2dceca910e806bddf17c95d1c345a144d9e31..8124e219ba499333ecdf4b34ff5352e281aaa016 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -5,6 +5,8 @@ import paddle.trainer_config_helpers.optimizers as v1_optimizers """ Optimizers(update equation) for SGD method. +TODO(zhihong) : create new optimizer with proto config, add new optimizer here + TODO(yuyang18): Complete comments. """