diff --git a/.clang-format b/.clang-format index 9ba433b17362424973626470d930356c2173dd84..aff93435f58c522f5ed1090aef2005f76e91cf31 100644 --- a/.clang-format +++ b/.clang-format @@ -25,4 +25,3 @@ AllowAllParametersOfDeclarationOnNextLine: true BinPackParameters: false BinPackArguments: false ... - diff --git a/.travis.yml b/.travis.yml index c51e02eb79a9e53a2b8d1d663e8f0c3e0d8c3a61..e2d49daa1981396628efa5d16459eb70e9e76884 100644 --- a/.travis.yml +++ b/.travis.yml @@ -42,7 +42,7 @@ before_install: script: - | timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout - RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; + RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true ;else exit 1; fi; - | if [[ "$JOB" != "build_doc" ]]; then exit 0; fi; if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; diff --git a/CMakeLists.txt b/CMakeLists.txt index 65164b8472b902be8b0b9d5fb99807d012b8a666..e76512166fcaea5daf2a67d1259331b680f15b7c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,6 +133,8 @@ include(external/any) # download libn::any include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 include(external/nccl) +include(external/cares) +include(external/grpc) include(cudnn) # set cudnn libraries, must before configure include(configure) # add paddle env configuration diff --git a/Dockerfile b/Dockerfile index 150344a8116e2be9b5bab8e5fdcc9c37f4025020..857d3f3e5f64791146741ffb29feabfcb2ecbb84 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ RUN apt-get update && \ automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ - net-tools && \ + net-tools libtool && \ apt-get clean -y # Install Go and glide diff --git a/cmake/external/cares.cmake b/cmake/external/cares.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e05111ee18efc906e39bcb56fb1be3b3c3dff5d6 --- /dev/null +++ b/cmake/external/cares.cmake @@ -0,0 +1,45 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +IF(MOBILE_INFERENCE) + return() +ENDIF() + +include (ExternalProject) + +# NOTE: c-ares is needed when linking with grpc. + +SET(CARES_SOURCES_DIR ${THIRD_PARTY_PATH}/cares) +SET(CARES_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cares) +SET(CARES_INCLUDE_DIR "${CARES_INSTALL_DIR}/include/" CACHE PATH "cares include directory." FORCE) + +ExternalProject_Add( + extern_cares + GIT_REPOSITORY "https://github.com/c-ares/c-ares.git" + GIT_TAG "cares-1_13_0" + PREFIX ${CARES_SOURCES_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND ./buildconf && ./configure --disable-shared --prefix=${CARES_INSTALL_DIR} + BUILD_IN_SOURCE 1 + BUILD_COMMAND make + INSTALL_COMMAND make install +) + +ADD_LIBRARY(cares STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET cares PROPERTY IMPORTED_LOCATION + "${CARES_INSTALL_DIR}/lib/libcares.a") + +include_directories(${CARES_INCLUDE_DIR}) +ADD_DEPENDENCIES(cares extern_cares) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake new file mode 100644 index 0000000000000000000000000000000000000000..1330ef82dc547aecb93e8390cf45ae2de1caaa66 --- /dev/null +++ b/cmake/external/grpc.cmake @@ -0,0 +1,66 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +IF(MOBILE_INFERENCE) + return() +ENDIF() + +include (ExternalProject) + +SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) +SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) +SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) +SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) +IF(APPLE) + SET(BUILD_CMD make -n | sed "s/-Werror//g" | sh) +ELSE() + SET(BUILD_CMD make) +ENDIF() + +ExternalProject_Add( + extern_grpc + DEPENDS protobuf zlib + GIT_REPOSITORY "https://github.com/grpc/grpc.git" + GIT_TAG "v1.7.x" + PREFIX ${GRPC_SOURCES_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + # NOTE(yuyang18): + # Disable -Werror, otherwise the compile will fail in MacOS. + # It seems that we cannot configure that by make command. + # Just dry run make command and remove `-Werror`, then use a shell to run make commands + BUILD_COMMAND ${BUILD_CMD} + INSTALL_COMMAND make prefix=${GRPC_INSTALL_DIR} install +) + +# FIXME(typhoonzero): hack to get static lib path, try a better way like merge them. +ADD_LIBRARY(grpc++_unsecure STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc++_unsecure PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc++_unsecure.a") + +ADD_LIBRARY(grpc++ STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc++ PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc++.a") +ADD_LIBRARY(gpr STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gpr PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgpr.a") + +ADD_LIBRARY(grpc_unsecure STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc_unsecure PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc_unsecure.a") + +include_directories(${GRPC_INCLUDE_DIR}) +ADD_DEPENDENCIES(grpc++_unsecure extern_grpc) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index be7f6a9465970711170bd15dcecaadeaa8a55f86..7cfe1e68078eed023fd0cc6971c573bb0108b4cc 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -15,7 +15,18 @@ INCLUDE(ExternalProject) # Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp FIND_PACKAGE(Protobuf QUIET) -SET(PROTOBUF_FOUND "OFF") +macro(UNSET_VAR VAR_NAME) + UNSET(${VAR_NAME} CACHE) + UNSET(${VAR_NAME}) +endmacro() +UNSET_VAR(PROTOBUF_INCLUDE_DIR) +UNSET_VAR(PROTOBUF_FOUND) +UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE) +UNSET_VAR(PROTOBUF_PROTOC_LIBRARY) +UNSET_VAR(PROTOBUF_LITE_LIBRARY) +UNSET_VAR(PROTOBUF_LIBRARY) +UNSET_VAR(PROTOBUF_INCLUDE_DIR) +UNSET_VAR(Protobuf_PROTOC_EXECUTABLE) if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. function(protobuf_generate_python SRCS) @@ -110,7 +121,6 @@ macro(PROMPT_PROTOBUF_LIB) # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. # make `protobuf_generate_cpp` happy. SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) - FOREACH(dep ${protobuf_DEPS}) ADD_DEPENDENCIES(protobuf ${dep}) ADD_DEPENDENCIES(protobuf_lite ${dep}) @@ -128,11 +138,11 @@ endmacro() set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") if (NOT "${PROTOBUF_ROOT}" STREQUAL "") - find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include) - find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib) - find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib) - find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib) - find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin) + find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH) + find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) + find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) + find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) + find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH) if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") SET_PROTOBUF_VERSION() diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index a98e069b7cd1654ddd5868560d0905eab6d9c692..1638cd8fdfc34575132462859e056a1907f0b2f1 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -50,6 +50,8 @@ ExternalProject_Add( ) LIST(APPEND external_project_dependencies zlib) +ADD_LIBRARY(zlib_target STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET zlib_target PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) IF(WITH_C_API) INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 7b82d409a3b64a5fc8fdfe526a2e82a4e1c9fa8e..c917ca0ff4e087b7caae8876da127bec6b39b798 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -467,3 +467,50 @@ function(py_test TARGET_NAME) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction() + +# grpc_library generate grpc code using grpc_cpp_plugin and protoc +# then build the generated protobuf code and grpc code with your +# implementation source codes together. Use SRCS argument for your +# implementation source files and PROTO argument for your .proto +# files. +# +# Usage: grpc_library(my_target SRCS my_client.cc PROTO my_target.proto DEPS my_dep) + +function(grpc_library TARGET_NAME) + set(oneValueArgs PROTO) + set(multiValueArgs SRCS DEPS) + set(options "") + cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + message(STATUS "generating grpc ${grpc_library_PROTO}") + + get_filename_component(ABS_PROTO ${grpc_library_PROTO} ABSOLUTE) + get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) + get_filename_component(PROTO_PATH ${ABS_PROTO} PATH) + + protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") + set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") + set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") + cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}") + + add_custom_command( + OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" + --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" + DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc) + + # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it + # as compiler warnings instead of error. Should try remove the warnings also. + set_source_files_properties( + ${grpc_grpc_srcs} + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}") + + set_source_files_properties( + ${grpc_library_SRCS} + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + cc_library("${TARGET_NAME}" SRCS "${grpc_library_SRCS}" DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}") +endfunction() diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index a0f2906c749054c1ff9f624e47df432ec2bd6ac8..fdf6de4babff3bb3c253aaf516636882237e6faf 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -13,6 +13,8 @@ limitations under the License. */ #include "paddle/framework/lod_tensor.h" +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" #include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" @@ -27,11 +29,11 @@ namespace paddle { namespace framework { -std::ostream& operator<<(std::ostream& os, const LoD& lod) { +std::ostream &operator<<(std::ostream &os, const LoD &lod) { os << "{"; - for (auto& v : lod) { + for (auto &v : lod) { os << "{"; - for (auto& i : v) { + for (auto &i : v) { os << i << ","; } os << "}"; @@ -41,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) { return os; } -LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { +LoD SliceLevels(const LoD &in, size_t level_begin, size_t level_end) { LoD new_lod; new_lod.reserve(level_end - level_begin); for (size_t i = level_begin; i < level_end; i++) { @@ -53,7 +55,7 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { return new_lod; } -LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, +LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE_LT(level, in.size()); PADDLE_ENFORCE_LT(elem_end, in[level].size()); @@ -64,9 +66,9 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, res[0].assign(in[level].begin() + elem_begin, in[level].begin() + elem_end + 1); for (size_t lvl = 1; lvl < res.size(); lvl++) { - const auto& in_level = in[level + lvl]; - const auto& above_level = res[lvl - 1]; - auto& out_level = res[lvl]; + const auto &in_level = in[level + lvl]; + const auto &above_level = res[lvl - 1]; + auto &out_level = res[lvl]; out_level.assign(in_level.begin() + above_level.front(), in_level.begin() + above_level.back() + 1); } @@ -74,33 +76,33 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, // to make the first offset equals 0, all the elements minus the first // element size_t front = res[lvl].front(); - for (auto& ele : res[lvl]) { + for (auto &ele : res[lvl]) { ele -= front; } } return res; } -LoD ToAbsOffset(const LoD& in) { +LoD ToAbsOffset(const LoD &in) { // the lowest level stores relative offsets if (in.empty() || in.size() == 1) return in; LoD result = in; for (int level = result.size() - 2; level >= 0; level--) { - for (auto& ele : result[level]) { + for (auto &ele : result[level]) { ele = result[level + 1][ele]; } } return result; } -bool operator==(const LoD& a, const LoD& b) { +bool operator==(const LoD &a, const LoD &b) { if (a.size() != b.size()) { return false; } for (size_t i = 0; i < a.size(); i++) { - const auto& a_level = a[i]; - const auto& b_level = b[i]; + const auto &a_level = a[i]; + const auto &b_level = b[i]; if (a_level.size() != b_level.size()) { return false; } @@ -151,7 +153,7 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, } using LoDAndOffset = std::pair>; -LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, +LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx, size_t end_idx, size_t start_level) { LoD sub_lod; @@ -170,7 +172,7 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, return LoDAndOffset{sub_lod, {start_idx, end_idx}}; } -void AppendLoD(LoD* lod, const LoD& lod_length) { +void AppendLoD(LoD *lod, const LoD &lod_length) { PADDLE_ENFORCE( lod->empty() || lod->size() == lod_length.size(), "The lod_length should has the same size with the appended lod."); @@ -178,12 +180,139 @@ void AppendLoD(LoD* lod, const LoD& lod_length) { *lod = LoD(lod_length.size(), std::vector({0})); } for (size_t i = 0; i < lod->size(); ++i) { - auto& level = (*lod)[i]; + auto &level = (*lod)[i]; for (size_t len : lod_length[i]) { level.push_back(level.back() + len); } } } +void SerializeToStream(std::ostream &os, const LoDTensor &tensor, + const platform::DeviceContext &dev_ctx) { + // TODO(typhoonzero): serialize to ostream + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { // the 2nd field, tensor description + // int32_t size + // void* protobuf message + framework::TensorDesc desc; + desc.set_data_type(framework::ToDataType(tensor.type())); + auto dims = framework::vectorize(tensor.dims()); + auto *pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + os.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + os.write(out.data(), size); + } + { // the 3rd field, tensor data + uint64_t size = tensor.memory_size(); + auto *data_ptr = tensor.data(); + PADDLE_ENFORCE(size < std::numeric_limits::max(), + "Index overflow when writing tensor"); + if (platform::is_gpu_place(tensor.place())) { +#ifdef PADDLE_WITH_CUDA + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto &gpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + boost::get(tensor.place()), + reinterpret_cast(data), size_to_write, + gpu_dev_ctx.stream()); + gpu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + os.write(static_cast(data_ptr), + static_cast(size)); + } + } + { // the 4th field, lod information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... + auto lod = tensor.lod(); + uint64_t size = lod.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + + for (auto &each : lod) { + size = each.size() * sizeof(framework::LoD::value_type::value_type); + os.write(reinterpret_cast(&size), sizeof(size)); + os.write(reinterpret_cast(each.data()), + static_cast(size)); + } + } +} + +void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + framework::TensorDesc desc; + { // int32_t size + // proto buffer + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); + tensor->Resize(framework::make_ddim(dims)); + + void *buf; + platform::Place cpu = platform::CPUPlace(); + switch (desc.data_type()) { + case framework::FP32: + buf = tensor->mutable_data(cpu); + break; + case framework::FP64: + buf = tensor->mutable_data(cpu); + break; + case framework::INT32: + buf = tensor->mutable_data(cpu); + break; + case framework::INT64: + buf = tensor->mutable_data(cpu); + break; + default: + PADDLE_THROW("DataType %d not supported", desc.data_type()); + } + is.read(static_cast(buf), tensor->memory_size()); + } + { // read lod + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + lod[i] = tmp; + } + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 21bdfca1111f16d5b8ea71be004ddb8da12fd03c..9411c96aea4c10ebf921cc3e3b442769c8acbefa 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -189,5 +189,14 @@ std::pair> GetSubLoDAndAbsoluteOffset( void AppendLoD(LoD* lod, const LoD& lod_length); +/* + * Serialize/Desiralize LoDTensor to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. GPU tensor will be copied to CPU. + */ +void SerializeToStream(std::ostream& os, const LoDTensor& tensor, + const platform::DeviceContext& dev_ctx); +void DeserializeFromStream(std::istream& is, LoDTensor* tensor); + } // namespace framework } // namespace paddle diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp index d62a8d846e5b347aa44ce1951c043d5813a5b3ff..236f8096bdb6e024cf3c9c73eba422616a777a23 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp @@ -64,49 +64,111 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); + false); Matrix::resizeOrCreate(preOutput_.grad, batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); - + false); IVectorPtr label = getInput(*getLabelLayer()).ids; - preOutput_.value->zeroMem(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuOutput_, + output_.value->getHeight(), + output_.value->getWidth(), + /* trans */ false, + false); + IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); + cpuLabel_->copyFrom(*label); + cpuOutput_->copyFrom(*output_.value); + } else { + cpuOutput_ = output_.value; + cpuLabel_ = label; + } /* add the bias-vector */ if (biases_.get() != NULL) { - preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW()); + if (useGpu_) { + Matrix::resizeOrCreate(cpuBias_, + 1, + numClasses_ - 1, + /* trans */ false, + false); + cpuBias_->copyFrom(*biases_->getW()); + } else { + cpuBias_ = biases_->getW(); + } + preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_); } for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { MatrixPtr input = getInputValue(i); + if (useGpu_) { + Matrix::resizeOrCreate(cpuInput_, + input->getHeight(), + input->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeight_, + weights_[i]->getW()->getHeight(), + weights_[i]->getW()->getWidth(), + /* trans */ false, + false); + cpuInput_->copyFrom(*input); + cpuWeight_->copyFrom(*weights_[i]->getW()); + } else { + cpuInput_ = input; + cpuWeight_ = weights_[i]->getW(); + } preOutput_.value->mulByBitCode( - numClasses_, *label, *weights_[i]->getW(), *input); + numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_); } // keep consistent with the clipping in the following softrelu preOutput_.value->clip(-40.0, 40.0); preOutput_.value->sumByBitCode(numClasses_, - *label, - *output_.value, + *cpuLabel_, + *cpuOutput_, -1); // scaleSum preOutput_.value->softrelu(*preOutput_.value); - MatrixPtr sum = - Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_)); + MatrixPtr sum = Matrix::create(batchSize, 1, /* trans= */ false, false); preOutput_.value->rowSum(*sum); - output_.value->add(*sum); + cpuOutput_->add(*sum); + if (useGpu_) { + output_.value->copyFrom(*cpuOutput_); + } else { + output_.value = cpuOutput_; + } } void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { IVectorPtr label = getInput(*getLabelLayer()).ids; + if (useGpu_) { + IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); + cpuLabel_->copyFrom(*label); + } else { + cpuLabel_ = label; + } preOutput_.grad->one(); preOutput_.grad->softreluDerivative(*preOutput_.value); - preOutput_.grad->subByBitCode(numClasses_, *label); + preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_); if (biases_ && biases_->getWGrad()) { - preOutput_.grad->addByBitCodeBackward( - numClasses_, *label, *biases_->getWGrad()); - + MatrixPtr biases_grad = biases_->getWGrad(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuBias_, + 1, + numClasses_ - 1, + /* trans */ false, + false); + cpuBias_->copyFrom(*biases_grad); + } else { + cpuBias_ = biases_grad; + } + preOutput_.grad->addByBitCodeBackward(numClasses_, *cpuLabel_, *cpuBias_); + if (useGpu_) { + biases_grad->copyFrom(*cpuBias_); + } else { + biases_grad = cpuBias_; + } /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } @@ -115,9 +177,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { /* Calculate the W-gradient for the current layer */ MatrixPtr input = getInputValue(i); if (weights_[i]->getWGrad()) { + MatrixPtr weights_grad = weights_[i]->getWGrad(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuInput_, + input->getHeight(), + input->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeightGrad_, + weights_grad->getHeight(), + weights_grad->getWidth(), + /* trans */ false, + false); + cpuInput_->copyFrom(*input); + cpuWeightGrad_->copyFrom(*weights_grad); + } else { + cpuInput_ = input; + cpuWeightGrad_ = weights_grad; + } preOutput_.grad->mulByBitCodeBackwardWeight( - numClasses_, *label, *weights_[i]->getWGrad(), *input); - + numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_); + if (useGpu_) { + weights_grad->copyFrom(*cpuWeightGrad_); + } else { + weights_grad = cpuWeightGrad_; + } /* Increasing the number of gradient */ weights_[i]->getParameterPtr()->incUpdate(callback); } @@ -125,8 +209,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ MatrixPtr inputGrad = getInputGrad(i); if (inputGrad) { + if (useGpu_) { + Matrix::resizeOrCreate(cpuInputGrad_, + inputGrad->getHeight(), + inputGrad->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeight_, + weights_[i]->getW()->getHeight(), + weights_[i]->getW()->getWidth(), + /* trans */ false, + false); + cpuInputGrad_->copyFrom(*inputGrad); + cpuWeight_->copyFrom(*weights_[i]->getW()); + } else { + cpuInputGrad_ = inputGrad; + cpuWeight_ = weights_[i]->getW(); + } preOutput_.grad->mulByBitCodeBackwardError( - numClasses_, *label, *weights_[i]->getW(), *inputGrad); + numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_); + if (useGpu_) { + inputGrad->copyFrom(*cpuInputGrad_); + } else { + inputGrad = cpuInputGrad_; + } } } } diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/gserver/layers/HierarchicalSigmoidLayer.h index 9afd40b1674680da962d6e51caa56b46279b70de..7f896e61ca26e3e22b99b65b1285384a121f7f02 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.h +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.h @@ -80,6 +80,15 @@ protected: int codeLength_; /// temporary result of output_ Argument preOutput_; + + /// The temporary variables in CPU memory. + MatrixPtr cpuWeight_; + MatrixPtr cpuWeightGrad_; + MatrixPtr cpuInput_; + MatrixPtr cpuInputGrad_; + MatrixPtr cpuBias_; + MatrixPtr cpuOutput_; + IVectorPtr cpuLabel_; }; } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index a9fc733d1de441dea9f817c18ec65743836c2f23..c5359f272b4bed4d4d2483bf19d7ae482b0d33dd 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -681,12 +681,13 @@ TEST(Layer, hsigmoidLayer) { config.layerConfig.add_inputs(); config.layerConfig.add_inputs(); - // Not support GPU now - testLayerGrad(config, - "hsigmoid", - 100, - /* trans */ false, /* useGpu */ - false); + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "hsigmoid", + 100, + /* trans */ false, + /* useGpu */ useGpu); + } } TEST(Layer, multi_cross) { diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index e86aae292b9d1d73a68ea73d756974393d8b2a64..937441b318095eadb9022c1d7578ad8aca2dadc8 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -206,8 +206,24 @@ set(DEPS_OPS tensor_array_read_write_op gru_op adagrad_op - sgd_op) + sgd_op + save_op + load_op + send_op + recv_op) +add_subdirectory(detail) +op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) +set_source_files_properties( + send_op.cc + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + +op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) +set_source_files_properties( + recv_op.cc + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) @@ -237,6 +253,10 @@ op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) +# FIXME(typhoonzero): save/load depends lodtensor serialization functions +op_library(save_op DEPS lod_tensor) +op_library(load_op DEPS lod_tensor) + list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) op_library(${src}) @@ -244,6 +264,8 @@ endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") + + cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) @@ -253,3 +275,4 @@ if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) +cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) diff --git a/paddle/operators/batch_norm_op.cc b/paddle/operators/batch_norm_op.cc index f884e6efa917ce3f8554dce0e248f2b29273e3f3..ac97bd83ab7e7838871586cfe5acb832084b6cec 100644 --- a/paddle/operators/batch_norm_op.cc +++ b/paddle/operators/batch_norm_op.cc @@ -62,13 +62,14 @@ class BatchNormOp : public framework::OperatorWithKernel { const auto x_dims = ctx->GetInputDim("X"); const TensorFormat tensor_format = StringToTensorFormat(ctx->Attrs().Get("tensor_format")); + + PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, + "Input X must have 2 to 5 dimensions."); + const int C = (tensor_format == TensorFormat::NCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); - PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, - "Input X must have 3 to 5 dimensions."); - PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); @@ -146,8 +147,8 @@ class BatchNormKernel : public framework::OpKernel { const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); - PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, - "The Input dim size should be between 3 and 5"); + PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, + "The Input dim size should be between 2 and 5"); const int N = x_dims[0]; const int C = (tensor_format == TensorFormat::NCHW ? x_dims[1] @@ -339,8 +340,8 @@ class BatchNormGradKernel // Get the size for each dimension. // NCHW [batch_size, in_channels, in_height, in_width] const auto &x_dims = x->dims(); - PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, - "The Input dim size should be between 3 and 5"); + PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, + "The Input dim size should be between 2 and 5"); const int N = x_dims[0]; const int C = (tensor_format == TensorFormat::NCHW ? x_dims[1] diff --git a/paddle/operators/batch_norm_op.cu.cc b/paddle/operators/batch_norm_op.cu.cc index 726d1ea1b8d7ced93f94bb0e5bb4df9e43b0ac7b..7b2f3187007fa2491afa75de1cde1910c6ce9bb8 100644 --- a/paddle/operators/batch_norm_op.cu.cc +++ b/paddle/operators/batch_norm_op.cu.cc @@ -29,14 +29,21 @@ void ExtractNCWHD(const framework::DDim &dims, const TensorFormat &tensor_format, int *N, int *C, int *H, int *W, int *D) { *N = dims[0]; - *C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1]; - *H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1]; - *W = dims.size() > 3 - ? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2]) - : 1; - *D = dims.size() > 4 - ? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3]) - : 1; + if (dims.size() == 2) { + *C = dims[1]; + *H = 1; + *W = 1; + *D = 1; + } else { + *C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1]; + *H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1]; + *W = dims.size() > 3 + ? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2]) + : 1; + *D = dims.size() > 4 + ? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3]) + : 1; + } } template @@ -56,8 +63,8 @@ class BatchNormKernel : public framework::OpKernel { // NCHW [batch_size, in_channels, in_height, in_width] const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); - PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, - "The Input dim size should be between 3 and 5"); + PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, + "The Input dim size should be between 2 and 5"); int N, C, H, W, D; ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D); @@ -180,8 +187,8 @@ class BatchNormGradKernel const auto &x_dims = x->dims(); - PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, - "The Input dim size should be between 3 and 5"); + PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5, + "The Input dim size should be between 2 and 5"); int N, C, H, W, D; ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D); diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 3e55ef036a7fb976117054574d1347fa943acd55..314b577d0067773028a4e9b333a41b8e7a74d327 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -74,12 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( "The format of output tensor is also NCHW."); AddAttr>( "strides", - "(vector defalut:{1, 1}), the strides(h_stride, w_stride) of " + "(vector default:{1, 1}), the strides(h_stride, w_stride) of " "convolution transpose operator.") .SetDefault({1, 1}); AddAttr>( "paddings", - "(vector defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution " + "(vector default:{0, 0}), the paddings(h_pad, w_pad) of convolution " "transpose operator.") .SetDefault({0, 0}); AddComment(R"DOC( @@ -101,8 +101,8 @@ Example: Output: Output shape: (N, C_out, H_out, W_out) where - H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; - W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; + H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + H_f; + W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + W_f; )DOC"); } @@ -130,12 +130,12 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( "the number of channels, D is the depth of the feature, H is the " "height of the feature, and W is the width of the feature."); AddAttr>("strides", - "(vector defalut:{1, 1, 1}), the " + "(vector default:{1, 1, 1}), the " "strides{d_stride, h_stride, w_stride} of " "convolution transpose operator.") .SetDefault({1, 1, 1}); AddAttr>("paddings", - "(vector defalut:{0, 0, 0}), paddings(d_pad, " + "(vector default:{0, 0, 0}), paddings(d_pad, " "h_pad, w_pad) of convolution transpose operator.") .SetDefault({0, 0, 0}); AddComment(R"DOC( @@ -158,9 +158,9 @@ Example: Output: Output shape: (N, C_out, D_out, H_out, W_out) where - D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; - H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; - W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2]; + D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + D_f; + H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + H_f; + W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + W_f; )DOC"); } diff --git a/paddle/operators/detail/CMakeLists.txt b/paddle/operators/detail/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f6bdc63cc2cfae526fe911ee4d989675452d5c5d --- /dev/null +++ b/paddle/operators/detail/CMakeLists.txt @@ -0,0 +1 @@ +grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto DEPS lod_tensor selected_rows) diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..89dc5045221156eed7aa9411bc96ad86f91136d2 --- /dev/null +++ b/paddle/operators/detail/recv_impl.cc @@ -0,0 +1,44 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "send_recv_impl.h" + +namespace paddle { +namespace operators { +namespace detail { + +Status SendRecvServerImpl::SendVariable(ServerContext *context, + const VariableMessage *in_var, + VariableMessage *out_var) { + framework::LoDTensor t; + // TODO(typhoonzero): desirealize in_tensor and run pserver network. + std::istringstream iss(in_var->serialized()); + framework::DeserializeFromStream(iss, &t); + lodtensor_queue_.Push(std::move(t)); + // Block util the sub graph is done. + t = lodtensor_return_queue_.Pop(); + std::ostringstream oss; + // FIXME(typhoonzero): get context from op. + framework::SerializeToStream(oss, t, platform::CPUDeviceContext()); + std::string *varname = out_var->mutable_varname(); + *varname = in_var->varname(); + std::string *serialized = out_var->mutable_serialized(); + *serialized = oss.str(); + + return Status::OK; +} + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc new file mode 100644 index 0000000000000000000000000000000000000000..da1ddf75d2afb85670c5ea0c9884376415f28208 --- /dev/null +++ b/paddle/operators/detail/send_impl.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "send_recv_impl.h" + +namespace paddle { +namespace operators { +namespace detail { + +bool RPCClient::SendVariable(const framework::Scope& scope, + const std::string& inname, + const std::string& outname) { + ClientContext context; + VariableMessage msg, out_msg; + // FIXME(typhoonzero): pass device context to here. + auto ctx = platform::CPUDeviceContext(); + auto* var = scope.FindVar(inname); + PADDLE_ENFORCE(var); + // TODO(typhoonzero): support SelectedRows + PADDLE_ENFORCE(var->IsType(), + "Only support LoDTensor, %s has wrong type", inname); + const framework::LoDTensor& tensor = var->Get(); + std::ostringstream oss; + framework::SerializeToStream(oss, tensor, ctx); + msg.set_varname(inname); + msg.set_serialized(oss.str()); + Status status = stub_->SendVariable(&context, msg, &out_msg); + if (!status.ok()) { + return false; + } + std::istringstream iss(out_msg.serialized()); + framework::LoDTensor ret_tensor; + framework::DeserializeFromStream(iss, &ret_tensor); + auto* outvar = scope.FindVar(outname); + framework::LoDTensor* out_tensor = outvar->GetMutable(); + // FIXME(typhoonzero): do not copy. + framework::CopyFrom(ret_tensor, ctx.GetPlace(), ctx, out_tensor); + return true; +} + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto new file mode 100644 index 0000000000000000000000000000000000000000..962c7d59819dede022474aec4a2d7f538d28c688 --- /dev/null +++ b/paddle/operators/detail/send_recv.proto @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +syntax = "proto3"; + +package sendrecv; + +service SendRecvService { + // For parameter server round-robin like hashing, do not split tensors. + // Send and recv only one tensor + rpc SendVariable(VariableMessage) returns (VariableMessage) {} +} + +// VariableMessage is serialized paddle variable message. +// It can be: +// Tensor +// LoDTensor +// SelectedRows +message VariableMessage { + string varname = 1; + bytes serialized = 2; +} + +message VoidMessage {} \ No newline at end of file diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..b9a5340a8636db7b5d6ec7b21368632d3916b4aa --- /dev/null +++ b/paddle/operators/detail/send_recv_impl.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/data_type.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/selected_rows.h" +#include "paddle/operators/detail/simple_block_queue.h" + +// #include +// #include +// #include +// #include +#include "paddle/operators/detail/send_recv.grpc.pb.h" +#include "paddle/operators/detail/send_recv.pb.h" + +#include + +using grpc::Channel; +using grpc::Server; +using grpc::ServerContext; +using grpc::ServerReader; +using grpc::ServerBuilder; + +using grpc::ClientContext; +using grpc::ClientReader; +using grpc::ClientReaderWriter; +using grpc::ClientWriter; +using grpc::Status; +using sendrecv::SendRecvService; +using sendrecv::VariableMessage; +using sendrecv::VoidMessage; + +namespace paddle { +namespace operators { +namespace detail { + +class SendRecvServerImpl final : public SendRecvService::Service { + public: + explicit SendRecvServerImpl() {} + + Status SendVariable(ServerContext *context, const VariableMessage *in_var, + VariableMessage *out_var) override; + + const framework::LoDTensor Get() { return this->lodtensor_queue_.Pop(); } + + void Push(const framework::LoDTensor &tensor) { + this->lodtensor_return_queue_.Push(tensor); + } + + private: + SimpleBlockQueue lodtensor_queue_; + SimpleBlockQueue lodtensor_return_queue_; + SimpleBlockQueue selected_rows_queue_; + SimpleBlockQueue selected_rows_return_queue_; +}; + +// RPCClient is a class to send tensors to pserver sub-network +// using different hashing methods. +class RPCClient { + public: + RPCClient(std::shared_ptr channel) + : stub_(SendRecvService::NewStub(channel)) {} + + bool SendVariable(const framework::Scope &scope, const std::string &inname, + const std::string &outname); + + private: + std::unique_ptr stub_; +}; + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/detail/simple_block_queue.h b/paddle/operators/detail/simple_block_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..44899217579532af2c1d2e6074ec0e08231e7b86 --- /dev/null +++ b/paddle/operators/detail/simple_block_queue.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include + +namespace paddle { +namespace operators { +namespace detail { + +template +class SimpleBlockQueue { + private: + std::mutex mutex_; + std::condition_variable condition_; + std::deque queue_; + + public: + void Push(T const& value) { + { + std::unique_lock lock(this->mutex_); + queue_.push_front(value); + } + this->condition_.notify_one(); + } + + T Pop() { + std::unique_lock lock(this->mutex_); + this->condition_.wait(lock, [=] { return !this->queue_.empty(); }); + T rc(std::move(this->queue_.back())); + this->queue_.pop_back(); + return rc; + } +}; + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index b0838eed1611c1d51e57fc2300606f753982dc89..4e58b84430f2a8697bbbc1acf971fd063120f563 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -38,61 +38,7 @@ class LoadOp : public framework::OperatorBase { out_var_name); auto *tensor = out_var->GetMutable(); - - uint32_t version; - fin.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); - framework::TensorDesc desc; - { // int32_t size - // proto buffer - int32_t size; - fin.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - fin.read(reinterpret_cast(buf.get()), size); - PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), - "Cannot parse tensor desc"); - } - { // read tensor - std::vector dims; - dims.reserve(static_cast(desc.dims().size())); - std::copy(desc.dims().begin(), desc.dims().end(), - std::back_inserter(dims)); - tensor->Resize(framework::make_ddim(dims)); - - void *buf; - platform::Place cpu = platform::CPUPlace(); - switch (desc.data_type()) { - case framework::FP32: - buf = tensor->mutable_data(cpu); - break; - case framework::FP64: - buf = tensor->mutable_data(cpu); - break; - case framework::INT32: - buf = tensor->mutable_data(cpu); - break; - case framework::INT64: - buf = tensor->mutable_data(cpu); - break; - default: - PADDLE_THROW("DataType %d not supported", desc.data_type()); - } - fin.read(static_cast(buf), tensor->memory_size()); - } - { // read lod - uint64_t lod_level; - fin.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size; - fin.read(reinterpret_cast(&size), sizeof(size)); - std::vector tmp(size / sizeof(size_t)); - fin.read(reinterpret_cast(tmp.data()), - static_cast(size)); - lod[i] = tmp; - } - } + framework::DeserializeFromStream(fin, tensor); auto place = dev_ctx.GetPlace(); if (platform::is_gpu_place(place)) { diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c69e416e10f2a9ced1f1b22c39235e4c9338e77c --- /dev/null +++ b/paddle/operators/recv_op.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include + +#include + +#include "paddle/framework/data_type.h" +#include "paddle/framework/executor.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/send_recv_impl.h" +#include "paddle/operators/detail/simple_block_queue.h" + +namespace paddle { +namespace operators { + +void RunServer(Server **rpc_server, + std::shared_ptr service, + const std::string &server_address) { + ServerBuilder builder; + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); + builder.RegisterService(service.get()); + std::unique_ptr server(builder.BuildAndStart()); + *rpc_server = server.get(); + LOG(INFO) << "Server listening on " << server_address << std::endl; + server->Wait(); +} + +class RecvOp : public framework::OperatorBase { + public: + RecvOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) { + if (!rpc_service_) { + rpc_service_.reset(new detail::SendRecvServerImpl()); + std::string endpoint = Attr("endpoint"); + server_thread_.reset( + new std::thread(RunServer, &rpc_server_, rpc_service_, endpoint)); + } + } + + virtual ~RecvOp() { + rpc_server_->Shutdown(); + server_thread_->join(); + } + + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + // blocking get one var from client. + const framework::LoDTensor &t = rpc_service_->Get(); + framework::Scope &recv_scope = scope.NewScope(); + // set graph input var + auto *var = recv_scope.Var(Input("RX")); + auto *tensor = var->GetMutable(); + // FIXME(typhoonzero): do not copy + framework::CopyFrom(t, dev_ctx.GetPlace(), dev_ctx, tensor); + + auto *block = Attr("OptimizeBlock"); + auto *program = block->Program(); + framework::Executor executor(dev_ctx); + // Run sub graph to get optimized tensor + executor.Run(*program, &recv_scope, block->ID(), + false /*create_local_scope*/); + + auto *out_var = recv_scope.FindVar("Out"); + // push back + rpc_service_->Push(out_var->Get()); + } + + protected: + // grpc server instance to track status and gracefully shutdown. + // borrow an pointer from server thread. + Server *rpc_server_{nullptr}; + // grpc send/recv service implement to register. + std::shared_ptr rpc_service_; + std::shared_ptr server_thread_; +}; + +class RecvOpMaker : public framework::OpProtoAndCheckerMaker { + public: + RecvOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("RX", "(Tensor) Input tensor to be saved"); + AddComment(R"DOC( +Recv operator + +This operator will recv tensor from send_op +)DOC"); + AddAttr("endpoint", + "(string, default 127.0.0.1:6164)" + "IP address to listen on.") + .SetDefault("127.0.0.1:6164") + .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + AddAttr("OptimizeBlock", "type BlockDescBind*", + "optimize network run in server"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(recv, ops::RecvOp, ops::RecvOpMaker); diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index 56909fb65f44ad00314103e21bee9535fbd59317..d4921cb80c8d78c52ae1887c36819b52621470eb 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -88,73 +88,7 @@ class SaveOp : public framework::OperatorBase { "SaveOp only support LoDTensor, %s has wrong type", iname); auto &tensor = var->Get(); - - { // the 1st field, uint32_t version - constexpr uint32_t version = 0; - fout.write(reinterpret_cast(&version), sizeof(version)); - } - { // the 2nd field, tensor description - // int32_t size - // void* protobuf message - framework::TensorDesc desc; - desc.set_data_type(framework::ToDataType(tensor.type())); - auto dims = framework::vectorize(tensor.dims()); - auto *pb_dims = desc.mutable_dims(); - pb_dims->Resize(static_cast(dims.size()), 0); - std::copy(dims.begin(), dims.end(), pb_dims->begin()); - int32_t size = desc.ByteSize(); - fout.write(reinterpret_cast(&size), sizeof(size)); - auto out = desc.SerializeAsString(); - fout.write(out.data(), size); - } - { // the 3rd field, tensor data - uint64_t size = tensor.memory_size(); - auto *data_ptr = tensor.data(); - PADDLE_ENFORCE(size < std::numeric_limits::max(), - "Index overflow when writing tensor"); - if (platform::is_gpu_place(tensor.place())) { -#ifdef PADDLE_WITH_CUDA - constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB - std::unique_ptr buf(new char[kBufSize]); - auto &gpu_dev_ctx = - static_cast(dev_ctx); - platform::CPUPlace cpu; - uintptr_t data = reinterpret_cast(data_ptr); - while (size != 0) { - size_t size_to_write = std::min(kBufSize, static_cast(size)); - memory::Copy(cpu, buf.get(), - boost::get(tensor.place()), - reinterpret_cast(data), size_to_write, - gpu_dev_ctx.stream()); - gpu_dev_ctx.Wait(); - fout.write(buf.get(), size_to_write); - data += size_to_write; - size -= size_to_write; - } -#else - PADDLE_THROW("Unexpected branch"); -#endif - } else { - fout.write(static_cast(data_ptr), - static_cast(size)); - } - } - { // the 4th field, lod information - // uint64_t lod_level - // uint64_t lod_level_1 size in byte. - // int* lod_level_1 data - // ... - auto lod = tensor.lod(); - uint64_t size = lod.size(); - fout.write(reinterpret_cast(&size), sizeof(size)); - - for (auto &each : lod) { - size = each.size() * sizeof(framework::LoD::value_type::value_type); - fout.write(reinterpret_cast(&size), sizeof(size)); - fout.write(reinterpret_cast(each.data()), - static_cast(size)); - } - } + framework::SerializeToStream(fout, tensor, dev_ctx); } }; diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 5745580504fb9bda551f21665bff5c65ae82aeb9..e5c10fec4d840c58a74758a65ddfa93421ab4827 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -77,4 +77,6 @@ REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker, ops::ScaleGradMaker); REGISTER_OP_CPU_KERNEL(scale, ops::ScaleKernel, - ops::ScaleKernel); + ops::ScaleKernel, + ops::ScaleKernel, + ops::ScaleKernel); diff --git a/paddle/operators/scale_op.cu b/paddle/operators/scale_op.cu index 820fd4e6855bb192ec3292ea6983d5ecae73b6e6..0d707751598e65bc56bf73a435c10b4acd6d8ed0 100644 --- a/paddle/operators/scale_op.cu +++ b/paddle/operators/scale_op.cu @@ -16,4 +16,6 @@ REGISTER_OP_GPU_KERNEL( scale, paddle::operators::ScaleKernel, - paddle::operators::ScaleKernel); + paddle::operators::ScaleKernel, + paddle::operators::ScaleKernel, + paddle::operators::ScaleKernel); diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..a3059847f2d420359b347e3a5d514d8a3829a4e2 --- /dev/null +++ b/paddle/operators/send_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include + +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" + +#include "paddle/operators/detail/send_recv_impl.h" +#include "paddle/operators/detail/simple_block_queue.h" + +namespace paddle { +namespace operators { + +// TODO(typhoonzero): this is a simple implementation which only send +// one tensor +class SendOp : public framework::OperatorBase { + public: + SendOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) { + // init client when the operator is created at runtime. + if (!client_) { + std::string endpoint = Attr("endpoint"); + client_.reset(new detail::RPCClient( + grpc::CreateChannel(endpoint, grpc::InsecureChannelCredentials()))); + // TODO(typhoonzero): how to call InitVariables + } + } + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto iname = Input("X"); + auto oname = Output("Out"); + // TODO(typhoonzero): currently it's non-blocking, + // should block until server responds. + bool ret = client_->SendVariable(scope, iname, oname); + if (!ret) { + LOG(ERROR) << "send variable error"; + } + } + + protected: + std::shared_ptr client_{nullptr}; +}; + +class SendOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SendOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor to be saved"); + AddOutput("Out", "(Tensor) Output fetched from server"); + AddComment(R"DOC( +Recv operator + +This operator will recv tensor from send_op +)DOC"); + AddAttr("endpoint", + "(string, default 127.0.0.1:6164)" + "IP address to listen on.") + .SetDefault("127.0.0.1:6164") + .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(send, ops::SendOp, ops::SendOpMaker); diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ac03eb3752e7cd31dd80f4caa39dc0625f0409d5 --- /dev/null +++ b/paddle/operators/send_recv_op_test.cc @@ -0,0 +1,125 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +// TODO(typhoonzero): add python bindings for this test as +// a RemoteOptimizer. + +#include +#include + +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/program_desc.h" + +USE_NO_KERNEL_OP(send); +USE_NO_KERNEL_OP(recv); +USE_OP(sum); + +// global for simplicity. +std::unique_ptr recv_op; + +void InitTensorsInScope(paddle::framework::Scope &scope, + paddle::platform::CPUPlace &place) { + paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("X"); + auto tensor = var->GetMutable(); + tensor->Resize({10, 10}); + float *expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + + auto out_var = scope.Var("Out"); + auto out_tensor = out_var->GetMutable(); + out_tensor->Resize({10, 10}); + tensor->mutable_data(place); // allocate +} + +void AddOp(const std::string &type, + const paddle::framework::VariableNameMap &inputs, + const paddle::framework::VariableNameMap &outputs, + paddle::framework::AttributeMap attrs, + paddle::framework::BlockDescBind *block) { + // insert output + for (auto kv : outputs) { + for (auto v : kv.second) { + auto var = block->Var(v); + var->SetDataType(paddle::framework::DataType::FP32); + } + } + + // insert op + auto op = block->AppendOp(); + op->SetType(type); + for (auto &kv : inputs) { + op->SetInput(kv.first, kv.second); + } + for (auto &kv : outputs) { + op->SetOutput(kv.first, kv.second); + } + op->SetAttrMap(attrs); +} + +void StartServerNet() { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + + // sub program run in recv_op, for simple test we use sum + paddle::framework::ProgramDescBind program; + paddle::framework::BlockDescBind *block = program.MutableBlock(0); + // X for server side tensors, RX for received tensers, must be of same shape. + AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block); + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + attrs.insert({"OptimizeBlock", block}); + recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, + {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + recv_op->Run(scope, ctx); +} + +TEST(SendRecvOp, CPU) { + std::thread server_thread(StartServerNet); + sleep(5); // wait server to start + // local net + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + + auto send_op = paddle::framework::OpRegistry::CreateOp( + "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + send_op->Run(scope, ctx); + + auto in_var = scope.Var("X"); + auto tensor = in_var->GetMutable(); + float *expected = tensor->data(); + + auto out_var = scope.Var("Out"); + auto target = out_var->GetMutable(); + // send fail cause output is none. + EXPECT_NE(target->memory_size(), size_t(0)); + float *actual = target->data(); + for (int64_t i = 0; i < target->numel(); ++i) { + EXPECT_EQ(expected[i] * 2, actual[i]); + } + recv_op.reset(); // dtor can shutdown and join server thread. + server_thread.join(); +} diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index fda2a2f1b764106a7a108e8c56bc90ce3459e9b5..a2fdc5ce69bfdf0fadb808e4b51c8eef4ff7dfd6 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -16,11 +16,13 @@ function cmake_gen() { echo "using python abi: $1" if [ "$1" == "cp27-cp27m" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} + export PATH=/opt/python/cp27-cp27m/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" elif [ "$1" == "cp27-cp27mu" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} + export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 28d82343ed32273740d0c52d0451681e43b3675e..7d54f0254c8ea9367a34233602293db5b8593f9a 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -11,8 +11,9 @@ make -j `nproc` gen_proto_py make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links -linkchecker doc/en/html/index.html -linkchecker doc/cn/html/index.html +# It will be failed now! +#linkchecker doc/en/html/index.html +#linkchecker doc/cn/html/index.html # Parse Github URL REPO=`git config remote.origin.url` diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 33a0829ba8d635ebd68b50f3da07da958fb79dcb..70f61e84997efdbe3d6f268d249be8bac15b9ecd 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -83,11 +83,10 @@ def set_omp_mkl_env_vars(trainer_count): '''Get the number of physical cores''' if platform.system() == "Linux": num_sockets = int( - os.popen("lscpu |grep \"Socket\" |awk -F':' '{print $2}'|xargs") + os.popen("grep 'physical id' /proc/cpuinfo | sort -u | wc -l") .read()) num_cores_per_socket = int( - os.popen( - "lscpu |grep \"per socket\" |awk -F':' '{print $2}'|xargs") + os.popen("grep 'core id' /proc/cpuinfo | sort -u | wc -l") .read()) return num_sockets * num_cores_per_socket else: diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index bd4a6fda1fd20e68d5a42e76f6ab516bb5c00cff..137c5736226b689340748d5098ca51659d5acff8 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -26,9 +26,9 @@ class Evaluator(object): name(str): The name of evaluator. such as, "accuracy". Used for generate temporary variable name. main_program(Program, optional): The evaluator should be added to this - main_program. Default g_main_program + main_program. Default default_main_program() startup_program(Program, optional):The parameter should be added to this - startup_program. Default g_startup_program + startup_program. Default default_startup_program() Attributes: states(list): The list of state variables. states will be reset to zero diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index 3e26d1b983a3c924ce2392c266bcd32e27c7b309..bdc82eede9d93a7cf904999a6b869ce2d23c90dc 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -1,6 +1,6 @@ import numpy as np from . import core -from framework import Program, g_main_program +from framework import Program, default_main_program __all__ = ['Executor', 'g_scope'] @@ -103,7 +103,7 @@ class Executor(object): fetch_list = [] if program is None: - program = g_main_program + program = default_main_program() if not isinstance(program, Program): raise TypeError() diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 6d6ea23f55eebc57cb120582a7c82d77eb1df45c..1c42e4d44f5046e0db171fdaeb8e7af38a2cae07 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -6,7 +6,7 @@ import proto.framework_pb2 as framework_pb2 __all__ = [ 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', - 'default_main_program', 'g_startup_program', 'g_main_program' + 'default_main_program' ] @@ -654,13 +654,13 @@ class Parameter(Variable): # program is a global instance. -g_main_program = Program() -g_startup_program = Program() +_main_program_ = Program() +_startup_program_ = Program() def default_startup_program(): - return g_startup_program + return _startup_program_ def default_main_program(): - return g_main_program + return _main_program_ diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index e5b2aa3b919df4cec1091c0bbd39b7e400cc6867..e147ac22ad289eb00c83def66974d875fcdc31f8 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -1,8 +1,7 @@ import os import cPickle as pickle -from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ - Variable +from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', @@ -46,7 +45,7 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): """ if vars is None: if main_program is None: - main_program = g_main_program + main_program = default_main_program() if not isinstance(main_program, Program): raise TypeError("program should be as Program type or None") @@ -98,7 +97,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): :param executor: executor that save variable :param dirname: directory path :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default g_program. + program which fit `predicate`. Default default_main_program(). :param predicate: The Predicate describes a callable that returns a variable as a bool. If it returns true, the variables will be loaded. :param vars: variables need to be loaded. If specify vars, program & @@ -107,7 +106,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): """ if vars is None: if main_program is None: - main_program = g_main_program + main_program = default_main_program() if not isinstance(main_program, Program): raise TypeError("program's type should be Program") @@ -154,7 +153,7 @@ def load_persistables(executor, dirname, main_program=None): def get_inference_program(target_vars, main_program=None): if main_program is None: - main_program = g_main_program + main_program = default_main_program() if not isinstance(target_vars, list): target_vars = [target_vars] @@ -177,12 +176,12 @@ def save_inference_model(dirname, :param target_vars: Variables from which we can get inference results. :param executor: executor that save inference model :param main_program: original program, which will be pruned to build the inference model. - Default g_main_program. + Default default_main_program(). :return: None """ if main_program is None: - main_program = g_main_program + main_program = default_main_program() if not isinstance(target_vars, list): target_vars = [target_vars] @@ -272,10 +271,10 @@ def get_parameter_value_by_name(name, executor, program=None): :param executor: executor for retrieving the value :param name: the name of the parameter :param program: the program where the variable is found - Default g_main_program. + Default default_main_program(). :return: the LoDTensor for the variable """ if program is None: - program = g_main_program + program = default_main_program() var = program.global_block().var(name) return get_parameter_value(var, executor) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 5f8855551114a9a9b671d1630c9e8a3f0cb5c04b..7762b0d88f3a62c3b919d64a6565bfc3951c4e25 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,8 +1,7 @@ import copy import itertools -from framework import Variable, g_main_program, \ - g_startup_program, unique_name, dtype_is_floating +from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier @@ -22,7 +21,7 @@ class LayerHelper(object): def main_program(self): prog = self.kwargs.get('main_program', None) if prog is None: - return g_main_program + return default_main_program() else: return prog @@ -30,7 +29,7 @@ class LayerHelper(object): def startup_program(self): prog = self.kwargs.get('startup_program', None) if prog is None: - return g_startup_program + return default_startup_program() else: return prog diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 28bc3d214b559a089efb2bb736eb49cb1ba4de25..6adfac3a32c7c8ecf035ba3a8f757a0efb6f9b68 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -1,7 +1,7 @@ -from . import core +import core import proto.framework_pb2 as framework_pb2 from framework import OpProtoHolder, Variable, Program, Operator -from initializer import Constant, Normal, Xavier +from initializer import Constant, Normal, Xavier, Initializer from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO @@ -1587,6 +1587,97 @@ def array_length(array, main_program=None): return tmp +def conv2d_transpose(input, + num_filters, + output_size=None, + filter_size=None, + padding=None, + stride=None, + param_attr=None, + param_initializer=None, + main_program=None, + startup_program=None): + """ + The transpose of conv2d layer. + + This layer is also known as deconvolution layer. + + Args: + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). This + parameter only works when filter_size is None. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. + param_attr: Parameter Attribute. + param_initializer(Initializer): Parameter Initializer. Default is Xavier + main_program(Program): the main program + startup_program(Program): the startup program + + Returns: + Variable: Output image. + """ + helper = LayerHelper("conv2d_transpose", **locals()) + if not isinstance(input, Variable): + raise TypeError("Input of conv2d_transpose must be Variable") + input_channel = input.shape[1] + + op_attr = dict() + + if isinstance(padding, int): + op_attr['paddings'] = [padding, padding] + elif padding is not None: + op_attr['paddings'] = padding + + if isinstance(stride, int): + op_attr['strides'] = stride + elif stride is not None: + op_attr['strides'] = stride + + if filter_size is None: + if output_size is None: + raise ValueError("output_size must be set when filter_size is None") + if isinstance(output_size, int): + output_size = [output_size, output_size] + + padding = op_attr.get('paddings', [0, 0]) + stride = op_attr.get('strides', [1, 1]) + + h_in = input.shape[2] + w_in = input.shape[3] + filter_size_h = output_size[0] - (h_in - 1) * stride[0] + 2 * padding[0] + filter_size_w = output_size[1] - (w_in - 1) * stride[1] + 2 * padding[1] + filter_size = [filter_size_h, filter_size_w] + elif isinstance(filter_size, int): + filter_size = [filter_size, filter_size] + + filter_shape = [input_channel, num_filters] + filter_size + img_filter = helper.create_parameter( + dtype=input.dtype, + shape=filter_shape, + attr=helper.param_attr, + initializer=param_initializer) + + out = helper.create_tmp_variable(dtype=input.dtype) + helper.append_op( + type='conv2d_transpose', + inputs={'Input': [input], + 'Filter': [img_filter]}, + outputs={'Output': out}, + attrs=op_attr) + return out + + class ConditionalBlockGuard(BlockGuard): def __init__(self, block): if not isinstance(block, ConditionalBlock): diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index cc45b10b90868858c61334a3a43acf65c3d4eaf5..0f0cc5b5406ef51ac3504a95ea716056ae8730af 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -69,8 +69,7 @@ def vgg16_bn_drop(input): drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) fc1 = fluid.layers.fc(input=drop, size=512, act=None) - reshape1 = fluid.layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) - bn = fluid.layers.batch_norm(input=reshape1, act='relu') + bn = fluid.layers.batch_norm(input=fc1, act='relu') drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) fc2 = fluid.layers.fc(input=drop2, size=512, act=None) return fc2 diff --git a/python/paddle/v2/fluid/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py index b7790b01062d480cbd6c9e1a626d318385b4f61e..f6120aedecf1015c279b8f218f5e37f2e598ab91 100644 --- a/python/paddle/v2/fluid/tests/test_array_read_write_op.py +++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py @@ -3,7 +3,7 @@ import paddle.v2.fluid.core as core import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.backward import append_backward_ops -from paddle.v2.fluid.framework import g_main_program +from paddle.v2.fluid.framework import default_main_program import numpy @@ -66,7 +66,7 @@ class TestArrayReadWrite(unittest.TestCase): append_backward_ops(total_sum_scaled) - g_vars = map(g_main_program.global_block().var, + g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py index 71f9599e0de83c86808f7e62547f80d3d50ffc7d..e766a68c0e338b07e47260e40edc544c98555382 100644 --- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py +++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py @@ -21,6 +21,13 @@ def get_backward_op(scope, op, no_grad_set): def _reference_training(x, scale, offset, epsilon, data_format): + x_shape = x.shape + if len(x_shape) == 2: + if data_format == "NCHW": + x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) + else: + x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) + if data_format == "NCHW": n, c, h, w = x.shape x_square = x * x @@ -39,6 +46,8 @@ def _reference_training(x, scale, offset, epsilon, data_format): offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) y = normalized * scale_tile + offset_tile + if len(x_shape) == 2: + y = np.reshape(y, (y.shape[0], y.shape[1])) return y, mean, var elif data_format == "NHWC": x_square = x * x @@ -48,7 +57,10 @@ def _reference_training(x, scale, offset, epsilon, data_format): mean = x_sum / element_count var = x_square_sum / element_count - mean * mean normalized = (x - mean) / np.sqrt(var + epsilon) - return (normalized * scale + offset), mean, var + y = normalized * scale + offset + if len(x_shape) == 2: + y = np.reshape(y, x_shape) + return y, mean, var else: raise ValueError("Unknown data order.") @@ -65,6 +77,18 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation + x_shape = x.shape + + if len(x_shape) == 2: + if data_format == "NCHW": + x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1)) + grad_y = np.reshape(grad_y, + (grad_y.shape[0], grad_y.shape[1], 1, 1)) + else: + x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1])) + grad_y = np.reshape(grad_y, + (grad_y.shape[0], 1, 1, grad_y.shape[1])) + if data_format == "NCHW": x = np.transpose(x, (0, 2, 3, 1)) grad_y = np.transpose(grad_y, (0, 2, 3, 1)) @@ -83,6 +107,9 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): grad_x = np.transpose(grad_x, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2)) grad_y = np.transpose(grad_y, (0, 3, 1, 2)) + + if len(x_shape) == 2: + grad_x = np.reshape(grad_x, x_shape) return grad_x, grad_scale, grad_offset @@ -127,7 +154,7 @@ class TestBatchNormOp(OpTest): momentum = 0.9 # N, H, W, C: 2, 3, 4, 2 - n, h, w, c = 2, 3, 4, 2 + n, h, w, c = 2, 3, 4, 5 x_shape = [n, h, w, c] scale_shape = [c] @@ -184,20 +211,23 @@ class TestBatchNormOp(OpTest): print 'python: NHWC, NCHW, backward checking passed' def test_forward_backward(self): - def test_with_place(place, tensor_format): + def test_with_place(place, tensor_format, shape): # attr epsilon = 0.00001 momentum = 0.9 - # N, H, W, C: 12, 3, 4, 2 - n, h, w, c = 2, 3, 4, 2 - - if data_format == "NHWC": - x_shape = [n, h, w, c] - elif data_format == "NCHW": - x_shape = [n, c, h, w] + if len(shape) == 2: + x_shape = shape + c = shape[1] else: - raise ValueError("Unknown data type.") + # n, h, w, c = 2, 3, 4, 2 + n, h, w, c = shape[0], shape[1], shape[2], shape[3] + if data_format == "NHWC": + x_shape = [n, h, w, c] + elif data_format == "NCHW": + x_shape = [n, c, h, w] + else: + raise ValueError("Unknown data type.") scale_shape = [c] x_val = np.random.random_sample(x_shape).astype(np.float32) @@ -219,7 +249,10 @@ class TestBatchNormOp(OpTest): # for gradient test # y_grad = np.ones(x_shape).astype(np.float32) y_grad = np.zeros(x_shape).astype(np.float32) - y_grad[0, 0, 0, 0] = 1. + if len(y_grad.shape) == 2: + y_grad[0, 0] = 1. + else: + y_grad[0, 0, 0, 0] = 1. # y_grad = np.random.random_sample(x_shape).astype(np.float32) x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, @@ -313,7 +346,8 @@ class TestBatchNormOp(OpTest): places.append(core.GPUPlace(0)) for place in places: for data_format in ["NCHW", "NHWC"]: - test_with_place(place, data_format) + test_with_place(place, data_format, [2, 3, 4, 5]) + test_with_place(place, data_format, [2, 3]) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py index d953ee7ddc37d150d87cbd680379410a4d16f6b1..2b9d8f351a2836cd723d629d4790de1e068d0ea3 100644 --- a/python/paddle/v2/fluid/tests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -1,7 +1,7 @@ import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -from paddle.v2.fluid.framework import g_startup_program, g_main_program +from paddle.v2.fluid.framework import default_startup_program, default_main_program from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.backward import append_backward_ops import numpy @@ -19,7 +19,7 @@ class ConditionalBlock(unittest.TestCase): cpu = core.CPUPlace() exe = Executor(cpu) - exe.run(g_startup_program) + exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') @@ -29,7 +29,9 @@ class ConditionalBlock(unittest.TestCase): append_backward_ops(loss=loss) outs = exe.run( feed={'X': x}, - fetch_list=[g_main_program.block(0).var(data.name + "@GRAD")])[0] + fetch_list=[ + default_main_program().block(0).var(data.name + "@GRAD") + ])[0] print outs diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py index 558273e30dff7fb74f78751f4fe569f79a453d0d..b1ef87c5cb1711c419b401c5950839816f7f4160 100644 --- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py +++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py @@ -1,9 +1,10 @@ import unittest -from paddle.v2.fluid.layers import mul, data, sequence_pool + +import numpy import paddle.v2.fluid.core as core + from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.framework import g_main_program -import numpy +from paddle.v2.fluid.layers import mul, data class TestExecutor(unittest.TestCase): @@ -19,10 +20,7 @@ class TestExecutor(unittest.TestCase): a_np = numpy.random.random((100, 784)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32') exe = Executor(place) - outs = exe.run(g_main_program, - feed={'a': a_np, - 'b': b_np}, - fetch_list=[out]) + outs = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out]) out = outs[0] self.assertEqual((100, 100), out.shape) self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np))) diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py index 8e8e1b0a8c07a60cb1404462f976d10fe26e87f6..2fd609d4474e97ecd96adcd146f2f550e0772740 100644 --- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py +++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py @@ -1,6 +1,6 @@ import unittest -import paddle.v2.fluid.layers as layers +import paddle.v2.fluid as fluid import paddle.v2.fluid.nets as nets from paddle.v2.fluid.framework import Program @@ -29,27 +29,35 @@ class TestLayer(unittest.TestCase): def test_batch_norm_layer(self): main_program = Program() startup_program = Program() - images = layers.data( + images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32', main_program=main_program) - layers.batch_norm( + hidden1 = fluid.layers.batch_norm( input=images, main_program=main_program, startup_program=startup_program) + hidden2 = fluid.layers.fc(input=hidden1, + size=128, + act='relu', + main_program=main_program) + hidden3 = fluid.layers.batch_norm( + input=hidden2, + main_program=main_program, + startup_program=startup_program) - # print str(main_program) + print str(main_program) def test_dropout_layer(self): main_program = Program() startup_program = Program() - images = layers.data( + images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32', main_program=main_program) - layers.dropout( + fluid.layers.dropout( x=images, dropout_prob=0.5, main_program=main_program, @@ -61,7 +69,7 @@ class TestLayer(unittest.TestCase): main_program = Program() startup_program = Program() - images = layers.data( + images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32', @@ -77,19 +85,19 @@ class TestLayer(unittest.TestCase): def test_elementwise_add_with_act(self): main_program = Program() startup_program = Program() - image1 = layers.data( + image1 = fluid.layers.data( name='pixel1', shape=[3, 48, 48], dtype='float32', main_program=main_program, startup_program=startup_program) - image2 = layers.data( + image2 = fluid.layers.data( name='pixel2', shape=[3, 48, 48], dtype='float32', main_program=main_program, startup_program=startup_program) - out = layers.elementwise_add( + out = fluid.layers.elementwise_add( x=image1, y=image2, act='relu', diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 87dc6d1a6270e0f8425b56601d04049450c73380..62b2a0f9a11aa20e170fd108083abe04caedc4f3 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -65,6 +65,15 @@ class TestBook(unittest.TestCase): print str(program) + def test_conv2d_transpose(self): + program = Program() + kwargs = {'main_program': program} + img = layers.data( + name='pixel', shape=[3, 2, 2], dtype='float32', **kwargs) + layers.conv2d_transpose( + input=img, num_filters=10, output_size=28, **kwargs) + print str(program) + def test_recognize_digits_conv(self): program = Program() diff --git a/python/paddle/v2/fluid/tests/test_lod_rank_table.py b/python/paddle/v2/fluid/tests/test_lod_rank_table.py index bbc11930b9e804c2769cc590c298c6e90dc36ca6..30d619fe318517345195281b17f88e9916b6afb3 100644 --- a/python/paddle/v2/fluid/tests/test_lod_rank_table.py +++ b/python/paddle/v2/fluid/tests/test_lod_rank_table.py @@ -1,6 +1,5 @@ from paddle.v2.fluid.layers import lod_rank_table, data from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.framework import g_main_program import paddle.v2.fluid.core as core import numpy import unittest @@ -18,7 +17,7 @@ class TestLoDRankTable(unittest.TestCase): tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) - exe.run(g_main_program, scope=scope, feed={'x': tensor}) + exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items()) diff --git a/python/paddle/v2/fluid/tests/test_operator_desc.py b/python/paddle/v2/fluid/tests/test_operator_desc.py index e8362d2e9c6038c04c24dce35de8c53bfde78142..ce34d95ac8cb2644dee9c551cd8e85b33609919a 100644 --- a/python/paddle/v2/fluid/tests/test_operator_desc.py +++ b/python/paddle/v2/fluid/tests/test_operator_desc.py @@ -1,11 +1,15 @@ import unittest -from paddle.v2.fluid.framework import Variable, Program, g_main_program + import paddle.v2.fluid.core as core +from paddle.v2.fluid.framework import Program, default_startup_program + +main_program = default_startup_program() + class TestOperator(unittest.TestCase): def test_error_type(self): - block = g_main_program.create_block() + block = main_program.create_block() try: block.append_op() self.assertFail() diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py index 13f6278ad8b7244e7980b32463f29d7a824b4572..694344acbbd3b7c80cb0ff48ada843f794061282 100644 --- a/python/paddle/v2/fluid/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -1,17 +1,19 @@ import unittest -from paddle.v2.fluid.framework import g_main_program +from paddle.v2.fluid.framework import default_main_program import paddle.v2.fluid.core as core from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.io as io from paddle.v2.fluid.initializer import ConstantInitializer import numpy as np +main_program = default_main_program() + class TestParameter(unittest.TestCase): def test_param(self): shape = [784, 100] val = 1.0625 - b = g_main_program.global_block() + b = main_program.global_block() param = b.create_parameter( name='fc.w', shape=shape, @@ -23,9 +25,9 @@ class TestParameter(unittest.TestCase): self.assertEqual(core.DataType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) - p = exe.run(g_main_program, fetch_list=[param])[0] + p = exe.run(main_program, fetch_list=[param])[0] self.assertTrue(np.allclose(p, np.ones(shape) * val)) - p = io.get_parameter_value_by_name('fc.w', exe, g_main_program) + p = io.get_parameter_value_by_name('fc.w', exe, main_program) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py index 15653a1dbf5b1a66edd3f768bee5a36be1bb7a7a..1a9313c68aab165d85ae29051faeacb4927ac2c9 100644 --- a/python/paddle/v2/fluid/tests/test_program.py +++ b/python/paddle/v2/fluid/tests/test_program.py @@ -1,37 +1,38 @@ from __future__ import print_function import unittest -from paddle.v2.fluid.framework import Program -from paddle.v2.fluid.framework import g_main_program +from paddle.v2.fluid.framework import Program, default_main_program import paddle.v2.fluid.layers as layers +main_program = default_main_program() + class TestProgram(unittest.TestCase): def test_program(self): - b = g_main_program.current_block() + b = main_program.current_block() self.assertEqual(-1, b.parent_idx) self.assertEqual(0, b.idx) - b = g_main_program.create_block() + b = main_program.create_block() self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) - b = g_main_program.create_block() + b = main_program.create_block() self.assertEqual(2, b.idx) self.assertEqual(1, b.parent_idx) - g_main_program.rollback() + main_program.rollback() - b = g_main_program.current_block() + b = main_program.current_block() self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) - b = g_main_program.create_block() + b = main_program.create_block() self.assertEqual(3, b.idx) self.assertEqual(1, b.parent_idx) - g_main_program.rollback() - b = g_main_program.current_block() + main_program.rollback() + b = main_program.current_block() self.assertEqual(1, b.idx) self.assertEqual(0, b.parent_idx) diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 05f6a560644f18da6ff2e015911901cd73cc36c9..86db4c64b493d94cc675ed4bcee7e2925fef1977 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -3,9 +3,11 @@ import paddle.v2.fluid.core as core from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.layers as layers from paddle.v2.fluid.backward import append_backward_ops -from paddle.v2.fluid.framework import g_main_program +from paddle.v2.fluid.framework import default_main_program import numpy +main_program = default_main_program() + class TestShrinkRNNMemory(unittest.TestCase): def test_shrink_rnn_memory(self): @@ -36,7 +38,7 @@ class TestShrinkRNNMemory(unittest.TestCase): append_backward_ops(loss=mem3_mean) x_grad = exe.run( feed={'x': tensor}, - fetch_list=[g_main_program.global_block().var('x@GRAD')])[0] + fetch_list=[main_program.global_block().var('x@GRAD')])[0] self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1) diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py index 92ffdceb6c84fb2669f8c1bb556c46fb1c03c411..f1e4c0ba21d5c4f10d2b5011bdb5abaebaec5431 100644 --- a/python/paddle/v2/fluid/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.fluid.framework import g_main_program, Program, convert_np_dtype_to_dtype_ +from paddle.v2.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_ import paddle.v2.fluid.core as core import numpy as np @@ -18,7 +18,7 @@ class TestVariable(unittest.TestCase): self.assertRaises(ValueError, lambda: convert("int8")) def test_var(self): - b = g_main_program.current_block() + b = default_main_program().current_block() w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") self.assertNotEqual(str(w), "")