提交 821899cc 编写于 作者: S sweetsky0901

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into my_unpool_max_2d

...@@ -25,4 +25,3 @@ AllowAllParametersOfDeclarationOnNextLine: true ...@@ -25,4 +25,3 @@ AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false BinPackParameters: false
BinPackArguments: false BinPackArguments: false
... ...
...@@ -42,7 +42,7 @@ before_install: ...@@ -42,7 +42,7 @@ before_install:
script: script:
- | - |
timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true ;else exit 1; fi;
- | - |
if [[ "$JOB" != "build_doc" ]]; then exit 0; fi; if [[ "$JOB" != "build_doc" ]]; then exit 0; fi;
if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
......
...@@ -133,6 +133,8 @@ include(external/any) # download libn::any ...@@ -133,6 +133,8 @@ include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/nccl) include(external/nccl)
include(external/cares)
include(external/grpc)
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
......
...@@ -29,7 +29,7 @@ RUN apt-get update && \ ...@@ -29,7 +29,7 @@ RUN apt-get update && \
automake locales clang-format swig doxygen cmake \ automake locales clang-format swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \ liblapack-dev liblapacke-dev libboost-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools && \ net-tools libtool && \
apt-get clean -y apt-get clean -y
# Install Go and glide # Install Go and glide
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
IF(MOBILE_INFERENCE)
return()
ENDIF()
include (ExternalProject)
# NOTE: c-ares is needed when linking with grpc.
SET(CARES_SOURCES_DIR ${THIRD_PARTY_PATH}/cares)
SET(CARES_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cares)
SET(CARES_INCLUDE_DIR "${CARES_INSTALL_DIR}/include/" CACHE PATH "cares include directory." FORCE)
ExternalProject_Add(
extern_cares
GIT_REPOSITORY "https://github.com/c-ares/c-ares.git"
GIT_TAG "cares-1_13_0"
PREFIX ${CARES_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ./buildconf && ./configure --disable-shared --prefix=${CARES_INSTALL_DIR}
BUILD_IN_SOURCE 1
BUILD_COMMAND make
INSTALL_COMMAND make install
)
ADD_LIBRARY(cares STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET cares PROPERTY IMPORTED_LOCATION
"${CARES_INSTALL_DIR}/lib/libcares.a")
include_directories(${CARES_INCLUDE_DIR})
ADD_DEPENDENCIES(cares extern_cares)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
IF(MOBILE_INFERENCE)
return()
ENDIF()
include (ExternalProject)
SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE)
SET(BUILD_CMD make -n | sed "s/-Werror//g" | sh)
ELSE()
SET(BUILD_CMD make)
ENDIF()
ExternalProject_Add(
extern_grpc
DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git"
GIT_TAG "v1.7.x"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_IN_SOURCE 1
# NOTE(yuyang18):
# Disable -Werror, otherwise the compile will fail in MacOS.
# It seems that we cannot configure that by make command.
# Just dry run make command and remove `-Werror`, then use a shell to run make commands
BUILD_COMMAND ${BUILD_CMD}
INSTALL_COMMAND make prefix=${GRPC_INSTALL_DIR} install
)
# FIXME(typhoonzero): hack to get static lib path, try a better way like merge them.
ADD_LIBRARY(grpc++_unsecure STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET grpc++_unsecure PROPERTY IMPORTED_LOCATION
"${GRPC_INSTALL_DIR}/lib/libgrpc++_unsecure.a")
ADD_LIBRARY(grpc++ STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET grpc++ PROPERTY IMPORTED_LOCATION
"${GRPC_INSTALL_DIR}/lib/libgrpc++.a")
ADD_LIBRARY(gpr STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gpr PROPERTY IMPORTED_LOCATION
"${GRPC_INSTALL_DIR}/lib/libgpr.a")
ADD_LIBRARY(grpc_unsecure STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET grpc_unsecure PROPERTY IMPORTED_LOCATION
"${GRPC_INSTALL_DIR}/lib/libgrpc_unsecure.a")
include_directories(${GRPC_INCLUDE_DIR})
ADD_DEPENDENCIES(grpc++_unsecure extern_grpc)
...@@ -15,7 +15,18 @@ ...@@ -15,7 +15,18 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp # Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp
FIND_PACKAGE(Protobuf QUIET) FIND_PACKAGE(Protobuf QUIET)
SET(PROTOBUF_FOUND "OFF") macro(UNSET_VAR VAR_NAME)
UNSET(${VAR_NAME} CACHE)
UNSET(${VAR_NAME})
endmacro()
UNSET_VAR(PROTOBUF_INCLUDE_DIR)
UNSET_VAR(PROTOBUF_FOUND)
UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE)
UNSET_VAR(PROTOBUF_PROTOC_LIBRARY)
UNSET_VAR(PROTOBUF_LITE_LIBRARY)
UNSET_VAR(PROTOBUF_LIBRARY)
UNSET_VAR(PROTOBUF_INCLUDE_DIR)
UNSET_VAR(Protobuf_PROTOC_EXECUTABLE)
if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined. if(NOT COMMAND protobuf_generate_python) # before cmake 3.4, protobuf_genrerate_python is not defined.
function(protobuf_generate_python SRCS) function(protobuf_generate_python SRCS)
...@@ -110,7 +121,6 @@ macro(PROMPT_PROTOBUF_LIB) ...@@ -110,7 +121,6 @@ macro(PROMPT_PROTOBUF_LIB)
# FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`. # FIND_Protobuf.cmake uses `Protobuf_PROTOC_EXECUTABLE`.
# make `protobuf_generate_cpp` happy. # make `protobuf_generate_cpp` happy.
SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE}) SET(Protobuf_PROTOC_EXECUTABLE ${PROTOBUF_PROTOC_EXECUTABLE})
FOREACH(dep ${protobuf_DEPS}) FOREACH(dep ${protobuf_DEPS})
ADD_DEPENDENCIES(protobuf ${dep}) ADD_DEPENDENCIES(protobuf ${dep})
ADD_DEPENDENCIES(protobuf_lite ${dep}) ADD_DEPENDENCIES(protobuf_lite ${dep})
...@@ -128,11 +138,11 @@ endmacro() ...@@ -128,11 +138,11 @@ endmacro()
set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf")
if (NOT "${PROTOBUF_ROOT}" STREQUAL "") if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include) find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib) find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib) find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib) find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin) find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH)
if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE)
message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.")
SET_PROTOBUF_VERSION() SET_PROTOBUF_VERSION()
......
...@@ -50,6 +50,8 @@ ExternalProject_Add( ...@@ -50,6 +50,8 @@ ExternalProject_Add(
) )
LIST(APPEND external_project_dependencies zlib) LIST(APPEND external_project_dependencies zlib)
ADD_LIBRARY(zlib_target STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET zlib_target PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES})
IF(WITH_C_API) IF(WITH_C_API)
INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib) INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib)
......
...@@ -467,3 +467,50 @@ function(py_test TARGET_NAME) ...@@ -467,3 +467,50 @@ function(py_test TARGET_NAME)
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
endif() endif()
endfunction() endfunction()
# grpc_library generate grpc code using grpc_cpp_plugin and protoc
# then build the generated protobuf code and grpc code with your
# implementation source codes together. Use SRCS argument for your
# implementation source files and PROTO argument for your .proto
# files.
#
# Usage: grpc_library(my_target SRCS my_client.cc PROTO my_target.proto DEPS my_dep)
function(grpc_library TARGET_NAME)
set(oneValueArgs PROTO)
set(multiValueArgs SRCS DEPS)
set(options "")
cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
message(STATUS "generating grpc ${grpc_library_PROTO}")
get_filename_component(ABS_PROTO ${grpc_library_PROTO} ABSOLUTE)
get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE)
get_filename_component(PROTO_PATH ${ABS_PROTO} PATH)
protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}")
set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc")
set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h")
cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}")
add_custom_command(
OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}"
--plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}"
DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc)
# FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it
# as compiler warnings instead of error. Should try remove the warnings also.
set_source_files_properties(
${grpc_grpc_srcs}
PROPERTIES
COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}")
set_source_files_properties(
${grpc_library_SRCS}
PROPERTIES
COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
cc_library("${TARGET_NAME}" SRCS "${grpc_library_SRCS}" DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}")
endfunction()
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
limitations under the License. */ limitations under the License. */
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/memory/memcpy.h" #include "paddle/memory/memcpy.h"
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
...@@ -27,11 +29,11 @@ ...@@ -27,11 +29,11 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
std::ostream& operator<<(std::ostream& os, const LoD& lod) { std::ostream &operator<<(std::ostream &os, const LoD &lod) {
os << "{"; os << "{";
for (auto& v : lod) { for (auto &v : lod) {
os << "{"; os << "{";
for (auto& i : v) { for (auto &i : v) {
os << i << ","; os << i << ",";
} }
os << "}"; os << "}";
...@@ -41,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) { ...@@ -41,7 +43,7 @@ std::ostream& operator<<(std::ostream& os, const LoD& lod) {
return os; return os;
} }
LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { LoD SliceLevels(const LoD &in, size_t level_begin, size_t level_end) {
LoD new_lod; LoD new_lod;
new_lod.reserve(level_end - level_begin); new_lod.reserve(level_end - level_begin);
for (size_t i = level_begin; i < level_end; i++) { for (size_t i = level_begin; i < level_end; i++) {
...@@ -53,7 +55,7 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { ...@@ -53,7 +55,7 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) {
return new_lod; return new_lod;
} }
LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
size_t elem_end) { size_t elem_end) {
PADDLE_ENFORCE_LT(level, in.size()); PADDLE_ENFORCE_LT(level, in.size());
PADDLE_ENFORCE_LT(elem_end, in[level].size()); PADDLE_ENFORCE_LT(elem_end, in[level].size());
...@@ -64,9 +66,9 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, ...@@ -64,9 +66,9 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
res[0].assign(in[level].begin() + elem_begin, res[0].assign(in[level].begin() + elem_begin,
in[level].begin() + elem_end + 1); in[level].begin() + elem_end + 1);
for (size_t lvl = 1; lvl < res.size(); lvl++) { for (size_t lvl = 1; lvl < res.size(); lvl++) {
const auto& in_level = in[level + lvl]; const auto &in_level = in[level + lvl];
const auto& above_level = res[lvl - 1]; const auto &above_level = res[lvl - 1];
auto& out_level = res[lvl]; auto &out_level = res[lvl];
out_level.assign(in_level.begin() + above_level.front(), out_level.assign(in_level.begin() + above_level.front(),
in_level.begin() + above_level.back() + 1); in_level.begin() + above_level.back() + 1);
} }
...@@ -74,33 +76,33 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, ...@@ -74,33 +76,33 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
// to make the first offset equals 0, all the elements minus the first // to make the first offset equals 0, all the elements minus the first
// element // element
size_t front = res[lvl].front(); size_t front = res[lvl].front();
for (auto& ele : res[lvl]) { for (auto &ele : res[lvl]) {
ele -= front; ele -= front;
} }
} }
return res; return res;
} }
LoD ToAbsOffset(const LoD& in) { LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets // the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in; if (in.empty() || in.size() == 1) return in;
LoD result = in; LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) { for (int level = result.size() - 2; level >= 0; level--) {
for (auto& ele : result[level]) { for (auto &ele : result[level]) {
ele = result[level + 1][ele]; ele = result[level + 1][ele];
} }
} }
return result; return result;
} }
bool operator==(const LoD& a, const LoD& b) { bool operator==(const LoD &a, const LoD &b) {
if (a.size() != b.size()) { if (a.size() != b.size()) {
return false; return false;
} }
for (size_t i = 0; i < a.size(); i++) { for (size_t i = 0; i < a.size(); i++) {
const auto& a_level = a[i]; const auto &a_level = a[i];
const auto& b_level = b[i]; const auto &b_level = b[i];
if (a_level.size() != b_level.size()) { if (a_level.size() != b_level.size()) {
return false; return false;
} }
...@@ -151,7 +153,7 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ...@@ -151,7 +153,7 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin,
} }
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>; using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
size_t end_idx, size_t start_level) { size_t end_idx, size_t start_level) {
LoD sub_lod; LoD sub_lod;
...@@ -170,7 +172,7 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx, ...@@ -170,7 +172,7 @@ LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD& lod, size_t start_idx,
return LoDAndOffset{sub_lod, {start_idx, end_idx}}; return LoDAndOffset{sub_lod, {start_idx, end_idx}};
} }
void AppendLoD(LoD* lod, const LoD& lod_length) { void AppendLoD(LoD *lod, const LoD &lod_length) {
PADDLE_ENFORCE( PADDLE_ENFORCE(
lod->empty() || lod->size() == lod_length.size(), lod->empty() || lod->size() == lod_length.size(),
"The lod_length should has the same size with the appended lod."); "The lod_length should has the same size with the appended lod.");
...@@ -178,12 +180,139 @@ void AppendLoD(LoD* lod, const LoD& lod_length) { ...@@ -178,12 +180,139 @@ void AppendLoD(LoD* lod, const LoD& lod_length) {
*lod = LoD(lod_length.size(), std::vector<size_t>({0})); *lod = LoD(lod_length.size(), std::vector<size_t>({0}));
} }
for (size_t i = 0; i < lod->size(); ++i) { for (size_t i = 0; i < lod->size(); ++i) {
auto& level = (*lod)[i]; auto &level = (*lod)[i];
for (size_t len : lod_length[i]) { for (size_t len : lod_length[i]) {
level.push_back(level.back() + len); level.push_back(level.back() + len);
} }
} }
} }
void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
const platform::DeviceContext &dev_ctx) {
// TODO(typhoonzero): serialize to ostream
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
framework::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto *pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.memory_size();
auto *data_ptr = tensor.data<void>();
PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
"Index overflow when writing tensor");
if (platform::is_gpu_place(tensor.place())) {
#ifdef PADDLE_WITH_CUDA
constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB
std::unique_ptr<char[]> buf(new char[kBufSize]);
auto &gpu_dev_ctx =
static_cast<const platform::CUDADeviceContext &>(dev_ctx);
platform::CPUPlace cpu;
uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
while (size != 0) {
size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
memory::Copy(cpu, buf.get(),
boost::get<platform::GPUPlace>(tensor.place()),
reinterpret_cast<const void *>(data), size_to_write,
gpu_dev_ctx.stream());
gpu_dev_ctx.Wait();
os.write(buf.get(), size_to_write);
data += size_to_write;
size -= size_to_write;
}
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
os.write(static_cast<const char *>(data_ptr),
static_cast<std::streamsize>(size));
}
}
{ // the 4th field, lod information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() * sizeof(framework::LoD::value_type::value_type);
os.write(reinterpret_cast<const char *>(&size), sizeof(size));
os.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
}
void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
framework::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char *>(buf.get()), size);
PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
"Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *buf;
platform::Place cpu = platform::CPUPlace();
switch (desc.data_type()) {
case framework::FP32:
buf = tensor->mutable_data<float>(cpu);
break;
case framework::FP64:
buf = tensor->mutable_data<double>(cpu);
break;
case framework::INT32:
buf = tensor->mutable_data<int>(cpu);
break;
case framework::INT64:
buf = tensor->mutable_data<int64_t>(cpu);
break;
default:
PADDLE_THROW("DataType %d not supported", desc.data_type());
}
is.read(static_cast<char *>(buf), tensor->memory_size());
}
{ // read lod
uint64_t lod_level;
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
is.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
is.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -189,5 +189,14 @@ std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset( ...@@ -189,5 +189,14 @@ std::pair<LoD, std::pair<size_t, size_t>> GetSubLoDAndAbsoluteOffset(
void AppendLoD(LoD* lod, const LoD& lod_length); void AppendLoD(LoD* lod, const LoD& lod_length);
/*
* Serialize/Desiralize LoDTensor to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void SerializeToStream(std::ostream& os, const LoDTensor& tensor,
const platform::DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is, LoDTensor* tensor);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -64,49 +64,111 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { ...@@ -64,49 +64,111 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
batchSize, batchSize,
codeLength_, codeLength_,
/* trans */ false, /* trans */ false,
useGpu(deviceId_)); false);
Matrix::resizeOrCreate(preOutput_.grad, Matrix::resizeOrCreate(preOutput_.grad,
batchSize, batchSize,
codeLength_, codeLength_,
/* trans */ false, /* trans */ false,
useGpu(deviceId_)); false);
IVectorPtr label = getInput(*getLabelLayer()).ids; IVectorPtr label = getInput(*getLabelLayer()).ids;
preOutput_.value->zeroMem(); preOutput_.value->zeroMem();
if (useGpu_) {
Matrix::resizeOrCreate(cpuOutput_,
output_.value->getHeight(),
output_.value->getWidth(),
/* trans */ false,
false);
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
cpuLabel_->copyFrom(*label);
cpuOutput_->copyFrom(*output_.value);
} else {
cpuOutput_ = output_.value;
cpuLabel_ = label;
}
/* add the bias-vector */ /* add the bias-vector */
if (biases_.get() != NULL) { if (biases_.get() != NULL) {
preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW()); if (useGpu_) {
Matrix::resizeOrCreate(cpuBias_,
1,
numClasses_ - 1,
/* trans */ false,
false);
cpuBias_->copyFrom(*biases_->getW());
} else {
cpuBias_ = biases_->getW();
}
preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_);
} }
for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { for (size_t i = 0; i < inputLayers_.size() - 1; ++i) {
MatrixPtr input = getInputValue(i); MatrixPtr input = getInputValue(i);
if (useGpu_) {
Matrix::resizeOrCreate(cpuInput_,
input->getHeight(),
input->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeight_,
weights_[i]->getW()->getHeight(),
weights_[i]->getW()->getWidth(),
/* trans */ false,
false);
cpuInput_->copyFrom(*input);
cpuWeight_->copyFrom(*weights_[i]->getW());
} else {
cpuInput_ = input;
cpuWeight_ = weights_[i]->getW();
}
preOutput_.value->mulByBitCode( preOutput_.value->mulByBitCode(
numClasses_, *label, *weights_[i]->getW(), *input); numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_);
} }
// keep consistent with the clipping in the following softrelu // keep consistent with the clipping in the following softrelu
preOutput_.value->clip(-40.0, 40.0); preOutput_.value->clip(-40.0, 40.0);
preOutput_.value->sumByBitCode(numClasses_, preOutput_.value->sumByBitCode(numClasses_,
*label, *cpuLabel_,
*output_.value, *cpuOutput_,
-1); // scaleSum -1); // scaleSum
preOutput_.value->softrelu(*preOutput_.value); preOutput_.value->softrelu(*preOutput_.value);
MatrixPtr sum = MatrixPtr sum = Matrix::create(batchSize, 1, /* trans= */ false, false);
Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_));
preOutput_.value->rowSum(*sum); preOutput_.value->rowSum(*sum);
output_.value->add(*sum); cpuOutput_->add(*sum);
if (useGpu_) {
output_.value->copyFrom(*cpuOutput_);
} else {
output_.value = cpuOutput_;
}
} }
void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
IVectorPtr label = getInput(*getLabelLayer()).ids; IVectorPtr label = getInput(*getLabelLayer()).ids;
if (useGpu_) {
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
cpuLabel_->copyFrom(*label);
} else {
cpuLabel_ = label;
}
preOutput_.grad->one(); preOutput_.grad->one();
preOutput_.grad->softreluDerivative(*preOutput_.value); preOutput_.grad->softreluDerivative(*preOutput_.value);
preOutput_.grad->subByBitCode(numClasses_, *label); preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_);
if (biases_ && biases_->getWGrad()) { if (biases_ && biases_->getWGrad()) {
preOutput_.grad->addByBitCodeBackward( MatrixPtr biases_grad = biases_->getWGrad();
numClasses_, *label, *biases_->getWGrad()); if (useGpu_) {
Matrix::resizeOrCreate(cpuBias_,
1,
numClasses_ - 1,
/* trans */ false,
false);
cpuBias_->copyFrom(*biases_grad);
} else {
cpuBias_ = biases_grad;
}
preOutput_.grad->addByBitCodeBackward(numClasses_, *cpuLabel_, *cpuBias_);
if (useGpu_) {
biases_grad->copyFrom(*cpuBias_);
} else {
biases_grad = cpuBias_;
}
/* Increasing the number of gradient */ /* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
...@@ -115,9 +177,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { ...@@ -115,9 +177,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
/* Calculate the W-gradient for the current layer */ /* Calculate the W-gradient for the current layer */
MatrixPtr input = getInputValue(i); MatrixPtr input = getInputValue(i);
if (weights_[i]->getWGrad()) { if (weights_[i]->getWGrad()) {
MatrixPtr weights_grad = weights_[i]->getWGrad();
if (useGpu_) {
Matrix::resizeOrCreate(cpuInput_,
input->getHeight(),
input->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeightGrad_,
weights_grad->getHeight(),
weights_grad->getWidth(),
/* trans */ false,
false);
cpuInput_->copyFrom(*input);
cpuWeightGrad_->copyFrom(*weights_grad);
} else {
cpuInput_ = input;
cpuWeightGrad_ = weights_grad;
}
preOutput_.grad->mulByBitCodeBackwardWeight( preOutput_.grad->mulByBitCodeBackwardWeight(
numClasses_, *label, *weights_[i]->getWGrad(), *input); numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
if (useGpu_) {
weights_grad->copyFrom(*cpuWeightGrad_);
} else {
weights_grad = cpuWeightGrad_;
}
/* Increasing the number of gradient */ /* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback); weights_[i]->getParameterPtr()->incUpdate(callback);
} }
...@@ -125,8 +209,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { ...@@ -125,8 +209,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
/* Calculate the input layers error */ /* Calculate the input layers error */
MatrixPtr inputGrad = getInputGrad(i); MatrixPtr inputGrad = getInputGrad(i);
if (inputGrad) { if (inputGrad) {
if (useGpu_) {
Matrix::resizeOrCreate(cpuInputGrad_,
inputGrad->getHeight(),
inputGrad->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeight_,
weights_[i]->getW()->getHeight(),
weights_[i]->getW()->getWidth(),
/* trans */ false,
false);
cpuInputGrad_->copyFrom(*inputGrad);
cpuWeight_->copyFrom(*weights_[i]->getW());
} else {
cpuInputGrad_ = inputGrad;
cpuWeight_ = weights_[i]->getW();
}
preOutput_.grad->mulByBitCodeBackwardError( preOutput_.grad->mulByBitCodeBackwardError(
numClasses_, *label, *weights_[i]->getW(), *inputGrad); numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
if (useGpu_) {
inputGrad->copyFrom(*cpuInputGrad_);
} else {
inputGrad = cpuInputGrad_;
}
} }
} }
} }
......
...@@ -80,6 +80,15 @@ protected: ...@@ -80,6 +80,15 @@ protected:
int codeLength_; int codeLength_;
/// temporary result of output_ /// temporary result of output_
Argument preOutput_; Argument preOutput_;
/// The temporary variables in CPU memory.
MatrixPtr cpuWeight_;
MatrixPtr cpuWeightGrad_;
MatrixPtr cpuInput_;
MatrixPtr cpuInputGrad_;
MatrixPtr cpuBias_;
MatrixPtr cpuOutput_;
IVectorPtr cpuLabel_;
}; };
} // namespace paddle } // namespace paddle
...@@ -681,12 +681,13 @@ TEST(Layer, hsigmoidLayer) { ...@@ -681,12 +681,13 @@ TEST(Layer, hsigmoidLayer) {
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
config.layerConfig.add_inputs(); config.layerConfig.add_inputs();
// Not support GPU now for (auto useGpu : {false, true}) {
testLayerGrad(config, testLayerGrad(config,
"hsigmoid", "hsigmoid",
100, 100,
/* trans */ false, /* useGpu */ /* trans */ false,
false); /* useGpu */ useGpu);
}
} }
TEST(Layer, multi_cross) { TEST(Layer, multi_cross) {
......
...@@ -206,8 +206,24 @@ set(DEPS_OPS ...@@ -206,8 +206,24 @@ set(DEPS_OPS
tensor_array_read_write_op tensor_array_read_write_op
gru_op gru_op
adagrad_op adagrad_op
sgd_op) sgd_op
save_op
load_op
send_op
recv_op)
add_subdirectory(detail)
op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf)
set_source_files_properties(
send_op.cc
PROPERTIES
COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf)
set_source_files_properties(
recv_op.cc
PROPERTIES
COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op)
op_library(cross_entropy_op DEPS cross_entropy) op_library(cross_entropy_op DEPS cross_entropy)
...@@ -237,6 +253,10 @@ op_library(conv_transpose_op DEPS vol2col) ...@@ -237,6 +253,10 @@ op_library(conv_transpose_op DEPS vol2col)
op_library(gru_op DEPS sequence2batch gru_compute) op_library(gru_op DEPS sequence2batch gru_compute)
op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) op_library(recurrent_op SRCS recurrent_op.cc DEPS executor)
# FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
op_library(${src}) op_library(${src})
...@@ -244,6 +264,8 @@ endforeach() ...@@ -244,6 +264,8 @@ endforeach()
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
...@@ -253,3 +275,4 @@ if(WITH_GPU) ...@@ -253,3 +275,4 @@ if(WITH_GPU)
cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
endif() endif()
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor)
...@@ -62,13 +62,14 @@ class BatchNormOp : public framework::OperatorWithKernel { ...@@ -62,13 +62,14 @@ class BatchNormOp : public framework::OperatorWithKernel {
const auto x_dims = ctx->GetInputDim("X"); const auto x_dims = ctx->GetInputDim("X");
const TensorFormat tensor_format = const TensorFormat tensor_format =
StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format")); StringToTensorFormat(ctx->Attrs().Get<std::string>("tensor_format"));
PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"Input X must have 2 to 5 dimensions.");
const int C = const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1] (tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5,
"Input X must have 3 to 5 dimensions.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL);
...@@ -146,8 +147,8 @@ class BatchNormKernel<platform::CPUPlace, T> : public framework::OpKernel<T> { ...@@ -146,8 +147,8 @@ class BatchNormKernel<platform::CPUPlace, T> : public framework::OpKernel<T> {
const auto *x = ctx.Input<Tensor>("X"); const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims(); const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 3 and 5"); "The Input dim size should be between 2 and 5");
const int N = x_dims[0]; const int N = x_dims[0];
const int C = const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1] (tensor_format == TensorFormat::NCHW ? x_dims[1]
...@@ -339,8 +340,8 @@ class BatchNormGradKernel<platform::CPUPlace, T> ...@@ -339,8 +340,8 @@ class BatchNormGradKernel<platform::CPUPlace, T>
// Get the size for each dimension. // Get the size for each dimension.
// NCHW [batch_size, in_channels, in_height, in_width] // NCHW [batch_size, in_channels, in_height, in_width]
const auto &x_dims = x->dims(); const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 3 and 5"); "The Input dim size should be between 2 and 5");
const int N = x_dims[0]; const int N = x_dims[0];
const int C = const int C =
(tensor_format == TensorFormat::NCHW ? x_dims[1] (tensor_format == TensorFormat::NCHW ? x_dims[1]
......
...@@ -29,14 +29,21 @@ void ExtractNCWHD(const framework::DDim &dims, ...@@ -29,14 +29,21 @@ void ExtractNCWHD(const framework::DDim &dims,
const TensorFormat &tensor_format, int *N, int *C, int *H, const TensorFormat &tensor_format, int *N, int *C, int *H,
int *W, int *D) { int *W, int *D) {
*N = dims[0]; *N = dims[0];
*C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1]; if (dims.size() == 2) {
*H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1]; *C = dims[1];
*W = dims.size() > 3 *H = 1;
? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2]) *W = 1;
: 1; *D = 1;
*D = dims.size() > 4 } else {
? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3]) *C = tensor_format == TensorFormat::NCHW ? dims[1] : dims[dims.size() - 1];
: 1; *H = tensor_format == TensorFormat::NCHW ? dims[2] : dims[1];
*W = dims.size() > 3
? (tensor_format == TensorFormat::NCHW ? dims[3] : dims[2])
: 1;
*D = dims.size() > 4
? (tensor_format == TensorFormat::NCHW ? dims[4] : dims[3])
: 1;
}
} }
template <typename T> template <typename T>
...@@ -56,8 +63,8 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> { ...@@ -56,8 +63,8 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
// NCHW [batch_size, in_channels, in_height, in_width] // NCHW [batch_size, in_channels, in_height, in_width]
const auto *x = ctx.Input<Tensor>("X"); const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims(); const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 3 and 5"); "The Input dim size should be between 2 and 5");
int N, C, H, W, D; int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D); ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
...@@ -180,8 +187,8 @@ class BatchNormGradKernel<platform::GPUPlace, T> ...@@ -180,8 +187,8 @@ class BatchNormGradKernel<platform::GPUPlace, T>
const auto &x_dims = x->dims(); const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 2 && x_dims.size() <= 5,
"The Input dim size should be between 3 and 5"); "The Input dim size should be between 2 and 5");
int N, C, H, W, D; int N, C, H, W, D;
ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D); ExtractNCWHD(x_dims, tensor_format, &N, &C, &H, &W, &D);
......
...@@ -74,12 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker( ...@@ -74,12 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"strides", "strides",
"(vector<int> defalut:{1, 1}), the strides(h_stride, w_stride) of " "(vector<int> default:{1, 1}), the strides(h_stride, w_stride) of "
"convolution transpose operator.") "convolution transpose operator.")
.SetDefault({1, 1}); .SetDefault({1, 1});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"paddings", "paddings",
"(vector<int> defalut:{0, 0}), the paddings(h_pad, w_pad) of convolution " "(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
"transpose operator.") "transpose operator.")
.SetDefault({0, 0}); .SetDefault({0, 0});
AddComment(R"DOC( AddComment(R"DOC(
...@@ -101,8 +101,8 @@ Example: ...@@ -101,8 +101,8 @@ Example:
Output: Output:
Output shape: (N, C_out, H_out, W_out) Output shape: (N, C_out, H_out, W_out)
where where
H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; H_out = (H_in - 1) * strides[0] - 2 * paddings[0] + H_f;
W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; W_out = (W_in - 1) * strides[1] - 2 * paddings[1] + W_f;
)DOC"); )DOC");
} }
...@@ -130,12 +130,12 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker( ...@@ -130,12 +130,12 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(
"the number of channels, D is the depth of the feature, H is the " "the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."); "height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector<int> defalut:{1, 1, 1}), the " "(vector<int> default:{1, 1, 1}), the "
"strides{d_stride, h_stride, w_stride} of " "strides{d_stride, h_stride, w_stride} of "
"convolution transpose operator.") "convolution transpose operator.")
.SetDefault({1, 1, 1}); .SetDefault({1, 1, 1});
AddAttr<std::vector<int>>("paddings", AddAttr<std::vector<int>>("paddings",
"(vector<int> defalut:{0, 0, 0}), paddings(d_pad, " "(vector<int> default:{0, 0, 0}), paddings(d_pad, "
"h_pad, w_pad) of convolution transpose operator.") "h_pad, w_pad) of convolution transpose operator.")
.SetDefault({0, 0, 0}); .SetDefault({0, 0, 0});
AddComment(R"DOC( AddComment(R"DOC(
...@@ -158,9 +158,9 @@ Example: ...@@ -158,9 +158,9 @@ Example:
Output: Output:
Output shape: (N, C_out, D_out, H_out, W_out) Output shape: (N, C_out, D_out, H_out, W_out)
where where
D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + filter_size[0]; D_out = (D_in - 1) * strides[0] - 2 * paddings[0] + D_f;
H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + filter_size[1]; H_out = (H_in - 1) * strides[1] - 2 * paddings[1] + H_f;
W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + filter_size[2]; W_out = (W_in - 1) * strides[2] - 2 * paddings[2] + W_f;
)DOC"); )DOC");
} }
......
grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "send_recv_impl.h"
namespace paddle {
namespace operators {
namespace detail {
Status SendRecvServerImpl::SendVariable(ServerContext *context,
const VariableMessage *in_var,
VariableMessage *out_var) {
framework::LoDTensor t;
// TODO(typhoonzero): desirealize in_tensor and run pserver network.
std::istringstream iss(in_var->serialized());
framework::DeserializeFromStream(iss, &t);
lodtensor_queue_.Push(std::move(t));
// Block util the sub graph is done.
t = lodtensor_return_queue_.Pop();
std::ostringstream oss;
// FIXME(typhoonzero): get context from op.
framework::SerializeToStream(oss, t, platform::CPUDeviceContext());
std::string *varname = out_var->mutable_varname();
*varname = in_var->varname();
std::string *serialized = out_var->mutable_serialized();
*serialized = oss.str();
return Status::OK;
}
} // namespace detail
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "send_recv_impl.h"
namespace paddle {
namespace operators {
namespace detail {
bool RPCClient::SendVariable(const framework::Scope& scope,
const std::string& inname,
const std::string& outname) {
ClientContext context;
VariableMessage msg, out_msg;
// FIXME(typhoonzero): pass device context to here.
auto ctx = platform::CPUDeviceContext();
auto* var = scope.FindVar(inname);
PADDLE_ENFORCE(var);
// TODO(typhoonzero): support SelectedRows
PADDLE_ENFORCE(var->IsType<framework::LoDTensor>(),
"Only support LoDTensor, %s has wrong type", inname);
const framework::LoDTensor& tensor = var->Get<framework::LoDTensor>();
std::ostringstream oss;
framework::SerializeToStream(oss, tensor, ctx);
msg.set_varname(inname);
msg.set_serialized(oss.str());
Status status = stub_->SendVariable(&context, msg, &out_msg);
if (!status.ok()) {
return false;
}
std::istringstream iss(out_msg.serialized());
framework::LoDTensor ret_tensor;
framework::DeserializeFromStream(iss, &ret_tensor);
auto* outvar = scope.FindVar(outname);
framework::LoDTensor* out_tensor = outvar->GetMutable<framework::LoDTensor>();
// FIXME(typhoonzero): do not copy.
framework::CopyFrom(ret_tensor, ctx.GetPlace(), ctx, out_tensor);
return true;
}
} // namespace detail
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
syntax = "proto3";
package sendrecv;
service SendRecvService {
// For parameter server round-robin like hashing, do not split tensors.
// Send and recv only one tensor
rpc SendVariable(VariableMessage) returns (VariableMessage) {}
}
// VariableMessage is serialized paddle variable message.
// It can be:
// Tensor
// LoDTensor
// SelectedRows
message VariableMessage {
string varname = 1;
bytes serialized = 2;
}
message VoidMessage {}
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/data_type.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/operators/detail/simple_block_queue.h"
// #include <grpc++/channel.h>
// #include <grpc++/client_context.h>
// #include <grpc++/create_channel.h>
// #include <grpc++/security/credentials.h>
#include "paddle/operators/detail/send_recv.grpc.pb.h"
#include "paddle/operators/detail/send_recv.pb.h"
#include <grpc++/grpc++.h>
using grpc::Channel;
using grpc::Server;
using grpc::ServerContext;
using grpc::ServerReader;
using grpc::ServerBuilder;
using grpc::ClientContext;
using grpc::ClientReader;
using grpc::ClientReaderWriter;
using grpc::ClientWriter;
using grpc::Status;
using sendrecv::SendRecvService;
using sendrecv::VariableMessage;
using sendrecv::VoidMessage;
namespace paddle {
namespace operators {
namespace detail {
class SendRecvServerImpl final : public SendRecvService::Service {
public:
explicit SendRecvServerImpl() {}
Status SendVariable(ServerContext *context, const VariableMessage *in_var,
VariableMessage *out_var) override;
const framework::LoDTensor Get() { return this->lodtensor_queue_.Pop(); }
void Push(const framework::LoDTensor &tensor) {
this->lodtensor_return_queue_.Push(tensor);
}
private:
SimpleBlockQueue<framework::LoDTensor> lodtensor_queue_;
SimpleBlockQueue<framework::LoDTensor> lodtensor_return_queue_;
SimpleBlockQueue<framework::SelectedRows> selected_rows_queue_;
SimpleBlockQueue<framework::SelectedRows> selected_rows_return_queue_;
};
// RPCClient is a class to send tensors to pserver sub-network
// using different hashing methods.
class RPCClient {
public:
RPCClient(std::shared_ptr<Channel> channel)
: stub_(SendRecvService::NewStub(channel)) {}
bool SendVariable(const framework::Scope &scope, const std::string &inname,
const std::string &outname);
private:
std::unique_ptr<SendRecvService::Stub> stub_;
};
} // namespace detail
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <condition_variable>
#include <deque>
#include <mutex>
namespace paddle {
namespace operators {
namespace detail {
template <typename T>
class SimpleBlockQueue {
private:
std::mutex mutex_;
std::condition_variable condition_;
std::deque<T> queue_;
public:
void Push(T const& value) {
{
std::unique_lock<std::mutex> lock(this->mutex_);
queue_.push_front(value);
}
this->condition_.notify_one();
}
T Pop() {
std::unique_lock<std::mutex> lock(this->mutex_);
this->condition_.wait(lock, [=] { return !this->queue_.empty(); });
T rc(std::move(this->queue_.back()));
this->queue_.pop_back();
return rc;
}
};
} // namespace detail
} // namespace operators
} // namespace paddle
...@@ -38,61 +38,7 @@ class LoadOp : public framework::OperatorBase { ...@@ -38,61 +38,7 @@ class LoadOp : public framework::OperatorBase {
out_var_name); out_var_name);
auto *tensor = out_var->GetMutable<framework::LoDTensor>(); auto *tensor = out_var->GetMutable<framework::LoDTensor>();
framework::DeserializeFromStream(fin, tensor);
uint32_t version;
fin.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
framework::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
fin.read(reinterpret_cast<char *>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
fin.read(reinterpret_cast<char *>(buf.get()), size);
PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
"Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(),
std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void *buf;
platform::Place cpu = platform::CPUPlace();
switch (desc.data_type()) {
case framework::FP32:
buf = tensor->mutable_data<float>(cpu);
break;
case framework::FP64:
buf = tensor->mutable_data<double>(cpu);
break;
case framework::INT32:
buf = tensor->mutable_data<int>(cpu);
break;
case framework::INT64:
buf = tensor->mutable_data<int64_t>(cpu);
break;
default:
PADDLE_THROW("DataType %d not supported", desc.data_type());
}
fin.read(static_cast<char *>(buf), tensor->memory_size());
}
{ // read lod
uint64_t lod_level;
fin.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
auto &lod = *tensor->mutable_lod();
lod.resize(lod_level);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size;
fin.read(reinterpret_cast<char *>(&size), sizeof(size));
std::vector<size_t> tmp(size / sizeof(size_t));
fin.read(reinterpret_cast<char *>(tmp.data()),
static_cast<std::streamsize>(size));
lod[i] = tmp;
}
}
auto place = dev_ctx.GetPlace(); auto place = dev_ctx.GetPlace();
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <stdint.h>
#include <sys/stat.h>
#include <ostream>
#include <thread>
#include <unistd.h>
#include "paddle/framework/data_type.h"
#include "paddle/framework/executor.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/detail/send_recv_impl.h"
#include "paddle/operators/detail/simple_block_queue.h"
namespace paddle {
namespace operators {
void RunServer(Server **rpc_server,
std::shared_ptr<detail::SendRecvServerImpl> service,
const std::string &server_address) {
ServerBuilder builder;
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
builder.RegisterService(service.get());
std::unique_ptr<Server> server(builder.BuildAndStart());
*rpc_server = server.get();
LOG(INFO) << "Server listening on " << server_address << std::endl;
server->Wait();
}
class RecvOp : public framework::OperatorBase {
public:
RecvOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {
if (!rpc_service_) {
rpc_service_.reset(new detail::SendRecvServerImpl());
std::string endpoint = Attr<std::string>("endpoint");
server_thread_.reset(
new std::thread(RunServer, &rpc_server_, rpc_service_, endpoint));
}
}
virtual ~RecvOp() {
rpc_server_->Shutdown();
server_thread_->join();
}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
// blocking get one var from client.
const framework::LoDTensor &t = rpc_service_->Get();
framework::Scope &recv_scope = scope.NewScope();
// set graph input var
auto *var = recv_scope.Var(Input("RX"));
auto *tensor = var->GetMutable<framework::LoDTensor>();
// FIXME(typhoonzero): do not copy
framework::CopyFrom(t, dev_ctx.GetPlace(), dev_ctx, tensor);
auto *block = Attr<framework::BlockDescBind *>("OptimizeBlock");
auto *program = block->Program();
framework::Executor executor(dev_ctx);
// Run sub graph to get optimized tensor
executor.Run(*program, &recv_scope, block->ID(),
false /*create_local_scope*/);
auto *out_var = recv_scope.FindVar("Out");
// push back
rpc_service_->Push(out_var->Get<framework::LoDTensor>());
}
protected:
// grpc server instance to track status and gracefully shutdown.
// borrow an pointer from server thread.
Server *rpc_server_{nullptr};
// grpc send/recv service implement to register.
std::shared_ptr<detail::SendRecvServerImpl> rpc_service_;
std::shared_ptr<std::thread> server_thread_;
};
class RecvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
RecvOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("RX", "(Tensor) Input tensor to be saved");
AddComment(R"DOC(
Recv operator
This operator will recv tensor from send_op
)DOC");
AddAttr<std::string>("endpoint",
"(string, default 127.0.0.1:6164)"
"IP address to listen on.")
.SetDefault("127.0.0.1:6164")
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
AddAttr<framework::BlockDescBind *>("OptimizeBlock", "type BlockDescBind*",
"optimize network run in server");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(recv, ops::RecvOp, ops::RecvOpMaker);
...@@ -88,73 +88,7 @@ class SaveOp : public framework::OperatorBase { ...@@ -88,73 +88,7 @@ class SaveOp : public framework::OperatorBase {
"SaveOp only support LoDTensor, %s has wrong type", iname); "SaveOp only support LoDTensor, %s has wrong type", iname);
auto &tensor = var->Get<framework::LoDTensor>(); auto &tensor = var->Get<framework::LoDTensor>();
framework::SerializeToStream(fout, tensor, dev_ctx);
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
fout.write(reinterpret_cast<const char *>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
framework::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto *pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
fout.write(reinterpret_cast<const char *>(&size), sizeof(size));
auto out = desc.SerializeAsString();
fout.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.memory_size();
auto *data_ptr = tensor.data<void>();
PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
"Index overflow when writing tensor");
if (platform::is_gpu_place(tensor.place())) {
#ifdef PADDLE_WITH_CUDA
constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB
std::unique_ptr<char[]> buf(new char[kBufSize]);
auto &gpu_dev_ctx =
static_cast<const platform::CUDADeviceContext &>(dev_ctx);
platform::CPUPlace cpu;
uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
while (size != 0) {
size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
memory::Copy(cpu, buf.get(),
boost::get<platform::GPUPlace>(tensor.place()),
reinterpret_cast<const void *>(data), size_to_write,
gpu_dev_ctx.stream());
gpu_dev_ctx.Wait();
fout.write(buf.get(), size_to_write);
data += size_to_write;
size -= size_to_write;
}
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
fout.write(static_cast<const char *>(data_ptr),
static_cast<std::streamsize>(size));
}
}
{ // the 4th field, lod information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto lod = tensor.lod();
uint64_t size = lod.size();
fout.write(reinterpret_cast<const char *>(&size), sizeof(size));
for (auto &each : lod) {
size = each.size() * sizeof(framework::LoD::value_type::value_type);
fout.write(reinterpret_cast<const char *>(&size), sizeof(size));
fout.write(reinterpret_cast<const char *>(each.data()),
static_cast<std::streamsize>(size));
}
}
} }
}; };
......
...@@ -77,4 +77,6 @@ REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker<float>, ...@@ -77,4 +77,6 @@ REGISTER_OPERATOR(scale, ops::ScaleOp, ops::ScaleOpMaker<float>,
ops::ScaleGradMaker); ops::ScaleGradMaker);
REGISTER_OP_CPU_KERNEL(scale, REGISTER_OP_CPU_KERNEL(scale,
ops::ScaleKernel<paddle::platform::CPUPlace, float>, ops::ScaleKernel<paddle::platform::CPUPlace, float>,
ops::ScaleKernel<paddle::platform::CPUPlace, double>); ops::ScaleKernel<paddle::platform::CPUPlace, double>,
ops::ScaleKernel<paddle::platform::CPUPlace, int>,
ops::ScaleKernel<paddle::platform::CPUPlace, int64_t>);
...@@ -16,4 +16,6 @@ ...@@ -16,4 +16,6 @@
REGISTER_OP_GPU_KERNEL( REGISTER_OP_GPU_KERNEL(
scale, paddle::operators::ScaleKernel<paddle::platform::GPUPlace, float>, scale, paddle::operators::ScaleKernel<paddle::platform::GPUPlace, float>,
paddle::operators::ScaleKernel<paddle::platform::GPUPlace, double>); paddle::operators::ScaleKernel<paddle::platform::GPUPlace, double>,
paddle::operators::ScaleKernel<paddle::platform::GPUPlace, int>,
paddle::operators::ScaleKernel<paddle::platform::GPUPlace, int64_t>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <ostream>
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/detail/send_recv_impl.h"
#include "paddle/operators/detail/simple_block_queue.h"
namespace paddle {
namespace operators {
// TODO(typhoonzero): this is a simple implementation which only send
// one tensor
class SendOp : public framework::OperatorBase {
public:
SendOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {
// init client when the operator is created at runtime.
if (!client_) {
std::string endpoint = Attr<std::string>("endpoint");
client_.reset(new detail::RPCClient(
grpc::CreateChannel(endpoint, grpc::InsecureChannelCredentials())));
// TODO(typhoonzero): how to call InitVariables
}
}
void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override {
auto iname = Input("X");
auto oname = Output("Out");
// TODO(typhoonzero): currently it's non-blocking,
// should block until server responds.
bool ret = client_->SendVariable(scope, iname, oname);
if (!ret) {
LOG(ERROR) << "send variable error";
}
}
protected:
std::shared_ptr<detail::RPCClient> client_{nullptr};
};
class SendOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SendOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor to be saved");
AddOutput("Out", "(Tensor) Output fetched from server");
AddComment(R"DOC(
Recv operator
This operator will recv tensor from send_op
)DOC");
AddAttr<std::string>("endpoint",
"(string, default 127.0.0.1:6164)"
"IP address to listen on.")
.SetDefault("127.0.0.1:6164")
.AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(send, ops::SendOp, ops::SendOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// TODO(typhoonzero): add python bindings for this test as
// a RemoteOptimizer.
#include <unistd.h>
#include <thread>
#include "gtest/gtest.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h"
USE_NO_KERNEL_OP(send);
USE_NO_KERNEL_OP(recv);
USE_OP(sum);
// global for simplicity.
std::unique_ptr<paddle::framework::OperatorBase> recv_op;
void InitTensorsInScope(paddle::framework::Scope &scope,
paddle::platform::CPUPlace &place) {
paddle::platform::CPUDeviceContext ctx(place);
auto var = scope.Var("X");
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
tensor->Resize({10, 10});
float *expect = tensor->mutable_data<float>(place);
for (int64_t i = 0; i < tensor->numel(); ++i) {
expect[i] = static_cast<float>(i);
}
auto out_var = scope.Var("Out");
auto out_tensor = out_var->GetMutable<paddle::framework::LoDTensor>();
out_tensor->Resize({10, 10});
tensor->mutable_data<float>(place); // allocate
}
void AddOp(const std::string &type,
const paddle::framework::VariableNameMap &inputs,
const paddle::framework::VariableNameMap &outputs,
paddle::framework::AttributeMap attrs,
paddle::framework::BlockDescBind *block) {
// insert output
for (auto kv : outputs) {
for (auto v : kv.second) {
auto var = block->Var(v);
var->SetDataType(paddle::framework::DataType::FP32);
}
}
// insert op
auto op = block->AppendOp();
op->SetType(type);
for (auto &kv : inputs) {
op->SetInput(kv.first, kv.second);
}
for (auto &kv : outputs) {
op->SetOutput(kv.first, kv.second);
}
op->SetAttrMap(attrs);
}
void StartServerNet() {
paddle::framework::Scope scope;
paddle::platform::CPUPlace place;
InitTensorsInScope(scope, place);
// sub program run in recv_op, for simple test we use sum
paddle::framework::ProgramDescBind program;
paddle::framework::BlockDescBind *block = program.MutableBlock(0);
// X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block);
paddle::framework::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
attrs.insert({"OptimizeBlock", block});
recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}},
{{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place);
recv_op->Run(scope, ctx);
}
TEST(SendRecvOp, CPU) {
std::thread server_thread(StartServerNet);
sleep(5); // wait server to start
// local net
paddle::framework::Scope scope;
paddle::platform::CPUPlace place;
InitTensorsInScope(scope, place);
paddle::framework::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
auto send_op = paddle::framework::OpRegistry::CreateOp(
"send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place);
send_op->Run(scope, ctx);
auto in_var = scope.Var("X");
auto tensor = in_var->GetMutable<paddle::framework::LoDTensor>();
float *expected = tensor->data<float>();
auto out_var = scope.Var("Out");
auto target = out_var->GetMutable<paddle::framework::LoDTensor>();
// send fail cause output is none.
EXPECT_NE(target->memory_size(), size_t(0));
float *actual = target->data<float>();
for (int64_t i = 0; i < target->numel(); ++i) {
EXPECT_EQ(expected[i] * 2, actual[i]);
}
recv_op.reset(); // dtor can shutdown and join server thread.
server_thread.join();
}
...@@ -16,11 +16,13 @@ function cmake_gen() { ...@@ -16,11 +16,13 @@ function cmake_gen() {
echo "using python abi: $1" echo "using python abi: $1"
if [ "$1" == "cp27-cp27m" ]; then if [ "$1" == "cp27-cp27m" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:}
export PATH=/opt/python/cp27-cp27m/bin/:${PATH}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so"
elif [ "$1" == "cp27-cp27mu" ]; then elif [ "$1" == "cp27-cp27mu" ]; then
export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:}
export PATH=/opt/python/cp27-cp27mu/bin/:${PATH}
PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python
-DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7
-DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so"
......
...@@ -11,8 +11,9 @@ make -j `nproc` gen_proto_py ...@@ -11,8 +11,9 @@ make -j `nproc` gen_proto_py
make -j `nproc` paddle_docs paddle_docs_cn make -j `nproc` paddle_docs paddle_docs_cn
# check websites for broken links # check websites for broken links
linkchecker doc/en/html/index.html # It will be failed now!
linkchecker doc/cn/html/index.html #linkchecker doc/en/html/index.html
#linkchecker doc/cn/html/index.html
# Parse Github URL # Parse Github URL
REPO=`git config remote.origin.url` REPO=`git config remote.origin.url`
......
...@@ -83,11 +83,10 @@ def set_omp_mkl_env_vars(trainer_count): ...@@ -83,11 +83,10 @@ def set_omp_mkl_env_vars(trainer_count):
'''Get the number of physical cores''' '''Get the number of physical cores'''
if platform.system() == "Linux": if platform.system() == "Linux":
num_sockets = int( num_sockets = int(
os.popen("lscpu |grep \"Socket\" |awk -F':' '{print $2}'|xargs") os.popen("grep 'physical id' /proc/cpuinfo | sort -u | wc -l")
.read()) .read())
num_cores_per_socket = int( num_cores_per_socket = int(
os.popen( os.popen("grep 'core id' /proc/cpuinfo | sort -u | wc -l")
"lscpu |grep \"per socket\" |awk -F':' '{print $2}'|xargs")
.read()) .read())
return num_sockets * num_cores_per_socket return num_sockets * num_cores_per_socket
else: else:
......
...@@ -26,9 +26,9 @@ class Evaluator(object): ...@@ -26,9 +26,9 @@ class Evaluator(object):
name(str): The name of evaluator. such as, "accuracy". Used for generate name(str): The name of evaluator. such as, "accuracy". Used for generate
temporary variable name. temporary variable name.
main_program(Program, optional): The evaluator should be added to this main_program(Program, optional): The evaluator should be added to this
main_program. Default g_main_program main_program. Default default_main_program()
startup_program(Program, optional):The parameter should be added to this startup_program(Program, optional):The parameter should be added to this
startup_program. Default g_startup_program startup_program. Default default_startup_program()
Attributes: Attributes:
states(list): The list of state variables. states will be reset to zero states(list): The list of state variables. states will be reset to zero
......
import numpy as np import numpy as np
from . import core from . import core
from framework import Program, g_main_program from framework import Program, default_main_program
__all__ = ['Executor', 'g_scope'] __all__ = ['Executor', 'g_scope']
...@@ -103,7 +103,7 @@ class Executor(object): ...@@ -103,7 +103,7 @@ class Executor(object):
fetch_list = [] fetch_list = []
if program is None: if program is None:
program = g_main_program program = default_main_program()
if not isinstance(program, Program): if not isinstance(program, Program):
raise TypeError() raise TypeError()
......
...@@ -6,7 +6,7 @@ import proto.framework_pb2 as framework_pb2 ...@@ -6,7 +6,7 @@ import proto.framework_pb2 as framework_pb2
__all__ = [ __all__ = [
'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program',
'default_main_program', 'g_startup_program', 'g_main_program' 'default_main_program'
] ]
...@@ -654,13 +654,13 @@ class Parameter(Variable): ...@@ -654,13 +654,13 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_main_program = Program() _main_program_ = Program()
g_startup_program = Program() _startup_program_ = Program()
def default_startup_program(): def default_startup_program():
return g_startup_program return _startup_program_
def default_main_program(): def default_main_program():
return g_main_program return _main_program_
import os import os
import cPickle as pickle import cPickle as pickle
from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable
Variable
__all__ = [ __all__ = [
'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
...@@ -46,7 +45,7 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None): ...@@ -46,7 +45,7 @@ def save_vars(executor, dirname, main_program=None, vars=None, predicate=None):
""" """
if vars is None: if vars is None:
if main_program is None: if main_program is None:
main_program = g_main_program main_program = default_main_program()
if not isinstance(main_program, Program): if not isinstance(main_program, Program):
raise TypeError("program should be as Program type or None") raise TypeError("program should be as Program type or None")
...@@ -98,7 +97,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): ...@@ -98,7 +97,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None):
:param executor: executor that save variable :param executor: executor that save variable
:param dirname: directory path :param dirname: directory path
:param main_program: program. If vars is None, then filter all variables in this :param main_program: program. If vars is None, then filter all variables in this
program which fit `predicate`. Default g_program. program which fit `predicate`. Default default_main_program().
:param predicate: The Predicate describes a callable that returns a variable :param predicate: The Predicate describes a callable that returns a variable
as a bool. If it returns true, the variables will be loaded. as a bool. If it returns true, the variables will be loaded.
:param vars: variables need to be loaded. If specify vars, program & :param vars: variables need to be loaded. If specify vars, program &
...@@ -107,7 +106,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None): ...@@ -107,7 +106,7 @@ def load_vars(executor, dirname, main_program=None, vars=None, predicate=None):
""" """
if vars is None: if vars is None:
if main_program is None: if main_program is None:
main_program = g_main_program main_program = default_main_program()
if not isinstance(main_program, Program): if not isinstance(main_program, Program):
raise TypeError("program's type should be Program") raise TypeError("program's type should be Program")
...@@ -154,7 +153,7 @@ def load_persistables(executor, dirname, main_program=None): ...@@ -154,7 +153,7 @@ def load_persistables(executor, dirname, main_program=None):
def get_inference_program(target_vars, main_program=None): def get_inference_program(target_vars, main_program=None):
if main_program is None: if main_program is None:
main_program = g_main_program main_program = default_main_program()
if not isinstance(target_vars, list): if not isinstance(target_vars, list):
target_vars = [target_vars] target_vars = [target_vars]
...@@ -177,12 +176,12 @@ def save_inference_model(dirname, ...@@ -177,12 +176,12 @@ def save_inference_model(dirname,
:param target_vars: Variables from which we can get inference results. :param target_vars: Variables from which we can get inference results.
:param executor: executor that save inference model :param executor: executor that save inference model
:param main_program: original program, which will be pruned to build the inference model. :param main_program: original program, which will be pruned to build the inference model.
Default g_main_program. Default default_main_program().
:return: None :return: None
""" """
if main_program is None: if main_program is None:
main_program = g_main_program main_program = default_main_program()
if not isinstance(target_vars, list): if not isinstance(target_vars, list):
target_vars = [target_vars] target_vars = [target_vars]
...@@ -272,10 +271,10 @@ def get_parameter_value_by_name(name, executor, program=None): ...@@ -272,10 +271,10 @@ def get_parameter_value_by_name(name, executor, program=None):
:param executor: executor for retrieving the value :param executor: executor for retrieving the value
:param name: the name of the parameter :param name: the name of the parameter
:param program: the program where the variable is found :param program: the program where the variable is found
Default g_main_program. Default default_main_program().
:return: the LoDTensor for the variable :return: the LoDTensor for the variable
""" """
if program is None: if program is None:
program = g_main_program program = default_main_program()
var = program.global_block().var(name) var = program.global_block().var(name)
return get_parameter_value(var, executor) return get_parameter_value(var, executor)
import copy import copy
import itertools import itertools
from framework import Variable, g_main_program, \ from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating
g_startup_program, unique_name, dtype_is_floating
from paddle.v2.fluid.initializer import Constant, Xavier from paddle.v2.fluid.initializer import Constant, Xavier
...@@ -22,7 +21,7 @@ class LayerHelper(object): ...@@ -22,7 +21,7 @@ class LayerHelper(object):
def main_program(self): def main_program(self):
prog = self.kwargs.get('main_program', None) prog = self.kwargs.get('main_program', None)
if prog is None: if prog is None:
return g_main_program return default_main_program()
else: else:
return prog return prog
...@@ -30,7 +29,7 @@ class LayerHelper(object): ...@@ -30,7 +29,7 @@ class LayerHelper(object):
def startup_program(self): def startup_program(self):
prog = self.kwargs.get('startup_program', None) prog = self.kwargs.get('startup_program', None)
if prog is None: if prog is None:
return g_startup_program return default_startup_program()
else: else:
return prog return prog
......
from . import core import core
import proto.framework_pb2 as framework_pb2 import proto.framework_pb2 as framework_pb2
from framework import OpProtoHolder, Variable, Program, Operator from framework import OpProtoHolder, Variable, Program, Operator
from initializer import Constant, Normal, Xavier from initializer import Constant, Normal, Xavier, Initializer
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
import re import re
import cStringIO import cStringIO
...@@ -1587,6 +1587,97 @@ def array_length(array, main_program=None): ...@@ -1587,6 +1587,97 @@ def array_length(array, main_program=None):
return tmp return tmp
def conv2d_transpose(input,
num_filters,
output_size=None,
filter_size=None,
padding=None,
stride=None,
param_attr=None,
param_initializer=None,
main_program=None,
startup_program=None):
"""
The transpose of conv2d layer.
This layer is also known as deconvolution layer.
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). This
parameter only works when filter_size is None.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride.
param_attr: Parameter Attribute.
param_initializer(Initializer): Parameter Initializer. Default is Xavier
main_program(Program): the main program
startup_program(Program): the startup program
Returns:
Variable: Output image.
"""
helper = LayerHelper("conv2d_transpose", **locals())
if not isinstance(input, Variable):
raise TypeError("Input of conv2d_transpose must be Variable")
input_channel = input.shape[1]
op_attr = dict()
if isinstance(padding, int):
op_attr['paddings'] = [padding, padding]
elif padding is not None:
op_attr['paddings'] = padding
if isinstance(stride, int):
op_attr['strides'] = stride
elif stride is not None:
op_attr['strides'] = stride
if filter_size is None:
if output_size is None:
raise ValueError("output_size must be set when filter_size is None")
if isinstance(output_size, int):
output_size = [output_size, output_size]
padding = op_attr.get('paddings', [0, 0])
stride = op_attr.get('strides', [1, 1])
h_in = input.shape[2]
w_in = input.shape[3]
filter_size_h = output_size[0] - (h_in - 1) * stride[0] + 2 * padding[0]
filter_size_w = output_size[1] - (w_in - 1) * stride[1] + 2 * padding[1]
filter_size = [filter_size_h, filter_size_w]
elif isinstance(filter_size, int):
filter_size = [filter_size, filter_size]
filter_shape = [input_channel, num_filters] + filter_size
img_filter = helper.create_parameter(
dtype=input.dtype,
shape=filter_shape,
attr=helper.param_attr,
initializer=param_initializer)
out = helper.create_tmp_variable(dtype=input.dtype)
helper.append_op(
type='conv2d_transpose',
inputs={'Input': [input],
'Filter': [img_filter]},
outputs={'Output': out},
attrs=op_attr)
return out
class ConditionalBlockGuard(BlockGuard): class ConditionalBlockGuard(BlockGuard):
def __init__(self, block): def __init__(self, block):
if not isinstance(block, ConditionalBlock): if not isinstance(block, ConditionalBlock):
......
...@@ -69,8 +69,7 @@ def vgg16_bn_drop(input): ...@@ -69,8 +69,7 @@ def vgg16_bn_drop(input):
drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
fc1 = fluid.layers.fc(input=drop, size=512, act=None) fc1 = fluid.layers.fc(input=drop, size=512, act=None)
reshape1 = fluid.layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) bn = fluid.layers.batch_norm(input=fc1, act='relu')
bn = fluid.layers.batch_norm(input=reshape1, act='relu')
drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
fc2 = fluid.layers.fc(input=drop2, size=512, act=None) fc2 = fluid.layers.fc(input=drop2, size=512, act=None)
return fc2 return fc2
......
...@@ -3,7 +3,7 @@ import paddle.v2.fluid.core as core ...@@ -3,7 +3,7 @@ import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.framework import g_main_program from paddle.v2.fluid.framework import default_main_program
import numpy import numpy
...@@ -66,7 +66,7 @@ class TestArrayReadWrite(unittest.TestCase): ...@@ -66,7 +66,7 @@ class TestArrayReadWrite(unittest.TestCase):
append_backward_ops(total_sum_scaled) append_backward_ops(total_sum_scaled)
g_vars = map(g_main_program.global_block().var, g_vars = map(default_main_program().global_block().var,
[each_x.name + "@GRAD" for each_x in x]) [each_x.name + "@GRAD" for each_x in x])
g_out = [ g_out = [
item.sum() item.sum()
......
...@@ -21,6 +21,13 @@ def get_backward_op(scope, op, no_grad_set): ...@@ -21,6 +21,13 @@ def get_backward_op(scope, op, no_grad_set):
def _reference_training(x, scale, offset, epsilon, data_format): def _reference_training(x, scale, offset, epsilon, data_format):
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
n, c, h, w = x.shape n, c, h, w = x.shape
x_square = x * x x_square = x * x
...@@ -39,6 +46,8 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -39,6 +46,8 @@ def _reference_training(x, scale, offset, epsilon, data_format):
offset_tile = np.reshape(offset, (1, c, 1, 1)) offset_tile = np.reshape(offset, (1, c, 1, 1))
offset_tile = np.reshape(offset_tile, (1, c, 1, 1)) offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
y = normalized * scale_tile + offset_tile y = normalized * scale_tile + offset_tile
if len(x_shape) == 2:
y = np.reshape(y, (y.shape[0], y.shape[1]))
return y, mean, var return y, mean, var
elif data_format == "NHWC": elif data_format == "NHWC":
x_square = x * x x_square = x * x
...@@ -48,7 +57,10 @@ def _reference_training(x, scale, offset, epsilon, data_format): ...@@ -48,7 +57,10 @@ def _reference_training(x, scale, offset, epsilon, data_format):
mean = x_sum / element_count mean = x_sum / element_count
var = x_square_sum / element_count - mean * mean var = x_square_sum / element_count - mean * mean
normalized = (x - mean) / np.sqrt(var + epsilon) normalized = (x - mean) / np.sqrt(var + epsilon)
return (normalized * scale + offset), mean, var y = normalized * scale + offset
if len(x_shape) == 2:
y = np.reshape(y, x_shape)
return y, mean, var
else: else:
raise ValueError("Unknown data order.") raise ValueError("Unknown data order.")
...@@ -65,6 +77,18 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): ...@@ -65,6 +77,18 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
x_shape = x.shape
if len(x_shape) == 2:
if data_format == "NCHW":
x = np.reshape(x, (x.shape[0], x.shape[1], 1, 1))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], grad_y.shape[1], 1, 1))
else:
x = np.reshape(x, (x.shape[0], 1, 1, x.shape[1]))
grad_y = np.reshape(grad_y,
(grad_y.shape[0], 1, 1, grad_y.shape[1]))
if data_format == "NCHW": if data_format == "NCHW":
x = np.transpose(x, (0, 2, 3, 1)) x = np.transpose(x, (0, 2, 3, 1))
grad_y = np.transpose(grad_y, (0, 2, 3, 1)) grad_y = np.transpose(grad_y, (0, 2, 3, 1))
...@@ -83,6 +107,9 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format): ...@@ -83,6 +107,9 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
grad_x = np.transpose(grad_x, (0, 3, 1, 2)) grad_x = np.transpose(grad_x, (0, 3, 1, 2))
x = np.transpose(x, (0, 3, 1, 2)) x = np.transpose(x, (0, 3, 1, 2))
grad_y = np.transpose(grad_y, (0, 3, 1, 2)) grad_y = np.transpose(grad_y, (0, 3, 1, 2))
if len(x_shape) == 2:
grad_x = np.reshape(grad_x, x_shape)
return grad_x, grad_scale, grad_offset return grad_x, grad_scale, grad_offset
...@@ -127,7 +154,7 @@ class TestBatchNormOp(OpTest): ...@@ -127,7 +154,7 @@ class TestBatchNormOp(OpTest):
momentum = 0.9 momentum = 0.9
# N, H, W, C: 2, 3, 4, 2 # N, H, W, C: 2, 3, 4, 2
n, h, w, c = 2, 3, 4, 2 n, h, w, c = 2, 3, 4, 5
x_shape = [n, h, w, c] x_shape = [n, h, w, c]
scale_shape = [c] scale_shape = [c]
...@@ -184,20 +211,23 @@ class TestBatchNormOp(OpTest): ...@@ -184,20 +211,23 @@ class TestBatchNormOp(OpTest):
print 'python: NHWC, NCHW, backward checking passed' print 'python: NHWC, NCHW, backward checking passed'
def test_forward_backward(self): def test_forward_backward(self):
def test_with_place(place, tensor_format): def test_with_place(place, tensor_format, shape):
# attr # attr
epsilon = 0.00001 epsilon = 0.00001
momentum = 0.9 momentum = 0.9
# N, H, W, C: 12, 3, 4, 2 if len(shape) == 2:
n, h, w, c = 2, 3, 4, 2 x_shape = shape
c = shape[1]
if data_format == "NHWC":
x_shape = [n, h, w, c]
elif data_format == "NCHW":
x_shape = [n, c, h, w]
else: else:
raise ValueError("Unknown data type.") # n, h, w, c = 2, 3, 4, 2
n, h, w, c = shape[0], shape[1], shape[2], shape[3]
if data_format == "NHWC":
x_shape = [n, h, w, c]
elif data_format == "NCHW":
x_shape = [n, c, h, w]
else:
raise ValueError("Unknown data type.")
scale_shape = [c] scale_shape = [c]
x_val = np.random.random_sample(x_shape).astype(np.float32) x_val = np.random.random_sample(x_shape).astype(np.float32)
...@@ -219,7 +249,10 @@ class TestBatchNormOp(OpTest): ...@@ -219,7 +249,10 @@ class TestBatchNormOp(OpTest):
# for gradient test # for gradient test
# y_grad = np.ones(x_shape).astype(np.float32) # y_grad = np.ones(x_shape).astype(np.float32)
y_grad = np.zeros(x_shape).astype(np.float32) y_grad = np.zeros(x_shape).astype(np.float32)
y_grad[0, 0, 0, 0] = 1. if len(y_grad.shape) == 2:
y_grad[0, 0] = 1.
else:
y_grad[0, 0, 0, 0] = 1.
# y_grad = np.random.random_sample(x_shape).astype(np.float32) # y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad( x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
...@@ -313,7 +346,8 @@ class TestBatchNormOp(OpTest): ...@@ -313,7 +346,8 @@ class TestBatchNormOp(OpTest):
places.append(core.GPUPlace(0)) places.append(core.GPUPlace(0))
for place in places: for place in places:
for data_format in ["NCHW", "NHWC"]: for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format) test_with_place(place, data_format, [2, 3, 4, 5])
test_with_place(place, data_format, [2, 3])
if __name__ == '__main__': if __name__ == '__main__':
......
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import g_startup_program, g_main_program from paddle.v2.fluid.framework import default_startup_program, default_main_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
import numpy import numpy
...@@ -19,7 +19,7 @@ class ConditionalBlock(unittest.TestCase): ...@@ -19,7 +19,7 @@ class ConditionalBlock(unittest.TestCase):
cpu = core.CPUPlace() cpu = core.CPUPlace()
exe = Executor(cpu) exe = Executor(cpu)
exe.run(g_startup_program) exe.run(default_startup_program())
x = numpy.random.random(size=(10, 1)).astype('float32') x = numpy.random.random(size=(10, 1)).astype('float32')
...@@ -29,7 +29,9 @@ class ConditionalBlock(unittest.TestCase): ...@@ -29,7 +29,9 @@ class ConditionalBlock(unittest.TestCase):
append_backward_ops(loss=loss) append_backward_ops(loss=loss)
outs = exe.run( outs = exe.run(
feed={'X': x}, feed={'X': x},
fetch_list=[g_main_program.block(0).var(data.name + "@GRAD")])[0] fetch_list=[
default_main_program().block(0).var(data.name + "@GRAD")
])[0]
print outs print outs
......
import unittest import unittest
from paddle.v2.fluid.layers import mul, data, sequence_pool
import numpy
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_main_program from paddle.v2.fluid.layers import mul, data
import numpy
class TestExecutor(unittest.TestCase): class TestExecutor(unittest.TestCase):
...@@ -19,10 +20,7 @@ class TestExecutor(unittest.TestCase): ...@@ -19,10 +20,7 @@ class TestExecutor(unittest.TestCase):
a_np = numpy.random.random((100, 784)).astype('float32') a_np = numpy.random.random((100, 784)).astype('float32')
b_np = numpy.random.random((784, 100)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32')
exe = Executor(place) exe = Executor(place)
outs = exe.run(g_main_program, outs = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out])
feed={'a': a_np,
'b': b_np},
fetch_list=[out])
out = outs[0] out = outs[0]
self.assertEqual((100, 100), out.shape) self.assertEqual((100, 100), out.shape)
self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np))) self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np)))
......
import unittest import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid as fluid
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program
...@@ -29,27 +29,35 @@ class TestLayer(unittest.TestCase): ...@@ -29,27 +29,35 @@ class TestLayer(unittest.TestCase):
def test_batch_norm_layer(self): def test_batch_norm_layer(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = layers.data( images = fluid.layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
dtype='float32', dtype='float32',
main_program=main_program) main_program=main_program)
layers.batch_norm( hidden1 = fluid.layers.batch_norm(
input=images, input=images,
main_program=main_program, main_program=main_program,
startup_program=startup_program) startup_program=startup_program)
hidden2 = fluid.layers.fc(input=hidden1,
size=128,
act='relu',
main_program=main_program)
hidden3 = fluid.layers.batch_norm(
input=hidden2,
main_program=main_program,
startup_program=startup_program)
# print str(main_program) print str(main_program)
def test_dropout_layer(self): def test_dropout_layer(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = layers.data( images = fluid.layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
dtype='float32', dtype='float32',
main_program=main_program) main_program=main_program)
layers.dropout( fluid.layers.dropout(
x=images, x=images,
dropout_prob=0.5, dropout_prob=0.5,
main_program=main_program, main_program=main_program,
...@@ -61,7 +69,7 @@ class TestLayer(unittest.TestCase): ...@@ -61,7 +69,7 @@ class TestLayer(unittest.TestCase):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
images = layers.data( images = fluid.layers.data(
name='pixel', name='pixel',
shape=[3, 48, 48], shape=[3, 48, 48],
dtype='float32', dtype='float32',
...@@ -77,19 +85,19 @@ class TestLayer(unittest.TestCase): ...@@ -77,19 +85,19 @@ class TestLayer(unittest.TestCase):
def test_elementwise_add_with_act(self): def test_elementwise_add_with_act(self):
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
image1 = layers.data( image1 = fluid.layers.data(
name='pixel1', name='pixel1',
shape=[3, 48, 48], shape=[3, 48, 48],
dtype='float32', dtype='float32',
main_program=main_program, main_program=main_program,
startup_program=startup_program) startup_program=startup_program)
image2 = layers.data( image2 = fluid.layers.data(
name='pixel2', name='pixel2',
shape=[3, 48, 48], shape=[3, 48, 48],
dtype='float32', dtype='float32',
main_program=main_program, main_program=main_program,
startup_program=startup_program) startup_program=startup_program)
out = layers.elementwise_add( out = fluid.layers.elementwise_add(
x=image1, x=image1,
y=image2, y=image2,
act='relu', act='relu',
......
...@@ -65,6 +65,15 @@ class TestBook(unittest.TestCase): ...@@ -65,6 +65,15 @@ class TestBook(unittest.TestCase):
print str(program) print str(program)
def test_conv2d_transpose(self):
program = Program()
kwargs = {'main_program': program}
img = layers.data(
name='pixel', shape=[3, 2, 2], dtype='float32', **kwargs)
layers.conv2d_transpose(
input=img, num_filters=10, output_size=28, **kwargs)
print str(program)
def test_recognize_digits_conv(self): def test_recognize_digits_conv(self):
program = Program() program = Program()
......
from paddle.v2.fluid.layers import lod_rank_table, data from paddle.v2.fluid.layers import lod_rank_table, data
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_main_program
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import numpy import numpy
import unittest import unittest
...@@ -18,7 +17,7 @@ class TestLoDRankTable(unittest.TestCase): ...@@ -18,7 +17,7 @@ class TestLoDRankTable(unittest.TestCase):
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set(numpy.random.random(size=(17, 100)), cpu)
tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]])
exe.run(g_main_program, scope=scope, feed={'x': tensor}) exe.run(scope=scope, feed={'x': tensor})
var = scope.find_var(rank_table.name) var = scope.find_var(rank_table.name)
table = var.get_lod_rank_table() table = var.get_lod_rank_table()
self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items()) self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items())
......
import unittest import unittest
from paddle.v2.fluid.framework import Variable, Program, g_main_program
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import Program, default_startup_program
main_program = default_startup_program()
class TestOperator(unittest.TestCase): class TestOperator(unittest.TestCase):
def test_error_type(self): def test_error_type(self):
block = g_main_program.create_block() block = main_program.create_block()
try: try:
block.append_op() block.append_op()
self.assertFail() self.assertFail()
......
import unittest import unittest
from paddle.v2.fluid.framework import g_main_program from paddle.v2.fluid.framework import default_main_program
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.io as io import paddle.v2.fluid.io as io
from paddle.v2.fluid.initializer import ConstantInitializer from paddle.v2.fluid.initializer import ConstantInitializer
import numpy as np import numpy as np
main_program = default_main_program()
class TestParameter(unittest.TestCase): class TestParameter(unittest.TestCase):
def test_param(self): def test_param(self):
shape = [784, 100] shape = [784, 100]
val = 1.0625 val = 1.0625
b = g_main_program.global_block() b = main_program.global_block()
param = b.create_parameter( param = b.create_parameter(
name='fc.w', name='fc.w',
shape=shape, shape=shape,
...@@ -23,9 +25,9 @@ class TestParameter(unittest.TestCase): ...@@ -23,9 +25,9 @@ class TestParameter(unittest.TestCase):
self.assertEqual(core.DataType.FP32, param.dtype) self.assertEqual(core.DataType.FP32, param.dtype)
self.assertEqual(0, param.block.idx) self.assertEqual(0, param.block.idx)
exe = Executor(core.CPUPlace()) exe = Executor(core.CPUPlace())
p = exe.run(g_main_program, fetch_list=[param])[0] p = exe.run(main_program, fetch_list=[param])[0]
self.assertTrue(np.allclose(p, np.ones(shape) * val)) self.assertTrue(np.allclose(p, np.ones(shape) * val))
p = io.get_parameter_value_by_name('fc.w', exe, g_main_program) p = io.get_parameter_value_by_name('fc.w', exe, main_program)
self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val))
......
from __future__ import print_function from __future__ import print_function
import unittest import unittest
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program, default_main_program
from paddle.v2.fluid.framework import g_main_program
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
main_program = default_main_program()
class TestProgram(unittest.TestCase): class TestProgram(unittest.TestCase):
def test_program(self): def test_program(self):
b = g_main_program.current_block() b = main_program.current_block()
self.assertEqual(-1, b.parent_idx) self.assertEqual(-1, b.parent_idx)
self.assertEqual(0, b.idx) self.assertEqual(0, b.idx)
b = g_main_program.create_block() b = main_program.create_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
b = g_main_program.create_block() b = main_program.create_block()
self.assertEqual(2, b.idx) self.assertEqual(2, b.idx)
self.assertEqual(1, b.parent_idx) self.assertEqual(1, b.parent_idx)
g_main_program.rollback() main_program.rollback()
b = g_main_program.current_block() b = main_program.current_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
b = g_main_program.create_block() b = main_program.create_block()
self.assertEqual(3, b.idx) self.assertEqual(3, b.idx)
self.assertEqual(1, b.parent_idx) self.assertEqual(1, b.parent_idx)
g_main_program.rollback() main_program.rollback()
b = g_main_program.current_block() b = main_program.current_block()
self.assertEqual(1, b.idx) self.assertEqual(1, b.idx)
self.assertEqual(0, b.parent_idx) self.assertEqual(0, b.parent_idx)
......
...@@ -3,9 +3,11 @@ import paddle.v2.fluid.core as core ...@@ -3,9 +3,11 @@ import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.framework import g_main_program from paddle.v2.fluid.framework import default_main_program
import numpy import numpy
main_program = default_main_program()
class TestShrinkRNNMemory(unittest.TestCase): class TestShrinkRNNMemory(unittest.TestCase):
def test_shrink_rnn_memory(self): def test_shrink_rnn_memory(self):
...@@ -36,7 +38,7 @@ class TestShrinkRNNMemory(unittest.TestCase): ...@@ -36,7 +38,7 @@ class TestShrinkRNNMemory(unittest.TestCase):
append_backward_ops(loss=mem3_mean) append_backward_ops(loss=mem3_mean)
x_grad = exe.run( x_grad = exe.run(
feed={'x': tensor}, feed={'x': tensor},
fetch_list=[g_main_program.global_block().var('x@GRAD')])[0] fetch_list=[main_program.global_block().var('x@GRAD')])[0]
self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1) self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1)
......
import unittest import unittest
from paddle.v2.fluid.framework import g_main_program, Program, convert_np_dtype_to_dtype_ from paddle.v2.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import numpy as np import numpy as np
...@@ -18,7 +18,7 @@ class TestVariable(unittest.TestCase): ...@@ -18,7 +18,7 @@ class TestVariable(unittest.TestCase):
self.assertRaises(ValueError, lambda: convert("int8")) self.assertRaises(ValueError, lambda: convert("int8"))
def test_var(self): def test_var(self):
b = g_main_program.current_block() b = default_main_program().current_block()
w = b.create_var( w = b.create_var(
dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") dtype="float64", shape=[784, 100], lod_level=0, name="fc.w")
self.assertNotEqual(str(w), "") self.assertNotEqual(str(w), "")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册