diff --git a/CMakeLists.txt b/CMakeLists.txt index 4117f077219d3b8fc097631073eafa748ff918bc..c23be24c6c8971746f0e7fce7209da3dd3b667ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,10 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) +option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) +option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -131,6 +133,10 @@ if (NOT DEFINED WITH_MKLDNN) set(WITH_MKLDNN OFF) endif() endif() + +if (REPLACE_ENFORCE_GLOG) + add_definitions("-DREPLACE_ENFORCE_GLOG") +endif() ######################################################################################## include(external/mklml) # download mklml package @@ -153,12 +159,24 @@ include(external/cares) if(WITH_DISTRIBUTE) if(WITH_GRPC) include(external/grpc) + message(STATUS "Use grpc framework.") else() + message(STATUS "Use brpc framework.") include(external/leveldb) include(external/brpc) endif() endif() +if(WITH_BRPC_RDMA) + message(STATUS "Use brpc with rdma.") + if(WITH_GRPC) + message(FATAL_ERROR "Can't use grpc with brpc rdma.") + endif() + if(NOT WITH_DISTRIBUTE) + message(FATAL_ERROR "Can't use brpc rdma in no distribute env.") + endif() +endif() + include(external/snappy) # download snappy include(external/snappystream) include(external/threadpool) diff --git a/README.md b/README.md index 8d89c6b1ec9e4aefbd64328dedb4e8c7cc50c21b..63abca069a6629ac59739224ded9cd9f06207d0a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) -[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 6a8b15a6b60a2e5635dc78fc877f0c8da9a2a998..e4af34d10ed92c501dd805addb62747c91c00978 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -174,3 +174,7 @@ endif(WITH_GOLANG) if(WITH_GRPC) add_definitions(-DPADDLE_WITH_GRPC) endif(WITH_GRPC) + +if(WITH_BRPC_RDMA) + add_definitions(-DPADDLE_WITH_BRPC_RDMA) +endif(WITH_BRPC_RDMA) diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index 8e2c913b2caae0c4eeb844d2b51a8975e81c1592..30b227b6452abf44171a1a4e04569e66b16e67a4 100644 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -14,6 +14,15 @@ INCLUDE(ExternalProject) +find_library(SSL_LIBRARY NAMES ssl) +ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${SSL_LIBRARY}) + +find_library(CRYPTO_LIBRARY NAMES crypto) +ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${CRYPTO_LIBRARY}) + + SET(BRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/brpc) SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc) SET(BRPC_INCLUDE_DIR "${BRPC_INSTALL_DIR}/include" CACHE PATH "brpc include directory." FORCE) @@ -22,14 +31,14 @@ SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc libr INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args -set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf") +set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib") # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( extern_brpc ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/brpc/brpc" - GIT_TAG "6d153dd7ff00f960ae6895c9c5fff0ce9f07aff2" + GIT_REPOSITORY "https://github.com/gongweibao/brpc" + GIT_TAG "7dc04defad1fd4173aae170c3fcbde131b65155a" PREFIX ${BRPC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -42,6 +51,8 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_PREFIX_PATH=${prefix_path} -DBRPC_WITH_GLOG=ON + -DIOBUF_WITH_HUGE_BLOCK=ON + -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} ${EXTERNAL_OPTIONAL_ARGS} LIST_SEPARATOR | CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} @@ -49,7 +60,7 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) -ADD_DEPENDENCIES(extern_brpc protobuf leveldb gflags glog gtest snappy) +ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy) ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) ADD_DEPENDENCIES(brpc extern_brpc) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 9c42044ec163e9db1dd21d5c3915b010c30fdf1c..fd7fc16bff5651f022b484623243048fbd225b5a 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID) set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") endif(NOT APPLE AND NOT ANDROID) +set_property(GLOBAL PROPERTY FLUID_MODULES "") +# find all fluid modules is used for paddle fluid static library +# for building inference libs +function(find_fluid_modules TARGET_NAME) + get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(FIND "${__target_path}" "fluid" pos) + if(pos GREATER 1) + get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) + set(fluid_modules ${fluid_modules} ${TARGET_NAME}) + set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") + endif() +endfunction(find_fluid_modules) + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) @@ -250,6 +264,7 @@ function(cc_test TARGET_NAME) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) if (${cc_test_SERIAL}) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) endif() endif() endfunction(cc_test) @@ -314,6 +329,7 @@ function(nv_test TARGET_NAME) add_test(${TARGET_NAME} ${TARGET_NAME}) if (nv_test_SERIAL) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) endif() endif() endfunction(nv_test) @@ -561,7 +577,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS ENVS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} + COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 850098297e1456487cb8a7b83dffd3d2b0478689..c6979713231f631f8757e4139d6f685d4554b54e 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -12,19 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -set_property(GLOBAL PROPERTY FLUID_MODULES "") -# find all fluid modules is used for paddle fluid static library -function(find_fluid_modules TARGET_NAME) - get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) - string(FIND "${__target_path}" "fluid" pos) - if(pos GREATER 1) - get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) - set(fluid_modules ${fluid_modules} ${TARGET_NAME}) - set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") - endif() -endfunction(find_fluid_modules) - # make package for paddle fluid shared and static library function(copy TARGET) set(options "") @@ -154,7 +141,7 @@ set(inference_deps paddle_fluid_shared paddle_fluid) if(WITH_CONTRIB) message(STATUS "installing contrib") set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference") - if (WITH_ANAKIN) + if (WITH_ANAKIN AND WITH_GPU) copy(contrib_anakin_inference_lib DEPS paddle_inference_api inference_anakin_api SRCS ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libinference_anakin_api* # compiled anakin api @@ -163,9 +150,9 @@ if(WITH_CONTRIB) list(APPEND inference_deps contrib_anakin_inference_lib) endif() - copy(contrib_inference_lib DEPS paddle_inference_api + copy(contrib_inference_lib DEPS paddle_inference_api paddle_inference_api_shared SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h - ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.* + ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api* DSTS ${contrib_dst_dir} ${contrib_dst_dir}) list(APPEND inference_deps contrib_inference_lib) endif() diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 2cd6ab2bbf042bced41957193a0269f477eb10d0..a8bbb4eb8081420ae0bbaf761bd27303c0d043cb 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -46,6 +46,10 @@ cc_library(paddle_inference_api SRCS paddle_inference_api.cc paddle_inference_api_impl.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) +cc_library(paddle_inference_api_shared SHARED + SRCS paddle_inference_api.cc paddle_inference_api_impl.cc + DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) + cc_test(test_paddle_inference_api SRCS test_paddle_inference_api.cc DEPS paddle_inference_api) diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index bc48fd3b479157d4aea390cd5f4dc61ea46dca4b..cd00b7de7338982308acfa1f1e8c38e010c6a43b 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -147,9 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, "Input tensor type is not supported: ", in.type().name()); memory::data_type out_type = in_type; - auto in_format = MKLDNNFormatForSize(in_tz.size(), in.format()); + auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); auto out_format = - MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); + platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); void* in_data = GetDataFromTensor(in, in_type); diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 67f91e4e48d3e11ed493c5e6943cb9071aff60c4..90bb206ec6b698bc23ad1a5c9609a25186ec6de8 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -62,12 +62,6 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { return MKLDNNDataType::data_undef; } -inline MKLDNNFormat MKLDNNFormatForSize(size_t dims_size, - MKLDNNFormat default_format) { - return (dims_size == 1 - ? mkldnn::memory::format::x - : dims_size == 2 ? mkldnn::memory::format::nc : default_format); -} #endif void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 635eb404d4a94e9a765f511bb249ce0cce22c477..82872224501709080ff02a13464d58543a0abda8 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_type_transform.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace framework { @@ -48,8 +52,8 @@ void TransformData(const OpKernelType &expected_kernel_type, // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Just set layout/format. No real transform occur - auto out_format = - MKLDNNFormatForSize(in.dims().size(), ToMKLDNNFormat(lin)); + auto out_format = platform::MKLDNNFormatForSize(in.dims().size(), + ToMKLDNNFormat(lin)); out.ShareDataWith(input_tensor); out.set_layout(DataLayout::kMKLDNN); diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 87b0ff0c80270c5b29f81dde66fb16c5fc2b9e54..39287501385844a7ee1225d132c9372325135342 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" -#ifdef PADDLE_WITH_DISTRIBUTE -#include "paddle/fluid/operators/distributed/grpc_client.h" -#endif +#include "paddle/fluid/operators/detail/macros.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index d29d8ce1c561e45980d10c17c984ca2ed3b453f3..5373d769a4993bb378b30c3b23885c072b778e5c 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -68,7 +68,7 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { // only print first ten elements int64_t size = t.numel() < 10 ? t.numel() : 10; for (int64_t i = 0; i < size; ++i) { - if (t.type().hash_code() == typeid(float).hash_code()) { + if (t.type().hash_code() == typeid(float).hash_code()) { // NOLINT os << t.data()[i] << " "; } else if (t.type().hash_code() == typeid(int64_t).hash_code()) { os << t.data()[i] << " "; diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index aa1a42fc97310e4c149f2018f1706d8e42630a98..4586183d8d206d07f8dcdc000a5ce0bc65d847d5 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -748,10 +748,6 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType( t = &var->Get(); } else if (var->IsType()) { t = &(var->Get().value()); - } else if (var->IsType()) { - const LoDTensorArray& arr = var->Get(); - PADDLE_ENFORCE(arr.size() > 0); - t = &(arr[0]); } if (t != nullptr) { int tmp = static_cast(ToDataType(t->type())); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 751b10eeeed10828c08ada4173300c07f81c093e..b53a6f43fbd1f23e69d23ad0fcc54d5c25d352a3 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -253,9 +253,6 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( t->set_lod(lod_tensors[j].lod()); } } - for (auto &p : member_->places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } } ParallelExecutor::~ParallelExecutor() { diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 96114678a9992f2975c4173c7cc003114f04d8df..7f678f869aac4616c8bca440d0431f765da41dd6 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -23,9 +23,9 @@ namespace framework { template inline const T* Tensor::data() const { check_memory_size(); - PADDLE_ENFORCE(std::is_same::value || - holder_->type() == std::type_index(typeid(T)), - "Tensor holds the wrong type, it holds %s", + bool valid = std::is_same::value || + holder_->type() == std::type_index(typeid(T)); + PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s", this->holder_->type().name()); return reinterpret_cast( @@ -37,9 +37,9 @@ inline bool Tensor::IsInitialized() const { return holder_ != nullptr; } template inline T* Tensor::data() { check_memory_size(); - PADDLE_ENFORCE(std::is_same::value || - holder_->type() == std::type_index(typeid(T)), - "Tensor holds the wrong type, it holds %s", + bool valid = std::is_same::value || + holder_->type() == std::type_index(typeid(T)); + PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s", this->holder_->type().name()); return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index e5bc74755f46449296a153e8b330968e6d9f1e1d..f98011e896f4033ef210e0eb69f93ce7800a3cd6 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -69,7 +69,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); auto stream = reinterpret_cast(ctx).stream(); - memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); + if (platform::is_same_place(src_place, dst_place)) { + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + } else { + if (platform::is_same_place(ctx_place, src_place)) { + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); + } else if (platform::is_same_place(ctx_place, dst_place)) { + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + } else { + PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); + } + } } #endif } @@ -78,10 +93,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext* dev_ctx; - if (platform::is_gpu_place(src.place())) { - dev_ctx = pool.Get(src.place()); - } else { + if (platform::is_gpu_place(dst_place)) { dev_ctx = pool.Get(dst_place); + } else { + dev_ctx = pool.Get(src.place()); } TensorCopy(src, dst_place, *dev_ctx, dst); } diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index dca279b69382b80e055f661cefe84b81326704b5..4457382ade37a12f5f3613fc4113fbf1f6f91124 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -23,10 +23,25 @@ limitations under the License. */ namespace paddle { namespace framework { +// NOTE(zcd): Because TensorCopy is an async operation, when the src_place +// and dst_place are two different GPU, to ensure that the operation can +// be carried out correctly, there is a src_ctx wait operation in TensorCopy. +// If ctx_place and src_place are the same, src_ctx.Wait() is added +// after memory::Copy; if ctx_place and dst_place are the same, +// src_ctx.Wait() is added before memory::Copy. void TensorCopy(const Tensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, Tensor* dst); + +// NOTE(zcd): If the src.place() and dst_place are two different GPU, +// the copy operation is carried out on the dst_place's stream. This is +// very important, because TensorCopy is an async operator, and in most +// case, once this copy operator returns, dst is to be used in dst_place's +// stream, if this copy operation is carried out on the src_place's stream, +// when dst is used in dst_place's stream the copy operation may be +// not completed. void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst); + void TensorCopySync(const Tensor& src, const platform::Place& dst_place, Tensor* dst); diff --git a/paddle/fluid/inference/analysis/README.md b/paddle/fluid/inference/analysis/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4c5de189cd1eab1ba3de0b2cdfd2294d139ceab2 --- /dev/null +++ b/paddle/fluid/inference/analysis/README.md @@ -0,0 +1,57 @@ +# Inference Analysis + +The `inference/analysis` module is used to analyze and optimize the inference program, +it references some philosophy from `LLVM/analysis`, +and make the various optimization features be pluggable and co-exist in a pipeline. + +We borrowed some concepts from LLVM, such as + +- [Pass](./pass.h)es to implement optimization that traverse the inference program, +- [DataFlowGraph](./data_flow_graph.h) to represent the data flow graph built from a program, +- [PassManager](./pass_manager.h) to manage a sequence of `Pass`es over a graph. + +There are some other basic concepts here + +- [Node](./node.h), the node in a `DataFlowGraph`, + - `Function`, the Operator in Fluid, + - `Value`, the Variable in Fluid; +- [Argument](./argument.h), the argument that treat as the input and output of all `Pass`es in the pipeline, + +## How it works + +The `inference/analysis` module make all the passes in a pipeline, and works in such way: + +1. Build a `DataFlowGraph` from a Fluid inference ProgramDesc, +2. Call the middle passes one by one, the same `DataFlowGraph` is passed across all the passes, +3. Transform a new ProgramDesc from the modified `DataFlowGraph`. + +The new optimization features can be added as an independent `Pass` and controlled by gflags, +each pass will generate unified debug information or visualization for better debugging. + +## Supported Passes + +### `FluidToDataFlowGraphPass` +Transform the fluid `ProgramDesc` to a `DataFlowGraph` to give an abstract representation for all the middle passes, +this should be the first pass of the pipeline. + +### `DataFlowGraphToFluidPass` +Generate a final `ProgramDesc` from a data flow graph, this should be the last pass of the pipeline. + +### `TensorRTSubgraphNodeMarkPass` +Mark the `Node` that are supported by TensorRT, +this pass will generate a visualization file which can be used for debugging. + +### `TensorRTSubGraphPass` +Split the sub-graph that are can be accelerated by TensorRT. + +### `DFG_GraphvizDrawPass` +This pass is just for debug, it will visualize the `DataFlowGraph` using the [graphviz](http://www.graphviz.org) tool. + +It can be used as a helper class that draws the modified graph after each pass. + +## Utilities + +There is some helper function/class for analysis. + +- [dot.h](./dot.h) give a easy to use interface for generating `DOT` codes, +- [graph_traits.h](./graph_traits.h) contains the graph traversal algorithms, it uses `iterator` to make the algorithms easy to share across different passes. diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 5d85530969c5bec1c84d5f5b0d2626431a9e1c63..a4625f008c15300b88ef0bce71cd7d8aa473c9a8 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/analyzer.h" +#include #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" @@ -79,4 +80,4 @@ void Analyzer::Run(Argument* argument) { } // namespace analysis } // namespace inference -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index f290a3777d5be2ef64667d8c17ec59adddc3ef1b..e9e14fb1947da059c8d126d3da182ce446f6421e 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#pragma once + /* * This file contains Analyzer, an class that exposed as a library that analyze * and optimize diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index 30c60661f3492034248e164a70a682bae3819d23..a4fefc83e0c551d52bec87299bcbc966e7a2dbf7 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -138,7 +138,7 @@ struct GraphTraits { // sub-graph is the inputs nodes and output nodes that doesn't inside the // sub-graph. static std::pair, std::vector> -ExtractInputAndOutputOfSubGraph(std::vector &graph) { +ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT std::unordered_set nodes(graph.begin(), graph.end()); std::unordered_set inputs; std::unordered_set outputs; diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc index e74efd17b834db1d0314c8b7082f3e9c15d6eda3..29ca008123addf07959b965a4b54bf55b18c401d 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/proto_desc.h" @@ -150,13 +151,14 @@ namespace { class DFG_DebuggerPass : public DFG_GraphvizDrawPass { public: using Config = DFG_GraphvizDrawPass::Config; - DFG_DebuggerPass(const Config& config) : DFG_GraphvizDrawPass(config) {} + explicit DFG_DebuggerPass(const Config& config) + : DFG_GraphvizDrawPass(config) {} std::string repr() const override { return "dfg-to-fluid-debuger-pass"; } bool Finalize() override { return true; } }; -} +} // namespace Pass* DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h index 1726e056ed37e2e5fbe2042851ca9bd188806bac..edc84b02ed20991e3e7c6c437d2b1fac169bae03 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h @@ -19,6 +19,7 @@ #pragma once +#include #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/pass.h" diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h index b064782586f6243353eda67ac8db040509716b20..17445ab4407a159ca11345bc9a9226b3ad0044f0 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h @@ -46,7 +46,7 @@ class DFG_GraphvizDrawPass : public DataFlowGraphPass { const bool display_deleted_node; }; - DFG_GraphvizDrawPass(const Config &config) : config_(config) {} + explicit DFG_GraphvizDrawPass(const Config &config) : config_(config) {} bool Initialize(Argument *argument) override { return true; } void Run(DataFlowGraph *graph) override; diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc index 5d7eb43b7cbd7bc45b5f0c940bf80ad72348e1b9..e918622d74cfb11d83090555be2a768cc14e7742 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include -#include "analyzer.h" +#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" @@ -88,7 +88,8 @@ namespace { class DFG_DebuggerPass : public DFG_GraphvizDrawPass { public: using Config = DFG_GraphvizDrawPass::Config; - DFG_DebuggerPass(const Config &config) : DFG_GraphvizDrawPass(config) {} + explicit DFG_DebuggerPass(const Config &config) + : DFG_GraphvizDrawPass(config) {} std::string repr() const override { return "fluid-to-dfg-debuger-pass"; } bool Finalize() override { return true; } }; diff --git a/paddle/fluid/inference/analysis/pass_manager_tester.cc b/paddle/fluid/inference/analysis/pass_manager_tester.cc index 6caba8f04237e014c5ddf1a3a077bcbadb0ddb71..dac1c509d728114bd24a2ea1150c407646026fd4 100644 --- a/paddle/fluid/inference/analysis/pass_manager_tester.cc +++ b/paddle/fluid/inference/analysis/pass_manager_tester.cc @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/inference/analysis/pass_manager.h" +#include + #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" #include "paddle/fluid/inference/analysis/ut_helper.h" -#include - namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc index 5ad092a9ed201e5e6ab7770bcfd9ddf871779c12..f736e385c11add152dc9ab9485bf1de40f80b2f3 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" +#include + #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/node_attr_flags.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" namespace paddle { namespace inference { @@ -29,7 +31,7 @@ void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) { class DfgDebuggerPass : public DFG_GraphvizDrawPass { public: - DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) + explicit DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) : DFG_GraphvizDrawPass(config) {} std::string repr() const override { diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h index 6cfac55d3b7b501e8ccc141cb7309f1428478672..c558a6ebbde371071c7330a14cc986bf764d1773 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h @@ -16,6 +16,10 @@ * This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops * that supported by TensorRT engine. */ + +#pragma once + +#include #include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" @@ -30,7 +34,8 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { public: using teller_t = SubGraphSplitter::NodeInsideSubgraphTeller; - TensorRTSubgraphNodeMarkPass(const teller_t& teller) : teller_(teller) {} + explicit TensorRTSubgraphNodeMarkPass(const teller_t& teller) + : teller_(teller) {} bool Initialize(Argument* argument) override { return true; } @@ -38,8 +43,10 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { // sub-graph into TensorRT. void Run(DataFlowGraph* graph) override; - std::string repr() const { return "tensorrt-sub-subgraph-mark"; } - std::string description() const { return "tensorrt sub-graph mark pass"; } + std::string repr() const override { return "tensorrt-sub-subgraph-mark"; } + std::string description() const override { + return "tensorrt sub-graph mark pass"; + } Pass* CreateGraphvizDebugerPass() const override; bool Finalize() override; diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h index 11e088069538414c79371b920cb8fa1509b24bb1..c6741a92095d33d261a4e1667c87a8ca02e51a9f 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "paddle/fluid/inference/analysis/node.h" #include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" @@ -30,7 +31,7 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { // Tell whether to transform a sub-graph into TensorRT. using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller; - TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); + explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); bool Initialize(Argument* argument) override { return true; } @@ -40,8 +41,8 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { bool Finalize() override { return true; } - std::string repr() const { return "tensorrt-sub-graph"; } - std::string description() const { return "tensorrt sub graph pass"; } + std::string repr() const override { return "tensorrt-sub-graph"; } + std::string description() const override { return "tensorrt sub graph pass"; } private: NodeInsideSubgraphTeller node_inside_subgraph_teller_; @@ -49,4 +50,4 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { } // namespace analysis } // namespace inference -} // paddle +} // namespace paddle diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 0c74f62de5c6f5d432ee928945db6dcf385ca209..bd98ed81899440a46415d30b6d74fec2dac4c155 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -20,6 +20,12 @@ limitations under the License. */ #include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/platform/gpu_info.h" +DEFINE_bool(init_allocated_mem, false, + "It is a mistake that the values of the memory allocated by " + "BuddyAllocator are always zeroed in some op's implementation. " + "To find this error in time, we use init_allocated_mem to indicate " + "that initializing the allocated memory with a small value " + "during unit testing."); DECLARE_double(fraction_of_gpu_memory_to_use); namespace paddle { @@ -41,6 +47,9 @@ template <> void* Alloc(platform::CPUPlace place, size_t size) { VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); void* p = GetCPUBuddyAllocator()->Alloc(size); + if (FLAGS_init_allocated_mem) { + memset(p, 0xEF, size); + } VLOG(10) << " pointer=" << p; return p; } @@ -104,6 +113,9 @@ void* Alloc(platform::CUDAPlace place, size_t size) { LOG(WARNING) << "GPU memory used: " << Used(place); platform::SetDeviceId(cur_dev); } + if (FLAGS_init_allocated_mem) { + cudaMemset(ptr, 0xEF, size); + } return ptr; } @@ -137,6 +149,9 @@ void* Alloc(platform::CUDAPinnedPlace place, LOG(WARNING) << "cudaMallocHost Cannot allocate " << size << " bytes in CUDAPinnedPlace"; } + if (FLAGS_init_allocated_mem) { + memset(ptr, 0xEF, size); + } return ptr; } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 9dc39ad0ddf8c5de3e1960a1171431e026de35ae..ab1d2143330fb8cbfd535758a83bc71de939c4e0 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -184,6 +184,7 @@ else() set(DEPS_OPS ${DEPS_OPS} nccl_op) endif() +set(DISTRIBUTE_DEPS "") if(WITH_DISTRIBUTE) add_subdirectory(distributed) @@ -192,6 +193,18 @@ if(WITH_DISTRIBUTE) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf) else() set(DISTRIBUTE_DEPS sendrecvop_brpc brpc leveldb snappystream snappy protobuf ssl crypto zlib) + if(WITH_BRPC_RDMA) + find_library(IBVERBS_LIBRARY NAMES ibverbs) + ADD_LIBRARY(ibverbs SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET ibverbs PROPERTY IMPORTED_LOCATION ${IBVERBS_LIBRARY}) + + + find_library(RDMACM_LIBRARY NAMES rdmacm) + ADD_LIBRARY(rdmacm SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET rdmacm PROPERTY IMPORTED_LOCATION ${RDMACM_LIBRARY}) + + set(DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} ibverbs rdmacm) + endif() endif() set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") @@ -205,7 +218,7 @@ if(WITH_DISTRIBUTE) # listen_and_serv_op sum_op executor SERIAL) if(WITH_GPU) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op executor SERIAL) + cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op ${DISTRIBUTE_DEPS} executor SERIAL) if(WITH_GRPC) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) else() @@ -297,6 +310,7 @@ foreach(src ${DETECTION_LIBRARY}) endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") +set(GLOB_DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} CACHE INTERNAL "distributed dependency") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/fluid/operators/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/batch_norm_mkldnn_op.cc index 6ecb43c49c30f9da2a273d506f7b85c0a4f5fa2c..9ab2179b5fe689762704039c5f67dd080e530aa5 100644 --- a/paddle/fluid/operators/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/batch_norm_mkldnn_op.cc @@ -115,9 +115,12 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // create primitive descriptor for batch norm forward using bn_fwd_types = bn_type_traits; @@ -251,15 +254,21 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { using bn_bwd_types = bn_type_traits; // create mkldnn memory from input diff_y tensor - auto user_diff_dst_memory = - memory({{{diff_dst_tz}, memory::data_type::f32, diff_y->format()}, - mkldnn_engine}, - to_void_cast(diff_y_data)); + + mkldnn::memory::format dst_format = + platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format()); + + auto user_diff_dst_memory = memory( + {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, + to_void_cast(diff_y_data)); // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // for diff_dst, try to use same format as dst in forward pass auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc(); diff --git a/paddle/fluid/operators/detail/macros.h b/paddle/fluid/operators/detail/macros.h index b9e385994efcea0388756e8bd780ebfc719ed08d..6f4a15caa5542a45cd8e26a72b055ca8948069d0 100644 --- a/paddle/fluid/operators/detail/macros.h +++ b/paddle/fluid/operators/detail/macros.h @@ -14,14 +14,22 @@ #pragma once +#ifdef PADDLE_WITH_DISTRIBUTE + #ifdef PADDLE_WITH_GRPC + #include "paddle/fluid/operators/distributed/grpc_client.h" #include "paddle/fluid/operators/distributed/grpc_server.h" -#define RPCSERVER_T distributed::AsyncGRPCServer -#define RPCCLIENT_T distributed::GRPCClient -#else +#define RPCSERVER_T paddle::operators::distributed::AsyncGRPCServer +#define RPCCLIENT_T paddle::operators::distributed::GRPCClient + +#else // PADDLE_WITH_GRPC + #include "paddle/fluid/operators/distributed/brpc_client.h" #include "paddle/fluid/operators/distributed/brpc_server.h" -#define RPCSERVER_T distributed::AsyncBRPCServer -#define RPCCLIENT_T distributed::BRPCClient -#endif +#define RPCSERVER_T paddle::operators::distributed::AsyncBRPCServer +#define RPCCLIENT_T paddle::operators::distributed::BRPCClient + +#endif // PADDLE_WITH_GRPC + +#endif // PADDLE_WITH_DISTRIBUTE diff --git a/paddle/fluid/operators/distributed/rpc_client.cc b/paddle/fluid/operators/distributed/rpc_client.cc index 382b65d637c0be6ac432d2b5feb44e0df3c6fd6e..b5ec9fe5367beb97b3cc7298102deff1e8ca4ec9 100644 --- a/paddle/fluid/operators/distributed/rpc_client.cc +++ b/paddle/fluid/operators/distributed/rpc_client.cc @@ -16,7 +16,7 @@ #include "gflags/gflags.h" // default to 3min to avoid temprary network failures. -DEFINE_int32(rpc_deadline, 30000, "deadline timeouts for rpc"); +DEFINE_int32(rpc_deadline, 180000, "deadline timeouts for rpc"); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fill_zeros_like_op.cc b/paddle/fluid/operators/fill_zeros_like_op.cc index a9d47c017275193cdacc7db8f31e8e874b9b84de..d67bec36b3248be8602da562a88aeb58f5effe39 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cc @@ -26,12 +26,8 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { "Input(X) of FillZerosLikeOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of FillZerosLikeOp should not be null."); - - if (ctx->IsRuntime() && - ctx->GetOutputsVarType("Out")[0] == - framework::proto::VarType::LOD_TENSOR_ARRAY) { - return; // skip runtime infershape when is tensor array; - } + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -43,7 +39,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( FillZerosLike Operator. -Fill up a variable with zeros, supporting both LoDTensor and LoDTensorArray. +Fill up a variable with zeros. The output will have the same size as the input. )DOC"); diff --git a/paddle/fluid/operators/fill_zeros_like_op.h b/paddle/fluid/operators/fill_zeros_like_op.h index daa6521b32e583f733bc040afb61bf13c4236731..4bbe0df6b6890122381c87494e510cf125792377 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.h +++ b/paddle/fluid/operators/fill_zeros_like_op.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" @@ -24,29 +23,12 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto var = context.InputVar("X"); - if (var->IsType()) { - auto& input = *context.Input("X"); - auto& output = *context.Output("Out"); - output.Resize(input.dims()); - output.set_lod(input.lod()); - output.mutable_data(context.GetPlace()); - math::SetConstant setter; - setter(context.template device_context(), &(output), - static_cast(0)); - } else if (var->IsType()) { - auto& input = *context.Input("X"); - auto& output = *context.Output("Out"); - output.resize(input.size()); - for (auto i = 0; i < input.size(); i++) { - output[i].Resize(input[i].dims()); - output[i].set_lod(input[i].lod()); - output[i].mutable_data(context.GetPlace()); - math::SetConstant setter; - setter(context.template device_context(), &(output[i]), - static_cast(0)); - } - } + auto* out = context.Output("Out"); + out->mutable_data(context.GetPlace()); + + math::SetConstant setter; + setter(context.template device_context(), out, + static_cast(0)); } }; diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index a34e4371cccfd1be0d173fa11595e4368eb65b85..70bc9c4e8340b8e02ef2826d828faff3f6d11965 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -113,7 +113,11 @@ template inline typename std::enable_if::type throw_on_error( bool stat, const Args&... args) { if (UNLIKELY(!(stat))) { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -123,8 +127,12 @@ template inline typename std::enable_if::type throw_on_error( cudaError_t e, const Args&... args) { if (UNLIKELY(e)) { +#ifndef REPLACE_ENFORCE_GLOG throw thrust::system_error(e, thrust::cuda_category(), string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -132,8 +140,12 @@ template inline typename std::enable_if::type throw_on_error( curandStatus_t stat, const Args&... args) { if (stat != CURAND_STATUS_SUCCESS) { +#ifndef REPLACE_ENFORCE_GLOG throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -143,8 +155,12 @@ inline typename std::enable_if::type throw_on_error( if (stat == CUDNN_STATUS_SUCCESS) { return; } else { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) + string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -173,7 +189,11 @@ inline typename std::enable_if::type throw_on_error( } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { err = "CUBLAS: license error, "; } +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(err + string::Sprintf(args...)); +#else + LOG(FATAL) << err << string::Sprintf(args...); +#endif } #ifndef __APPLE__ @@ -183,8 +203,13 @@ inline typename std::enable_if::type throw_on_error( if (stat == ncclSuccess) { return; } else { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + string::Sprintf(args...)); +#else + LOG(FATAL) << platform::dynload::ncclGetErrorString(stat) + << string::Sprintf(args...); +#endif } } #endif // __APPLE__ @@ -203,6 +228,7 @@ inline void throw_on_error(T e) { __FILE__, __LINE__); \ } while (false) +#ifndef REPLACE_ENFORCE_GLOG #define PADDLE_ENFORCE(...) \ do { \ try { \ @@ -212,6 +238,9 @@ inline void throw_on_error(T e) { __FILE__, __LINE__); \ } \ } while (false) +#else +#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); +#endif /* * Some enforce helpers here, usage: diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index ed99932546446eb877c9701de15e2d37d29b5f88..33fec2c1073819d88d85a8872227adcb9df3e8f4 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" @@ -182,10 +183,11 @@ class MKLDNNHandler { } std::shared_ptr AcquireMemory( - mkldnn::memory::primitive_desc& mpd, - mkldnn::memory::primitive_desc& user_mpd, + mkldnn::memory::primitive_desc& mpd, // NOLINT + mkldnn::memory::primitive_desc& user_mpd, // NOLINT const std::shared_ptr user_memory_p, - const std::string& suffix, std::vector& pipeline) { + const std::string& suffix, + std::vector& pipeline) { // NOLINT // create reorder primitive if the input format is not the preferred one auto local_key = key_ + suffix; auto key_reorder_p = key_ + suffix + "reorder_p"; @@ -218,7 +220,7 @@ class MKLDNNHandler { return target_memory_p; } - static std::string GetHash(mkldnn::memory::dims& operand_dims, + static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT const std::string& suffix) { auto dims2str = [](const mkldnn::memory::dims& operand_dims) { std::string dstr = ""; @@ -227,8 +229,9 @@ class MKLDNNHandler { } return dstr; }; + return dims2str(operand_dims) + suffix; - }; + } protected: const MKLDNNDeviceContext& dev_ctx_; @@ -237,5 +240,15 @@ class MKLDNNHandler { bool is_reusing_; }; +inline mkldnn::memory::format MKLDNNFormatForSize( + size_t dims_size, mkldnn::memory::format data_format) { + if (dims_size == 1) { + return mkldnn::memory::format::x; + } else if (dims_size == 2) { + return mkldnn::memory::format::nc; + } + return data_format; +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 062095a1c3e977c0bcc89346ead765acb023bcf7..e0f6202506868fd0dcbe93297785682edbcfe792 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -84,7 +84,7 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { } template -std::string Sprintf(const char* fmt, const Args&... args) { +std::string Sprintf(const char* fmt = "", const Args&... args) { std::ostringstream oss; Fprintf(oss, fmt, args...); return oss.str(); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 45af83708ea63fc1b6aa86f1e8423bb44b7388a6..3034c1a0875a71421bcba172c16ee32d809df152 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -118,7 +118,8 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb' + 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', + 'init_allocated_mem' ] if core.is_compiled_with_cuda(): read_env_flags += [ diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 61c01b3b0056648b6cc67430d5d3bfffc8412928..bcf520d5a4e3bbe1d949d08f42199dd8c5cdc947 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -95,7 +95,6 @@ __all__ = [ 'relu', 'log', 'crop', - 'fill_zeros_like', ] @@ -5185,40 +5184,3 @@ def crop(x, shape=None, offsets=None, name=None): outputs={'Out': out}, attrs=None if len(attrs) == 0 else attrs) return out - - -def fill_zeros_like(x): - """ - This layer takes an input and outputs a variable that has the same structure as - the input and with all the element values as zero. The variable can be a Tensor - or TensorArray. - - .. code-block:: text - - - Given - X = [[0, 1, 2, 0], - [0, 3, 4, 0], - [0, 0, 0, 0]], - output is: - Out = [[0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0]]. - - Args: - x (Variable): The input variable, which could be a tensor or tensor array - - Returns: - Variable: The zero-filled variable, which has the same type and shape as - the input variable. - - Examples: - - .. code-block:: python - y = fluid.layers.fill_zeros_like(x) - """ - helper = LayerHelper('fill_zeros_like', **locals()) - out = helper.create_tmp_variable(dtype=x.dtype) - helper.append_op( - type='fill_zeros_like', inputs={'X': [x]}, outputs={'Out': [out]}) - return out diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index 74f96f456a8dc917b715d0f4308bb5ea41947f0b..71bf5f8b3a9b17f24ce35220a9348bb871852623 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -110,14 +110,23 @@ def infer(use_cuda, save_dirname=None): # The input's dimension should be 2-D and the second dim is 13 # The input data should be >= 0 batch_size = 10 - tensor_x = numpy.random.uniform(0, 10, - [batch_size, 13]).astype("float32") + + test_reader = paddle.batch( + paddle.dataset.uci_housing.test(), batch_size=batch_size) + + test_data = test_reader().next() + test_feat = numpy.array( + [data[0] for data in test_data]).astype("float32") + test_label = numpy.array( + [data[1] for data in test_data]).astype("float32") + assert feed_target_names[0] == 'x' results = exe.run(inference_program, - feed={feed_target_names[0]: tensor_x}, + feed={feed_target_names[0]: numpy.array(test_feat)}, fetch_list=fetch_targets) print("infer shape: ", results[0].shape) print("infer results: ", results[0]) + print("ground truth: ", test_label) def main(use_cuda, is_local=True): diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 5f27864c140573086d07415f83caca708889a068..f6c8dcabcbc592024188f4742e6c532a704d2289 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -52,3 +52,4 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 180) +set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 180) diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index 21f2037ad408b0a92718c0ea2bae5e8bf563c665..cddf00765f4894126988c794763c34629449e8e6 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -18,6 +18,8 @@ import unittest import paddle.fluid as fluid import time import numpy as np +import math +import sys __all__ = ['TestParallelExecutorBase'] @@ -93,6 +95,12 @@ class TestParallelExecutorBase(unittest.TestCase): print "%.4f Instance per second" % ( (batch_size * iter + 2) / (end - begin)) + avg_last_loss_val = np.array(last_loss).mean() + avg_first_loss_val = np.array(first_loss).mean() + if math.isnan(float(avg_last_loss_val)) or math.isnan( + float(avg_first_loss_val)): + sys.exit("got NaN loss, training failed.") + print first_loss, last_loss # self.assertGreater(first_loss[0], last_loss[0]) return first_loss, last_loss diff --git a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py new file mode 100644 index 0000000000000000000000000000000000000000..712fd5849d80b1915ae3b2ae5108bedee8d88a2c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py @@ -0,0 +1,203 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +from paddle.fluid import core +import unittest +from multiprocessing import Process +import os +import signal + +IS_SPARSE = True +EMBED_SIZE = 32 +HIDDEN_SIZE = 256 +N = 5 +BATCH_SIZE = 32 +ExecutionStrategy = core.ParallelExecutor.ExecutionStrategy + + +def get_model(): + def __network__(words): + embed_first = fluid.layers.embedding( + input=words[0], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_second = fluid.layers.embedding( + input=words[1], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_third = fluid.layers.embedding( + input=words[2], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_forth = fluid.layers.embedding( + input=words[3], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + + concat_embed = fluid.layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) + hidden1 = fluid.layers.fc(input=concat_embed, + size=HIDDEN_SIZE, + act='sigmoid') + predict_word = fluid.layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) + avg_cost = fluid.layers.mean(cost) + return avg_cost, predict_word + + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + + first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') + second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') + third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') + next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') + avg_cost, predict_word = __network__( + [first_word, second_word, third_word, forth_word, next_word]) + + inference_program = paddle.fluid.default_main_program().clone() + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) + test_reader = paddle.batch( + paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE) + + return inference_program, avg_cost, train_reader, test_reader, predict_word + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +def run_pserver(pserver_endpoints, trainers, current_endpoint): + get_model() + t = get_transpiler(0, + fluid.default_main_program(), pserver_endpoints, + trainers) + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + exe.run(pserver_prog) + + +class TestDistMnist(unittest.TestCase): + def setUp(self): + self._trainers = 1 + self._pservers = 1 + self._ps_endpoints = "127.0.0.1:9123" + + def start_pserver(self, endpoint): + p = Process( + target=run_pserver, + args=(self._ps_endpoints, self._trainers, endpoint)) + p.start() + return p.pid + + def _wait_ps_ready(self, pid): + retry_times = 5 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(1) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + retry_times -= 1 + + def stop_pserver(self, pid): + os.kill(pid, signal.SIGKILL) + + def test_with_place(self): + p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + pserver_pid = self.start_pserver(self._ps_endpoints) + self._wait_ps_ready(pserver_pid) + + self.run_trainer(p, 0) + + self.stop_pserver(pserver_pid) + + def run_trainer(self, place, trainer_id): + test_program, avg_cost, train_reader, test_reader, predict = get_model() + t = get_transpiler(trainer_id, + fluid.default_main_program(), self._ps_endpoints, + self._trainers) + + trainer_prog = t.get_trainer_program() + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + use_gpu = True if core.is_compiled_with_cuda() else False + + exec_strategy = ExecutionStrategy() + exec_strategy.use_cuda = use_gpu + train_exe = fluid.ParallelExecutor( + use_cuda=use_gpu, + main_program=trainer_prog, + loss_name=avg_cost.name, + exec_strategy=exec_strategy) + + feed_var_list = [ + var for var in trainer_prog.global_block().vars.itervalues() + if var.is_data + ] + + feeder = fluid.DataFeeder(feed_var_list, place) + for pass_id in xrange(10): + for batch_id, data in enumerate(train_reader()): + avg_loss_np = train_exe.run(feed=feeder.feed(data), + fetch_list=[avg_cost.name]) + loss = np.array(avg_loss_np).mean() + if float(loss) < 5.0: + return + if math.isnan(loss): + assert ("Got Nan loss, training failed") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py deleted file mode 100644 index 23871508d8042ade5253c2f0b3bc9f32ec71a135..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import paddle.fluid.core as core -import numpy -import paddle.fluid.layers as layers -from paddle.fluid.framework import Program, program_guard -from paddle.fluid.executor import Executor - -import paddle.fluid as fluid -import paddle.fluid.core as core - - -class TestFillZerosLikeOpForTensorArray(unittest.TestCase): - def place(self): - return core.CPUPlace() - - def test_zero_filling_lod_tensor_array(self): - tensor = core.LoDTensor() - tensor.set( - numpy.arange(20).reshape(20, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]]) - - expect = [ - numpy.array( - [0, 0, 0, 0, 0], dtype='int32'), numpy.array( - [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='int32'), - numpy.array( - [0, 0, 0], dtype='int32') - ] - - lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]] - self.main( - tensor=tensor, - expect_array=expect, - expect_lod=lod, - expect_max_len=3) - - def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): - place = self.place() - program = Program() - with program_guard(program): - x = layers.data(name='x', shape=[10]) - x.persistable = True - table = layers.lod_rank_table(x, level=level) - max_len = layers.max_sequence_len(table) - max_len.persistable = True - array = layers.lod_tensor_to_array(x, table) - array = layers.fill_zeros_like(array) - array.persistable = True - - result = layers.array_to_lod_tensor(array, table) - result.persistable = True - exe = Executor(place) - scope = core.Scope() - exe.run(program, feed={'x': tensor}, scope=scope) - var = scope.find_var(array.name) - array = var.get_lod_tensor_array() - if expect_array is not None and expect_lod is not None: - self.check_array_same(array, expect_array, expect_lod) - - self.assertEqual( - numpy.array(scope.find_var(max_len.name).get_tensor())[0], - expect_max_len) - - def check_array_same(self, array, expect_tensor, expect_lod): - self.assertEqual(len(expect_tensor), len(array)) - for i, exp in enumerate(zip(expect_tensor, expect_lod)): - exp_tensor, exp_lod = exp - exp_tensor = numpy.expand_dims(exp_tensor, axis=1) - self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i]))) - self.assertEqual(exp_lod, array[i].lod()) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py index 252793944462244539084a288e5259f216359650..9a2733927d38f1a2b1af92fcc12f036158b4d06f 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py @@ -16,6 +16,8 @@ import paddle.fluid as fluid import numpy as np import unittest import os +import sys +import math def simple_fc_net(): @@ -73,6 +75,14 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): train_loss, = train_exe.run([loss.name], feed=feed_dict) + avg_test_loss_val = np.array(test_loss).mean() + if math.isnan(float(avg_test_loss_val)): + sys.exit("got NaN loss, testing failed.") + + avg_train_loss_val = np.array(train_loss).mean() + if math.isnan(float(avg_train_loss_val)): + sys.exit("got NaN loss, training failed.") + self.assertTrue( np.allclose( train_loss, test_loss, atol=1e-8),