diff --git a/CMakeLists.txt b/CMakeLists.txt index fba5c58dc4a1410e7a8d5a70d78f5d1b1a75259e..fcff1de56742d027b4b10d003fd463e7335720b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,7 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) @@ -82,10 +81,8 @@ if(ANDROID OR IOS) "Disable PYTHON when cross-compiling for Android and iOS" FORCE) set(WITH_RDMA OFF CACHE STRING "Disable RDMA when cross-compiling for Android and iOS" FORCE) - set(WITH_MKLDNN OFF CACHE STRING - "Disable MKLDNN when cross-compiling for Android and iOS" FORCE) - set(WITH_MKLML OFF CACHE STRING - "Disable MKLML package when cross-compiling for Android and iOS" FORCE) + set(WITH_MKL OFF CACHE STRING + "Disable MKL when cross-compiling for Android and iOS" FORCE) # Compile PaddlePaddle mobile inference library if (NOT WITH_C_API) @@ -111,6 +108,17 @@ else() set(THIRD_PARTY_BUILD_TYPE Release) endif() +if(WITH_MKL) + set(WITH_MKLML ON) + set(WITH_MKLDNN ${AVX2_FOUND}) + if(NOT WITH_MKLDNN) + message(WARNING "Do not have AVX2 intrinsics and disabled MKL-DNN") + endif() +else() + set(WITH_MKLML OFF) + set(WITH_MKLDNN OFF) +endif() + ######################################################################################## include(external/mklml) # download mklml package @@ -161,8 +169,12 @@ if(WITH_GPU) include(cuda) endif(WITH_GPU) +if(WITH_MKLML) + list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) +endif() + if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) endif() if(USE_NNPACK) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 24ddb24399dabeec9b8e5faf36be3eb21f420111..e550ec285668ea25757eeee9e7c5dc48fc9d339d 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -76,27 +76,14 @@ else() include_directories(${CUDA_TOOLKIT_INCLUDE}) endif(NOT WITH_GPU) -if(WITH_MKLDNN) - add_definitions(-DPADDLE_USE_MKLDNN) - if (WITH_MKLML AND MKLDNN_IOMP_DIR) - message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}") - set(OPENMP_FLAGS "-fopenmp") - set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") - else() - find_package(OpenMP) - if(OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - else() - message(WARNING "Can not find OpenMP." - "Some performance features in MKLDNN may not be available") - endif() - endif() - -endif(WITH_MKLDNN) +if (WITH_MKLML AND MKLML_IOMP_LIB) + message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") + set(OPENMP_FLAGS "-fopenmp") + set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}") +endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 310450f7d009dc0cdae9c0079a96445af8ec8f95..d3f5bf6852b3b295f3b5806b0577a880b0ce6ba6 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") # Set the architecture for iOS if(NOT DEFINED IOS_ARCH) if(IOS_PLATFORM STREQUAL "OS") - # FIXME(liuyiqun): support "armv7;armv7s;arm64" future - set(IOS_ARCH "arm64") + set(IOS_ARCH "armv7;armv7s;arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") - # FIXME(liuyiqun): support "i386;x86_64" future - set(IOS_ARCH "x86_64") + set(IOS_ARCH "i386;x86_64") endif() endif() set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") @@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_ # Hidden visibilty is required for cxx on iOS set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") -set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") +set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 9c7a52164ae8fa9e841d51fd816a36fed9bc48d4..6bea7cf3022242ce48cc882915f7e71810937283 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -63,7 +63,6 @@ function(select_nvcc_arch_flags out_variable) set(archs_name_default "All") if(NOT CMAKE_CROSSCOMPILING) list(APPEND archs_names "Auto") - set(archs_name_default "Auto") endif() # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 5a06825beb73e85d8a55b7b578b187bee2c4340c..fc52d339d7a336b44c97f2e0a9fc8d6604854365 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) IF(${CBLAS_PROVIDER} STREQUAL "MKLML") SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) - SET(MKLDNN_MKLROOT ${MKLML_ROOT}) - SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) - SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) - MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}") + MESSAGE(STATUS "Build MKLDNN with MKLML ${MKLML_ROOT}") +ELSE() + MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") ENDIF() SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow") @@ -57,15 +56,16 @@ ExternalProject_Add( PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} - CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} + CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} - -DMKLROOT:PATH=${MKLDNN_MKLROOT} + -DMKLROOT:PATH=${MKLML_ROOT} ) ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) -MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}") +MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}") +add_definitions(-DPADDLE_USE_MKLDNN) LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 324e29f931ecbb6beab2d363daa01a19b1a56b3e..4c4f59656dae68739f2f07f3febd510e727fe2dd 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND}) "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) - SET(OPENBLAS_CC "${CMAKE_C_COMPILER}") + SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") IF(CMAKE_CROSSCOMPILING) SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER}) @@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND}) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() ELSEIF(IOS) - # FIXME(liuyiqun): support multiple architectures - SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") - SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") - SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) - ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) + ELSE() + MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. " + "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.") ENDIF() ELSEIF(RPI) # use hardfp diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 8bd058222880b4df3b08da09c02f9fe7f1d0ee66..a8e1aca49c97df256b1269c286b0bce7732fa932 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(MOBILE_INFERENCE) + return() +ENDIF() + INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) diff --git a/cmake/util.cmake b/cmake/util.cmake index 117ab7f49cdf4a568cd203b2b17767643d0b2d50..ad905ab55ba3537054fa5b30b5fca4d83c406702 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME) target_link_libraries(${TARGET_NAME} log) endif(ANDROID) - if(WITH_MKLDNN AND WITH_MKLML AND MKLDNN_IOMP_DIR) - target_link_libraries(${TARGET_NAME} "-L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed") + if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB) + target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") endif() add_dependencies(${TARGET_NAME} ${external_project_dependencies}) diff --git a/doc/design/mkldnn/README.MD b/doc/design/mkldnn/README.MD index 16236763a73770f3fe5eadf67645765d0456f875..ec6d4681836e189f46dbb9b915a237dc15cda7cf 100644 --- a/doc/design/mkldnn/README.MD +++ b/doc/design/mkldnn/README.MD @@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA. 我们把集成方案大致分为了如下几个方面。 ### CMake -我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。 +我们会在`CMakeLists.txt`中会给用户添加一个`WITH_MKL`的开关,他是负责`WITH_MKLML`和`WITH_MKLDNN`的总开关。 -同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。 +当打开`WITH_MKL`时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功能。 -所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 +当关闭`WITH_MKL`时,MKLML和MKL-DNN功能会同时关闭。 -**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。 +所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。 ### Layers 所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在 diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst index 731a63f945c29ba78538b3d71289b234e569354d..61f3a223547b352cf7929615cf3682b29b9a738f 100644 --- a/doc/howto/dev/write_docs_cn.rst +++ b/doc/howto/dev/write_docs_cn.rst @@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。 cd TO_YOUR_PADDLE_CLONE_PATH mkdir -p build cd build - cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON + cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON make gen_proto_py make paddle_docs paddle_docs_cn diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md index 882066f23714f7ab3bba9199b5fa5ff2325ce849..424d7718c64438496cf0895397babd5408e1ca02 100644 --- a/doc/mobile/cross_compiling_for_android_cn.md +++ b/doc/mobile/cross_compiling_for_android_cn.md @@ -1,4 +1,4 @@ -# 构建Android平台上的PaddlePaddle库 +# Android平台编译指南 用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库: - 基于Docker容器的编译方式 diff --git a/doc/mobile/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md index cda636a67de712e072f4cc7ad859dda75211eaa8..9da48e7f2119ce901fbb3abab73400df27be16d2 100644 --- a/doc/mobile/cross_compiling_for_ios_cn.md +++ b/doc/mobile/cross_compiling_for_ios_cn.md @@ -1,4 +1,4 @@ -# 构建iOS平台上的PaddlePaddle库 +# iOS平台编译指南 交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。 ## 准备交叉编译环境 @@ -25,7 +25,7 @@ iOS平台可选配置参数: - `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。 - `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 -- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: +- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构: @@ -41,11 +41,11 @@ iOS平台可选配置参数: - + - +
OSarmv7, armv7s, arm64 (默认)armv7, armv7s, arm64
SIMULATORi386, x86_64 (默认)i386, x86_64
@@ -66,7 +66,7 @@ iOS平台可选配置参数: ```bash cmake -DCMAKE_SYSTEM_NAME=iOS \ -DIOS_PLATFORM=OS \ - -DIOS_ARCH="arm64" \ + -DIOS_ARCH="armv7;arm64" \ -DIOS_ENABLE_BITCODE=ON \ -DIOS_USE_VECLIB_FOR_BLAS=ON \ -DCMAKE_INSTALL_PREFIX=your/path/to/install \ @@ -112,6 +112,6 @@ $ make install - `lib`目录,其中包含PaddlePaddle的C-API静态库 - `third_party`目录,其中包含所依赖的所有第三方库 -注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。 +注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。 自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。 diff --git a/doc/mobile/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md index 6e983645faaed1f67edaeeb82ddbef9cef6bb85f..f8ef9dc8031613831437745995268f3abc392f5b 100644 --- a/doc/mobile/cross_compiling_for_raspberry_cn.md +++ b/doc/mobile/cross_compiling_for_raspberry_cn.md @@ -1,4 +1,4 @@ -# 构建Raspberry Pi平台上的PaddlePaddle库 +# Raspberry Pi平台编译指南 通常有两个方法来构建基于 Rasspberry Pi 的版本: diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h index ede2670882ee2b93f610a2261a4ecc1784bc2d0c..4ab8de80d1c7be0f8e3eb848955373dd5e21bc18 100644 --- a/paddle/cuda/include/hl_gpu.h +++ b/paddle/cuda/include/hl_gpu.h @@ -25,7 +25,9 @@ limitations under the License. */ #include "hl_matrix.h" #include "hl_sequence.h" #include "hl_sparse.h" +#ifndef PADDLE_MOBILE_INFERENCE #include "hl_warpctc_wrap.h" +#endif #ifdef HPPL_STUB_FUNC #include "stub/hl_aggregate_stub.h" diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index b3b9c45ded95ce2e735b8898d47760956dcacdce..00d9dd238ec5328be28f58f8118daad3a039e08c 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector& names, return false; } } + if (VLOG_IS_ON(10)) { + std::ostringstream sout; + sout << "All input {"; + for (auto& name : names) { + sout << name << ","; + } + sout << "} is in {"; + for (auto& name : set) { + sout << name << ","; + } + sout << "}"; + VLOG(10) << sout.str(); + } return true; } @@ -290,14 +303,12 @@ static void CreateGradVarInBlock( auto ops = block_desc->AllOps(); for (size_t op_index = grad_op_start_index; op_index < ops.size(); ++op_index) { - bool need_infer_shape = false; std::unordered_set new_vars; ForEachVarName(ops[op_index]->Outputs(), [&](const std::string& grad_var_name) { if (block_desc->HasVar(grad_var_name)) { return false; } - need_infer_shape = true; auto var = block_desc->Var(grad_var_name); new_vars.insert(var->Name()); auto it = param_name_map.find(grad_var_name); @@ -311,23 +322,21 @@ static void CreateGradVarInBlock( grad_record.op_idx_ = static_cast(op_index); return false; /* not break */ }); - if (need_infer_shape) { - ops[op_index]->InferVarType(block_desc); - for (auto& arg : ops[op_index]->OutputArgumentNames()) { - if (new_vars.find(arg) == new_vars.end()) { - continue; - } - auto pname = FwdName(arg); - auto* param = block_desc->FindVarRecursive(pname); - auto* grad = block_desc->FindVar(arg); - if (param == nullptr) { - grad->SetDataType(DataType::FP32); - } else { - grad->SetDataType(param->GetDataType()); - } + ops[op_index]->InferVarType(block_desc); + for (auto& arg : ops[op_index]->OutputArgumentNames()) { + if (new_vars.find(arg) == new_vars.end()) { + continue; + } + auto pname = FwdName(arg); + auto* param = block_desc->FindVarRecursive(pname); + auto* grad = block_desc->FindVar(arg); + if (param == nullptr) { + grad->SetDataType(DataType::FP32); + } else { + grad->SetDataType(param->GetDataType()); } - ops[op_index]->InferShape(*block_desc); } + ops[op_index]->InferShape(*block_desc); } } @@ -387,6 +396,7 @@ std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, std::unordered_set* no_grad_vars, std::unordered_map* grad_to_var) { + VLOG(5) << "MakeBlockBackward"; BlockDescBind* cur_block = program_desc.MutableBlock(block_idx); std::vector op_descs = cur_block->AllOps(); std::unordered_map> dup_out_ops; @@ -394,9 +404,10 @@ std::vector> MakeBlockBackward( std::vector> backward_descs; for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { + VLOG(5) << "Making backward " << (*it)->Type() << " op"; std::vector> op_grads; - if ((*it)->Type() == "recurrent") { + if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") { int step_block_idx = (*it)->GetBlockAttr("step_block"); BlockDescBind* backward_block = CreateStepBlock( program_desc, no_grad_vars, grad_to_var, step_block_idx); @@ -410,6 +421,15 @@ std::vector> MakeBlockBackward( op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var); } + if (VLOG_IS_ON(10)) { + std::ostringstream sout; + sout << "Made "; + for (auto& op_grad : op_grads) { + sout << op_grad->Type() << " "; + } + VLOG(10) << sout.str(); + } + for (const auto& desc : op_grads) { for (const std::string& out_name : desc->OutputArgumentNames()) { if (out_name.find("@GRAD") == std::string::npos) { @@ -425,6 +445,8 @@ std::vector> MakeBlockBackward( op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs), [](std::unique_ptr& ptr) { return std::move(ptr); }); } + + VLOG(5) << "Appending Sums"; // Check whether some variables are written more than once std::list>> pending_sum_ops; for (const auto& dup : dup_out_ops) { @@ -432,16 +454,22 @@ std::vector> MakeBlockBackward( const std::vector dup_op = dup.second; if (out_name != kEmptyVarName && dup_op.size() > 1) { std::vector sum_op_inputs; + std::string next_g_name = out_name; for (size_t i = 0; i < dup_op.size(); ++i) { + VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name + << " duplicated"; std::string new_name = out_name + "@RENAME@" + std::to_string(i); - backward_descs[dup_op[i]]->Rename(out_name, new_name); + backward_descs[dup_op[i]]->RenameOutput(out_name, new_name); + backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name); sum_op_inputs.emplace_back(new_name); + next_g_name = sum_op_inputs.back(); } std::unique_ptr sum_op(new OpDescBind( "sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {})); pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)}); } } + pending_sum_ops.sort( [](const std::pair>& a, const std::pair>& b) { @@ -452,6 +480,8 @@ std::vector> MakeBlockBackward( std::move(p.second)); } + VLOG(5) << "MakeBlockBackward Finished"; + return backward_descs; } diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h index 3ec88d7a72c3339bf5e7d0ca3957a3f608f039b7..be144d8fc0104fccc08006532a85906ade25c2a1 100644 --- a/paddle/framework/data_type.h +++ b/paddle/framework/data_type.h @@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) { return DataType::INT32; } else if (typeid(int64_t).hash_code() == type.hash_code()) { return DataType::INT64; + } else if (typeid(bool).hash_code() == type.hash_code()) { + return DataType::BOOL; } else { PADDLE_THROW("Not supported"); } diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc index 53b899a23997b71e723a298ec360a4e018d89878..8b6f42b82df14bfcd25f33ef16b5903fb965a8ba 100644 --- a/paddle/framework/ddim.cc +++ b/paddle/framework/ddim.cc @@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) { ddim = make_dim<9>(dims); break; default: - throw std::invalid_argument( - "Dynamic dimensions must have between [1, 9] dimensions."); + PADDLE_THROW("Dynamic dimensions must have between [1, 9] dimensions."); } } diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 2fcf41d69f0011b0d9a3d89c97fcebacb0703e97..adedd8cb0e8504fd6fc924e62a2ede3c1c7ce698 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); + VLOG(10) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 39c8def82e1ebb10a0e357a648af760099020c32..48cd131550dea5ad3f368b25c31d753efbe0dff9 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name, need_update_ = true; } +void OpDescBind::RenameOutput(const std::string &old_name, + const std::string &new_name) { + for (auto &output : outputs_) { + std::replace(output.second.begin(), output.second.end(), old_name, + new_name); + } + need_update_ = true; +} + +void OpDescBind::RenameInput(const std::string &old_name, + const std::string &new_name) { + for (auto &input : inputs_) { + std::replace(input.second.begin(), input.second.end(), old_name, new_name); + } + need_update_ = true; +} + struct SetAttrDescVisitor : public boost::static_visitor { explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} mutable OpDesc::Attr *attr_; @@ -448,7 +465,12 @@ const std::vector &CompileTimeInferShapeContext::Outputs( DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { auto var = block_.FindVarRecursive(name); PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name); - return framework::make_ddim(var->Shape()); + try { + return framework::make_ddim(var->Shape()); + } catch (...) { + VLOG(5) << "GetDim of variable " << name << " error"; + std::rethrow_exception(std::current_exception()); + } } void CompileTimeInferShapeContext::SetDim(const std::string &name, diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index e3e96441bbf51729f2ba69c9257e6961b1de0d5c..da032319afa775571d3942bf6ae415db7d233735 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -73,6 +73,10 @@ class OpDescBind { void Rename(const std::string &old_name, const std::string &new_name); + void RenameOutput(const std::string &old_name, const std::string &new_name); + + void RenameInput(const std::string &old_name, const std::string &new_name); + // Only be used in C++ const AttributeMap &GetAttrMap() const; diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 3276f8af396fe58450a8dc6713fe61e49d5ca708..93467ab8ac796277b47a861a427de2837fb2d3d4 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext { void OperatorWithKernel::Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const { - if (VLOG_IS_ON(1)) { - auto inputs = this->InputVars(); - auto outputs = this->OutputVars(true); - std::ostringstream sout; - sout << "Run operator " << this->Type() << " From ["; - std::ostream_iterator out_it(sout, ","); - std::copy(inputs.begin(), inputs.end(), out_it); - sout << "] to ["; - std::copy(outputs.begin(), outputs.end(), out_it); - sout << "]"; - VLOG(1) << sout.str(); - } - RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 9428b8a07ea0af005f6e960ddaa02da624ad9d97..9ad6272c99dd6a85520ae44c1331ac232bc6a9a2 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -38,11 +38,12 @@ Scope& Scope::NewScope() const { Variable* Scope::Var(const std::string& name) { auto iter = vars_.find(name); if (iter != vars_.end()) { + VLOG(3) << "Get existing variable " << name; return iter->second; } Variable* v = new Variable(); vars_[name] = v; - VLOG(3) << "Create variable " << name << " on scope"; + VLOG(3) << "Create variable " << name; v->name_ = &(vars_.find(name)->first); return v; } diff --git a/paddle/framework/shape_inference.h b/paddle/framework/shape_inference.h index 7d36ead2ca85328c7843b3b5d423cf8e921d1c93..05dc47f06ac81f0acb6d0317cbecb3009c7dd7f0 100644 --- a/paddle/framework/shape_inference.h +++ b/paddle/framework/shape_inference.h @@ -53,6 +53,10 @@ class InferShapeContext { virtual bool IsRuntime() const = 0; + // Note: In while op, we need this to be public + void SetDims(const std::vector &names, + const std::vector &dims); + protected: virtual framework::DDim GetDim(const std::string &name) const = 0; virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0; @@ -60,9 +64,6 @@ class InferShapeContext { std::vector GetDims( const std::vector &names) const; - void SetDims(const std::vector &names, - const std::vector &dims); - std::vector GetVarTypes( const std::vector &names) const; diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index e75ac5ba4647a8267b7bc189893bd7adb5c3053f..2125155c6cb807045c1a25f422dc072d0a401716 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -22,7 +22,7 @@ namespace paddle { bool MKLDNNLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " + << "Please set WITH_MKL=ON " << "and set use_mkldnn=True"; CHECK(!useGpu_) << "Do not support GPU yet"; diff --git a/paddle/math/Storage.cpp b/paddle/math/Storage.cpp index 4adaaef9838f0d178468af3af142031325bfc11d..a2ef731ecbcd18ca4bd0b2381de04650a2686c2d 100644 --- a/paddle/math/Storage.cpp +++ b/paddle/math/Storage.cpp @@ -17,9 +17,13 @@ limitations under the License. */ #include "paddle/utils/StringUtil.h" #include "paddle/utils/Util.h" +#ifndef PADDLE_MOBILE_INFERENCE DEFINE_int32(pool_limit_size, 536870912, "maximum memory size managed by a memory pool, default is 512M"); +#else +DEFINE_int32(pool_limit_size, 0, "default is 0"); +#endif namespace paddle { diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 666043e824f885e9c0e79e319d0a38ba108c209a..233a81198e336d3190565fb18556f96979cec0ce 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase { } else { offset = static_cast(*i_tensor.data()); } + VLOG(10) << " Offset = " << offset; return offset; } }; diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h index ffa4f43a327418498c1f110504127e7d2878409d..1113a4c6f357edb4f6b14b73c6eec9c6cca24ce5 100644 --- a/paddle/operators/bilinear_tensor_product_op.h +++ b/paddle/operators/bilinear_tensor_product_op.h @@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel { // Caculate the gradient of Input(Bias). if (d_bias) { d_bias->mutable_data(ctx.GetPlace()); - auto d_bias_mat = EigenMatrix::From(*d_bias); + auto d_bias_mat = framework::EigenVector::Flatten(*d_bias); d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes(0)); } } diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 13ac0cd54cbeb8f68c2246f7e1d02f032266a72e..310e3f5c937bd1345663b2c2307610a485a027ef 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { std::vector strides = ctx->Attrs().Get>("strides"); std::vector paddings = ctx->Attrs().Get>("paddings"); - for (size_t i = 0; i < paddings.size(); ++i) { - PADDLE_ENFORCE_EQ(paddings[i], 0, - "No Padding allowed in conv transpose op."); - } - PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, "ConvTransposeOp intput should be 4-D or 5-D tensor."); PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(), @@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { std::vector output_shape({in_dims[0], filter_dims[1]}); for (size_t i = 0; i < strides.size(); ++i) { - output_shape.push_back((in_dims[i + 2] - 1) * strides[i] + + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + filter_dims[i + 2]); } ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index 4b2bd60437da8f58054d8cdd5e6ba1fdac05f0d5..ab336ad23ce1c180b68d04e4c85b299e301d5376 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel { Tensor* output = context.Output("Output"); std::vector strides = context.Attr>("strides"); - // Actually, no paddings and groups allowed in conv transpose. std::vector paddings = context.Attr>("paddings"); // TODO(Zhuoyuan): Paddings can be added in future. // groups will alway be disabled in conv2dtranspose. @@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel { } else if (filter_shape_vec.size() == 3) { // col2vol: col_matrix -> dy // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) - col2vol(context.device_context(), col, dilations, strides, - std::vector{0, 0, 0}, &output_batch); + col2vol(context.device_context(), col, dilations, strides, paddings, + &output_batch); } } } @@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { if ((!input_grad) && (!filter_grad)) return; std::vector strides = context.Attr>("strides"); - // Actually, no paddings and groups allowed in conv transpose. std::vector paddings = context.Attr>("paddings"); const int batch_size = static_cast(input->dims()[0]); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 68c56f531f941e1b8f66ac7ba6bf318881642c4f..62a4e484eceeabc4cc26e68ac54a50be1ac95df7 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel { // compute dy if (out_grad_y) { out_grad_y->mutable_data(context.GetPlace()); - auto dy = EigenMatrix::Reshape(*out_grad_y, 1); + auto dy = EigenVector::Flatten(*out_grad_y); auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}})); } diff --git a/paddle/operators/detail/safe_ref.h b/paddle/operators/detail/safe_ref.h new file mode 100644 index 0000000000000000000000000000000000000000..b71af17309f9f46b5c87f0f479d4e03443fa7f93 --- /dev/null +++ b/paddle/operators/detail/safe_ref.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +namespace paddle { +namespace operators { +namespace detail { +/** + * Get Reference From Pointer with check. The error message is printf format, + * and passed by `args` + */ +template +inline T &Ref(T *ptr, ARGS &&... args) { + PADDLE_ENFORCE(ptr != nullptr, args...); + return *ptr; +} +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 85871ebbfcd8ee38ef5e8078d1d6cb6bdda46a7b..985b5d1e865e513d833bff72dcd20a8f20851d8c 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like, REGISTER_OP_CPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cu.cc b/paddle/operators/fill_constant_batch_size_like_op.cu.cc index 87e3697e2832e7c60a4293fe7126ae4c9c053e4d..9e7a1eeab863c962ca72908e561e12a04d5021c5 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cu.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cu.cc @@ -19,4 +19,7 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( fill_constant_batch_size_like, ops::FillConstantBatchSizeLikeOpKernel, - ops::FillConstantBatchSizeLikeOpKernel); + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel, + ops::FillConstantBatchSizeLikeOpKernel); diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 8ab39d4fb012b8fa3883f33e4d15be7918500354..95fb5932b8b555e1357adc9fdfb7b6e6db7da71d 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -54,5 +54,8 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp, ops::FillZerosLikeOpMaker); REGISTER_OP_CPU_KERNEL( - fill_zeros_like, - ops::FillZerosLikeKernel); + fill_zeros_like, ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/operators/fill_zeros_like_op.cu.cc b/paddle/operators/fill_zeros_like_op.cu.cc index 2adb40cf90b42a5ba608302f7985346c949ff6ed..1501a17441072223ba0e8cf5b6c8cdd5e903a467 100644 --- a/paddle/operators/fill_zeros_like_op.cu.cc +++ b/paddle/operators/fill_zeros_like_op.cu.cc @@ -17,5 +17,8 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - fill_zeros_like, - ops::FillZerosLikeKernel); + fill_zeros_like, ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel); diff --git a/paddle/operators/is_empty_op.cc b/paddle/operators/is_empty_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..54fecf44e881b5c283c81580fd161da9808d253e --- /dev/null +++ b/paddle/operators/is_empty_op.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +constexpr char kInput[] = "X"; +constexpr char kOutput[] = "Out"; + +class IsEmptyOp : public framework::OperatorBase { + public: + IsEmptyOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + // get input + auto *var = scope.FindVar(Input(kInput)); + PADDLE_ENFORCE_NOT_NULL(var); + auto &tensor = var->Get(); + // get output + auto *out = scope.FindVar(Output(kOutput)); + PADDLE_ENFORCE_NOT_NULL(out); + auto *out_tensor = out->GetMutable(); + + out_tensor->Resize({1}); + out_tensor->mutable_data(platform::CPUPlace())[0] = + framework::product(tensor.dims()) == 0; + } +}; + +class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + IsEmptyOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput(kInput, "(Tensor) Tensor which is to be checked."); + AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not."); + AddComment(R"DOC( +IsEmpty Operator which checks whether a tensor is empty. + +It will just return product(tensor.ddims()) > 0; + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(is_empty, paddle::operators::IsEmptyOp, + paddle::operators::IsEmptyOpProtoMaker); diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index b9417f1d7fdc663fff751328d18239af3dbb1216..002b68fecf4f1e294387357f0346d9926a2b2b5a 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -1,7 +1,7 @@ add_subdirectory(detail) if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context) + nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context framework_proto) nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) nv_library(selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function) nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) @@ -15,7 +15,7 @@ if(WITH_GPU) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) else() - cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context) + cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) cc_library(softmax SRCS softmax.cc DEPS device_context) cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context) diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu index 347df7a0ffdec163c0479a71ec775a813930ba5f..bf7894243919571c2ab15d53690b1ef05bfcc6ee 100644 --- a/paddle/operators/math/im2col.cu +++ b/paddle/operators/math/im2col.cu @@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width, if (index < n) { T val = 0; - int w = index % im_width; - int h = (index / im_width) % im_height; + int w = index % im_width + padding_width; + int h = (index / im_width) % im_height + padding_height; int c = index / (im_width * im_height); // compute the start and end of the output diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc index 5ee091788687133f6eaef7229d9f95e2025a2daf..2e333a8cde721f8e65dbf2cf5e3aac6272172cc0 100644 --- a/paddle/operators/math/math_function.cc +++ b/paddle/operators/math/math_function.cc @@ -250,6 +250,8 @@ void axpy(const platform::DeviceContext& context, template struct SetConstant; template struct SetConstant; template struct SetConstant; +template struct SetConstant; +template struct SetConstant; #define DEFINE_CPU_TRANS(RANK) \ template struct Transpose; \ diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 38c04b97f9d07b9cca938b09f46ea81328a35322..58356a4b7783241ca0292829bf05dc1a8ed80c6c 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -256,6 +256,8 @@ void axpy(const platform::DeviceContext& context, template struct SetConstant; template struct SetConstant; template struct SetConstant; +template struct SetConstant; +template struct SetConstant; #define DEFINE_GPU_TRANS(RANK) \ template struct Transpose; \ diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 9837f325e30f68ba927a540d395cc7d7e093a607..c2b7632b2865a3ef66051d815d7722a08c6a8cbd 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include "paddle/operators/sum_op.h" #include #include "paddle/framework/var_type_inference.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -59,13 +60,16 @@ class SumOp : public framework::OperatorWithKernel { x_vars[0]->Get().value().type()), ctx.device_context()); } else if (x_vars[0]->IsType()) { - auto& array = x_vars[0]->Get(); - for (auto& each : array) { - if (each.numel() != 0) { - return framework::OpKernelType(framework::ToDataType(each.type()), - ctx.device_context()); + for (auto& x_var : x_vars) { + auto& array = x_var->Get(); + for (auto& each : array) { + if (each.numel() != 0) { + return framework::OpKernelType(framework::ToDataType(each.type()), + ctx.device_context()); + } } } + PADDLE_THROW("Cannot find the input data type by all input data"); } PADDLE_THROW("Unexpected branch. Input type is %s", x_vars[0]->Type().name()); @@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference { auto& inputs = op_desc.Input("X"); auto var_type = framework::VarDesc::SELECTED_ROWS; + for (auto& name : op_desc.Input("X")) { + VLOG(10) << name << " " + << block->FindRecursiveOrCreateVar(name)->GetType(); + } + bool any_input_is_lod_tensor = std::any_of( inputs.begin(), inputs.end(), [block](const std::string& name) { return block->FindRecursiveOrCreateVar(name)->GetType() == @@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { }); auto is_tensor_array = [block](const std::string& name) { - return block->FindRecursiveOrCreateVar(name)->GetType() == + return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() == framework::VarDesc::LOD_TENSOR_ARRAY; }; @@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference { std::all_of(inputs.begin(), inputs.end(), is_tensor_array); if (any_input_is_tensor_array) { - PADDLE_ENFORCE(all_inputs_are_tensor_array); + if (!all_inputs_are_tensor_array) { + std::ostringstream os; + for (auto& each : inputs) { + os << " " << each << " type is " + << detail::Ref(block->FindRecursiveOrCreateVar(each)).GetType() + << "\n"; + } + PADDLE_ENFORCE(all_inputs_are_tensor_array, + "Not all inputs are tensor array:\n%s", os.str()); + } var_type = framework::VarDesc::LOD_TENSOR_ARRAY; } else if (any_input_is_lod_tensor) { var_type = framework::VarDesc::LOD_TENSOR; } auto out_var_name = op_desc.Output("Out").front(); - block->FindRecursiveOrCreateVar(out_var_name)->SetType(var_type); + auto& out_var = detail::Ref(block->FindRecursiveOrCreateVar(out_var_name)); + out_var.SetType(var_type); + auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front())); + out_var.SetDataType(in_var.GetDataType()); } }; diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 62e15604c47f25c458abc69ecd1cabf964de39bb..ae1b48d7a8e3d573a5134a822a2ed5ef70511077 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/array_operator.h" - +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp { auto *out = scope.FindVar(Output("Out"))->GetMutable(); if (offset >= out->size()) { + VLOG(10) << "Resize " << Output("Out") << " from " << out->size() + << " to " << offset + 1; out->resize(offset + 1); } auto *out_tensor = &out->at(offset); @@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference { public: void operator()(const framework::OpDescBind &op_desc, framework::BlockDescBind *block) const override { - for (auto &out_var : op_desc.OutputArgumentNames()) { - VLOG(10) << "Set Variable " << out_var << " as LOD_TENSOR_ARRAY"; - block->FindRecursiveOrCreateVar(out_var)->SetType( - framework::VarDesc::LOD_TENSOR_ARRAY); - } + auto x_name = op_desc.Input("X")[0]; + auto out_name = op_desc.Output("Out")[0]; + VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; + auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name), + "Cannot found %s", out_name); + out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY); + auto &x = + detail::Ref(block->FindVarRecursive(x_name), "Cannot found %s", x_name); + out.SetDataType(x.GetDataType()); } }; @@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp { auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out != nullptr, "Out must be set"); - auto *out_tesnor = out->GetMutable(); + auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, dev_ctx); PADDLE_ENFORCE_LT(offset, x_array.size()); - out_tesnor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); - out_tesnor->set_lod(x_array[offset].lod()); + out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); + out_tensor->set_lod(x_array[offset].lod()); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 4ca6c8507a48507fd29a9c9acae2bdf36ed936ee..dcc59f5ff2ae3a8ca999d72a20cfd5c759987d89 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -14,8 +14,10 @@ #include #include "paddle/framework/executor.h" +#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" +#include "paddle/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor; constexpr char kStepBlock[] = "step_block"; constexpr char kCondition[] = "Condition"; constexpr char kStepScopes[] = "StepScopes"; -constexpr char kParamGrads[] = "X@Grad"; constexpr char kParameters[] = "X"; +constexpr char kParamGrads[] = "X@GRAD"; +constexpr char kOutputs[] = "Out"; class WhileOp : public framework::OperatorBase { public: @@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker { kCondition, "(Bool) An scalar. When it's False, the While Op will be terminated.") .AsDuplicable(); - AddOutput("Out", + AddOutput(kOutputs, "A set of variables, which will be assigned with values " - "generated by perators inside the block of While Op.") + "generated by the operators inside the block of While Op.") .AsDuplicable(); AddOutput(kStepScopes, "(StepScopeVar) A vector of local scope, which size equals the " @@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase { auto *step_scopes = scope.FindVar(Input(kStepScopes))->GetMutable(); + auto outside_og_names = Inputs(framework::GradVarName(kOutputs)); + auto inside_og_names = + Attr>("original_output_grad"); + + PADDLE_ENFORCE_EQ(outside_og_names.size(), inside_og_names.size()); + for (auto cur_scope_iter = step_scopes->rbegin(); cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) { + VLOG(3) << "Start backward at time_step " + << cur_scope_iter - step_scopes->rbegin(); + framework::Scope &cur_scope = **cur_scope_iter; + // Link OG from outside to inside + for (size_t i = 0; i < outside_og_names.size(); ++i) { + auto outside_og_name = outside_og_names[i]; + auto inside_og_name = inside_og_names[i]; + VLOG(10) << "Linking outside " << outside_og_name << " --> inside " + << inside_og_name; + auto &og_outside = detail::Ref(scope.FindVar(outside_og_name)); + auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name)); + if (og_outside.Type().hash_code() == + typeid(framework::LoDTensor).hash_code()) { + auto &outside_tensor = og_outside.Get(); + auto &inside_tensor = + detail::Ref(og_inside.GetMutable()); + inside_tensor.set_lod(outside_tensor.lod()); + inside_tensor.ShareDataWith(outside_tensor); + } else if (og_outside.Type().hash_code() == + typeid(framework::LoDTensorArray).hash_code()) { + auto &outside_array = og_outside.Get(); + auto &inside_array = + detail::Ref(og_inside.GetMutable()); + VLOG(10) << outside_og_name << " size = " << outside_array.size(); + inside_array.resize(outside_array.size()); + + for (size_t j = 0; j < inside_array.size(); ++j) { + VLOG(10) << j << " " << outside_array[j].numel(); + if (outside_array[j].numel() != 0) { + inside_array[j].set_lod(outside_array[j].lod()); + inside_array[j].ShareDataWith(outside_array[j]); + } else { + PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0); + } + } + } + } + executor.Run(*program, *cur_scope_iter, block->ID(), false); auto &pg_names = Outputs(kParamGrads); auto &p_names = Inputs(kParameters); PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); - for (size_t prog_id = 0; prog_id < pg_names.size(); ++prog_id) { - auto inside_grad_name = framework::GradVarName(p_names[prog_id]); + for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { + if (pg_names[param_id] == framework::kEmptyVarName) { + continue; // iterator doesn't have gradient + } + auto inside_grad_name = framework::GradVarName(p_names[param_id]); - // // TODO(tonyyang-savil: Not sure we need the following + // // TODO(tonyyang-svail): Not sure we need the following // // If does not compute gradient of that variable inside rnn, // just // // continue @@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase { // zero gradient variable in step 0 if (cur_scope_iter == step_scopes->rbegin()) { auto *var = (*cur_scope_iter)->FindVar(inside_grad_name); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name); if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; @@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase { attrs["value"] = 0.0f; auto zero_op = framework::OpRegistry::CreateOp( - "fill_constant", {}, {{"Out", {pg_names[prog_id]}}}, attrs); + "fill_constant", {}, {{"Out", {pg_names[param_id]}}}, attrs); zero_op->Run(scope, dev_ctx); } } // sum gradient - auto *outside_var = scope.FindVar(pg_names[prog_id]); - PADDLE_ENFORCE_NOT_NULL(outside_var); - auto &outside_tensor = *outside_var->GetMutable(); - - std::string result_var_name; - auto *local_result_var = (*cur_scope_iter)->Var(&result_var_name); - auto &local_result_tensor = - *local_result_var->GetMutable(); - - local_result_tensor.ShareDataWith(outside_tensor); - + auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( - "sum", {{"X", {result_var_name, inside_grad_name}}}, - {{"Out", {result_var_name}}}, {}); - sum_op->Run(**cur_scope_iter, dev_ctx); + "sum", {{"X", {pg_names[param_id], new_inside_name}}}, + {{"Out", {pg_names[param_id]}}}, {}); + sum_op->Run(cur_scope, dev_ctx); + cur_scope.Rename(new_inside_name, inside_grad_name); } } } @@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { virtual std::unique_ptr Apply() const { auto *grad = new framework::OpDescBind(); grad->SetType("while_grad"); - for (auto &input_param : this->InputNames()) { - grad->SetInput(input_param, this->Input(input_param)); - grad->SetOutput(framework::GradVarName(input_param), - this->InputGrad(input_param)); + grad->SetInput(kParameters, Input(kParameters)); + grad->SetOutput( + framework::GradVarName(kParameters), + InputGrad(kParameters, /*do not drop empty gradient*/ false)); + grad->SetInput(kOutputs, Output(kOutputs)); + + // OG should be re-calculated by step blocks, since many outputs of while op + // do not need to calculate gradients. + std::unordered_set block_ins; + { + for (auto &p : Input(kParameters)) { + block_ins.insert(p); + } + for (auto &o : Output(kOutputs)) { + block_ins.insert(o); + } } + std::unordered_set extra_inputs; + for (size_t i = 0; i < grad_block_[0]->OpSize(); ++i) { + for (auto &input_name : grad_block_[0]->Op(i)->InputArgumentNames()) { + if (block_ins.find(input_name) != block_ins.end()) { + continue; + } + extra_inputs.insert(input_name); + } - for (auto &output_param : this->OutputNames()) { - grad->SetInput(output_param, this->Output(output_param)); - if (output_param != kStepScopes) { - grad->SetInput(framework::GradVarName(output_param), - this->OutputGrad(output_param)); + for (auto &output_name : grad_block_[0]->Op(i)->OutputArgumentNames()) { + block_ins.insert(output_name); } } + + std::vector extra_inputs_list; + extra_inputs_list.resize(extra_inputs.size()); + std::copy(extra_inputs.begin(), extra_inputs.end(), + extra_inputs_list.begin()); + grad->SetInput(framework::GradVarName(kOutputs), extra_inputs_list); + grad->SetInput(kStepScopes, Output(kStepScopes)); grad->SetAttrMap(this->Attrs()); grad->SetBlockAttr(kStepBlock, *grad_block_[0]); + // record the original output gradient names, since the gradient name of + // while operator could be renamed. + grad->SetAttr("original_output_grad", extra_inputs_list); return std::unique_ptr(grad); } }; +class WhileGradOpVarTypeInference : public framework::VarTypeInference { + public: + void operator()(const framework::OpDescBind &op_desc, + framework::BlockDescBind *block) const override { + auto p_names = op_desc.Input(kParameters); + auto pg_names = op_desc.Output(framework::GradVarName(kParameters)); + + for (size_t i = 0; i < p_names.size(); ++i) { + auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); + auto *g_var = block->FindVarRecursive(pg_names[i]); + if (g_var != nullptr) { // Gradient could be @EMPTY@ + VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i] + << " type: " << p_var.GetType(); + g_var->SetType(p_var.GetType()); + g_var->SetDataType(p_var.GetDataType()); + } + } + } +}; + +class WhileGradOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override { + ctx->HasInputs(kParameters); + ctx->HasOutputs(framework::GradVarName(kParameters)); + ctx->HasInputs(kOutputs); + ctx->HasInputs(framework::GradVarName(kOutputs)); + + auto p_names = ctx->Inputs(kParameters); + auto pg_names = ctx->Outputs(kParamGrads); + auto dims = ctx->GetInputsDim(kParameters); + auto var_types = ctx->GetInputsVarType(kParameters); + std::vector names_to_set; + std::vector dims_to_set; + for (size_t i = 0; i < p_names.size(); ++i) { + if (pg_names[i] == framework::kEmptyVarName) { + continue; + } + if (var_types[i] == framework::VarDesc::LOD_TENSOR) { + names_to_set.push_back(pg_names[i]); + dims_to_set.push_back(dims[i]); + } else if (var_types[i] == framework::VarDesc::LOD_TENSOR_ARRAY) { + // not sure how to set the dim of LOD_TENSOR_ARRAY + names_to_set.push_back(pg_names[i]); + dims_to_set.push_back(dims[i]); + } + } + ctx->SetDims(names_to_set, dims_to_set); + } +}; + } // namespace operators } // namespace paddle REGISTER_OPERATOR(while, paddle::operators::WhileOp, paddle::operators::WhileOpMaker, paddle::operators::WhileGradOpDescMaker); +REGISTER_OPERATOR(while_grad, paddle::operators::WhileGradOp, + paddle::operators::WhileGradOpShapeInference, + paddle::operators::WhileGradOpVarTypeInference); diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md index b5fd68839ddb62e76f2fd930248d546bc093a892..f3a6f1dba7588c6b29c1dcae26ec134c1a7f937d 100644 --- a/paddle/scripts/docker/README.md +++ b/paddle/scripts/docker/README.md @@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. | | `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | | `WITH_TESTING` | ON | Build unit tests binaries. | -| `WITH_MKLDNN` | ON | Build with [Intel® MKL DNN](https://github.com/01org/mkl-dnn) support. | -| `WITH_MKLML` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) support. | +| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. | | `WITH_SWIG_PY` | ON | Build with SWIG python API support. | | `WITH_C_API` | OFF | Build capi libraries for inference. | diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 8dddb2be9c10fac693d17e92dd0e1c65faa0905e..595d25fd4830b6e69b9a1080803771b0464741db 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -34,9 +34,7 @@ function cmake_gen() { ${PYTHON_FLAGS} -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} - -DCUDA_ARCH_NAME=All - -DWITH_MKLDNN=${WITH_MKLDNN:-ON} - -DWITH_MKLML=${WITH_MKLML:-ON} + -DWITH_MKL=${WITH_MKL:-ON} -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON @@ -57,9 +55,7 @@ EOF ${PYTHON_FLAGS} \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ - -DCUDA_ARCH_NAME=All \ - -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \ - -DWITH_MKLML=${WITH_MKLML:-ON} \ + -DWITH_MKL=${WITH_MKL:-ON} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index b9a49526a7e02131767a4e9b26cd0b53278176d0..d71cb84df3785008ea5793519fc26a174e1b95f7 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -18,8 +18,8 @@ function version(){ echo "PaddlePaddle @PADDLE_VERSION@, compiled with" echo " with_avx: @WITH_AVX@" echo " with_gpu: @WITH_GPU@" + echo " with_mkl: @WITH_MKL@" echo " with_mkldnn: @WITH_MKLDNN@" - echo " with_mklml: @WITH_MKLML@" echo " with_double: @WITH_DOUBLE@" echo " with_python: @WITH_PYTHON@" echo " with_rdma: @WITH_RDMA@" @@ -45,8 +45,8 @@ function ver2num() { function cpu_config() { # auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status - # only when MKLDNN or MKLML enabled - if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + # only when MKL enabled + if [ "@WITH_MKL@" == "OFF" ]; then return 0 fi ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` @@ -70,8 +70,8 @@ function cpu_config() { function threads_config() { # auto set OMP_NUM_THREADS and MKL_NUM_THREADS # according to trainer_count and total processors - # only when MKLDNN or MKLML enabled - if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + # only when MKL enabled + if [ "@WITH_MKL@" == "OFF" ]; then return 0 fi processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l` diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 973b2736e5ce2b733d52df4f5a270b296bca2cac..28d82343ed32273740d0c52d0451681e43b3675e 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON make -j `nproc` gen_proto_py make -j `nproc` paddle_docs paddle_docs_cn diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp index b68e29cd5ea223272151e7a8b52d998832f47103..88e684849df6fbfe4042b92bdb76ef98159eecea 100644 --- a/paddle/trainer/Trainer.cpp +++ b/paddle/trainer/Trainer.cpp @@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr& config, } } + if (FLAGS_use_mkldnn) { + CHECK_EQ(FLAGS_trainer_count, 1UL) << "MKLDNN only need 1 trainer"; + } + if (testing) { LOG(INFO) << "trainer: in testing mode"; if (config_->getOptConfig().use_sparse_remote_updater() || diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index f20567243ae67baecbdbac13f879f4cf2f66d298..a6eca2d7194c30aabeafc34de0957792feeebbec 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -12,9 +12,9 @@ def unique_name(prefix): return "_".join([prefix, str(uid)]) -def _debug_string_(proto): +def _debug_string_(proto, throw_on_error=True): error_fields = list() - if not proto.IsInitialized(error_fields): + if not proto.IsInitialized(error_fields) and throw_on_error: raise ValueError("{0} are not initialized\nThe message is {1}".format( error_fields, proto)) return proto.__str__() @@ -101,9 +101,12 @@ class Variable(object): self.stop_gradient = stop_gradient def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) __repr__ = __str__ @@ -291,10 +294,13 @@ class Operator(object): self.desc.infer_var_type(self.block.desc) self.desc.infer_shape(self.block.desc) - def __str__(self): + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.OpDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) + + def __str__(self): + return self.to_string(True) __repr__ = __str__ @@ -349,9 +355,12 @@ class Block(object): self.program = program def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.BlockDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) __repr__ = __str__ @@ -454,9 +463,12 @@ class Program(object): self.current_block_idx = 0 def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error): protostr = self.desc.serialize_to_string() proto = framework_pb2.ProgramDesc.FromString(str(protostr)) - return _debug_string_(proto) + return _debug_string_(proto, throw_on_error) def clone(self): p = Program() @@ -512,7 +524,14 @@ class Program(object): assert isinstance(target, Variable) if no_grad_set is None: no_grad_set = set() - param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set) + try: + param_to_grad_info = self.desc.append_backward(target.desc, + no_grad_set) + except Exception as e: + raise core.EnforceNotMet( + str(e) + "\nCurrent protobuf is\n{0}".format( + self.to_string(False))) + self.sync_with_cpp() return param_to_grad_info diff --git a/python/paddle/v2/fluid/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py index 17ad547c2bb5b79ef8225dd1a8f1ef49a6572508..94fdd5e38970b309580de6fc934b158a3c46e464 100644 --- a/python/paddle/v2/fluid/net_drawer.py +++ b/python/paddle/v2/fluid/net_drawer.py @@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs): if not var_dict.has_key(var): var_dict[var] = "Feed" + temp_id = 0 proto = framework_pb2.ProgramDesc.FromString( program.desc.serialize_to_string()) for block in proto.blocks: for op in block.ops: + op.type = op.type + "_" + str(temp_id) + temp_id += 1 graph.node(**draw_node(op)) for o in op.outputs: for arg in o.arguments: @@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs): for arg in e.arguments: if var_dict.has_key(arg): graph.edge(**draw_edge(var_dict, op, e, arg)) + break # only plot the first block def draw_graph(startup_program, main_program, **kwargs): diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index ee677a2c5670a092c509b9ce1c555223bf22957f..a7f3bfc0caf76302674a00c80c2bd9ebf834f872 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -1,33 +1,22 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework -from paddle.v2.fluid.io import save_persistables, load_persistables +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.io import save_persistables, load_persistables +from paddle.v2.fluid.optimizer import SGDOptimizer -import numpy as np - -x = layers.data( - name='x', - shape=[13], - data_type='float32') +x = layers.data(name='x', shape=[13], data_type='float32') -y_predict = layers.fc(input=x, - size=1, - act=None) +y_predict = layers.fc(input=x, size=1, act=None) -y = layers.data( - name='y', - shape=[1], - data_type='float32') +y = layers.data(name='y', shape=[1], data_type='float32') -cost = layers.square_error_cost( - input=y_predict, - label=y) +cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(x=cost) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index f4be835b3ad57d5b0076e8a816c2c3def46e0663..b8506125501b6e533c4594b37943ec36ca8e7d30 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -1,21 +1,16 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -import paddle.v2.fluid.optimizer as optimizer from paddle.v2.fluid.executor import Executor -import paddle.v2.fluid.framework as framework from paddle.v2.fluid.initializer import XavierInitializer +from paddle.v2.fluid.optimizer import AdamOptimizer def resnet_cifar10(input, depth=32): - def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - act='relu'): + def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): tmp = layers.conv2d( input=input, filter_size=filter_size, @@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32): padding=padding, act=None, bias_attr=False) - return layers.batch_norm( - input=tmp, - act=act) + return layers.batch_norm(input=tmp, act=act) def shortcut(input, ch_in, ch_out, stride, program, init_program): if ch_in != ch_out: @@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32): else: return input - def basicblock(input, - ch_in, - ch_out, - stride): - tmp = conv_bn_layer( - input, - ch_out, - 3, - stride, - 1) - tmp = conv_bn_layer( - tmp, - ch_out, - 3, - 1, - 1, - act=None) + def basicblock(input, ch_in, ch_out, stride): + tmp = conv_bn_layer(input, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) short = shortcut(input, ch_in, ch_out, stride) - return layers.elementwise_add( - x=tmp, - y=short, - act='relu') + return layers.elementwise_add(x=tmp, y=short, act='relu') def layer_warp(block_func, input, ch_in, ch_out, count, stride): tmp = block_func(input, ch_in, ch_out, stride) @@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32): assert (depth - 2) % 6 == 0 n = (depth - 2) / 6 conv1 = conv_bn_layer( - input=input, - ch_out=16, - filter_size=3, - stride=1, - padding=1) - res1 = layer_warp( - basicblock, - conv1, - 16, - 16, - n, - 1) - res2 = layer_warp( - basicblock, - res1, - 16, - 32, - n, - 2) - res3 = layer_warp( - basicblock, - res2, - 32, - 64, - n, - 2) + input=input, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) + res2 = layer_warp(basicblock, res1, 16, 32, n, 2) + res3 = layer_warp(basicblock, res2, 32, 64, n, 2) pool = layers.pool2d( - input=res3, - pool_size=8, - pool_type='avg', - pool_stride=1) + input=res3, pool_size=8, pool_type='avg', pool_stride=1) return pool def vgg16_bn_drop(input): - def conv_block(input, - num_filter, - groups, - dropouts): + def conv_block(input, num_filter, groups, dropouts): return nets.img_conv_group( input=input, pool_size=2, @@ -123,22 +71,14 @@ def vgg16_bn_drop(input): conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - drop = layers.dropout( - x=conv5, - dropout_prob=0.5) + drop = layers.dropout(x=conv5, dropout_prob=0.5) fc1 = layers.fc(input=drop, size=512, act=None, param_attr={"initializer": XavierInitializer()}) - reshape1 = layers.reshape( - x=fc1, - shape=list(fc1.shape + (1, 1))) - bn = layers.batch_norm( - input=reshape1, - act='relu') - drop2 = layers.dropout( - x=bn, - dropout_prob=0.5) + reshape1 = layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) + bn = layers.batch_norm(input=reshape1, act='relu') + drop2 = layers.dropout(x=bn, dropout_prob=0.5) fc2 = layers.fc(input=drop2, size=512, act=None, @@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) accuracy = layers.accuracy(input=predict, label=label) -# optimizer = optimizer.SGDOptimizer(learning_rate=0.001) -optimizer = optimizer.AdamOptimizer(learning_rate=0.001) +# optimizer = SGDOptimizer(learning_rate=0.001) +optimizer = AdamOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) BATCH_SIZE = 128 diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index f330ff58137068e429008bc7aa07bbc8d2e35ac4..75fbaf83e8f3e62eb0d0abef9cfa267b65e72973 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -1,22 +1,15 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import AdamOptimizer -import numpy as np - -images = layers.data( - name='pixel', - shape=[1, 28, 28], - data_type='float32') -label = layers.data( - name='label', - shape=[1], - data_type='int64') +images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') +label = layers.data(name='label', shape=[1], data_type='int64') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, @@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool( pool_stride=2, act="relu") -predict = layers.fc(input=conv_pool_2, - size=10, - act="softmax") +predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) +optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) opts = optimizer.minimize(avg_cost) -accuracy, acc_out = evaluator.accuracy( - input=predict, - label=label) +accuracy, acc_out = evaluator.accuracy(input=predict, label=label) BATCH_SIZE = 50 PASS_NUM = 3 diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index b0164e3e3659c19edf2af45e706fb48ac1fe2b1c..cf10b1942e6a8243b18b0ae4586fdd7ec1a665fb 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,19 +1,15 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.regularizer import L2DecayRegularizer from paddle.v2.fluid.initializer import UniformInitializer - -import numpy as np +from paddle.v2.fluid.optimizer import MomentumOptimizer +from paddle.v2.fluid.regularizer import L2DecayRegularizer BATCH_SIZE = 128 -image = layers.data( - name='x', - shape=[784], - data_type='float32') +image = layers.data(name='x', shape=[784], data_type='float32') param_attr = { 'name': None, @@ -22,32 +18,21 @@ param_attr = { 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) } -hidden1 = layers.fc(input=image, - size=128, - act='relu', - param_attr=param_attr) -hidden2 = layers.fc(input=hidden1, - size=64, - act='relu', - param_attr=param_attr) +hidden1 = layers.fc(input=image, size=128, act='relu', param_attr=param_attr) +hidden2 = layers.fc(input=hidden1, size=64, act='relu', param_attr=param_attr) predict = layers.fc(input=hidden2, size=10, act='softmax', param_attr=param_attr) -label = layers.data( - name='y', - shape=[1], - data_type='int64') +label = layers.data(name='y', shape=[1], data_type='int64') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy( - input=predict, - label=label) +accuracy = layers.accuracy(input=predict, label=label) -optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) +optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost) train_reader = paddle.batch( diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index eefcb55bebff41eb9c67d9f0c8e83a5f1d4599bd..55ded3aed3a23c8cd7795f915dc1cbd512c6d945 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -1,12 +1,11 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import SGDOptimizer IS_SPARSE = True USE_GPU = False @@ -19,10 +18,7 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data( - name='user_id', - shape=[1], - data_type='int64') + uid = layers.data(name='user_id', shape=[1], data_type='int64') usr_emb = layers.embedding( input=uid, @@ -31,15 +27,11 @@ def get_usr_combined_features(): param_attr={'name': 'user_table'}, is_sparse=IS_SPARSE) - usr_fc = layers.fc(input=usr_emb, - size=32) + usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data( - name='gender_id', - shape=[1], - data_type='int64') + usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, @@ -47,14 +39,10 @@ def get_usr_combined_features(): param_attr={'name': 'gender_table'}, is_sparse=IS_SPARSE) - usr_gender_fc = layers.fc(input=usr_gender_emb, - size=16) + usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data( - name='age_id', - shape=[1], - data_type="int64") + usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") usr_age_emb = layers.embedding( input=usr_age_id, @@ -62,14 +50,10 @@ def get_usr_combined_features(): is_sparse=IS_SPARSE, param_attr={'name': 'age_table'}) - usr_age_fc = layers.fc(input=usr_age_emb, - size=16) + usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data( - name='job_id', - shape=[1], - data_type="int64") + usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") usr_job_emb = layers.embedding( input=usr_job_id, @@ -77,16 +61,12 @@ def get_usr_combined_features(): param_attr={'name': 'job_table'}, is_sparse=IS_SPARSE) - usr_job_fc = layers.fc(input=usr_job_emb, - size=16) + usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( - input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], - axis=1) + input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) - usr_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh") + usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features @@ -95,10 +75,7 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data( - name='movie_id', - shape=[1], - data_type='int64') + mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') mov_emb = layers.embedding( input=mov_id, @@ -107,36 +84,24 @@ def get_mov_combined_features(): param_attr={'name': 'movie_table'}, is_sparse=IS_SPARSE) - mov_fc = layers.fc(input=mov_emb, - size=32) + mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data( - name='category_id', - shape=[1], - data_type='int64') + category_id = layers.data(name='category_id', shape=[1], data_type='int64') mov_categories_emb = layers.embedding( - input=category_id, - size=[CATEGORY_DICT_SIZE, 32], - is_sparse=IS_SPARSE) + input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( - input=mov_categories_emb, - pool_type="sum") + input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data( - name='movie_title', - shape=[1], - data_type='int64') + mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') mov_title_emb = layers.embedding( - input=mov_title_id, - size=[MOV_TITLE_DICT_SIZE, 32], - is_sparse=IS_SPARSE) + input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, @@ -146,13 +111,10 @@ def get_mov_combined_features(): pool_type="sum") concat_embed = layers.concat( - input=[mov_fc, mov_categories_hidden, mov_title_conv], - axis=1) + input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator - mov_combined_features = layers.fc(input=concat_embed, - size=200, - act="tanh") + mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features @@ -162,18 +124,11 @@ def model(): mov_combined_features = get_mov_combined_features() # need cos sim - inference = layers.cos_sim( - X=usr_combined_features, - Y=mov_combined_features) + inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) - label = layers.data( - name='score', - shape=[1], - data_type='float32') + label = layers.data(name='score', shape=[1], data_type='float32') - square_cost = layers.square_error_cost( - input=inference, - label=label) + square_cost = layers.square_error_cost(input=inference, label=label) avg_cost = layers.mean(x=square_cost) @@ -182,7 +137,7 @@ def model(): def main(): cost = model() - sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2) + sgd_optimizer = SGDOptimizer(learning_rate=0.2) opts = sgd_optimizer.minimize(cost) if USE_GPU: diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index 91fc79a9870a31205098d8a40de6c033d5bf60b9..e69b915a9cfaf9e06075991975563a1fc1196661 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,12 +1,11 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): @@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): act="softmax") cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) return avg_cost, acc diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 8c3d4488354eb363cd1d378ebd4cb8069e7c1b1d..65d44542501e6531fc1912cbc726a1d903b9c031 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,12 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def stacked_lstm_net(input_dim, @@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim, act='softmax') cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) return avg_cost, acc diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index a7d791c1f38d4843f084127e879d613b21ae8daf..280f6e902c34512735a27586221c2be68963ef2b 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -1,11 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import AdamOptimizer def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): @@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): cost = layers.cross_entropy(input=prediction, label=label) avg_cost = layers.mean(x=cost) - adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002) + adam_optimizer = AdamOptimizer(learning_rate=0.002) opts = adam_optimizer.minimize(avg_cost) acc = layers.accuracy(input=prediction, label=label) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 9dcb6f2fea06ea8cd061be4f148854408779f990..afa7b285198e0349317e123e4bd98e8336217afa 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,11 +1,10 @@ +import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor - -import numpy as np +from paddle.v2.fluid.optimizer import SGDOptimizer PASS_NUM = 100 EMBED_SIZE = 32 @@ -17,26 +16,11 @@ IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data( - name='firstw', - shape=[1], - data_type='int64') -second_word = layers.data( - name='secondw', - shape=[1], - data_type='int64') -third_word = layers.data( - name='thirdw', - shape=[1], - data_type='int64') -forth_word = layers.data( - name='forthw', - shape=[1], - data_type='int64') -next_word = layers.data( - name='nextw', - shape=[1], - data_type='int64') +first_word = layers.data(name='firstw', shape=[1], data_type='int64') +second_word = layers.data(name='secondw', shape=[1], data_type='int64') +third_word = layers.data(name='thirdw', shape=[1], data_type='int64') +forth_word = layers.data(name='forthw', shape=[1], data_type='int64') +next_word = layers.data(name='nextw', shape=[1], data_type='int64') embed_first = layers.embedding( input=first_word, @@ -64,19 +48,12 @@ embed_forth = layers.embedding( param_attr={'name': 'shared_w'}) concat_embed = layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], - axis=1) -hidden1 = layers.fc(input=concat_embed, - size=HIDDEN_SIZE, - act='sigmoid') -predict_word = layers.fc(input=hidden1, - size=dict_size, - act='softmax') -cost = layers.cross_entropy( - input=predict_word, - label=next_word) + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) +hidden1 = layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') +predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') +cost = layers.cross_entropy(input=predict_word, label=next_word) avg_cost = layers.mean(x=cost) -sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) +sgd_optimizer = SGDOptimizer(learning_rate=0.001) opts = sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index 907b52c405d9e5c02c70f611e4c777ba21948c40..2240dc73cdd31f320fed174dd811e93c6640137f 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -110,13 +110,30 @@ class TestConv2dOp(OpTest): self.op_type = "conv2d" +class TestWithPad(TestConv2dOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + +class TestWithStride(TestConv2dOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] # NCHW + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [6, f_c, 3, 3] + + class TestWithGroup(TestConv2dOp): def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv2d" - class TestWith1x1(TestConv2dOp): def init_test_case(self): @@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp): f_c = self.input_size[1] / self.groups self.filter_size = [6, f_c, 1, 1] - def init_dilation(self): - self.dilations = [1, 1] - def init_group(self): self.groups = 3 - def init_op_type(self): - self.op_type = "conv2d" - class TestWithDilation(TestConv2dOp): def init_test_case(self): @@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp): def init_group(self): self.groups = 3 + +#----------------Conv2dCudnn---------------- +class TestCudnn(TestConv2dOp): def init_op_type(self): - self.op_type = "conv2d" + self.op_type = "conv_cudnn" -#----------------Conv2dCudnn---------------- +class TestCudnnWithPad(TestWithPad): + def init_op_type(self): + self.op_type = "conv_cudnn" -class TestCudnn(TestConv2dOp): +class TestCudnnWithStride(TestWithStride): def init_op_type(self): self.op_type = "conv_cudnn" diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py index 54349c018c4a53b8767d6cd4f94d99c719dc0237..d7b1f2f2a3abf6335998742dbbef8e17794170fa 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py @@ -4,9 +4,7 @@ from op_test import OpTest def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): - # [2, 3, 5, 5] in_n, in_c, in_h, in_w = input_.shape - # [3, 6, 3, 3] f_c, out_c, f_h, f_w = filter_.shape assert in_c == f_c @@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): j1, j2 = j * stride[0], j * stride[0] + f_w out[n, k, i1:i2, j1:j2] += tmp_out + out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]] return out @@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.init_op_type() - - # [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7] self.init_test_case() conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad} @@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - print 'check output here for', self.op_type self.check_output() def test_check_grad_no_input(self): @@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest): self.op_type = "conv2d_transpose" +class TestWithPad(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + +class TestWithStride(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + # ------------ test_cudnn ------------ class TestCudnn(TestConv2dTransposeOp): def init_op_type(self): diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py index 132fe7931438a30cf02e4ad2894c0838e48ffc9f..59a32c40821f2109306e898a6a798fea52b1e0ca 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py @@ -4,9 +4,7 @@ from op_test import OpTest def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): - # [2, 3, 5, 5, 5] in_n, in_c, in_d, in_h, in_w = input_.shape - # [3, 6, 3, 3, 3] f_c, out_c, f_d, f_h, f_w = filter_.shape assert in_c == f_c @@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): out_d = (in_d - 1) * stride[0] + f_d out_h = (in_h - 1) * stride[1] + f_h out_w = (in_w - 1) * stride[2] + f_w - out = np.zeros((in_n, out_c, out_d, out_h, out_w)) for n in range(in_n): @@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): j1, j2 = j * stride[2], j * stride[2] + f_w out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out + out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w - + pad[2]] return out @@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.init_op_type() - - # [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7] self.init_test_case() conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad} @@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest): filter_ = np.random.random(self.filter_size).astype("float32") output = conv3dtranspose_forward_naive( input_, filter_, conv3dtranspose_param).astype("float32") - # print 'deconv output py', output, output.shape self.inputs = {'Input': input_, 'Filter': filter_} self.attrs = { @@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest): self.outputs = {'Output': output} def test_check_output(self): - print 'check output here' self.check_output() def test_check_grad(self): @@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest): self.pad = [0, 0, 0] self.stride = [1, 1, 1] self.dilations = [1, 1, 1] - self.input_size = [2, 3, 5, 5, 5] # NCHW + self.input_size = [2, 3, 5, 5, 5] # NCDHW f_c = self.input_size[1] self.filter_size = [f_c, 6, 3, 3, 3] @@ -93,5 +88,25 @@ class TestConv3dTransposeOp(OpTest): self.op_type = "conv3d_transpose" +class TestWithPad(TestConv3dTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [1, 1, 1] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + +class TestWithStride(TestConv3dTransposeOp): + def init_test_case(self): + self.pad = [1, 1, 1] + self.stride = [2, 2, 2] + self.dilations = [1, 1, 1] + self.input_size = [2, 3, 5, 5, 5] # NCDHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3, 3] + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ed6e3fe24f6333c9c90d760787eb13241a7e1868 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py @@ -0,0 +1,43 @@ +import unittest +import numpy as np +from paddle.v2.fluid.op import Operator +import paddle.v2.fluid.core as core + + +def create_tensor(scope, name, np_data): + tensor = scope.var(name).get_tensor() + tensor.set_dims(np_data.shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class TestIsEmptyOp(unittest.TestCase): + def setUp(self): + self.scope = core.Scope() + # create input variables + np_data0 = np.array([0, 1, 2]) + create_tensor(self.scope, "X0", np_data0) + + np_data1 = np.array([1]) + t = create_tensor(self.scope, "X1", np_data1) + t.set_dims([0]) + + # create output variables + self.scope.var("out") + + def test_no_empty(self): + self.one_case("X0", False) + + def test_empty(self): + self.one_case("X1", True) + + def one_case(self, input, target): + op = Operator(type="is_empty", X=input, Out="out") + ctx = core.DeviceContext.create(core.CPUPlace()) + op.run(self.scope, ctx) + out = self.scope.var("out").get_tensor() + self.assertEqual(np.array(out)[0], target) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 0f01acb3b94dc55a3536e751108e785ddc6e47bb..84b432333f950f754a97bc1a051b59c16fb22aed 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -2,6 +2,7 @@ import unittest import paddle.v2.fluid.layers as layers from paddle.v2.fluid.executor import Executor import paddle.v2.fluid.core as core +from paddle.v2.fluid.backward import append_backward_ops import numpy @@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase): i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') - mem_array = layers.array_write(init, i=i) + mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) @@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase): i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) + array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) - i = layers.increment(x=i, in_place=True) result = layers.sums(input=[d, prev]) + + i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) - sum_result = layers.array_read(mem_array, i=array_len) + + sum_result = layers.array_read(array=mem_array, i=i) + loss = layers.mean(x=sum_result) + + append_backward_ops(loss) cpu = core.CPUPlace() exe = Executor(cpu)