diff --git a/CMakeLists.txt b/CMakeLists.txt
index fba5c58dc4a1410e7a8d5a70d78f5d1b1a75259e..fcff1de56742d027b4b10d003fd463e7335720b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,8 +36,7 @@ include(simd)
################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
-option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND})
-option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND})
+option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON)
option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON)
@@ -82,10 +81,8 @@ if(ANDROID OR IOS)
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
- set(WITH_MKLDNN OFF CACHE STRING
- "Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
- set(WITH_MKLML OFF CACHE STRING
- "Disable MKLML package when cross-compiling for Android and iOS" FORCE)
+ set(WITH_MKL OFF CACHE STRING
+ "Disable MKL when cross-compiling for Android and iOS" FORCE)
# Compile PaddlePaddle mobile inference library
if (NOT WITH_C_API)
@@ -111,6 +108,17 @@ else()
set(THIRD_PARTY_BUILD_TYPE Release)
endif()
+if(WITH_MKL)
+ set(WITH_MKLML ON)
+ set(WITH_MKLDNN ${AVX2_FOUND})
+ if(NOT WITH_MKLDNN)
+ message(WARNING "Do not have AVX2 intrinsics and disabled MKL-DNN")
+ endif()
+else()
+ set(WITH_MKLML OFF)
+ set(WITH_MKLDNN OFF)
+endif()
+
########################################################################################
include(external/mklml) # download mklml package
@@ -161,8 +169,12 @@ if(WITH_GPU)
include(cuda)
endif(WITH_GPU)
+if(WITH_MKLML)
+ list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
+endif()
+
if(WITH_MKLDNN)
- list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB})
+ list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
endif()
if(USE_NNPACK)
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index 24ddb24399dabeec9b8e5faf36be3eb21f420111..e550ec285668ea25757eeee9e7c5dc48fc9d339d 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -76,27 +76,14 @@ else()
include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU)
-if(WITH_MKLDNN)
- add_definitions(-DPADDLE_USE_MKLDNN)
- if (WITH_MKLML AND MKLDNN_IOMP_DIR)
- message(STATUS "Enable Intel OpenMP at ${MKLDNN_IOMP_DIR}")
- set(OPENMP_FLAGS "-fopenmp")
- set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
- set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
- else()
- find_package(OpenMP)
- if(OPENMP_FOUND)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
- else()
- message(WARNING "Can not find OpenMP."
- "Some performance features in MKLDNN may not be available")
- endif()
- endif()
-
-endif(WITH_MKLDNN)
+if (WITH_MKLML AND MKLML_IOMP_LIB)
+ message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")
+ set(OPENMP_FLAGS "-fopenmp")
+ set(CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
+ set(CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OPENMP_FLAGS})
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_FLAGS}")
+endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")
diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake
index 310450f7d009dc0cdae9c0079a96445af8ec8f95..d3f5bf6852b3b295f3b5806b0577a880b0ce6ba6 100644
--- a/cmake/cross_compiling/ios.cmake
+++ b/cmake/cross_compiling/ios.cmake
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if(NOT DEFINED IOS_ARCH)
if(IOS_PLATFORM STREQUAL "OS")
- # FIXME(liuyiqun): support "armv7;armv7s;arm64" future
- set(IOS_ARCH "arm64")
+ set(IOS_ARCH "armv7;armv7s;arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
- # FIXME(liuyiqun): support "i386;x86_64" future
- set(IOS_ARCH "x86_64")
+ set(IOS_ARCH "i386;x86_64")
endif()
endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
-set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
+set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 9c7a52164ae8fa9e841d51fd816a36fed9bc48d4..6bea7cf3022242ce48cc882915f7e71810937283 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -63,7 +63,6 @@ function(select_nvcc_arch_flags out_variable)
set(archs_name_default "All")
if(NOT CMAKE_CROSSCOMPILING)
list(APPEND archs_names "Auto")
- set(archs_name_default "Auto")
endif()
# set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index 5a06825beb73e85d8a55b7b578b187bee2c4340c..fc52d339d7a336b44c97f2e0a9fc8d6604854365 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -40,10 +40,9 @@ INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR})
IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
SET(MKLDNN_DEPENDS ${MKLML_PROJECT})
- SET(MKLDNN_MKLROOT ${MKLML_ROOT})
- SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB})
- SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR})
- MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}")
+ MESSAGE(STATUS "Build MKLDNN with MKLML ${MKLML_ROOT}")
+ELSE()
+ MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} -Wno-error=strict-overflow")
@@ -57,15 +56,16 @@ ExternalProject_Add(
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
- CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT}
+ CMAKE_ARGS -DMKLROOT=${MKLML_ROOT}
CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR}
- -DMKLROOT:PATH=${MKLDNN_MKLROOT}
+ -DMKLROOT:PATH=${MKLML_ROOT}
)
ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB})
ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
-MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}")
+MESSAGE(STATUS "MKLDNN library: ${MKLDNN_LIB}")
+add_definitions(-DPADDLE_USE_MKLDNN)
LIST(APPEND external_project_dependencies mkldnn)
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index 324e29f931ecbb6beab2d363daa01a19b1a56b3e..4c4f59656dae68739f2f07f3febd510e727fe2dd 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -29,7 +29,7 @@ IF(NOT ${CBLAS_FOUND})
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
- SET(OPENBLAS_CC "${CMAKE_C_COMPILER}")
+ SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
IF(CMAKE_CROSSCOMPILING)
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER})
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0)
ENDIF()
ELSEIF(IOS)
- # FIXME(liuyiqun): support multiple architectures
- SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
- SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
- IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7")
- SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7")
- SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
- ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
+ SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX})
+ ELSE()
+ MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. "
+ "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.")
ENDIF()
ELSEIF(RPI)
# use hardfp
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 8bd058222880b4df3b08da09c02f9fe7f1d0ee66..a8e1aca49c97df256b1269c286b0bce7732fa932 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+IF(MOBILE_INFERENCE)
+ return()
+ENDIF()
+
INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
diff --git a/cmake/util.cmake b/cmake/util.cmake
index 117ab7f49cdf4a568cd203b2b17767643d0b2d50..ad905ab55ba3537054fa5b30b5fca4d83c406702 100644
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@@ -115,8 +115,8 @@ function(link_paddle_exe TARGET_NAME)
target_link_libraries(${TARGET_NAME} log)
endif(ANDROID)
- if(WITH_MKLDNN AND WITH_MKLML AND MKLDNN_IOMP_DIR)
- target_link_libraries(${TARGET_NAME} "-L${MKLDNN_IOMP_DIR} -liomp5 -Wl,--as-needed")
+ if(WITH_MKLML AND MKLML_LIB_DIR AND MKLML_IOMP_LIB)
+ target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
diff --git a/doc/design/mkldnn/README.MD b/doc/design/mkldnn/README.MD
index 16236763a73770f3fe5eadf67645765d0456f875..ec6d4681836e189f46dbb9b915a237dc15cda7cf 100644
--- a/doc/design/mkldnn/README.MD
+++ b/doc/design/mkldnn/README.MD
@@ -36,13 +36,13 @@ Figure 1. PaddlePaddle on IA.
我们把集成方案大致分为了如下几个方面。
### CMake
-我们会在`CMakeLists.txt`中会添加`WITH_MKLDNN`的选项,当设置这个值为`ON`的时候会启用编译MKL-DNN功能。同时会自动开启OpenMP用于提高MKL-DNN的性能。
+我们会在`CMakeLists.txt`中会给用户添加一个`WITH_MKL`的开关,他是负责`WITH_MKLML`和`WITH_MKLDNN`的总开关。
-同时,我们会引入`WITH_MKLML`选项,用于选择是否使用MKL-DNN自带的MKLML安装包。这个安装包可以独立于MKL-DNN使用,但是建议在开启MKL-DNN的同时也打开MKLML的开关,这样才能发挥最好的性能。
+当打开`WITH_MKL`时,会开启MKLML的功能,作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 如果系统支持AVX2指令集及以上,同时会开启MKL-DNN功能。
-所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。
+当关闭`WITH_MKL`时,MKLML和MKL-DNN功能会同时关闭。
-**备注**:当`WITH_MKLML=ON`的时候,会优先使用这个包作为PaddlePaddle的CBLAS和LAPACK库,所以会稍微改动`cmake/cblas.cmake`中的逻辑。
+所以,我们会在`cmake/external`目录新建`mkldnn.cmake`和`mklml.cmake`文件,它们会在编译PaddlePaddle的时候下载对应的软件包,并放到PaddlePaddle的third party目录中。
### Layers
所有MKL-DNN相关的C++ layers,都会按照PaddlePaddle的目录结构存放在
diff --git a/doc/howto/dev/write_docs_cn.rst b/doc/howto/dev/write_docs_cn.rst
index 731a63f945c29ba78538b3d71289b234e569354d..61f3a223547b352cf7929615cf3682b29b9a738f 100644
--- a/doc/howto/dev/write_docs_cn.rst
+++ b/doc/howto/dev/write_docs_cn.rst
@@ -34,7 +34,7 @@ PaddlePaddle的文档构建有两种方式。
cd TO_YOUR_PADDLE_CLONE_PATH
mkdir -p build
cd build
- cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
+ cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make gen_proto_py
make paddle_docs paddle_docs_cn
diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md
index 882066f23714f7ab3bba9199b5fa5ff2325ce849..424d7718c64438496cf0895397babd5408e1ca02 100644
--- a/doc/mobile/cross_compiling_for_android_cn.md
+++ b/doc/mobile/cross_compiling_for_android_cn.md
@@ -1,4 +1,4 @@
-# 构建Android平台上的PaddlePaddle库
+# Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
- 基于Docker容器的编译方式
diff --git a/doc/mobile/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md
index cda636a67de712e072f4cc7ad859dda75211eaa8..9da48e7f2119ce901fbb3abab73400df27be16d2 100644
--- a/doc/mobile/cross_compiling_for_ios_cn.md
+++ b/doc/mobile/cross_compiling_for_ios_cn.md
@@ -1,4 +1,4 @@
-# 构建iOS平台上的PaddlePaddle库
+# iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
-- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示:
+- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构:
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
OS |
- armv7, armv7s, arm64 (默认) |
+ armv7, armv7s, arm64 |
SIMULATOR |
- i386, x86_64 (默认) |
+ i386, x86_64 |
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
- -DIOS_ARCH="arm64" \
+ -DIOS_ARCH="armv7;arm64" \
-DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
@@ -112,6 +112,6 @@ $ make install
- `lib`目录,其中包含PaddlePaddle的C-API静态库
- `third_party`目录,其中包含所依赖的所有第三方库
-注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。
+注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
diff --git a/doc/mobile/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md
index 6e983645faaed1f67edaeeb82ddbef9cef6bb85f..f8ef9dc8031613831437745995268f3abc392f5b 100644
--- a/doc/mobile/cross_compiling_for_raspberry_cn.md
+++ b/doc/mobile/cross_compiling_for_raspberry_cn.md
@@ -1,4 +1,4 @@
-# 构建Raspberry Pi平台上的PaddlePaddle库
+# Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h
index ede2670882ee2b93f610a2261a4ecc1784bc2d0c..4ab8de80d1c7be0f8e3eb848955373dd5e21bc18 100644
--- a/paddle/cuda/include/hl_gpu.h
+++ b/paddle/cuda/include/hl_gpu.h
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
+#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
+#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index b3b9c45ded95ce2e735b8898d47760956dcacdce..00d9dd238ec5328be28f58f8118daad3a039e08c 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -270,6 +270,19 @@ static bool AllGradInSet(const std::vector& names,
return false;
}
}
+ if (VLOG_IS_ON(10)) {
+ std::ostringstream sout;
+ sout << "All input {";
+ for (auto& name : names) {
+ sout << name << ",";
+ }
+ sout << "} is in {";
+ for (auto& name : set) {
+ sout << name << ",";
+ }
+ sout << "}";
+ VLOG(10) << sout.str();
+ }
return true;
}
@@ -290,14 +303,12 @@ static void CreateGradVarInBlock(
auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size();
++op_index) {
- bool need_infer_shape = false;
std::unordered_set new_vars;
ForEachVarName(ops[op_index]->Outputs(),
[&](const std::string& grad_var_name) {
if (block_desc->HasVar(grad_var_name)) {
return false;
}
- need_infer_shape = true;
auto var = block_desc->Var(grad_var_name);
new_vars.insert(var->Name());
auto it = param_name_map.find(grad_var_name);
@@ -311,23 +322,21 @@ static void CreateGradVarInBlock(
grad_record.op_idx_ = static_cast(op_index);
return false; /* not break */
});
- if (need_infer_shape) {
- ops[op_index]->InferVarType(block_desc);
- for (auto& arg : ops[op_index]->OutputArgumentNames()) {
- if (new_vars.find(arg) == new_vars.end()) {
- continue;
- }
- auto pname = FwdName(arg);
- auto* param = block_desc->FindVarRecursive(pname);
- auto* grad = block_desc->FindVar(arg);
- if (param == nullptr) {
- grad->SetDataType(DataType::FP32);
- } else {
- grad->SetDataType(param->GetDataType());
- }
+ ops[op_index]->InferVarType(block_desc);
+ for (auto& arg : ops[op_index]->OutputArgumentNames()) {
+ if (new_vars.find(arg) == new_vars.end()) {
+ continue;
+ }
+ auto pname = FwdName(arg);
+ auto* param = block_desc->FindVarRecursive(pname);
+ auto* grad = block_desc->FindVar(arg);
+ if (param == nullptr) {
+ grad->SetDataType(DataType::FP32);
+ } else {
+ grad->SetDataType(param->GetDataType());
}
- ops[op_index]->InferShape(*block_desc);
}
+ ops[op_index]->InferShape(*block_desc);
}
}
@@ -387,6 +396,7 @@ std::vector> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx,
std::unordered_set* no_grad_vars,
std::unordered_map* grad_to_var) {
+ VLOG(5) << "MakeBlockBackward";
BlockDescBind* cur_block = program_desc.MutableBlock(block_idx);
std::vector op_descs = cur_block->AllOps();
std::unordered_map> dup_out_ops;
@@ -394,9 +404,10 @@ std::vector> MakeBlockBackward(
std::vector> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
+ VLOG(5) << "Making backward " << (*it)->Type() << " op";
std::vector> op_grads;
- if ((*it)->Type() == "recurrent") {
+ if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") {
int step_block_idx = (*it)->GetBlockAttr("step_block");
BlockDescBind* backward_block = CreateStepBlock(
program_desc, no_grad_vars, grad_to_var, step_block_idx);
@@ -410,6 +421,15 @@ std::vector> MakeBlockBackward(
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
}
+ if (VLOG_IS_ON(10)) {
+ std::ostringstream sout;
+ sout << "Made ";
+ for (auto& op_grad : op_grads) {
+ sout << op_grad->Type() << " ";
+ }
+ VLOG(10) << sout.str();
+ }
+
for (const auto& desc : op_grads) {
for (const std::string& out_name : desc->OutputArgumentNames()) {
if (out_name.find("@GRAD") == std::string::npos) {
@@ -425,6 +445,8 @@ std::vector> MakeBlockBackward(
op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs),
[](std::unique_ptr& ptr) { return std::move(ptr); });
}
+
+ VLOG(5) << "Appending Sums";
// Check whether some variables are written more than once
std::list>> pending_sum_ops;
for (const auto& dup : dup_out_ops) {
@@ -432,16 +454,22 @@ std::vector> MakeBlockBackward(
const std::vector dup_op = dup.second;
if (out_name != kEmptyVarName && dup_op.size() > 1) {
std::vector sum_op_inputs;
+ std::string next_g_name = out_name;
for (size_t i = 0; i < dup_op.size(); ++i) {
+ VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name
+ << " duplicated";
std::string new_name = out_name + "@RENAME@" + std::to_string(i);
- backward_descs[dup_op[i]]->Rename(out_name, new_name);
+ backward_descs[dup_op[i]]->RenameOutput(out_name, new_name);
+ backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name);
sum_op_inputs.emplace_back(new_name);
+ next_g_name = sum_op_inputs.back();
}
std::unique_ptr sum_op(new OpDescBind(
"sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {}));
pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
}
}
+
pending_sum_ops.sort(
[](const std::pair>& a,
const std::pair>& b) {
@@ -452,6 +480,8 @@ std::vector> MakeBlockBackward(
std::move(p.second));
}
+ VLOG(5) << "MakeBlockBackward Finished";
+
return backward_descs;
}
diff --git a/paddle/framework/data_type.h b/paddle/framework/data_type.h
index 3ec88d7a72c3339bf5e7d0ca3957a3f608f039b7..be144d8fc0104fccc08006532a85906ade25c2a1 100644
--- a/paddle/framework/data_type.h
+++ b/paddle/framework/data_type.h
@@ -29,6 +29,8 @@ inline DataType ToDataType(std::type_index type) {
return DataType::INT32;
} else if (typeid(int64_t).hash_code() == type.hash_code()) {
return DataType::INT64;
+ } else if (typeid(bool).hash_code() == type.hash_code()) {
+ return DataType::BOOL;
} else {
PADDLE_THROW("Not supported");
}
diff --git a/paddle/framework/ddim.cc b/paddle/framework/ddim.cc
index 53b899a23997b71e723a298ec360a4e018d89878..8b6f42b82df14bfcd25f33ef16b5903fb965a8ba 100644
--- a/paddle/framework/ddim.cc
+++ b/paddle/framework/ddim.cc
@@ -60,8 +60,7 @@ void make_ddim(DDim& ddim, const int64_t* dims, int n) {
ddim = make_dim<9>(dims);
break;
default:
- throw std::invalid_argument(
- "Dynamic dimensions must have between [1, 9] dimensions.");
+ PADDLE_THROW("Dynamic dimensions must have between [1, 9] dimensions.");
}
}
diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc
index 2fcf41d69f0011b0d9a3d89c97fcebacb0703e97..adedd8cb0e8504fd6fc924e62a2ede3c1c7ce698 100644
--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -120,6 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id,
for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
+ VLOG(10) << op->DebugString();
op->Run(*local_scope, *device);
}
if (create_local_scope) {
diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc
index 39c8def82e1ebb10a0e357a648af760099020c32..48cd131550dea5ad3f368b25c31d753efbe0dff9 100644
--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@@ -235,6 +235,23 @@ void OpDescBind::Rename(const std::string &old_name,
need_update_ = true;
}
+void OpDescBind::RenameOutput(const std::string &old_name,
+ const std::string &new_name) {
+ for (auto &output : outputs_) {
+ std::replace(output.second.begin(), output.second.end(), old_name,
+ new_name);
+ }
+ need_update_ = true;
+}
+
+void OpDescBind::RenameInput(const std::string &old_name,
+ const std::string &new_name) {
+ for (auto &input : inputs_) {
+ std::replace(input.second.begin(), input.second.end(), old_name, new_name);
+ }
+ need_update_ = true;
+}
+
struct SetAttrDescVisitor : public boost::static_visitor {
explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {}
mutable OpDesc::Attr *attr_;
@@ -448,7 +465,12 @@ const std::vector &CompileTimeInferShapeContext::Outputs(
DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto var = block_.FindVarRecursive(name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
- return framework::make_ddim(var->Shape());
+ try {
+ return framework::make_ddim(var->Shape());
+ } catch (...) {
+ VLOG(5) << "GetDim of variable " << name << " error";
+ std::rethrow_exception(std::current_exception());
+ }
}
void CompileTimeInferShapeContext::SetDim(const std::string &name,
diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h
index e3e96441bbf51729f2ba69c9257e6961b1de0d5c..da032319afa775571d3942bf6ae415db7d233735 100644
--- a/paddle/framework/op_desc.h
+++ b/paddle/framework/op_desc.h
@@ -73,6 +73,10 @@ class OpDescBind {
void Rename(const std::string &old_name, const std::string &new_name);
+ void RenameOutput(const std::string &old_name, const std::string &new_name);
+
+ void RenameInput(const std::string &old_name, const std::string &new_name);
+
// Only be used in C++
const AttributeMap &GetAttrMap() const;
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index 3276f8af396fe58450a8dc6713fe61e49d5ca708..93467ab8ac796277b47a861a427de2837fb2d3d4 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -403,19 +403,6 @@ class RuntimeInferShapeContext : public InferShapeContext {
void OperatorWithKernel::Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const {
- if (VLOG_IS_ON(1)) {
- auto inputs = this->InputVars();
- auto outputs = this->OutputVars(true);
- std::ostringstream sout;
- sout << "Run operator " << this->Type() << " From [";
- std::ostream_iterator out_it(sout, ",");
- std::copy(inputs.begin(), inputs.end(), out_it);
- sout << "] to [";
- std::copy(outputs.begin(), outputs.end(), out_it);
- sout << "]";
- VLOG(1) << sout.str();
- }
-
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);
diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc
index 9428b8a07ea0af005f6e960ddaa02da624ad9d97..9ad6272c99dd6a85520ae44c1331ac232bc6a9a2 100644
--- a/paddle/framework/scope.cc
+++ b/paddle/framework/scope.cc
@@ -38,11 +38,12 @@ Scope& Scope::NewScope() const {
Variable* Scope::Var(const std::string& name) {
auto iter = vars_.find(name);
if (iter != vars_.end()) {
+ VLOG(3) << "Get existing variable " << name;
return iter->second;
}
Variable* v = new Variable();
vars_[name] = v;
- VLOG(3) << "Create variable " << name << " on scope";
+ VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
diff --git a/paddle/framework/shape_inference.h b/paddle/framework/shape_inference.h
index 7d36ead2ca85328c7843b3b5d423cf8e921d1c93..05dc47f06ac81f0acb6d0317cbecb3009c7dd7f0 100644
--- a/paddle/framework/shape_inference.h
+++ b/paddle/framework/shape_inference.h
@@ -53,6 +53,10 @@ class InferShapeContext {
virtual bool IsRuntime() const = 0;
+ // Note: In while op, we need this to be public
+ void SetDims(const std::vector &names,
+ const std::vector &dims);
+
protected:
virtual framework::DDim GetDim(const std::string &name) const = 0;
virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0;
@@ -60,9 +64,6 @@ class InferShapeContext {
std::vector GetDims(
const std::vector &names) const;
- void SetDims(const std::vector &names,
- const std::vector &dims);
-
std::vector GetVarTypes(
const std::vector &names) const;
diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp
index e75ac5ba4647a8267b7bc189893bd7adb5c3053f..2125155c6cb807045c1a25f422dc072d0a401716 100644
--- a/paddle/gserver/layers/MKLDNNLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNLayer.cpp
@@ -22,7 +22,7 @@ namespace paddle {
bool MKLDNNLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
- << "Please set WITH_MKLDNN=ON "
+ << "Please set WITH_MKL=ON "
<< "and set use_mkldnn=True";
CHECK(!useGpu_) << "Do not support GPU yet";
diff --git a/paddle/math/Storage.cpp b/paddle/math/Storage.cpp
index 4adaaef9838f0d178468af3af142031325bfc11d..a2ef731ecbcd18ca4bd0b2381de04650a2686c2d 100644
--- a/paddle/math/Storage.cpp
+++ b/paddle/math/Storage.cpp
@@ -17,9 +17,13 @@ limitations under the License. */
#include "paddle/utils/StringUtil.h"
#include "paddle/utils/Util.h"
+#ifndef PADDLE_MOBILE_INFERENCE
DEFINE_int32(pool_limit_size,
536870912,
"maximum memory size managed by a memory pool, default is 512M");
+#else
+DEFINE_int32(pool_limit_size, 0, "default is 0");
+#endif
namespace paddle {
diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h
index 666043e824f885e9c0e79e319d0a38ba108c209a..233a81198e336d3190565fb18556f96979cec0ce 100644
--- a/paddle/operators/array_operator.h
+++ b/paddle/operators/array_operator.h
@@ -42,6 +42,7 @@ class ArrayOp : public framework::OperatorBase {
} else {
offset = static_cast(*i_tensor.data());
}
+ VLOG(10) << " Offset = " << offset;
return offset;
}
};
diff --git a/paddle/operators/bilinear_tensor_product_op.h b/paddle/operators/bilinear_tensor_product_op.h
index ffa4f43a327418498c1f110504127e7d2878409d..1113a4c6f357edb4f6b14b73c6eec9c6cca24ce5 100644
--- a/paddle/operators/bilinear_tensor_product_op.h
+++ b/paddle/operators/bilinear_tensor_product_op.h
@@ -174,7 +174,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel {
// Caculate the gradient of Input(Bias).
if (d_bias) {
d_bias->mutable_data(ctx.GetPlace());
- auto d_bias_mat = EigenMatrix::From(*d_bias);
+ auto d_bias_mat = framework::EigenVector::Flatten(*d_bias);
d_bias_mat.device(place) = d_out_mat.sum(Eigen::DSizes(0));
}
}
diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc
index 13ac0cd54cbeb8f68c2246f7e1d02f032266a72e..310e3f5c937bd1345663b2c2307610a485a027ef 100644
--- a/paddle/operators/conv_transpose_op.cc
+++ b/paddle/operators/conv_transpose_op.cc
@@ -30,11 +30,6 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector strides = ctx->Attrs().Get>("strides");
std::vector paddings = ctx->Attrs().Get>("paddings");
- for (size_t i = 0; i < paddings.size(); ++i) {
- PADDLE_ENFORCE_EQ(paddings[i], 0,
- "No Padding allowed in conv transpose op.");
- }
-
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"ConvTransposeOp intput should be 4-D or 5-D tensor.");
PADDLE_ENFORCE_EQ(in_dims.size(), filter_dims.size(),
@@ -52,7 +47,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
std::vector output_shape({in_dims[0], filter_dims[1]});
for (size_t i = 0; i < strides.size(); ++i) {
- output_shape.push_back((in_dims[i + 2] - 1) * strides[i] +
+ output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] +
filter_dims[i + 2]);
}
ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h
index 4b2bd60437da8f58054d8cdd5e6ba1fdac05f0d5..ab336ad23ce1c180b68d04e4c85b299e301d5376 100644
--- a/paddle/operators/conv_transpose_op.h
+++ b/paddle/operators/conv_transpose_op.h
@@ -62,7 +62,6 @@ class GemmConvTransposeKernel : public framework::OpKernel {
Tensor* output = context.Output("Output");
std::vector strides = context.Attr>("strides");
- // Actually, no paddings and groups allowed in conv transpose.
std::vector paddings = context.Attr>("paddings");
// TODO(Zhuoyuan): Paddings can be added in future.
// groups will alway be disabled in conv2dtranspose.
@@ -148,8 +147,8 @@ class GemmConvTransposeKernel : public framework::OpKernel {
} else if (filter_shape_vec.size() == 3) {
// col2vol: col_matrix -> dy
// from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w)
- col2vol(context.device_context(), col, dilations, strides,
- std::vector{0, 0, 0}, &output_batch);
+ col2vol(context.device_context(), col, dilations, strides, paddings,
+ &output_batch);
}
}
}
@@ -173,7 +172,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel {
if ((!input_grad) && (!filter_grad)) return;
std::vector strides = context.Attr>("strides");
- // Actually, no paddings and groups allowed in conv transpose.
std::vector paddings = context.Attr>("paddings");
const int batch_size = static_cast(input->dims()[0]);
diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h
index 68c56f531f941e1b8f66ac7ba6bf318881642c4f..62a4e484eceeabc4cc26e68ac54a50be1ac95df7 100644
--- a/paddle/operators/cos_sim_op.h
+++ b/paddle/operators/cos_sim_op.h
@@ -132,7 +132,7 @@ class CosSimGradKernel : public framework::OpKernel {
// compute dy
if (out_grad_y) {
out_grad_y->mutable_data(context.GetPlace());
- auto dy = EigenMatrix::Reshape(*out_grad_y, 1);
+ auto dy = EigenVector::Flatten(*out_grad_y);
auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast;
dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}}));
}
diff --git a/paddle/operators/detail/safe_ref.h b/paddle/operators/detail/safe_ref.h
new file mode 100644
index 0000000000000000000000000000000000000000..b71af17309f9f46b5c87f0f479d4e03443fa7f93
--- /dev/null
+++ b/paddle/operators/detail/safe_ref.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#pragma once
+
+namespace paddle {
+namespace operators {
+namespace detail {
+/**
+ * Get Reference From Pointer with check. The error message is printf format,
+ * and passed by `args`
+ */
+template
+inline T &Ref(T *ptr, ARGS &&... args) {
+ PADDLE_ENFORCE(ptr != nullptr, args...);
+ return *ptr;
+}
+} // namespace detail
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc
index 85871ebbfcd8ee38ef5e8078d1d6cb6bdda46a7b..985b5d1e865e513d833bff72dcd20a8f20851d8c 100644
--- a/paddle/operators/fill_constant_batch_size_like_op.cc
+++ b/paddle/operators/fill_constant_batch_size_like_op.cc
@@ -101,4 +101,7 @@ REGISTER_OPERATOR(fill_constant_batch_size_like,
REGISTER_OP_CPU_KERNEL(
fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOpKernel,
- ops::FillConstantBatchSizeLikeOpKernel);
+ ops::FillConstantBatchSizeLikeOpKernel,
+ ops::FillConstantBatchSizeLikeOpKernel,
+ ops::FillConstantBatchSizeLikeOpKernel);
diff --git a/paddle/operators/fill_constant_batch_size_like_op.cu.cc b/paddle/operators/fill_constant_batch_size_like_op.cu.cc
index 87e3697e2832e7c60a4293fe7126ae4c9c053e4d..9e7a1eeab863c962ca72908e561e12a04d5021c5 100644
--- a/paddle/operators/fill_constant_batch_size_like_op.cu.cc
+++ b/paddle/operators/fill_constant_batch_size_like_op.cu.cc
@@ -19,4 +19,7 @@ namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
fill_constant_batch_size_like,
ops::FillConstantBatchSizeLikeOpKernel,
- ops::FillConstantBatchSizeLikeOpKernel);
+ ops::FillConstantBatchSizeLikeOpKernel,
+ ops::FillConstantBatchSizeLikeOpKernel,
+ ops::FillConstantBatchSizeLikeOpKernel);
diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc
index 8ab39d4fb012b8fa3883f33e4d15be7918500354..95fb5932b8b555e1357adc9fdfb7b6e6db7da71d 100644
--- a/paddle/operators/fill_zeros_like_op.cc
+++ b/paddle/operators/fill_zeros_like_op.cc
@@ -54,5 +54,8 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, ops::FillZerosLikeOp,
ops::FillZerosLikeOpMaker);
REGISTER_OP_CPU_KERNEL(
- fill_zeros_like,
- ops::FillZerosLikeKernel);
+ fill_zeros_like, ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel);
diff --git a/paddle/operators/fill_zeros_like_op.cu.cc b/paddle/operators/fill_zeros_like_op.cu.cc
index 2adb40cf90b42a5ba608302f7985346c949ff6ed..1501a17441072223ba0e8cf5b6c8cdd5e903a467 100644
--- a/paddle/operators/fill_zeros_like_op.cu.cc
+++ b/paddle/operators/fill_zeros_like_op.cu.cc
@@ -17,5 +17,8 @@
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
- fill_zeros_like,
- ops::FillZerosLikeKernel);
+ fill_zeros_like, ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel,
+ ops::FillZerosLikeKernel);
diff --git a/paddle/operators/is_empty_op.cc b/paddle/operators/is_empty_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..54fecf44e881b5c283c81580fd161da9808d253e
--- /dev/null
+++ b/paddle/operators/is_empty_op.cc
@@ -0,0 +1,67 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/operator.h"
+
+namespace paddle {
+namespace operators {
+
+constexpr char kInput[] = "X";
+constexpr char kOutput[] = "Out";
+
+class IsEmptyOp : public framework::OperatorBase {
+ public:
+ IsEmptyOp(const std::string &type, const framework::VariableNameMap &inputs,
+ const framework::VariableNameMap &outputs,
+ const framework::AttributeMap &attrs)
+ : OperatorBase(type, inputs, outputs, attrs) {}
+
+ void Run(const framework::Scope &scope,
+ const platform::DeviceContext &dev_ctx) const override {
+ // get input
+ auto *var = scope.FindVar(Input(kInput));
+ PADDLE_ENFORCE_NOT_NULL(var);
+ auto &tensor = var->Get();
+ // get output
+ auto *out = scope.FindVar(Output(kOutput));
+ PADDLE_ENFORCE_NOT_NULL(out);
+ auto *out_tensor = out->GetMutable();
+
+ out_tensor->Resize({1});
+ out_tensor->mutable_data(platform::CPUPlace())[0] =
+ framework::product(tensor.dims()) == 0;
+ }
+};
+
+class IsEmptyOpProtoMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+ IsEmptyOpProtoMaker(framework::OpProto *proto,
+ framework::OpAttrChecker *op_checker)
+ : OpProtoAndCheckerMaker(proto, op_checker) {
+ AddInput(kInput, "(Tensor) Tensor which is to be checked.");
+ AddOutput(kOutput, "(Tensor) a boolean Tensor that indicate empty or not.");
+ AddComment(R"DOC(
+IsEmpty Operator which checks whether a tensor is empty.
+
+It will just return product(tensor.ddims()) > 0;
+ )DOC");
+ }
+};
+
+} // namespace operators
+} // namespace paddle
+
+REGISTER_OP_WITHOUT_GRADIENT(is_empty, paddle::operators::IsEmptyOp,
+ paddle::operators::IsEmptyOpProtoMaker);
diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt
index b9417f1d7fdc663fff751328d18239af3dbb1216..002b68fecf4f1e294387357f0346d9926a2b2b5a 100644
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt
@@ -1,7 +1,7 @@
add_subdirectory(detail)
if(WITH_GPU)
- nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context)
+ nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context framework_proto)
nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor)
nv_library(selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor)
@@ -15,7 +15,7 @@ if(WITH_GPU)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function)
else()
- cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context)
+ cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto)
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function)
cc_library(softmax SRCS softmax.cc DEPS device_context)
cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context)
diff --git a/paddle/operators/math/im2col.cu b/paddle/operators/math/im2col.cu
index 347df7a0ffdec163c0479a71ec775a813930ba5f..bf7894243919571c2ab15d53690b1ef05bfcc6ee 100644
--- a/paddle/operators/math/im2col.cu
+++ b/paddle/operators/math/im2col.cu
@@ -119,8 +119,8 @@ __global__ void col2im(int n, const T* data_col, int im_height, int im_width,
if (index < n) {
T val = 0;
- int w = index % im_width;
- int h = (index / im_width) % im_height;
+ int w = index % im_width + padding_width;
+ int h = (index / im_width) % im_height + padding_height;
int c = index / (im_width * im_height);
// compute the start and end of the output
diff --git a/paddle/operators/math/math_function.cc b/paddle/operators/math/math_function.cc
index 5ee091788687133f6eaef7229d9f95e2025a2daf..2e333a8cde721f8e65dbf2cf5e3aac6272172cc0 100644
--- a/paddle/operators/math/math_function.cc
+++ b/paddle/operators/math/math_function.cc
@@ -250,6 +250,8 @@ void axpy(const platform::DeviceContext& context,
template struct SetConstant;
template struct SetConstant;
template struct SetConstant;
+template struct SetConstant;
+template struct SetConstant;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose; \
diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu
index 38c04b97f9d07b9cca938b09f46ea81328a35322..58356a4b7783241ca0292829bf05dc1a8ed80c6c 100644
--- a/paddle/operators/math/math_function.cu
+++ b/paddle/operators/math/math_function.cu
@@ -256,6 +256,8 @@ void axpy(const platform::DeviceContext& context,
template struct SetConstant;
template struct SetConstant;
template struct SetConstant;
+template struct SetConstant;
+template struct SetConstant;
#define DEFINE_GPU_TRANS(RANK) \
template struct Transpose; \
diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc
index 9837f325e30f68ba927a540d395cc7d7e093a607..c2b7632b2865a3ef66051d815d7722a08c6a8cbd 100644
--- a/paddle/operators/sum_op.cc
+++ b/paddle/operators/sum_op.cc
@@ -12,6 +12,7 @@ limitations under the License. */
#include "paddle/operators/sum_op.h"
#include
#include "paddle/framework/var_type_inference.h"
+#include "paddle/operators/detail/safe_ref.h"
namespace paddle {
namespace operators {
@@ -59,13 +60,16 @@ class SumOp : public framework::OperatorWithKernel {
x_vars[0]->Get().value().type()),
ctx.device_context());
} else if (x_vars[0]->IsType()) {
- auto& array = x_vars[0]->Get();
- for (auto& each : array) {
- if (each.numel() != 0) {
- return framework::OpKernelType(framework::ToDataType(each.type()),
- ctx.device_context());
+ for (auto& x_var : x_vars) {
+ auto& array = x_var->Get();
+ for (auto& each : array) {
+ if (each.numel() != 0) {
+ return framework::OpKernelType(framework::ToDataType(each.type()),
+ ctx.device_context());
+ }
}
}
+ PADDLE_THROW("Cannot find the input data type by all input data");
}
PADDLE_THROW("Unexpected branch. Input type is %s",
x_vars[0]->Type().name());
@@ -96,6 +100,11 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
auto& inputs = op_desc.Input("X");
auto var_type = framework::VarDesc::SELECTED_ROWS;
+ for (auto& name : op_desc.Input("X")) {
+ VLOG(10) << name << " "
+ << block->FindRecursiveOrCreateVar(name)->GetType();
+ }
+
bool any_input_is_lod_tensor = std::any_of(
inputs.begin(), inputs.end(), [block](const std::string& name) {
return block->FindRecursiveOrCreateVar(name)->GetType() ==
@@ -103,7 +112,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
});
auto is_tensor_array = [block](const std::string& name) {
- return block->FindRecursiveOrCreateVar(name)->GetType() ==
+ return detail::Ref(block->FindRecursiveOrCreateVar(name)).GetType() ==
framework::VarDesc::LOD_TENSOR_ARRAY;
};
@@ -113,14 +122,26 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
std::all_of(inputs.begin(), inputs.end(), is_tensor_array);
if (any_input_is_tensor_array) {
- PADDLE_ENFORCE(all_inputs_are_tensor_array);
+ if (!all_inputs_are_tensor_array) {
+ std::ostringstream os;
+ for (auto& each : inputs) {
+ os << " " << each << " type is "
+ << detail::Ref(block->FindRecursiveOrCreateVar(each)).GetType()
+ << "\n";
+ }
+ PADDLE_ENFORCE(all_inputs_are_tensor_array,
+ "Not all inputs are tensor array:\n%s", os.str());
+ }
var_type = framework::VarDesc::LOD_TENSOR_ARRAY;
} else if (any_input_is_lod_tensor) {
var_type = framework::VarDesc::LOD_TENSOR;
}
auto out_var_name = op_desc.Output("Out").front();
- block->FindRecursiveOrCreateVar(out_var_name)->SetType(var_type);
+ auto& out_var = detail::Ref(block->FindRecursiveOrCreateVar(out_var_name));
+ out_var.SetType(var_type);
+ auto& in_var = detail::Ref(block->FindVarRecursive(inputs.front()));
+ out_var.SetDataType(in_var.GetDataType());
}
};
diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc
index 62e15604c47f25c458abc69ecd1cabf964de39bb..ae1b48d7a8e3d573a5134a822a2ed5ef70511077 100644
--- a/paddle/operators/tensor_array_read_write_op.cc
+++ b/paddle/operators/tensor_array_read_write_op.cc
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/array_operator.h"
-
+#include "paddle/operators/detail/safe_ref.h"
namespace paddle {
namespace operators {
@@ -33,6 +33,8 @@ class WriteToArrayOp : public ArrayOp {
auto *out =
scope.FindVar(Output("Out"))->GetMutable();
if (offset >= out->size()) {
+ VLOG(10) << "Resize " << Output("Out") << " from " << out->size()
+ << " to " << offset + 1;
out->resize(offset + 1);
}
auto *out_tensor = &out->at(offset);
@@ -85,11 +87,15 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind &op_desc,
framework::BlockDescBind *block) const override {
- for (auto &out_var : op_desc.OutputArgumentNames()) {
- VLOG(10) << "Set Variable " << out_var << " as LOD_TENSOR_ARRAY";
- block->FindRecursiveOrCreateVar(out_var)->SetType(
- framework::VarDesc::LOD_TENSOR_ARRAY);
- }
+ auto x_name = op_desc.Input("X")[0];
+ auto out_name = op_desc.Output("Out")[0];
+ VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
+ auto &out = detail::Ref(block->FindRecursiveOrCreateVar(out_name),
+ "Cannot found %s", out_name);
+ out.SetType(framework::VarDesc::LOD_TENSOR_ARRAY);
+ auto &x =
+ detail::Ref(block->FindVarRecursive(x_name), "Cannot found %s", x_name);
+ out.SetDataType(x.GetDataType());
}
};
@@ -107,11 +113,11 @@ class ReadFromArrayOp : public ArrayOp {
auto &x_array = x->Get();
auto *out = scope.FindVar(Output("Out"));
PADDLE_ENFORCE(out != nullptr, "Out must be set");
- auto *out_tesnor = out->GetMutable();
+ auto *out_tensor = out->GetMutable();
size_t offset = GetOffset(scope, dev_ctx);
PADDLE_ENFORCE_LT(offset, x_array.size());
- out_tesnor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx);
- out_tesnor->set_lod(x_array[offset].lod());
+ out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx);
+ out_tensor->set_lod(x_array[offset].lod());
}
};
diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc
index 4ca6c8507a48507fd29a9c9acae2bdf36ed936ee..dcc59f5ff2ae3a8ca999d72a20cfd5c759987d89 100644
--- a/paddle/operators/while_op.cc
+++ b/paddle/operators/while_op.cc
@@ -14,8 +14,10 @@
#include
#include "paddle/framework/executor.h"
+#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h"
+#include "paddle/operators/detail/safe_ref.h"
namespace paddle {
namespace operators {
@@ -26,8 +28,9 @@ using LoDTensor = framework::LoDTensor;
constexpr char kStepBlock[] = "step_block";
constexpr char kCondition[] = "Condition";
constexpr char kStepScopes[] = "StepScopes";
-constexpr char kParamGrads[] = "X@Grad";
constexpr char kParameters[] = "X";
+constexpr char kParamGrads[] = "X@GRAD";
+constexpr char kOutputs[] = "Out";
class WhileOp : public framework::OperatorBase {
public:
@@ -71,9 +74,9 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
kCondition,
"(Bool) An scalar. When it's False, the While Op will be terminated.")
.AsDuplicable();
- AddOutput("Out",
+ AddOutput(kOutputs,
"A set of variables, which will be assigned with values "
- "generated by perators inside the block of While Op.")
+ "generated by the operators inside the block of While Op.")
.AsDuplicable();
AddOutput(kStepScopes,
"(StepScopeVar) A vector of local scope, which size equals the "
@@ -104,17 +107,64 @@ class WhileGradOp : public framework::OperatorBase {
auto *step_scopes =
scope.FindVar(Input(kStepScopes))->GetMutable();
+ auto outside_og_names = Inputs(framework::GradVarName(kOutputs));
+ auto inside_og_names =
+ Attr>("original_output_grad");
+
+ PADDLE_ENFORCE_EQ(outside_og_names.size(), inside_og_names.size());
+
for (auto cur_scope_iter = step_scopes->rbegin();
cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) {
+ VLOG(3) << "Start backward at time_step "
+ << cur_scope_iter - step_scopes->rbegin();
+ framework::Scope &cur_scope = **cur_scope_iter;
+ // Link OG from outside to inside
+ for (size_t i = 0; i < outside_og_names.size(); ++i) {
+ auto outside_og_name = outside_og_names[i];
+ auto inside_og_name = inside_og_names[i];
+ VLOG(10) << "Linking outside " << outside_og_name << " --> inside "
+ << inside_og_name;
+ auto &og_outside = detail::Ref(scope.FindVar(outside_og_name));
+ auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name));
+ if (og_outside.Type().hash_code() ==
+ typeid(framework::LoDTensor).hash_code()) {
+ auto &outside_tensor = og_outside.Get();
+ auto &inside_tensor =
+ detail::Ref(og_inside.GetMutable());
+ inside_tensor.set_lod(outside_tensor.lod());
+ inside_tensor.ShareDataWith(outside_tensor);
+ } else if (og_outside.Type().hash_code() ==
+ typeid(framework::LoDTensorArray).hash_code()) {
+ auto &outside_array = og_outside.Get();
+ auto &inside_array =
+ detail::Ref(og_inside.GetMutable());
+ VLOG(10) << outside_og_name << " size = " << outside_array.size();
+ inside_array.resize(outside_array.size());
+
+ for (size_t j = 0; j < inside_array.size(); ++j) {
+ VLOG(10) << j << " " << outside_array[j].numel();
+ if (outside_array[j].numel() != 0) {
+ inside_array[j].set_lod(outside_array[j].lod());
+ inside_array[j].ShareDataWith(outside_array[j]);
+ } else {
+ PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0);
+ }
+ }
+ }
+ }
+
executor.Run(*program, *cur_scope_iter, block->ID(), false);
auto &pg_names = Outputs(kParamGrads);
auto &p_names = Inputs(kParameters);
PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size());
- for (size_t prog_id = 0; prog_id < pg_names.size(); ++prog_id) {
- auto inside_grad_name = framework::GradVarName(p_names[prog_id]);
+ for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) {
+ if (pg_names[param_id] == framework::kEmptyVarName) {
+ continue; // iterator doesn't have gradient
+ }
+ auto inside_grad_name = framework::GradVarName(p_names[param_id]);
- // // TODO(tonyyang-savil: Not sure we need the following
+ // // TODO(tonyyang-svail): Not sure we need the following
// // If does not compute gradient of that variable inside rnn,
// just
// // continue
@@ -126,7 +176,7 @@ class WhileGradOp : public framework::OperatorBase {
// zero gradient variable in step 0
if (cur_scope_iter == step_scopes->rbegin()) {
auto *var = (*cur_scope_iter)->FindVar(inside_grad_name);
- PADDLE_ENFORCE_NOT_NULL(var);
+ PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name);
if (var->IsType()) {
auto &inside_tensor = var->Get();
framework::AttributeMap attrs;
@@ -135,27 +185,18 @@ class WhileGradOp : public framework::OperatorBase {
attrs["value"] = 0.0f;
auto zero_op = framework::OpRegistry::CreateOp(
- "fill_constant", {}, {{"Out", {pg_names[prog_id]}}}, attrs);
+ "fill_constant", {}, {{"Out", {pg_names[param_id]}}}, attrs);
zero_op->Run(scope, dev_ctx);
}
}
// sum gradient
- auto *outside_var = scope.FindVar(pg_names[prog_id]);
- PADDLE_ENFORCE_NOT_NULL(outside_var);
- auto &outside_tensor = *outside_var->GetMutable();
-
- std::string result_var_name;
- auto *local_result_var = (*cur_scope_iter)->Var(&result_var_name);
- auto &local_result_tensor =
- *local_result_var->GetMutable();
-
- local_result_tensor.ShareDataWith(outside_tensor);
-
+ auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp(
- "sum", {{"X", {result_var_name, inside_grad_name}}},
- {{"Out", {result_var_name}}}, {});
- sum_op->Run(**cur_scope_iter, dev_ctx);
+ "sum", {{"X", {pg_names[param_id], new_inside_name}}},
+ {{"Out", {pg_names[param_id]}}}, {});
+ sum_op->Run(cur_scope, dev_ctx);
+ cur_scope.Rename(new_inside_name, inside_grad_name);
}
}
}
@@ -169,29 +210,110 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
virtual std::unique_ptr Apply() const {
auto *grad = new framework::OpDescBind();
grad->SetType("while_grad");
- for (auto &input_param : this->InputNames()) {
- grad->SetInput(input_param, this->Input(input_param));
- grad->SetOutput(framework::GradVarName(input_param),
- this->InputGrad(input_param));
+ grad->SetInput(kParameters, Input(kParameters));
+ grad->SetOutput(
+ framework::GradVarName(kParameters),
+ InputGrad(kParameters, /*do not drop empty gradient*/ false));
+ grad->SetInput(kOutputs, Output(kOutputs));
+
+ // OG should be re-calculated by step blocks, since many outputs of while op
+ // do not need to calculate gradients.
+ std::unordered_set block_ins;
+ {
+ for (auto &p : Input(kParameters)) {
+ block_ins.insert(p);
+ }
+ for (auto &o : Output(kOutputs)) {
+ block_ins.insert(o);
+ }
}
+ std::unordered_set extra_inputs;
+ for (size_t i = 0; i < grad_block_[0]->OpSize(); ++i) {
+ for (auto &input_name : grad_block_[0]->Op(i)->InputArgumentNames()) {
+ if (block_ins.find(input_name) != block_ins.end()) {
+ continue;
+ }
+ extra_inputs.insert(input_name);
+ }
- for (auto &output_param : this->OutputNames()) {
- grad->SetInput(output_param, this->Output(output_param));
- if (output_param != kStepScopes) {
- grad->SetInput(framework::GradVarName(output_param),
- this->OutputGrad(output_param));
+ for (auto &output_name : grad_block_[0]->Op(i)->OutputArgumentNames()) {
+ block_ins.insert(output_name);
}
}
+
+ std::vector extra_inputs_list;
+ extra_inputs_list.resize(extra_inputs.size());
+ std::copy(extra_inputs.begin(), extra_inputs.end(),
+ extra_inputs_list.begin());
+ grad->SetInput(framework::GradVarName(kOutputs), extra_inputs_list);
+ grad->SetInput(kStepScopes, Output(kStepScopes));
grad->SetAttrMap(this->Attrs());
grad->SetBlockAttr(kStepBlock, *grad_block_[0]);
+ // record the original output gradient names, since the gradient name of
+ // while operator could be renamed.
+ grad->SetAttr("original_output_grad", extra_inputs_list);
return std::unique_ptr(grad);
}
};
+class WhileGradOpVarTypeInference : public framework::VarTypeInference {
+ public:
+ void operator()(const framework::OpDescBind &op_desc,
+ framework::BlockDescBind *block) const override {
+ auto p_names = op_desc.Input(kParameters);
+ auto pg_names = op_desc.Output(framework::GradVarName(kParameters));
+
+ for (size_t i = 0; i < p_names.size(); ++i) {
+ auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
+ auto *g_var = block->FindVarRecursive(pg_names[i]);
+ if (g_var != nullptr) { // Gradient could be @EMPTY@
+ VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i]
+ << " type: " << p_var.GetType();
+ g_var->SetType(p_var.GetType());
+ g_var->SetDataType(p_var.GetDataType());
+ }
+ }
+ }
+};
+
+class WhileGradOpShapeInference : public framework::InferShapeBase {
+ public:
+ void operator()(framework::InferShapeContext *ctx) const override {
+ ctx->HasInputs(kParameters);
+ ctx->HasOutputs(framework::GradVarName(kParameters));
+ ctx->HasInputs(kOutputs);
+ ctx->HasInputs(framework::GradVarName(kOutputs));
+
+ auto p_names = ctx->Inputs(kParameters);
+ auto pg_names = ctx->Outputs(kParamGrads);
+ auto dims = ctx->GetInputsDim(kParameters);
+ auto var_types = ctx->GetInputsVarType(kParameters);
+ std::vector names_to_set;
+ std::vector dims_to_set;
+ for (size_t i = 0; i < p_names.size(); ++i) {
+ if (pg_names[i] == framework::kEmptyVarName) {
+ continue;
+ }
+ if (var_types[i] == framework::VarDesc::LOD_TENSOR) {
+ names_to_set.push_back(pg_names[i]);
+ dims_to_set.push_back(dims[i]);
+ } else if (var_types[i] == framework::VarDesc::LOD_TENSOR_ARRAY) {
+ // not sure how to set the dim of LOD_TENSOR_ARRAY
+ names_to_set.push_back(pg_names[i]);
+ dims_to_set.push_back(dims[i]);
+ }
+ }
+ ctx->SetDims(names_to_set, dims_to_set);
+ }
+};
+
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(while, paddle::operators::WhileOp,
paddle::operators::WhileOpMaker,
paddle::operators::WhileGradOpDescMaker);
+REGISTER_OPERATOR(while_grad, paddle::operators::WhileGradOp,
+ paddle::operators::WhileGradOpShapeInference,
+ paddle::operators::WhileGradOpVarTypeInference);
diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md
index b5fd68839ddb62e76f2fd930248d546bc093a892..f3a6f1dba7588c6b29c1dcae26ec134c1a7f937d 100644
--- a/paddle/scripts/docker/README.md
+++ b/paddle/scripts/docker/README.md
@@ -57,8 +57,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF"
| `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. |
| `WITH_AVX` | OFF | Set to "ON" to enable AVX support. |
| `WITH_TESTING` | ON | Build unit tests binaries. |
-| `WITH_MKLDNN` | ON | Build with [Intel® MKL DNN](https://github.com/01org/mkl-dnn) support. |
-| `WITH_MKLML` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) support. |
+| `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. |
| `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. |
| `WITH_SWIG_PY` | ON | Build with SWIG python API support. |
| `WITH_C_API` | OFF | Build capi libraries for inference. |
diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh
index 8dddb2be9c10fac693d17e92dd0e1c65faa0905e..595d25fd4830b6e69b9a1080803771b0464741db 100644
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -34,9 +34,7 @@ function cmake_gen() {
${PYTHON_FLAGS}
-DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF}
- -DCUDA_ARCH_NAME=All
- -DWITH_MKLDNN=${WITH_MKLDNN:-ON}
- -DWITH_MKLML=${WITH_MKLML:-ON}
+ -DWITH_MKL=${WITH_MKL:-ON}
-DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-ON}
-DWITH_SWIG_PY=ON
@@ -57,9 +55,7 @@ EOF
${PYTHON_FLAGS} \
-DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \
- -DCUDA_ARCH_NAME=All \
- -DWITH_MKLDNN=${WITH_MKLDNN:-ON} \
- -DWITH_MKLML=${WITH_MKLML:-ON} \
+ -DWITH_MKL=${WITH_MKL:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-ON} \
-DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \
diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in
index b9a49526a7e02131767a4e9b26cd0b53278176d0..d71cb84df3785008ea5793519fc26a174e1b95f7 100755
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -18,8 +18,8 @@ function version(){
echo "PaddlePaddle @PADDLE_VERSION@, compiled with"
echo " with_avx: @WITH_AVX@"
echo " with_gpu: @WITH_GPU@"
+ echo " with_mkl: @WITH_MKL@"
echo " with_mkldnn: @WITH_MKLDNN@"
- echo " with_mklml: @WITH_MKLML@"
echo " with_double: @WITH_DOUBLE@"
echo " with_python: @WITH_PYTHON@"
echo " with_rdma: @WITH_RDMA@"
@@ -45,8 +45,8 @@ function ver2num() {
function cpu_config() {
# auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status
- # only when MKLDNN or MKLML enabled
- if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
+ # only when MKL enabled
+ if [ "@WITH_MKL@" == "OFF" ]; then
return 0
fi
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
@@ -70,8 +70,8 @@ function cpu_config() {
function threads_config() {
# auto set OMP_NUM_THREADS and MKL_NUM_THREADS
# according to trainer_count and total processors
- # only when MKLDNN or MKLML enabled
- if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then
+ # only when MKL enabled
+ if [ "@WITH_MKL@" == "OFF" ]; then
return 0
fi
processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh
index 973b2736e5ce2b733d52df4f5a270b296bca2cac..28d82343ed32273740d0c52d0451681e43b3675e 100755
--- a/paddle/scripts/travis/build_doc.sh
+++ b/paddle/scripts/travis/build_doc.sh
@@ -6,7 +6,7 @@ mkdir -p $TRAVIS_BUILD_DIR/build
cd $TRAVIS_BUILD_DIR/build
# Compile Documentation only.
-cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON
+cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
make -j `nproc` gen_proto_py
make -j `nproc` paddle_docs paddle_docs_cn
diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp
index b68e29cd5ea223272151e7a8b52d998832f47103..88e684849df6fbfe4042b92bdb76ef98159eecea 100644
--- a/paddle/trainer/Trainer.cpp
+++ b/paddle/trainer/Trainer.cpp
@@ -137,6 +137,10 @@ void Trainer::init(const std::shared_ptr& config,
}
}
+ if (FLAGS_use_mkldnn) {
+ CHECK_EQ(FLAGS_trainer_count, 1UL) << "MKLDNN only need 1 trainer";
+ }
+
if (testing) {
LOG(INFO) << "trainer: in testing mode";
if (config_->getOptConfig().use_sparse_remote_updater() ||
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index f20567243ae67baecbdbac13f879f4cf2f66d298..a6eca2d7194c30aabeafc34de0957792feeebbec 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -12,9 +12,9 @@ def unique_name(prefix):
return "_".join([prefix, str(uid)])
-def _debug_string_(proto):
+def _debug_string_(proto, throw_on_error=True):
error_fields = list()
- if not proto.IsInitialized(error_fields):
+ if not proto.IsInitialized(error_fields) and throw_on_error:
raise ValueError("{0} are not initialized\nThe message is {1}".format(
error_fields, proto))
return proto.__str__()
@@ -101,9 +101,12 @@ class Variable(object):
self.stop_gradient = stop_gradient
def __str__(self):
+ return self.to_string(True)
+
+ def to_string(self, throw_on_error):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.VarDesc.FromString(str(protostr))
- return _debug_string_(proto)
+ return _debug_string_(proto, throw_on_error)
__repr__ = __str__
@@ -291,10 +294,13 @@ class Operator(object):
self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc)
- def __str__(self):
+ def to_string(self, throw_on_error):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.OpDesc.FromString(str(protostr))
- return _debug_string_(proto)
+ return _debug_string_(proto, throw_on_error)
+
+ def __str__(self):
+ return self.to_string(True)
__repr__ = __str__
@@ -349,9 +355,12 @@ class Block(object):
self.program = program
def __str__(self):
+ return self.to_string(True)
+
+ def to_string(self, throw_on_error):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.BlockDesc.FromString(str(protostr))
- return _debug_string_(proto)
+ return _debug_string_(proto, throw_on_error)
__repr__ = __str__
@@ -454,9 +463,12 @@ class Program(object):
self.current_block_idx = 0
def __str__(self):
+ return self.to_string(True)
+
+ def to_string(self, throw_on_error):
protostr = self.desc.serialize_to_string()
proto = framework_pb2.ProgramDesc.FromString(str(protostr))
- return _debug_string_(proto)
+ return _debug_string_(proto, throw_on_error)
def clone(self):
p = Program()
@@ -512,7 +524,14 @@ class Program(object):
assert isinstance(target, Variable)
if no_grad_set is None:
no_grad_set = set()
- param_to_grad_info = self.desc.append_backward(target.desc, no_grad_set)
+ try:
+ param_to_grad_info = self.desc.append_backward(target.desc,
+ no_grad_set)
+ except Exception as e:
+ raise core.EnforceNotMet(
+ str(e) + "\nCurrent protobuf is\n{0}".format(
+ self.to_string(False)))
+
self.sync_with_cpp()
return param_to_grad_info
diff --git a/python/paddle/v2/fluid/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py
index 17ad547c2bb5b79ef8225dd1a8f1ef49a6572508..94fdd5e38970b309580de6fc934b158a3c46e464 100644
--- a/python/paddle/v2/fluid/net_drawer.py
+++ b/python/paddle/v2/fluid/net_drawer.py
@@ -66,10 +66,13 @@ def parse_graph(program, graph, var_dict, **kwargs):
if not var_dict.has_key(var):
var_dict[var] = "Feed"
+ temp_id = 0
proto = framework_pb2.ProgramDesc.FromString(
program.desc.serialize_to_string())
for block in proto.blocks:
for op in block.ops:
+ op.type = op.type + "_" + str(temp_id)
+ temp_id += 1
graph.node(**draw_node(op))
for o in op.outputs:
for arg in o.arguments:
@@ -78,6 +81,7 @@ def parse_graph(program, graph, var_dict, **kwargs):
for arg in e.arguments:
if var_dict.has_key(arg):
graph.edge(**draw_edge(var_dict, op, e, arg))
+ break # only plot the first block
def draw_graph(startup_program, main_program, **kwargs):
diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
index ee677a2c5670a092c509b9ce1c555223bf22957f..a7f3bfc0caf76302674a00c80c2bd9ebf834f872 100644
--- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
@@ -1,33 +1,22 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
-from paddle.v2.fluid.io import save_persistables, load_persistables
+import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
+from paddle.v2.fluid.io import save_persistables, load_persistables
+from paddle.v2.fluid.optimizer import SGDOptimizer
-import numpy as np
-
-x = layers.data(
- name='x',
- shape=[13],
- data_type='float32')
+x = layers.data(name='x', shape=[13], data_type='float32')
-y_predict = layers.fc(input=x,
- size=1,
- act=None)
+y_predict = layers.fc(input=x, size=1, act=None)
-y = layers.data(
- name='y',
- shape=[1],
- data_type='float32')
+y = layers.data(name='y', shape=[1], data_type='float32')
-cost = layers.square_error_cost(
- input=y_predict,
- label=y)
+cost = layers.square_error_cost(input=y_predict, label=y)
avg_cost = layers.mean(x=cost)
-sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
+sgd_optimizer = SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20
diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
index f4be835b3ad57d5b0076e8a816c2c3def46e0663..b8506125501b6e533c4594b37943ec36ca8e7d30 100644
--- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
+++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
@@ -1,21 +1,16 @@
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid.core as core
+import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
-import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.executor import Executor
-import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.initializer import XavierInitializer
+from paddle.v2.fluid.optimizer import AdamOptimizer
def resnet_cifar10(input, depth=32):
- def conv_bn_layer(input,
- ch_out,
- filter_size,
- stride,
- padding,
- act='relu'):
+ def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'):
tmp = layers.conv2d(
input=input,
filter_size=filter_size,
@@ -24,9 +19,7 @@ def resnet_cifar10(input, depth=32):
padding=padding,
act=None,
bias_attr=False)
- return layers.batch_norm(
- input=tmp,
- act=act)
+ return layers.batch_norm(input=tmp, act=act)
def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out:
@@ -35,28 +28,11 @@ def resnet_cifar10(input, depth=32):
else:
return input
- def basicblock(input,
- ch_in,
- ch_out,
- stride):
- tmp = conv_bn_layer(
- input,
- ch_out,
- 3,
- stride,
- 1)
- tmp = conv_bn_layer(
- tmp,
- ch_out,
- 3,
- 1,
- 1,
- act=None)
+ def basicblock(input, ch_in, ch_out, stride):
+ tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
+ tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None)
short = shortcut(input, ch_in, ch_out, stride)
- return layers.elementwise_add(
- x=tmp,
- y=short,
- act='relu')
+ return layers.elementwise_add(x=tmp, y=short, act='relu')
def layer_warp(block_func, input, ch_in, ch_out, count, stride):
tmp = block_func(input, ch_in, ch_out, stride)
@@ -67,45 +43,17 @@ def resnet_cifar10(input, depth=32):
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
conv1 = conv_bn_layer(
- input=input,
- ch_out=16,
- filter_size=3,
- stride=1,
- padding=1)
- res1 = layer_warp(
- basicblock,
- conv1,
- 16,
- 16,
- n,
- 1)
- res2 = layer_warp(
- basicblock,
- res1,
- 16,
- 32,
- n,
- 2)
- res3 = layer_warp(
- basicblock,
- res2,
- 32,
- 64,
- n,
- 2)
+ input=input, ch_out=16, filter_size=3, stride=1, padding=1)
+ res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
+ res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
+ res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
pool = layers.pool2d(
- input=res3,
- pool_size=8,
- pool_type='avg',
- pool_stride=1)
+ input=res3, pool_size=8, pool_type='avg', pool_stride=1)
return pool
def vgg16_bn_drop(input):
- def conv_block(input,
- num_filter,
- groups,
- dropouts):
+ def conv_block(input, num_filter, groups, dropouts):
return nets.img_conv_group(
input=input,
pool_size=2,
@@ -123,22 +71,14 @@ def vgg16_bn_drop(input):
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
- drop = layers.dropout(
- x=conv5,
- dropout_prob=0.5)
+ drop = layers.dropout(x=conv5, dropout_prob=0.5)
fc1 = layers.fc(input=drop,
size=512,
act=None,
param_attr={"initializer": XavierInitializer()})
- reshape1 = layers.reshape(
- x=fc1,
- shape=list(fc1.shape + (1, 1)))
- bn = layers.batch_norm(
- input=reshape1,
- act='relu')
- drop2 = layers.dropout(
- x=bn,
- dropout_prob=0.5)
+ reshape1 = layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1)))
+ bn = layers.batch_norm(input=reshape1, act='relu')
+ drop2 = layers.dropout(x=bn, dropout_prob=0.5)
fc2 = layers.fc(input=drop2,
size=512,
act=None,
@@ -165,8 +105,8 @@ cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
accuracy = layers.accuracy(input=predict, label=label)
-# optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
-optimizer = optimizer.AdamOptimizer(learning_rate=0.001)
+# optimizer = SGDOptimizer(learning_rate=0.001)
+optimizer = AdamOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost)
BATCH_SIZE = 128
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
index f330ff58137068e429008bc7aa07bbc8d2e35ac4..75fbaf83e8f3e62eb0d0abef9cfa267b65e72973 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
@@ -1,22 +1,15 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
-import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.evaluator as evaluator
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
+import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.executor import Executor
+from paddle.v2.fluid.optimizer import AdamOptimizer
-import numpy as np
-
-images = layers.data(
- name='pixel',
- shape=[1, 28, 28],
- data_type='float32')
-label = layers.data(
- name='label',
- shape=[1],
- data_type='int64')
+images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32')
+label = layers.data(name='label', shape=[1], data_type='int64')
conv_pool_1 = nets.simple_img_conv_pool(
input=images,
filter_size=5,
@@ -32,17 +25,13 @@ conv_pool_2 = nets.simple_img_conv_pool(
pool_stride=2,
act="relu")
-predict = layers.fc(input=conv_pool_2,
- size=10,
- act="softmax")
+predict = layers.fc(input=conv_pool_2, size=10, act="softmax")
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
-optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
+optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost)
-accuracy, acc_out = evaluator.accuracy(
- input=predict,
- label=label)
+accuracy, acc_out = evaluator.accuracy(input=predict, label=label)
BATCH_SIZE = 50
PASS_NUM = 3
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
index b0164e3e3659c19edf2af45e706fb48ac1fe2b1c..cf10b1942e6a8243b18b0ae4586fdd7ec1a665fb 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
@@ -1,19 +1,15 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
-from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.fluid.initializer import UniformInitializer
-
-import numpy as np
+from paddle.v2.fluid.optimizer import MomentumOptimizer
+from paddle.v2.fluid.regularizer import L2DecayRegularizer
BATCH_SIZE = 128
-image = layers.data(
- name='x',
- shape=[784],
- data_type='float32')
+image = layers.data(name='x', shape=[784], data_type='float32')
param_attr = {
'name': None,
@@ -22,32 +18,21 @@ param_attr = {
'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE)
}
-hidden1 = layers.fc(input=image,
- size=128,
- act='relu',
- param_attr=param_attr)
-hidden2 = layers.fc(input=hidden1,
- size=64,
- act='relu',
- param_attr=param_attr)
+hidden1 = layers.fc(input=image, size=128, act='relu', param_attr=param_attr)
+hidden2 = layers.fc(input=hidden1, size=64, act='relu', param_attr=param_attr)
predict = layers.fc(input=hidden2,
size=10,
act='softmax',
param_attr=param_attr)
-label = layers.data(
- name='y',
- shape=[1],
- data_type='int64')
+label = layers.data(name='y', shape=[1], data_type='int64')
cost = layers.cross_entropy(input=predict, label=label)
avg_cost = layers.mean(x=cost)
-accuracy = layers.accuracy(
- input=predict,
- label=label)
+accuracy = layers.accuracy(input=predict, label=label)
-optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
+optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch(
diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
index eefcb55bebff41eb9c67d9f0c8e83a5f1d4599bd..55ded3aed3a23c8cd7795f915dc1cbd512c6d945 100644
--- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
@@ -1,12 +1,11 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
-import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
+import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.executor import Executor
-
-import numpy as np
+from paddle.v2.fluid.optimizer import SGDOptimizer
IS_SPARSE = True
USE_GPU = False
@@ -19,10 +18,7 @@ def get_usr_combined_features():
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
- uid = layers.data(
- name='user_id',
- shape=[1],
- data_type='int64')
+ uid = layers.data(name='user_id', shape=[1], data_type='int64')
usr_emb = layers.embedding(
input=uid,
@@ -31,15 +27,11 @@ def get_usr_combined_features():
param_attr={'name': 'user_table'},
is_sparse=IS_SPARSE)
- usr_fc = layers.fc(input=usr_emb,
- size=32)
+ usr_fc = layers.fc(input=usr_emb, size=32)
USR_GENDER_DICT_SIZE = 2
- usr_gender_id = layers.data(
- name='gender_id',
- shape=[1],
- data_type='int64')
+ usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64')
usr_gender_emb = layers.embedding(
input=usr_gender_id,
@@ -47,14 +39,10 @@ def get_usr_combined_features():
param_attr={'name': 'gender_table'},
is_sparse=IS_SPARSE)
- usr_gender_fc = layers.fc(input=usr_gender_emb,
- size=16)
+ usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
- usr_age_id = layers.data(
- name='age_id',
- shape=[1],
- data_type="int64")
+ usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64")
usr_age_emb = layers.embedding(
input=usr_age_id,
@@ -62,14 +50,10 @@ def get_usr_combined_features():
is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'})
- usr_age_fc = layers.fc(input=usr_age_emb,
- size=16)
+ usr_age_fc = layers.fc(input=usr_age_emb, size=16)
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
- usr_job_id = layers.data(
- name='job_id',
- shape=[1],
- data_type="int64")
+ usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64")
usr_job_emb = layers.embedding(
input=usr_job_id,
@@ -77,16 +61,12 @@ def get_usr_combined_features():
param_attr={'name': 'job_table'},
is_sparse=IS_SPARSE)
- usr_job_fc = layers.fc(input=usr_job_emb,
- size=16)
+ usr_job_fc = layers.fc(input=usr_job_emb, size=16)
concat_embed = layers.concat(
- input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc],
- axis=1)
+ input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
- usr_combined_features = layers.fc(input=concat_embed,
- size=200,
- act="tanh")
+ usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
return usr_combined_features
@@ -95,10 +75,7 @@ def get_mov_combined_features():
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
- mov_id = layers.data(
- name='movie_id',
- shape=[1],
- data_type='int64')
+ mov_id = layers.data(name='movie_id', shape=[1], data_type='int64')
mov_emb = layers.embedding(
input=mov_id,
@@ -107,36 +84,24 @@ def get_mov_combined_features():
param_attr={'name': 'movie_table'},
is_sparse=IS_SPARSE)
- mov_fc = layers.fc(input=mov_emb,
- size=32)
+ mov_fc = layers.fc(input=mov_emb, size=32)
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
- category_id = layers.data(
- name='category_id',
- shape=[1],
- data_type='int64')
+ category_id = layers.data(name='category_id', shape=[1], data_type='int64')
mov_categories_emb = layers.embedding(
- input=category_id,
- size=[CATEGORY_DICT_SIZE, 32],
- is_sparse=IS_SPARSE)
+ input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_categories_hidden = layers.sequence_pool(
- input=mov_categories_emb,
- pool_type="sum")
+ input=mov_categories_emb, pool_type="sum")
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
- mov_title_id = layers.data(
- name='movie_title',
- shape=[1],
- data_type='int64')
+ mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64')
mov_title_emb = layers.embedding(
- input=mov_title_id,
- size=[MOV_TITLE_DICT_SIZE, 32],
- is_sparse=IS_SPARSE)
+ input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
mov_title_conv = nets.sequence_conv_pool(
input=mov_title_emb,
@@ -146,13 +111,10 @@ def get_mov_combined_features():
pool_type="sum")
concat_embed = layers.concat(
- input=[mov_fc, mov_categories_hidden, mov_title_conv],
- axis=1)
+ input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
# FIXME(dzh) : need tanh operator
- mov_combined_features = layers.fc(input=concat_embed,
- size=200,
- act="tanh")
+ mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
return mov_combined_features
@@ -162,18 +124,11 @@ def model():
mov_combined_features = get_mov_combined_features()
# need cos sim
- inference = layers.cos_sim(
- X=usr_combined_features,
- Y=mov_combined_features)
+ inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
- label = layers.data(
- name='score',
- shape=[1],
- data_type='float32')
+ label = layers.data(name='score', shape=[1], data_type='float32')
- square_cost = layers.square_error_cost(
- input=inference,
- label=label)
+ square_cost = layers.square_error_cost(input=inference, label=label)
avg_cost = layers.mean(x=square_cost)
@@ -182,7 +137,7 @@ def model():
def main():
cost = model()
- sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.2)
+ sgd_optimizer = SGDOptimizer(learning_rate=0.2)
opts = sgd_optimizer.minimize(cost)
if USE_GPU:
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
index 91fc79a9870a31205098d8a40de6c033d5bf60b9..e69b915a9cfaf9e06075991975563a1fc1196661 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
@@ -1,12 +1,11 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
-import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
+import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.executor import Executor
-
-import numpy as np
+from paddle.v2.fluid.optimizer import AdamOptimizer
def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
@@ -31,7 +30,7 @@ def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32):
act="softmax")
cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost)
- adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002)
+ adam_optimizer = AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost)
acc = layers.accuracy(input=prediction, label=label)
return avg_cost, acc
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
index 8c3d4488354eb363cd1d378ebd4cb8069e7c1b1d..65d44542501e6531fc1912cbc726a1d903b9c031 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
@@ -1,12 +1,10 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
-import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
-
-import numpy as np
+from paddle.v2.fluid.optimizer import AdamOptimizer
def stacked_lstm_net(input_dim,
@@ -41,7 +39,7 @@ def stacked_lstm_net(input_dim,
act='softmax')
cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost)
- adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002)
+ adam_optimizer = AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost)
acc = layers.accuracy(input=prediction, label=label)
return avg_cost, acc
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
index a7d791c1f38d4843f084127e879d613b21ae8daf..280f6e902c34512735a27586221c2be68963ef2b 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
@@ -1,11 +1,10 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
-
-import numpy as np
+from paddle.v2.fluid.optimizer import AdamOptimizer
def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
@@ -33,7 +32,7 @@ def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50):
cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost)
- adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002)
+ adam_optimizer = AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost)
acc = layers.accuracy(input=prediction, label=label)
diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py
index 9dcb6f2fea06ea8cd061be4f148854408779f990..afa7b285198e0349317e123e4bd98e8336217afa 100644
--- a/python/paddle/v2/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py
@@ -1,11 +1,10 @@
+import numpy as np
import paddle.v2 as paddle
-import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
-import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
+import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
-
-import numpy as np
+from paddle.v2.fluid.optimizer import SGDOptimizer
PASS_NUM = 100
EMBED_SIZE = 32
@@ -17,26 +16,11 @@ IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
-first_word = layers.data(
- name='firstw',
- shape=[1],
- data_type='int64')
-second_word = layers.data(
- name='secondw',
- shape=[1],
- data_type='int64')
-third_word = layers.data(
- name='thirdw',
- shape=[1],
- data_type='int64')
-forth_word = layers.data(
- name='forthw',
- shape=[1],
- data_type='int64')
-next_word = layers.data(
- name='nextw',
- shape=[1],
- data_type='int64')
+first_word = layers.data(name='firstw', shape=[1], data_type='int64')
+second_word = layers.data(name='secondw', shape=[1], data_type='int64')
+third_word = layers.data(name='thirdw', shape=[1], data_type='int64')
+forth_word = layers.data(name='forthw', shape=[1], data_type='int64')
+next_word = layers.data(name='nextw', shape=[1], data_type='int64')
embed_first = layers.embedding(
input=first_word,
@@ -64,19 +48,12 @@ embed_forth = layers.embedding(
param_attr={'name': 'shared_w'})
concat_embed = layers.concat(
- input=[embed_first, embed_second, embed_third, embed_forth],
- axis=1)
-hidden1 = layers.fc(input=concat_embed,
- size=HIDDEN_SIZE,
- act='sigmoid')
-predict_word = layers.fc(input=hidden1,
- size=dict_size,
- act='softmax')
-cost = layers.cross_entropy(
- input=predict_word,
- label=next_word)
+ input=[embed_first, embed_second, embed_third, embed_forth], axis=1)
+hidden1 = layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid')
+predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax')
+cost = layers.cross_entropy(input=predict_word, label=next_word)
avg_cost = layers.mean(x=cost)
-sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
+sgd_optimizer = SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch(
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py
index 907b52c405d9e5c02c70f611e4c777ba21948c40..2240dc73cdd31f320fed174dd811e93c6640137f 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py
@@ -110,13 +110,30 @@ class TestConv2dOp(OpTest):
self.op_type = "conv2d"
+class TestWithPad(TestConv2dOp):
+ def init_test_case(self):
+ self.pad = [1, 1]
+ self.stride = [1, 1]
+ self.input_size = [2, 3, 5, 5] # NCHW
+ assert np.mod(self.input_size[1], self.groups) == 0
+ f_c = self.input_size[1] / self.groups
+ self.filter_size = [6, f_c, 3, 3]
+
+
+class TestWithStride(TestConv2dOp):
+ def init_test_case(self):
+ self.pad = [1, 1]
+ self.stride = [2, 2]
+ self.input_size = [2, 3, 6, 6] # NCHW
+ assert np.mod(self.input_size[1], self.groups) == 0
+ f_c = self.input_size[1] / self.groups
+ self.filter_size = [6, f_c, 3, 3]
+
+
class TestWithGroup(TestConv2dOp):
def init_group(self):
self.groups = 3
- def init_op_type(self):
- self.op_type = "conv2d"
-
class TestWith1x1(TestConv2dOp):
def init_test_case(self):
@@ -127,15 +144,9 @@ class TestWith1x1(TestConv2dOp):
f_c = self.input_size[1] / self.groups
self.filter_size = [6, f_c, 1, 1]
- def init_dilation(self):
- self.dilations = [1, 1]
-
def init_group(self):
self.groups = 3
- def init_op_type(self):
- self.op_type = "conv2d"
-
class TestWithDilation(TestConv2dOp):
def init_test_case(self):
@@ -152,14 +163,19 @@ class TestWithDilation(TestConv2dOp):
def init_group(self):
self.groups = 3
+
+#----------------Conv2dCudnn----------------
+class TestCudnn(TestConv2dOp):
def init_op_type(self):
- self.op_type = "conv2d"
+ self.op_type = "conv_cudnn"
-#----------------Conv2dCudnn----------------
+class TestCudnnWithPad(TestWithPad):
+ def init_op_type(self):
+ self.op_type = "conv_cudnn"
-class TestCudnn(TestConv2dOp):
+class TestCudnnWithStride(TestWithStride):
def init_op_type(self):
self.op_type = "conv_cudnn"
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
index 54349c018c4a53b8767d6cd4f94d99c719dc0237..d7b1f2f2a3abf6335998742dbbef8e17794170fa 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
@@ -4,9 +4,7 @@ from op_test import OpTest
def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
- # [2, 3, 5, 5]
in_n, in_c, in_h, in_w = input_.shape
- # [3, 6, 3, 3]
f_c, out_c, f_h, f_w = filter_.shape
assert in_c == f_c
@@ -29,6 +27,7 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
j1, j2 = j * stride[0], j * stride[0] + f_w
out[n, k, i1:i2, j1:j2] += tmp_out
+ out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]]
return out
@@ -36,8 +35,6 @@ class TestConv2dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.init_op_type()
-
- # [2, 3, 5, 5] -> kernel [3, 6, 3, 3] -> output [2, 6, 7, 7]
self.init_test_case()
conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad}
@@ -55,7 +52,6 @@ class TestConv2dTransposeOp(OpTest):
self.outputs = {'Output': output}
def test_check_output(self):
- print 'check output here for', self.op_type
self.check_output()
def test_check_grad_no_input(self):
@@ -88,6 +84,26 @@ class TestConv2dTransposeOp(OpTest):
self.op_type = "conv2d_transpose"
+class TestWithPad(TestConv2dTransposeOp):
+ def init_test_case(self):
+ self.pad = [1, 1]
+ self.stride = [1, 1]
+ self.dilations = [1, 1]
+ self.input_size = [2, 3, 5, 5] # NCHW
+ f_c = self.input_size[1]
+ self.filter_size = [f_c, 6, 3, 3]
+
+
+class TestWithStride(TestConv2dTransposeOp):
+ def init_test_case(self):
+ self.pad = [1, 1]
+ self.stride = [2, 2]
+ self.dilations = [1, 1]
+ self.input_size = [2, 3, 5, 5] # NCHW
+ f_c = self.input_size[1]
+ self.filter_size = [f_c, 6, 3, 3]
+
+
# ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp):
def init_op_type(self):
diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
index 132fe7931438a30cf02e4ad2894c0838e48ffc9f..59a32c40821f2109306e898a6a798fea52b1e0ca 100644
--- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
@@ -4,9 +4,7 @@ from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
- # [2, 3, 5, 5, 5]
in_n, in_c, in_d, in_h, in_w = input_.shape
- # [3, 6, 3, 3, 3]
f_c, out_c, f_d, f_h, f_w = filter_.shape
assert in_c == f_c
@@ -14,7 +12,6 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
out_d = (in_d - 1) * stride[0] + f_d
out_h = (in_h - 1) * stride[1] + f_h
out_w = (in_w - 1) * stride[2] + f_w
-
out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n):
@@ -33,6 +30,8 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
j1, j2 = j * stride[2], j * stride[2] + f_w
out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out
+ out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w -
+ pad[2]]
return out
@@ -40,8 +39,6 @@ class TestConv3dTransposeOp(OpTest):
def setUp(self):
# init as conv transpose
self.init_op_type()
-
- # [2, 3, 5, 5, 5] -> kernel [3, 6, 3, 3, 3] -> output [2, 6, 7, 7, 7]
self.init_test_case()
conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad}
@@ -49,7 +46,6 @@ class TestConv3dTransposeOp(OpTest):
filter_ = np.random.random(self.filter_size).astype("float32")
output = conv3dtranspose_forward_naive(
input_, filter_, conv3dtranspose_param).astype("float32")
- # print 'deconv output py', output, output.shape
self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = {
@@ -60,7 +56,6 @@ class TestConv3dTransposeOp(OpTest):
self.outputs = {'Output': output}
def test_check_output(self):
- print 'check output here'
self.check_output()
def test_check_grad(self):
@@ -85,7 +80,7 @@ class TestConv3dTransposeOp(OpTest):
self.pad = [0, 0, 0]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
- self.input_size = [2, 3, 5, 5, 5] # NCHW
+ self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
@@ -93,5 +88,25 @@ class TestConv3dTransposeOp(OpTest):
self.op_type = "conv3d_transpose"
+class TestWithPad(TestConv3dTransposeOp):
+ def init_test_case(self):
+ self.pad = [1, 1, 1]
+ self.stride = [1, 1, 1]
+ self.dilations = [1, 1, 1]
+ self.input_size = [2, 3, 5, 5, 5] # NCDHW
+ f_c = self.input_size[1]
+ self.filter_size = [f_c, 6, 3, 3, 3]
+
+
+class TestWithStride(TestConv3dTransposeOp):
+ def init_test_case(self):
+ self.pad = [1, 1, 1]
+ self.stride = [2, 2, 2]
+ self.dilations = [1, 1, 1]
+ self.input_size = [2, 3, 5, 5, 5] # NCDHW
+ f_c = self.input_size[1]
+ self.filter_size = [f_c, 6, 3, 3, 3]
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed6e3fe24f6333c9c90d760787eb13241a7e1868
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py
@@ -0,0 +1,43 @@
+import unittest
+import numpy as np
+from paddle.v2.fluid.op import Operator
+import paddle.v2.fluid.core as core
+
+
+def create_tensor(scope, name, np_data):
+ tensor = scope.var(name).get_tensor()
+ tensor.set_dims(np_data.shape)
+ tensor.set(np_data, core.CPUPlace())
+ return tensor
+
+
+class TestIsEmptyOp(unittest.TestCase):
+ def setUp(self):
+ self.scope = core.Scope()
+ # create input variables
+ np_data0 = np.array([0, 1, 2])
+ create_tensor(self.scope, "X0", np_data0)
+
+ np_data1 = np.array([1])
+ t = create_tensor(self.scope, "X1", np_data1)
+ t.set_dims([0])
+
+ # create output variables
+ self.scope.var("out")
+
+ def test_no_empty(self):
+ self.one_case("X0", False)
+
+ def test_empty(self):
+ self.one_case("X1", True)
+
+ def one_case(self, input, target):
+ op = Operator(type="is_empty", X=input, Out="out")
+ ctx = core.DeviceContext.create(core.CPUPlace())
+ op.run(self.scope, ctx)
+ out = self.scope.var("out").get_tensor()
+ self.assertEqual(np.array(out)[0], target)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py
index 0f01acb3b94dc55a3536e751108e785ddc6e47bb..84b432333f950f754a97bc1a051b59c16fb22aed 100644
--- a/python/paddle/v2/fluid/tests/test_while_op.py
+++ b/python/paddle/v2/fluid/tests/test_while_op.py
@@ -2,6 +2,7 @@ import unittest
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.core as core
+from paddle.v2.fluid.backward import append_backward_ops
import numpy
@@ -16,7 +17,7 @@ class TestWhileOp(unittest.TestCase):
i = layers.zeros(shape=[1], dtype='int64')
i.stop_gradient = True
init = layers.zeros(shape=[10], dtype='float32')
- mem_array = layers.array_write(init, i=i)
+ mem_array = layers.array_write(x=init, i=i)
data_array = layers.array_write(x=d0, i=i)
i = layers.increment(i)
@@ -29,17 +30,23 @@ class TestWhileOp(unittest.TestCase):
i.stop_gradient = True
array_len = layers.fill_constant(shape=[1], dtype='int64', value=3)
+ array_len.stop_gradient = True
cond = layers.less_than(x=i, y=array_len)
while_op = layers.While(cond=cond)
with while_op.block():
d = layers.array_read(array=data_array, i=i)
prev = layers.array_read(array=mem_array, i=i)
- i = layers.increment(x=i, in_place=True)
result = layers.sums(input=[d, prev])
+
+ i = layers.increment(x=i, in_place=True)
layers.array_write(result, i=i, array=mem_array)
layers.less_than(x=i, y=array_len, cond=cond)
- sum_result = layers.array_read(mem_array, i=array_len)
+
+ sum_result = layers.array_read(array=mem_array, i=i)
+ loss = layers.mean(x=sum_result)
+
+ append_backward_ops(loss)
cpu = core.CPUPlace()
exe = Executor(cpu)